muonroi-cli 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +122 -122
  3. package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
  4. package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
  5. package/dist/src/agent-harness/mock-model.d.ts +11 -0
  6. package/dist/src/agent-harness/mock-model.js +21 -0
  7. package/dist/src/cli/cost-forensics.js +12 -12
  8. package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
  9. package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
  10. package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
  11. package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
  12. package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
  13. package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
  14. package/dist/src/council/clarifier.js +9 -1
  15. package/dist/src/council/debate.js +5 -1
  16. package/dist/src/council/decisions-lock.js +3 -3
  17. package/dist/src/council/index.js +12 -5
  18. package/dist/src/council/leader.d.ts +0 -17
  19. package/dist/src/council/leader.js +22 -15
  20. package/dist/src/council/planner.js +1 -1
  21. package/dist/src/council/prompts.js +63 -57
  22. package/dist/src/council/types.d.ts +7 -0
  23. package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
  24. package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
  25. package/dist/src/ee/auth.d.ts +9 -0
  26. package/dist/src/ee/auth.js +19 -0
  27. package/dist/src/ee/ee-onboarding.d.ts +5 -0
  28. package/dist/src/ee/ee-onboarding.js +76 -0
  29. package/dist/src/generated/version.d.ts +1 -1
  30. package/dist/src/generated/version.js +1 -1
  31. package/dist/src/headless/output.js +6 -4
  32. package/dist/src/headless/output.test.js +4 -3
  33. package/dist/src/index.js +20 -1
  34. package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
  35. package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
  36. package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
  37. package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
  38. package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
  39. package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
  40. package/dist/src/mcp/auto-setup.js +56 -2
  41. package/dist/src/mcp/client-pool.d.ts +46 -0
  42. package/dist/src/mcp/client-pool.js +212 -0
  43. package/dist/src/mcp/oauth-callback.js +2 -2
  44. package/dist/src/mcp/parse-headers.test.js +14 -14
  45. package/dist/src/mcp/runtime.d.ts +28 -0
  46. package/dist/src/mcp/runtime.js +117 -51
  47. package/dist/src/mcp/self-verify-runner.d.ts +14 -0
  48. package/dist/src/mcp/self-verify-runner.js +38 -0
  49. package/dist/src/mcp/setup-guide-text.d.ts +9 -0
  50. package/dist/src/mcp/setup-guide-text.js +84 -0
  51. package/dist/src/mcp/smart-filter.js +49 -0
  52. package/dist/src/mcp/smoke.test.js +43 -43
  53. package/dist/src/mcp/tools-server.d.ts +7 -0
  54. package/dist/src/mcp/tools-server.js +19 -22
  55. package/dist/src/models/catalog.json +349 -349
  56. package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
  57. package/dist/src/ops/doctor.d.ts +3 -2
  58. package/dist/src/ops/doctor.js +47 -11
  59. package/dist/src/ops/doctor.test.js +4 -3
  60. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
  61. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
  62. package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
  63. package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
  64. package/dist/src/orchestrator/batch-turn-runner.js +7 -11
  65. package/dist/src/orchestrator/message-processor.js +57 -27
  66. package/dist/src/orchestrator/orchestrator.js +26 -0
  67. package/dist/src/orchestrator/prompts.d.ts +51 -0
  68. package/dist/src/orchestrator/prompts.js +257 -134
  69. package/dist/src/orchestrator/scope-ceiling.js +6 -1
  70. package/dist/src/orchestrator/stream-runner.js +20 -15
  71. package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
  72. package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
  73. package/dist/src/pil/__tests__/config.test.js +1 -17
  74. package/dist/src/pil/__tests__/discovery.test.js +144 -11
  75. package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
  76. package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
  77. package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
  78. package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
  79. package/dist/src/pil/__tests__/layer6-output.test.js +137 -18
  80. package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
  81. package/dist/src/pil/agent-operating-contract.d.ts +1 -1
  82. package/dist/src/pil/agent-operating-contract.js +2 -0
  83. package/dist/src/pil/agent-operating-contract.test.js +7 -2
  84. package/dist/src/pil/cheap-model-playbook.js +35 -35
  85. package/dist/src/pil/cheap-model-workbooks.js +16 -13
  86. package/dist/src/pil/clarity-gate.d.ts +21 -19
  87. package/dist/src/pil/clarity-gate.js +26 -153
  88. package/dist/src/pil/config.d.ts +9 -1
  89. package/dist/src/pil/config.js +15 -4
  90. package/dist/src/pil/discovery.js +211 -136
  91. package/dist/src/pil/layer1-intent.d.ts +12 -0
  92. package/dist/src/pil/layer1-intent.js +283 -38
  93. package/dist/src/pil/layer1-intent.test.js +210 -4
  94. package/dist/src/pil/layer16-clarity.d.ts +25 -11
  95. package/dist/src/pil/layer16-clarity.js +19 -306
  96. package/dist/src/pil/layer4-gsd.js +18 -6
  97. package/dist/src/pil/layer6-output.d.ts +2 -0
  98. package/dist/src/pil/layer6-output.js +137 -22
  99. package/dist/src/pil/llm-classify.d.ts +26 -0
  100. package/dist/src/pil/llm-classify.js +34 -5
  101. package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
  102. package/dist/src/pil/native-capabilities-workbook.js +82 -76
  103. package/dist/src/pil/schema.d.ts +8 -0
  104. package/dist/src/pil/schema.js +12 -1
  105. package/dist/src/pil/task-tier-map.js +4 -0
  106. package/dist/src/pil/types.d.ts +11 -1
  107. package/dist/src/product-loop/done-gate.js +3 -3
  108. package/dist/src/product-loop/loop-driver.js +18 -18
  109. package/dist/src/product-loop/progress-snapshot.js +4 -4
  110. package/dist/src/providers/auth/gemini-oauth.js +6 -15
  111. package/dist/src/providers/auth/grok-oauth.js +6 -15
  112. package/dist/src/providers/auth/openai-oauth.js +6 -15
  113. package/dist/src/providers/mcp-vision-bridge.js +48 -48
  114. package/dist/src/reporter/index.js +1 -1
  115. package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
  116. package/dist/src/scaffold/bb-quality-gate.js +5 -5
  117. package/dist/src/scaffold/continuation-prompt.js +60 -60
  118. package/dist/src/scaffold/init-new.js +453 -453
  119. package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
  120. package/dist/src/self-qa/agentic-loop.js +24 -19
  121. package/dist/src/self-qa/spec-emitter.js +26 -23
  122. package/dist/src/storage/__tests__/migrations.test.js +2 -2
  123. package/dist/src/storage/interaction-log.js +5 -5
  124. package/dist/src/storage/migrations.js +122 -122
  125. package/dist/src/storage/sessions.js +42 -42
  126. package/dist/src/storage/transcript.js +91 -84
  127. package/dist/src/storage/usage.js +14 -14
  128. package/dist/src/storage/workspaces.js +12 -12
  129. package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
  130. package/dist/src/tools/__tests__/native-tools.test.js +53 -0
  131. package/dist/src/tools/git-safety.d.ts +61 -0
  132. package/dist/src/tools/git-safety.js +141 -0
  133. package/dist/src/tools/git-safety.test.d.ts +1 -0
  134. package/dist/src/tools/git-safety.test.js +111 -0
  135. package/dist/src/tools/native-tools.d.ts +31 -0
  136. package/dist/src/tools/native-tools.js +273 -0
  137. package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
  138. package/dist/src/tools/registry-git-safety.test.js +92 -0
  139. package/dist/src/tools/registry.js +39 -4
  140. package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
  141. package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
  142. package/dist/src/ui/app.js +0 -0
  143. package/dist/src/ui/components/message-view.js +4 -1
  144. package/dist/src/ui/components/structured-response-view.js +7 -3
  145. package/dist/src/ui/components/tool-group.js +7 -1
  146. package/dist/src/ui/markdown-render.d.ts +41 -0
  147. package/dist/src/ui/markdown-render.js +223 -0
  148. package/dist/src/ui/markdown.d.ts +10 -0
  149. package/dist/src/ui/markdown.js +12 -35
  150. package/dist/src/ui/slash/council-inspect.js +4 -4
  151. package/dist/src/ui/slash/export.js +4 -4
  152. package/dist/src/ui/utils/text.d.ts +8 -0
  153. package/dist/src/ui/utils/text.js +16 -0
  154. package/dist/src/ui/utils/text.test.d.ts +1 -0
  155. package/dist/src/ui/utils/text.test.js +23 -0
  156. package/dist/src/usage/ledger.js +48 -15
  157. package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
  158. package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
  159. package/dist/src/utils/clipboard-image.js +23 -23
  160. package/dist/src/utils/open-url.d.ts +56 -0
  161. package/dist/src/utils/open-url.js +58 -0
  162. package/dist/src/utils/open-url.test.d.ts +1 -0
  163. package/dist/src/utils/open-url.test.js +86 -0
  164. package/dist/src/utils/settings.d.ts +12 -0
  165. package/dist/src/utils/settings.js +48 -0
  166. package/dist/src/utils/side-question.js +2 -2
  167. package/dist/src/utils/skills.js +3 -3
  168. package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
  169. package/dist/src/verify/environment.js +2 -1
  170. package/package.json +1 -1
  171. package/dist/src/pil/layer16-clarity.test.js +0 -31
  172. /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
@@ -1,3 +1,4 @@
1
+ import * as fs from "node:fs";
1
2
  import { getModelInfo } from "../models/registry.js";
2
3
  import { buildContractSection } from "../pil/agent-operating-contract.js";
3
4
  import { buildNativeCapabilitiesSection } from "../pil/native-capabilities-workbook.js";
@@ -38,6 +39,81 @@ export const COMPUTER_MODEL = "grok-4.20-0309-reasoning";
38
39
  * changes (MUONROI_SHELL override, shell.kind config) are reflected
39
40
  * without a CLI restart.
40
41
  */
42
+ /**
43
+ * Deterministically detect the project's stack from manifest/lockfile presence
44
+ * at the workspace root. Pure (no LLM), cheap (one readdir), zero-hardcode (no
45
+ * model/provider IDs — only ecosystem markers). Returns a compact one-line
46
+ * summary like "TypeScript · pkg: bun · tests: vitest · vcs: git", or "" when
47
+ * nothing recognizable is present (greenfield / unreadable dir).
48
+ *
49
+ * Motivation (2026-06-14 dogfood): the ENVIRONMENT block told the model its OS,
50
+ * shell, and cwd but never WHICH project it was in — so the model acted
51
+ * context-blind, assumed Python, and asked the user to describe the repo it was
52
+ * already running inside. This gives every model, on every turn, in every mode
53
+ * (agent/plan/ask) and for every provider (it is NOT in the strippable TOOLS
54
+ * section), a concrete self-model of the codebase it can act on.
55
+ */
56
+ export function detectProjectStack(cwd) {
57
+ let entries;
58
+ try {
59
+ entries = fs.readdirSync(cwd);
60
+ }
61
+ catch (err) {
62
+ // Best-effort enrichment: a missing/unreadable cwd simply omits the stack
63
+ // line (the ENVIRONMENT cwd line already surfaces "<unknown>"). Debug-gated
64
+ // so prompt assembly never corrupts the TUI at startup.
65
+ if (process.env.MUONROI_DEBUG === "1") {
66
+ console.error(`[orchestrator/prompts] detectProjectStack failed for ${cwd}: ${err?.message}`);
67
+ }
68
+ return "";
69
+ }
70
+ const has = (name) => entries.includes(name);
71
+ const hasExt = (ext) => entries.some((e) => e.toLowerCase().endsWith(ext));
72
+ let lang = "";
73
+ if (has("tsconfig.json"))
74
+ lang = "TypeScript";
75
+ else if (has("package.json"))
76
+ lang = "JavaScript/Node";
77
+ else if (has("Cargo.toml"))
78
+ lang = "Rust";
79
+ else if (has("go.mod"))
80
+ lang = "Go";
81
+ else if (has("pyproject.toml") || has("requirements.txt") || has("setup.py"))
82
+ lang = "Python";
83
+ else if (hasExt(".csproj") || hasExt(".sln") || has("Directory.Build.props"))
84
+ lang = ".NET/C#";
85
+ else if (has("pom.xml"))
86
+ lang = "Java (Maven)";
87
+ else if (has("build.gradle") || has("build.gradle.kts"))
88
+ lang = "Java/Kotlin (Gradle)";
89
+ let pkg = "";
90
+ if (has("bun.lockb") || has("bun.lock"))
91
+ pkg = "bun";
92
+ else if (has("pnpm-lock.yaml"))
93
+ pkg = "pnpm";
94
+ else if (has("yarn.lock"))
95
+ pkg = "yarn";
96
+ else if (has("package-lock.json"))
97
+ pkg = "npm";
98
+ let tests = "";
99
+ if (entries.some((e) => /^vitest\.([\w.-]+\.)?config\.(ts|js|mjs|cjs|cts|mts)$/i.test(e)))
100
+ tests = "vitest";
101
+ else if (entries.some((e) => /^jest\.config\./i.test(e)))
102
+ tests = "jest";
103
+ else if (has("pytest.ini") || has("tox.ini"))
104
+ tests = "pytest";
105
+ const vcs = has(".git") ? "git" : "";
106
+ const segs = [];
107
+ if (lang)
108
+ segs.push(lang);
109
+ if (pkg)
110
+ segs.push(`pkg: ${pkg}`);
111
+ if (tests)
112
+ segs.push(`tests: ${tests}`);
113
+ if (vcs)
114
+ segs.push(`vcs: ${vcs}`);
115
+ return segs.join(" · ");
116
+ }
41
117
  function buildEnvironmentBlock() {
42
118
  const platform = process.platform;
43
119
  const osName = platform === "win32" ? "Windows" : platform === "darwin" ? "macOS" : platform === "linux" ? "Linux" : platform;
@@ -74,11 +150,14 @@ function buildEnvironmentBlock() {
74
150
  else if (shell.kind === "cmd") {
75
151
  shellRules.push("- The bash tool runs cmd.exe. Use cmd.exe syntax: dir, type, copy, del, if exist, for %%.", "- DO NOT use POSIX commands (grep, sed, awk, ls) or PowerShell cmdlets — they will fail.", "- For complex shell work, ask the user to enable Git Bash or PowerShell via `--shell` / MUONROI_SHELL env.");
76
152
  }
153
+ const projectStack = cwd === "<unknown>" ? "" : detectProjectStack(cwd);
77
154
  return [
78
155
  "ENVIRONMENT:",
79
156
  `- OS: ${osName} (${platform})`,
80
157
  `- Shell available via bash tool: ${shellKindLabel} (kind=${shell.kind})`,
81
158
  `- Working directory: ${cwd}`,
159
+ ...(projectStack ? [`- Project stack: ${projectStack}`] : []),
160
+ "- You are running INSIDE this repository: read and search it with your own tools instead of asking the user to describe its files, structure, or stack. You can act on what you find here directly.",
82
161
  "",
83
162
  "Terminal rendering:",
84
163
  "- Your text output is rendered in a plain terminal — not a browser, not a rich text editor.",
@@ -95,138 +174,138 @@ function buildEnvironmentBlock() {
95
174
  }
96
175
  const ENVIRONMENT = buildEnvironmentBlock();
97
176
  const MODE_PROMPTS = {
98
- agent: `You are muonroi-cli in Agent mode — a powerful AI coding agent. You execute tasks directly using tools.
99
-
100
- ${ENVIRONMENT}
101
-
102
- TOOLS:
103
- - read_file: Read file contents with start_line/end_line for iterative reading. Use for examining code.
104
- - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files. Supports full regex syntax and file filtering with the include parameter.
105
- - lsp: Experimental semantic code intelligence for definitions, references, hover, symbols, implementations, and call hierarchy when a matching language server is available.
106
- - write_file: Create new files or overwrite existing ones with full content.
107
- - edit_file: Replace a unique string in a file with new content. The old_string must be unique — include enough context lines.
108
- - bash: Execute shell commands. Set background=true for long-running processes (dev servers, watchers, builds). Returns a process ID immediately.
109
- - process_logs: View recent output from a background process by ID.
110
- - process_stop: Stop a background process by ID.
111
- - process_list: List all background processes with status and uptime.
112
- - wallet_info: Check the local wallet address, chain, and current ETH/USDC balances.
113
- - wallet_history: Show recent x402 payment history from the audit log.
114
- - fetch_payment_info: Inspect a URL for x402 payment requirements without paying. Returns payment options and a brin security score. Use only when the user wants to inspect — for actual access, use paid_request directly.
115
- - paid_request: Access an x402-protected URL using the local wallet. Includes a brin security scan — URLs scoring below 25 are automatically blocked. The user will be prompted to approve the payment before it executes. Prefer this over fetch_payment_info when the user wants to access the resource.
116
- - task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, verify for sandbox-aware validation, computer for host desktop screenshot/input workflows, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
117
- - delegate: Launch a read-only background agent for longer research while you continue working.
118
- - delegation_read: Retrieve a completed background delegation result by ID.
119
- - delegation_list: List running and completed background delegations. Do not poll it repeatedly.
120
- - schedule_create: Create a recurring or one-time scheduled headless run.
121
- - schedule_list: List saved schedules and their status.
122
- - schedule_remove: Remove a saved schedule.
123
- - schedule_read_log: Read recent log output from a schedule.
124
- - schedule_daemon_status: Check whether the schedule daemon is running.
125
- - schedule_daemon_start: Start the schedule daemon in the background.
126
- - schedule_daemon_stop: Stop the schedule daemon.
127
- - search_web: Search the web for current information, documentation, APIs, tutorials, etc.
128
- - search_x: Search X/Twitter for real-time posts, discussions, opinions, and trends.
129
- - generate_image: Generate a new image or edit an existing image. It saves image files locally and returns their paths.
130
- - generate_video: Generate a new video or animate an existing image. It saves video files locally and returns their paths.
131
- - computer_snapshot: Capture an accessibility-tree snapshot with stable refs like @e1 for desktop interaction.
132
- - computer_screenshot: Capture a host desktop screenshot for visual confirmation or fallback inspection.
133
- - computer_click: Click a desktop element by ref, or coordinates as a fallback.
134
- - computer_mouse_move: Hover a desktop element by ref, or coordinates as a fallback.
135
- - computer_type: Type text into a specific desktop element ref.
136
- - computer_press: Press a key or key chord in the focused host application.
137
- - computer_scroll: Scroll a desktop element by ref.
138
- - computer_launch: Launch an application and wait for its window to appear.
139
- - computer_list_windows: List visible windows and their ids.
140
- - computer_focus_window: Bring a target window to the front.
141
- - computer_wait: Wait for time, elements, windows, or text during desktop workflows.
142
- - computer_get: Read a property from a desktop element ref.
143
- - MCP tools: Enabled servers appear as tools named like mcp_<server>__<tool>.
144
-
145
- WORKFLOW:
146
- 1. Understand the request
147
- 2. Decide whether a sub-agent should handle the first investigation pass
148
- 3. Use read_file, grep, lsp, and bash to explore the codebase directly when the task is small or tightly scoped
149
- 4. Use bash with background=true for dev servers, watchers, or any long-running process — then continue working
150
- 5. Use delegate for read-only work that can run in parallel, then continue productive work
151
- 6. Use edit_file for targeted changes, write_file for new files or full rewrites
152
- 7. Verify changes by reading modified files
153
- 8. Run tests or builds with bash to confirm correctness
154
- 9. Use search_web or search_x when you need up-to-date information
155
-
156
- DEFAULT DELEGATION POLICY:
157
- - Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, verification, or any request that likely needs reading multiple files before acting.
158
- - Prefer delegate for longer-running read-only exploration when you can keep making progress without blocking.
159
- - Use the explore sub-agent for read-only investigation, reviews, research, and "how does this work?" tasks.
160
- - Use the general sub-agent for delegated work that may need editing files, running commands, or producing a concrete implementation.
161
- - Use the verify sub-agent for sandbox-aware build, test, app boot, and smoke validation work.
162
- - Use the computer sub-agent for host desktop interaction workflows that need screenshots, clicks, typing, keypresses, or scrolling.
163
- - Use a matching custom sub-agent when the task fits one of the configured specializations.
164
- - Never use delegate for tasks that should edit files or make shell changes.
165
- - When a background delegation is running, do not wait idly and do not spam delegation_list(). Continue useful work.
166
- - Do not wait for the user to explicitly ask for a sub-agent when delegation would clearly help.
167
- - Skip delegation only when the task is trivial, single-file, or you already have the exact answer.
168
-
169
- EXAMPLES:
170
- - "review this change" -> delegate to explore first
171
- - "research how auth works" -> delegate to explore first
172
- - "investigate why this test fails" -> delegate to explore first, then continue with findings
173
- - "refactor this module" -> delegate a focused part to general when helpful
174
- - "verify this feature locally" -> use verify
175
- - "open the host app and click through it" -> use computer
176
- - "generate a logo" -> use generate_image
177
- - "animate this still image" -> use generate_video
178
- - Recurring specialized workflows -> use the matching custom sub-agent via task
179
- - "every weekday at 9am run this check" -> use schedule_create with a cron expression
180
- - "run this once automatically" -> use schedule_create with the right timing
181
- - "make sure scheduled jobs keep running" -> use schedule_daemon_status and schedule_daemon_start
182
-
183
- IMPORTANT:
184
- - Prefer edit_file for surgical changes to existing files — it shows a clean diff.
185
- - Prefer grep over bash for searching file contents. Use bash only for find, ls, git, and other shell commands.
186
- - Prefer lsp over text search when you need exact definitions, references, implementations, or call hierarchy and a server is available.
187
- - Use write_file only for new files or when most of the file is changing. For very large files (>500 lines), split into multiple edit_file calls or write smaller chunks.
188
- - Use read_file instead of cat/head/tail for reading files.
189
- - When the user asks for an automated recurring or one-time run, use the schedule tools instead of only describing the setup.
190
- - After creating a recurring schedule, check the daemon status and start it with \`schedule_daemon_start\` if needed.
191
-
177
+ agent: `You are muonroi-cli in Agent mode — a powerful AI coding agent. You execute tasks directly using tools.
178
+
179
+ ${ENVIRONMENT}
180
+
181
+ TOOLS:
182
+ - read_file: Read file contents with start_line/end_line for iterative reading. Use for examining code.
183
+ - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files. Supports full regex syntax and file filtering with the include parameter.
184
+ - lsp: Experimental semantic code intelligence for definitions, references, hover, symbols, implementations, and call hierarchy when a matching language server is available.
185
+ - write_file: Create new files or overwrite existing ones with full content.
186
+ - edit_file: Replace a unique string in a file with new content. The old_string must be unique — include enough context lines.
187
+ - bash: Execute shell commands. Set background=true for long-running processes (dev servers, watchers, builds). Returns a process ID immediately.
188
+ - process_logs: View recent output from a background process by ID.
189
+ - process_stop: Stop a background process by ID.
190
+ - process_list: List all background processes with status and uptime.
191
+ - wallet_info: Check the local wallet address, chain, and current ETH/USDC balances.
192
+ - wallet_history: Show recent x402 payment history from the audit log.
193
+ - fetch_payment_info: Inspect a URL for x402 payment requirements without paying. Returns payment options and a brin security score. Use only when the user wants to inspect — for actual access, use paid_request directly.
194
+ - paid_request: Access an x402-protected URL using the local wallet. Includes a brin security scan — URLs scoring below 25 are automatically blocked. The user will be prompted to approve the payment before it executes. Prefer this over fetch_payment_info when the user wants to access the resource.
195
+ - task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, verify for sandbox-aware validation, computer for host desktop screenshot/input workflows, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
196
+ - delegate: Launch a read-only background agent for longer research while you continue working.
197
+ - delegation_read: Retrieve a completed background delegation result by ID.
198
+ - delegation_list: List running and completed background delegations. Do not poll it repeatedly.
199
+ - schedule_create: Create a recurring or one-time scheduled headless run.
200
+ - schedule_list: List saved schedules and their status.
201
+ - schedule_remove: Remove a saved schedule.
202
+ - schedule_read_log: Read recent log output from a schedule.
203
+ - schedule_daemon_status: Check whether the schedule daemon is running.
204
+ - schedule_daemon_start: Start the schedule daemon in the background.
205
+ - schedule_daemon_stop: Stop the schedule daemon.
206
+ - search_web: Search the web for current information, documentation, APIs, tutorials, etc.
207
+ - search_x: Search X/Twitter for real-time posts, discussions, opinions, and trends.
208
+ - generate_image: Generate a new image or edit an existing image. It saves image files locally and returns their paths.
209
+ - generate_video: Generate a new video or animate an existing image. It saves video files locally and returns their paths.
210
+ - computer_snapshot: Capture an accessibility-tree snapshot with stable refs like @e1 for desktop interaction.
211
+ - computer_screenshot: Capture a host desktop screenshot for visual confirmation or fallback inspection.
212
+ - computer_click: Click a desktop element by ref, or coordinates as a fallback.
213
+ - computer_mouse_move: Hover a desktop element by ref, or coordinates as a fallback.
214
+ - computer_type: Type text into a specific desktop element ref.
215
+ - computer_press: Press a key or key chord in the focused host application.
216
+ - computer_scroll: Scroll a desktop element by ref.
217
+ - computer_launch: Launch an application and wait for its window to appear.
218
+ - computer_list_windows: List visible windows and their ids.
219
+ - computer_focus_window: Bring a target window to the front.
220
+ - computer_wait: Wait for time, elements, windows, or text during desktop workflows.
221
+ - computer_get: Read a property from a desktop element ref.
222
+ - MCP tools: connected servers appear as first-class tools named mcp_<server>__<tool>. The exact tools available THIS turn are listed under "CONNECTED MCP TOOLS" near the end of this prompt — call them directly by that name; never shell out to bash/JSON-RPC to reach an MCP server.
223
+
224
+ WORKFLOW:
225
+ 1. Understand the request
226
+ 2. Decide whether a sub-agent should handle the first investigation pass
227
+ 3. Use read_file, grep, lsp, and bash to explore the codebase directly when the task is small or tightly scoped
228
+ 4. Use bash with background=true for dev servers, watchers, or any long-running process — then continue working
229
+ 5. Use delegate for read-only work that can run in parallel, then continue productive work
230
+ 6. Use edit_file for targeted changes, write_file for new files or full rewrites
231
+ 7. Verify changes by reading modified files
232
+ 8. Run tests or builds with bash to confirm correctness
233
+ 9. Use search_web or search_x when you need up-to-date information
234
+
235
+ DEFAULT DELEGATION POLICY:
236
+ - Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, verification, or any request that likely needs reading multiple files before acting.
237
+ - Prefer delegate for longer-running read-only exploration when you can keep making progress without blocking.
238
+ - Use the explore sub-agent for read-only investigation, reviews, research, and "how does this work?" tasks.
239
+ - Use the general sub-agent for delegated work that may need editing files, running commands, or producing a concrete implementation.
240
+ - Use the verify sub-agent for sandbox-aware build, test, app boot, and smoke validation work.
241
+ - Use the computer sub-agent for host desktop interaction workflows that need screenshots, clicks, typing, keypresses, or scrolling.
242
+ - Use a matching custom sub-agent when the task fits one of the configured specializations.
243
+ - Never use delegate for tasks that should edit files or make shell changes.
244
+ - When a background delegation is running, do not wait idly and do not spam delegation_list(). Continue useful work.
245
+ - Do not wait for the user to explicitly ask for a sub-agent when delegation would clearly help.
246
+ - Skip delegation only when the task is trivial, single-file, or you already have the exact answer.
247
+
248
+ EXAMPLES:
249
+ - "review this change" -> delegate to explore first
250
+ - "research how auth works" -> delegate to explore first
251
+ - "investigate why this test fails" -> delegate to explore first, then continue with findings
252
+ - "refactor this module" -> delegate a focused part to general when helpful
253
+ - "verify this feature locally" -> use verify
254
+ - "open the host app and click through it" -> use computer
255
+ - "generate a logo" -> use generate_image
256
+ - "animate this still image" -> use generate_video
257
+ - Recurring specialized workflows -> use the matching custom sub-agent via task
258
+ - "every weekday at 9am run this check" -> use schedule_create with a cron expression
259
+ - "run this once automatically" -> use schedule_create with the right timing
260
+ - "make sure scheduled jobs keep running" -> use schedule_daemon_status and schedule_daemon_start
261
+
262
+ IMPORTANT:
263
+ - Prefer edit_file for surgical changes to existing files — it shows a clean diff.
264
+ - Prefer grep over bash for searching file contents. Use bash only for find, ls, git, and other shell commands.
265
+ - Prefer lsp over text search when you need exact definitions, references, implementations, or call hierarchy and a server is available.
266
+ - Use write_file only for new files or when most of the file is changing. For very large files (>500 lines), split into multiple edit_file calls or write smaller chunks.
267
+ - Use read_file instead of cat/head/tail for reading files.
268
+ - When the user asks for an automated recurring or one-time run, use the schedule tools instead of only describing the setup.
269
+ - After creating a recurring schedule, check the daemon status and start it with \`schedule_daemon_start\` if needed.
270
+
192
271
  Be direct. Execute, don't just describe. Show results, not plans.`,
193
- plan: `You are muonroi-cli in Plan mode — you analyze and plan but DO NOT execute changes.
194
-
195
- ${ENVIRONMENT}
196
-
197
- TOOLS:
198
- - read_file: Read file contents for analysis.
199
- - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
200
- - lsp: Experimental semantic code intelligence for read-only planning and research.
201
- - bash: ONLY for searching (find, ls), git inspection — NEVER modify files.
202
- - task: Delegate a focused task to a sub-agent when deeper research or specialized analysis would help.
203
- - generate_plan: ALWAYS use this to present your plan. Creates an interactive UI with steps and questions.
204
-
205
- BEHAVIOR:
206
- - Explore the codebase first using read_file, grep, and bash to understand the current state
207
- - Prefer lsp for exact symbol navigation when a matching server is available
208
- - ALWAYS call generate_plan to present your plan — never just describe it in text
209
- - Include clear, ordered steps with affected file paths
210
- - Include questions when you need user input on approach, trade-offs, or preferences
211
- - Use "select" questions for single-choice decisions, "multiselect" for picking multiple options, and "text" for free-form input
212
- - Highlight potential risks, edge cases, and dependencies in the plan summary
272
+ plan: `You are muonroi-cli in Plan mode — you analyze and plan but DO NOT execute changes.
273
+
274
+ ${ENVIRONMENT}
275
+
276
+ TOOLS:
277
+ - read_file: Read file contents for analysis.
278
+ - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
279
+ - lsp: Experimental semantic code intelligence for read-only planning and research.
280
+ - bash: ONLY for searching (find, ls), git inspection — NEVER modify files.
281
+ - task: Delegate a focused task to a sub-agent when deeper research or specialized analysis would help.
282
+ - generate_plan: ALWAYS use this to present your plan. Creates an interactive UI with steps and questions.
283
+
284
+ BEHAVIOR:
285
+ - Explore the codebase first using read_file, grep, and bash to understand the current state
286
+ - Prefer lsp for exact symbol navigation when a matching server is available
287
+ - ALWAYS call generate_plan to present your plan — never just describe it in text
288
+ - Include clear, ordered steps with affected file paths
289
+ - Include questions when you need user input on approach, trade-offs, or preferences
290
+ - Use "select" questions for single-choice decisions, "multiselect" for picking multiple options, and "text" for free-form input
291
+ - Highlight potential risks, edge cases, and dependencies in the plan summary
213
292
  - NEVER create, modify, or delete files — only read and analyze`,
214
- ask: `You are muonroi-cli in Ask mode — you answer questions clearly and thoroughly.
215
-
216
- ${ENVIRONMENT}
217
-
218
- TOOLS:
219
- - read_file: Read file contents for context.
220
- - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
221
- - lsp: Experimental semantic code intelligence for definitions, references, hover, and symbols.
222
- - bash: ONLY for searching (find, ls), git inspection — NEVER modify.
223
- - task: Delegate a focused task to a sub-agent when specialized analysis or deeper investigation would help.
224
-
225
- BEHAVIOR:
226
- - Answer the user's question directly and thoroughly
227
- - Use tools to gather context when needed, preferring lsp for exact symbol questions when available
228
- - Provide code examples when helpful
229
- - NEVER create, modify, or delete files
293
+ ask: `You are muonroi-cli in Ask mode — you answer questions clearly and thoroughly.
294
+
295
+ ${ENVIRONMENT}
296
+
297
+ TOOLS:
298
+ - read_file: Read file contents for context.
299
+ - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
300
+ - lsp: Experimental semantic code intelligence for definitions, references, hover, and symbols.
301
+ - bash: ONLY for searching (find, ls), git inspection — NEVER modify.
302
+ - task: Delegate a focused task to a sub-agent when specialized analysis or deeper investigation would help.
303
+
304
+ BEHAVIOR:
305
+ - Answer the user's question directly and thoroughly
306
+ - Use tools to gather context when needed, preferring lsp for exact symbol questions when available
307
+ - Provide code examples when helpful
308
+ - NEVER create, modify, or delete files
230
309
  - Focus on explanation, not execution`,
231
310
  };
232
311
  export function findCustomSubagent(agent, subagents = loadValidSubAgents()) {
@@ -242,10 +321,10 @@ export function formatCustomSubagentsPromptSection(subagents) {
242
321
  });
243
322
  return `\n\nCUSTOM SUB-AGENTS:\nUser-defined foreground sub-agents from ~/.muonroi-cli/user-settings.json. When one matches the task, call the task tool with agent set to the exact name.\n\n${lines.join("\n\n")}\n`;
244
323
  }
245
- const NON_ANTHROPIC_TOOL_PREAMBLE = `\n\nIMPORTANT — TOOL CALLING:
246
- You MUST invoke tools ONLY via the structured function calling API provided to you.
247
- NEVER output XML tags like <tool_name>, <bash>, <read_file>, or <delegate> as text.
248
- If you want to call a tool, use the function calling mechanism — do NOT write tool invocations as text in your response.
324
+ const NON_ANTHROPIC_TOOL_PREAMBLE = `\n\nIMPORTANT — TOOL CALLING:
325
+ You MUST invoke tools ONLY via the structured function calling API provided to you.
326
+ NEVER output XML tags like <tool_name>, <bash>, <read_file>, or <delegate> as text.
327
+ If you want to call a tool, use the function calling mechanism — do NOT write tool invocations as text in your response.
249
328
  Any XML-like tool invocation in your text output will be ignored by the system.\n`;
250
329
  /**
251
330
  * Strip the TOOLS: listing section from system prompt.
@@ -255,6 +334,50 @@ Any XML-like tool invocation in your text output will be ignored by the system.\
255
334
  export function stripToolsSection(text) {
256
335
  return text.replace(/\nTOOLS:\n[\s\S]*?\n(?=WORKFLOW:|BEHAVIOR:|IMPORTANT:|DEFAULT DELEGATION|EXAMPLES:|$)/g, "\n");
257
336
  }
337
+ /**
338
+ * Render the LIVE per-turn MCP tool roster as a system-prompt block.
339
+ *
340
+ * The static prompt only states the mcp_<server>__<tool> naming convention; it
341
+ * never names the tools actually connected this turn, and the per-message smart
342
+ * filter can drop whole servers. The model therefore receives connected MCP
343
+ * tools ONLY as raw tool JSON, which it can overlook — live failure
344
+ * (session f6f7881a5fae): asked to call `setup_guide`, the agent said "I don't
345
+ * have a direct call_mcp tool" and drove the muonroi-docs server by hand over
346
+ * bash JSON-RPC, fabricating output. Surfacing the exact callable names in prose
347
+ * closes that gap.
348
+ *
349
+ * `toolNames` should be the keys of the FINAL assembled tool set for the turn
350
+ * (post smart-filter, post fs-dedup). Returns "" when no MCP tool is connected,
351
+ * so non-agent / chitchat / no-client-tools turns add nothing. The block is
352
+ * DYNAMIC (varies per turn) so callers must append it OUTSIDE the cached static
353
+ * prefix.
354
+ */
355
+ export function buildMcpCapabilityBlock(toolNames) {
356
+ const byServer = new Map();
357
+ for (const name of toolNames) {
358
+ if (!name.startsWith("mcp_"))
359
+ continue;
360
+ // mcp_<sanitized-server-id>__<tool>; split on the FIRST "__" (server ids
361
+ // rarely contain "__" — they are sanitized from real ids like "muonroi-docs").
362
+ const m = name.match(/^mcp_(.+?)__(.+)$/);
363
+ if (!m)
364
+ continue;
365
+ const server = m[1];
366
+ const list = byServer.get(server) ?? [];
367
+ list.push(name);
368
+ byServer.set(server, list);
369
+ }
370
+ if (byServer.size === 0)
371
+ return "";
372
+ const lines = [];
373
+ for (const [server, tools] of byServer) {
374
+ lines.push(` • ${server}: ${tools.sort().join(", ")}`);
375
+ }
376
+ return ("\n\nCONNECTED MCP TOOLS (this turn) — these are available to you RIGHT NOW as " +
377
+ "first-class tools. Call them directly by their exact name; do NOT shell out " +
378
+ "to bash or hand-write JSON-RPC to reach an MCP server:\n" +
379
+ lines.join("\n"));
380
+ }
258
381
  export function buildSystemPromptParts(cwd, mode, sandboxMode, planContext, subagents, sandboxSettings, providerId, resumeDigest, options) {
259
382
  const chitchat = options?.chitchat === true;
260
383
  const custom = loadCustomInstructions(cwd);
@@ -46,7 +46,12 @@ const KNOWN_TASK_TYPES = new Set(Object.keys(CEILING_MATRIX));
46
46
  * graceful when PIL emits an out-of-band label or null.
47
47
  */
48
48
  export function resolveCeiling(taskType, size) {
49
- const row = taskType && KNOWN_TASK_TYPES.has(taskType) ? taskType : "general";
49
+ // `build` (greenfield creation, PIL Pass-0) is not a row in the LOCKED matrix.
50
+ // It is the highest-effort task — scaffolding many files — so it borrows the
51
+ // `generate` ceiling (10/18/30) rather than falling back to the tight `general`
52
+ // row (5/10/20), which would force-finalize a greenfield build far too early.
53
+ const normalized = taskType === "build" ? "generate" : taskType;
54
+ const row = normalized && KNOWN_TASK_TYPES.has(normalized) ? normalized : "general";
50
55
  return CEILING_MATRIX[row][size];
51
56
  }
52
57
  /**
@@ -27,7 +27,8 @@
27
27
  // - F1 (sub-agent cumulative cap) — wrapToolSetWithCap
28
28
  // - siliconflow reasoning-strip — taskCaps.sanitizeHistory
29
29
  import { stepCountIs, streamText } from "ai";
30
- import { buildMcpToolSet } from "../mcp/runtime.js";
30
+ import { getDefaultEEClient } from "../ee/intercept.js";
31
+ import { acquireMcpTools } from "../mcp/client-pool.js";
31
32
  import { normalizeModelId } from "../models/registry.js";
32
33
  import { cheapModelShellLine, injectCheapModelPlaybook, injectCheapModelShellDirective, shouldInjectCheapModelPlaybook, } from "../pil/cheap-model-playbook.js";
33
34
  import { injectCheapModelWorkbook, shouldInjectCheapModelWorkbook, subagentTaskType, } from "../pil/cheap-model-workbooks.js";
@@ -38,6 +39,7 @@ import { wireDebug } from "../providers/wire-debug.js";
38
39
  import { BashTool } from "../tools/bash.js";
39
40
  import { createBuiltinTools } from "../tools/registry.js";
40
41
  import { statusBarStore } from "../ui/status-bar/store.js";
42
+ import { openUrl } from "../utils/open-url.js";
41
43
  import { getCurrentShellSettings, getProviderStallTimeoutMs, getSubAgentBudgetChars, getSubAgentCompactKeepLast, getSubAgentCompactThresholdChars, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
42
44
  import { resolveShell } from "../utils/shell.js";
43
45
  import { prepareVerifySandbox } from "../verify/entrypoint.js";
@@ -50,7 +52,6 @@ import { repairToolCallHook } from "./repair-tool-call.js";
50
52
  import { classifyStreamError } from "./retry-classifier.js";
51
53
  import { incSessionStep, resolveCeiling } from "./scope-ceiling.js";
52
54
  import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
53
- import { getDefaultEEClient } from "../ee/intercept.js";
54
55
  import { createStallWatchdog, STALL_ERROR_MESSAGE } from "./stall-watchdog.js";
55
56
  import { wrapToolSetWithCap } from "./sub-agent-cap.js";
56
57
  import { compactSubAgentMessages } from "./subagent-compactor.js";
@@ -211,17 +212,12 @@ export class StreamRunner {
211
212
  : childWithPlaybook;
212
213
  onActivity?.(initialDetail);
213
214
  if (childMode === "agent" && taskCaps.supportsClientTools(childRuntime.modelInfo)) {
214
- const mcpBundle = await buildMcpToolSet(loadMcpServers(), {
215
+ const mcpBundle = await acquireMcpTools(loadMcpServers(), {
215
216
  onOAuthRequired: (_serverId, url) => {
216
- const urlStr = url.toString();
217
- import("child_process").then(({ exec }) => {
218
- const cmd = process.platform === "win32"
219
- ? `start "" "${urlStr}"`
220
- : process.platform === "darwin"
221
- ? `open "${urlStr}"`
222
- : `xdg-open "${urlStr}"`;
223
- exec(cmd);
224
- });
217
+ // Server-supplied URL is untrusted — openUrl validates the scheme
218
+ // and spawns via execFile (no shell), closing the command-injection
219
+ // vector the old exec() opener had.
220
+ openUrl(url);
225
221
  },
226
222
  });
227
223
  closeMcp = mcpBundle.close;
@@ -407,7 +403,10 @@ export class StreamRunner {
407
403
  const joined = texts.join(" ");
408
404
  const mKeep = joined.match(/KEEP_TOOL_IDS\s*[:=]\s*([a-z0-9_, -]+)/i);
409
405
  if (mKeep) {
410
- subKeepToolIds = mKeep[1].split(/[,\s]+/).map((s) => s.trim()).filter(Boolean);
406
+ subKeepToolIds = mKeep[1]
407
+ .split(/[,\s]+/)
408
+ .map((s) => s.trim())
409
+ .filter(Boolean);
411
410
  break;
412
411
  }
413
412
  }
@@ -415,10 +414,16 @@ export class StreamRunner {
415
414
  const persistSubArtifact = (toolCallId, toolName, fullContent, reason) => {
416
415
  try {
417
416
  getDefaultEEClient()
418
- .extract({ transcript: fullContent.slice(0, 4000), projectPath: process.cwd(), meta: { source: "tool-artifact", toolCallId, toolName, reason } }, AbortSignal.timeout(600))
417
+ .extract({
418
+ transcript: fullContent.slice(0, 4000),
419
+ projectPath: process.cwd(),
420
+ meta: { source: "tool-artifact", toolCallId, toolName, reason },
421
+ }, AbortSignal.timeout(600))
419
422
  .catch(() => { });
420
423
  }
421
- catch { /* fail-open */ }
424
+ catch {
425
+ /* fail-open */
426
+ }
422
427
  };
423
428
  const compacted = compactSubAgentMessages(stripped, {
424
429
  thresholdChars: compactThreshold,
@@ -6,10 +6,10 @@ describe("detectTextEmittedToolCall", () => {
6
6
  // destructive edit, deepseek emitted this as plain assistant text to
7
7
  // re-read the file — the CLI returned it as the final answer and the turn
8
8
  // was silently wasted with a broken file left behind.
9
- const text = `Let me restore the file properly.
10
-
11
- <read_file>
12
- <path>src/app/screens/story-list/story-list.component.html</path>
9
+ const text = `Let me restore the file properly.
10
+
11
+ <read_file>
12
+ <path>src/app/screens/story-list/story-list.component.html</path>
13
13
  </read_file>`;
14
14
  const r = detectTextEmittedToolCall(text);
15
15
  expect(r.detected).toBe(true);
@@ -43,10 +43,10 @@ describe("detectTextEmittedToolCall", () => {
43
43
  // Live: storyflow_ui explore-A/B, deepseek T3 (session 799f0508e830) emitted
44
44
  // this as text and made no real tool call → empty, silent turn. The generic
45
45
  // <invoke matcher misses it because `<` is followed by the U+FF5C sentinel.
46
- const text = `<||DSML||tool_calls>
47
- <||DSML||invoke name="read_file">
48
- <||DSML||parameter name="file_path" string="true">src/app/foo.html</||DSML||parameter>
49
- </||DSML||invoke>
46
+ const text = `<||DSML||tool_calls>
47
+ <||DSML||invoke name="read_file">
48
+ <||DSML||parameter name="file_path" string="true">src/app/foo.html</||DSML||parameter>
49
+ </||DSML||invoke>
50
50
  </||DSML||tool_calls>`;
51
51
  const r = detectTextEmittedToolCall(text);
52
52
  expect(r.detected).toBe(true);
@@ -60,11 +60,11 @@ describe("detectTextEmittedToolCall", () => {
60
60
  expect(detectTextEmittedToolCall("I edited the file and ran the tests; everything passes.").detected).toBe(false);
61
61
  });
62
62
  it("parseDsmlToolCalls extracts name + args from the DSML block (for targeted re-steer)", () => {
63
- const text = `<||DSML||tool_calls>
64
- <||DSML||invoke name="read_file">
65
- <||DSML||parameter name="file_path" string="true">src/app/foo.html</||DSML||parameter>
66
- <||DSML||parameter name="start_line" string="false">25</||DSML||parameter>
67
- </||DSML||invoke>
63
+ const text = `<||DSML||tool_calls>
64
+ <||DSML||invoke name="read_file">
65
+ <||DSML||parameter name="file_path" string="true">src/app/foo.html</||DSML||parameter>
66
+ <||DSML||parameter name="start_line" string="false">25</||DSML||parameter>
67
+ </||DSML||invoke>
68
68
  </||DSML||tool_calls>`;
69
69
  const calls = parseDsmlToolCalls(text);
70
70
  expect(calls).toHaveLength(1);