@x-code-cli/core 0.1.11 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/dist/agent/api-errors.d.ts.map +1 -1
  2. package/dist/agent/api-errors.js +18 -0
  3. package/dist/agent/api-errors.js.map +1 -1
  4. package/dist/agent/diff.d.ts +35 -0
  5. package/dist/agent/diff.d.ts.map +1 -0
  6. package/dist/agent/diff.js +83 -0
  7. package/dist/agent/diff.js.map +1 -0
  8. package/dist/agent/loop-state.d.ts +45 -3
  9. package/dist/agent/loop-state.d.ts.map +1 -1
  10. package/dist/agent/loop-state.js +24 -3
  11. package/dist/agent/loop-state.js.map +1 -1
  12. package/dist/agent/loop.d.ts +10 -6
  13. package/dist/agent/loop.d.ts.map +1 -1
  14. package/dist/agent/loop.js +212 -30
  15. package/dist/agent/loop.js.map +1 -1
  16. package/dist/agent/plan-storage.d.ts +55 -0
  17. package/dist/agent/plan-storage.d.ts.map +1 -0
  18. package/dist/agent/plan-storage.js +156 -0
  19. package/dist/agent/plan-storage.js.map +1 -0
  20. package/dist/agent/session-store.d.ts +114 -0
  21. package/dist/agent/session-store.d.ts.map +1 -0
  22. package/dist/agent/session-store.js +415 -0
  23. package/dist/agent/session-store.js.map +1 -0
  24. package/dist/agent/sub-agents/built-in.d.ts +3 -0
  25. package/dist/agent/sub-agents/built-in.d.ts.map +1 -0
  26. package/dist/agent/sub-agents/built-in.js +98 -0
  27. package/dist/agent/sub-agents/built-in.js.map +1 -0
  28. package/dist/agent/sub-agents/index.d.ts +7 -0
  29. package/dist/agent/sub-agents/index.d.ts.map +1 -0
  30. package/dist/agent/sub-agents/index.js +5 -0
  31. package/dist/agent/sub-agents/index.js.map +1 -0
  32. package/dist/agent/sub-agents/loader.d.ts +5 -0
  33. package/dist/agent/sub-agents/loader.d.ts.map +1 -0
  34. package/dist/agent/sub-agents/loader.js +117 -0
  35. package/dist/agent/sub-agents/loader.js.map +1 -0
  36. package/dist/agent/sub-agents/registry.d.ts +14 -0
  37. package/dist/agent/sub-agents/registry.d.ts.map +1 -0
  38. package/dist/agent/sub-agents/registry.js +37 -0
  39. package/dist/agent/sub-agents/registry.js.map +1 -0
  40. package/dist/agent/sub-agents/runner.d.ts +26 -0
  41. package/dist/agent/sub-agents/runner.d.ts.map +1 -0
  42. package/dist/agent/sub-agents/runner.js +287 -0
  43. package/dist/agent/sub-agents/runner.js.map +1 -0
  44. package/dist/agent/sub-agents/types.d.ts +63 -0
  45. package/dist/agent/sub-agents/types.d.ts.map +1 -0
  46. package/dist/agent/sub-agents/types.js +2 -0
  47. package/dist/agent/sub-agents/types.js.map +1 -0
  48. package/dist/agent/system-prompt.d.ts +15 -0
  49. package/dist/agent/system-prompt.d.ts.map +1 -1
  50. package/dist/agent/system-prompt.js +161 -0
  51. package/dist/agent/system-prompt.js.map +1 -1
  52. package/dist/agent/tool-execution.d.ts +4 -3
  53. package/dist/agent/tool-execution.d.ts.map +1 -1
  54. package/dist/agent/tool-execution.js +324 -14
  55. package/dist/agent/tool-execution.js.map +1 -1
  56. package/dist/agent/tool-result-sanitize.d.ts +12 -0
  57. package/dist/agent/tool-result-sanitize.d.ts.map +1 -1
  58. package/dist/agent/tool-result-sanitize.js +70 -0
  59. package/dist/agent/tool-result-sanitize.js.map +1 -1
  60. package/dist/config/index.d.ts +6 -0
  61. package/dist/config/index.d.ts.map +1 -1
  62. package/dist/config/index.js.map +1 -1
  63. package/dist/index.d.ts +15 -5
  64. package/dist/index.d.ts.map +1 -1
  65. package/dist/index.js +13 -3
  66. package/dist/index.js.map +1 -1
  67. package/dist/knowledge/session.d.ts +4 -7
  68. package/dist/knowledge/session.d.ts.map +1 -1
  69. package/dist/knowledge/session.js +20 -55
  70. package/dist/knowledge/session.js.map +1 -1
  71. package/dist/permissions/index.d.ts +21 -4
  72. package/dist/permissions/index.d.ts.map +1 -1
  73. package/dist/permissions/index.js +37 -3
  74. package/dist/permissions/index.js.map +1 -1
  75. package/dist/permissions/session-store.d.ts +60 -0
  76. package/dist/permissions/session-store.d.ts.map +1 -0
  77. package/dist/permissions/session-store.js +233 -0
  78. package/dist/permissions/session-store.js.map +1 -0
  79. package/dist/tools/ask-user.d.ts.map +1 -1
  80. package/dist/tools/ask-user.js +8 -6
  81. package/dist/tools/ask-user.js.map +1 -1
  82. package/dist/tools/enter-plan-mode.d.ts +25 -0
  83. package/dist/tools/enter-plan-mode.d.ts.map +1 -0
  84. package/dist/tools/enter-plan-mode.js +120 -0
  85. package/dist/tools/enter-plan-mode.js.map +1 -0
  86. package/dist/tools/exit-plan-mode.d.ts +13 -0
  87. package/dist/tools/exit-plan-mode.d.ts.map +1 -0
  88. package/dist/tools/exit-plan-mode.js +22 -0
  89. package/dist/tools/exit-plan-mode.js.map +1 -0
  90. package/dist/tools/grep.d.ts +1 -1
  91. package/dist/tools/index.d.ts +20 -4
  92. package/dist/tools/index.d.ts.map +1 -1
  93. package/dist/tools/index.js +7 -1
  94. package/dist/tools/index.js.map +1 -1
  95. package/dist/tools/save-knowledge.d.ts +2 -2
  96. package/dist/tools/shell-provider.d.ts +4 -0
  97. package/dist/tools/shell-provider.d.ts.map +1 -1
  98. package/dist/tools/shell-provider.js +2 -0
  99. package/dist/tools/shell-provider.js.map +1 -1
  100. package/dist/tools/task.d.ts +14 -0
  101. package/dist/tools/task.d.ts.map +1 -0
  102. package/dist/tools/task.js +95 -0
  103. package/dist/tools/task.js.map +1 -0
  104. package/dist/tools/todo-write.d.ts +21 -0
  105. package/dist/tools/todo-write.d.ts.map +1 -0
  106. package/dist/tools/todo-write.js +117 -0
  107. package/dist/tools/todo-write.js.map +1 -0
  108. package/dist/types/index.d.ts +104 -1
  109. package/dist/types/index.d.ts.map +1 -1
  110. package/dist/types/index.js.map +1 -1
  111. package/package.json +1 -1
  112. package/dist/knowledge/session-usage.d.ts +0 -24
  113. package/dist/knowledge/session-usage.d.ts.map +0 -1
  114. package/dist/knowledge/session-usage.js +0 -86
  115. package/dist/knowledge/session-usage.js.map +0 -1
@@ -20,6 +20,39 @@ You have access to these tools:
20
20
  - webFetch: Fetch and extract content from URLs
21
21
  - askUser: Ask the user clarifying questions with choices
22
22
  - saveKnowledge: Save project/user knowledge facts to persistent memory
23
+ - todoWrite: Track multi-step tasks with a live checklist visible to the user
24
+ - task: Delegate a task to a specialized sub-agent (explore, plan, review, general-purpose)
25
+
26
+ ## Sub-agent Delegation
27
+ Use the task tool to delegate research, exploration, planning, or review tasks to a specialized sub-agent. Sub-agents run in isolated context — they don't see your conversation history and their intermediate tool calls never pollute your context window. Only the final conclusion comes back.
28
+
29
+ When to delegate:
30
+ - Open-ended research or exploration that needs many reads/greps
31
+ - Code review of pending changes
32
+ - Implementation planning that requires reading many files
33
+ - Any multi-step investigation where you only need the conclusion, not the raw tool output
34
+
35
+ When NOT to delegate:
36
+ - Reading a specific file — use readFile directly
37
+ - Searching for a known symbol — use grep directly
38
+ - Simple single-step tasks you can do faster yourself
39
+ - Tasks where your immediate next step is blocked on the raw output
40
+
41
+ Your prompt to the sub-agent must be self-contained: include file paths, function names, what you've already learned, and what you need back. Terse prompts produce shallow results.
42
+
43
+ IMPORTANT — trust sub-agent results. When a sub-agent returns findings (file contents, code snippets, architecture descriptions), do NOT re-read the same files yourself. The sub-agent has already done that work. If the result is missing specific details, ask a follow-up sub-agent with a targeted prompt rather than duplicating the exploration manually.
44
+
45
+ Concurrency: NEVER launch multiple sub-agents that could write to the same files. Parallel sub-agents are fine when their tasks are independent and read-only.
46
+
47
+ ## Task Management
48
+ Break down and manage your work with the todoWrite tool. The user sees a live checklist panel of your progress — it makes long tasks feel structured and gives visibility into your plan.
49
+
50
+ - For any task with 3+ steps, call todoWrite EARLY — ideally on your first implementation turn.
51
+ - Right after exitPlanMode is approved and you have a plan with several phases, translate the plan steps into todos before writing code.
52
+ - Mark each task as in_progress BEFORE starting it and completed IMMEDIATELY after finishing. Do not batch completions at the end.
53
+ - Exactly one item should be in_progress at all times.
54
+ - Do NOT use todoWrite for single-file edits, trivial fixes, pure Q&A, or tasks with 1-2 obvious steps — todos add ceremony with no benefit.
55
+ - When all tasks are done, verify your work (run tests, check for errors) before moving on.
23
56
 
24
57
  ## Response Format
25
58
  - IMPORTANT: You MUST NOT use any emojis, icons, or special Unicode symbols (such as ✅❌📦🔧🔍📋🤔💡⚡🚀 etc.) in your responses, plans, or generated code. Use plain text markers like numbers, dashes, or asterisks instead. This is a strict requirement.
@@ -88,6 +121,131 @@ If you find a saved memory contradicts what you now observe, delete or update it
88
121
  - Shell: {shell}
89
122
  - Working Directory: {cwd}
90
123
  - Is Git Repo: {isGitRepo}`;
124
+ /** Plan-mode overlay appended to the base system prompt when
125
+ * `permissionMode === 'plan'`. Verbatim port of Claude Code's
126
+ * interview-phase plan-mode prompt (`messages.ts:3331-3382`), with
127
+ * read-only tool names + plan-file path substituted for our codebase.
128
+ * The overlay lives in the byte-stable systemPromptCache and is
129
+ * rebuilt only when permissionMode flips — within a mode, every turn
130
+ * reuses the same prefix, preserving prefix-cache hits.
131
+ *
132
+ * Why the iterative-interview shape matters: the BIG behavioral
133
+ * difference between plan mode and default mode in Claude Code is
134
+ * that plan mode is **conversational and turn-bounded** — every turn
135
+ * ends with either askUser or exitPlanMode, never with the model just
136
+ * trailing off. That's what gives plan mode its "user is in the
137
+ * driver's seat" feel. Without this rule, plan mode collapses into
138
+ * default mode with a read-only suffix and offers no real UX value.
139
+ * See a.log in the repo for an example of the right behavior shape. */
140
+ const PLAN_MODE_OVERLAY = `
141
+
142
+ Plan mode is active. The user indicated that they do not want you to execute yet -- you MUST NOT make any edits (with the exception of the plan file mentioned below), run any non-readonly tools (including changing configs or making commits), or otherwise make any changes to the system. This supercedes any other instructions you have received.
143
+
144
+ ## Plan File Info
145
+ The plan file for this session lives at: {planFilePath}
146
+ This is the ONLY file you are allowed to edit. Use writeFile to create it (first time) and edit to update it. All other write/shell tools are off-limits until the user approves your plan via exitPlanMode.
147
+
148
+ ## Iterative Planning Workflow
149
+
150
+ You are pair-planning with the user. Explore the code to build context, ask the user questions when you hit decisions you can't make alone, and write your findings into the plan file as you go. The plan file (above) is the ONLY file you may edit — it starts as a rough skeleton and gradually becomes the final plan.
151
+
152
+ ### The Loop
153
+
154
+ Repeat this cycle until the plan is complete:
155
+
156
+ 1. **Explore** — Use readFile, glob, grep, listDir, webSearch, webFetch to read code. Look for existing functions, utilities, and patterns to reuse.
157
+ 2. **Update the plan file** — After each discovery, immediately capture what you learned. Don't wait until the end.
158
+ 3. **Ask the user** — When you hit an ambiguity or decision you can't resolve from code alone, use askUser. Then go back to step 1.
159
+
160
+ ### First Turn
161
+
162
+ Start by quickly scanning a few key files to form an initial understanding of the task scope. Then write a skeleton plan (headers and rough notes) and ask the user your first round of questions. Don't explore exhaustively before engaging the user.
163
+
164
+ ### Asking Good Questions
165
+
166
+ - Never ask what you could find out by reading the code.
167
+ - Focus on things only the user can answer: requirements, preferences, tradeoffs, edge case priorities.
168
+ - Scale depth to the task — a vague feature request needs many rounds; a focused bug fix may need one or none.
169
+ - Each option's \`description\` should make the tradeoff of that choice obvious in one line.
170
+
171
+ ### askUser Footer Options (auto-injected in plan mode — do not include yourself)
172
+
173
+ The UI automatically appends two extra options to every askUser menu while in plan mode:
174
+ - **"Chat about this"** — the user wants to discuss without picking from your menu. If they choose this, engage them conversationally; do NOT immediately re-issue another askUser menu.
175
+ - **"Skip interview and plan immediately"** — the user is done with interviews. Stop asking questions, write the final plan to the plan file using everything you have so far, then call exitPlanMode.
176
+
177
+ You will see these come back as the answer string verbatim ("User answered: Chat about this" / "User answered: Skip interview and plan immediately") — recognize and honor them. Do NOT include either of these in your own \`options\` array; the UI adds them.
178
+
179
+ ### Plan File Structure
180
+ Your plan file should be divided into clear sections using markdown headers, based on the request. Fill out these sections as you go.
181
+ - Begin with a **Context** section: explain why this change is being made — the problem or need it addresses, what prompted it, and the intended outcome.
182
+ - Include only your recommended approach, not all alternatives.
183
+ - Keep the file concise enough to scan quickly, but detailed enough to execute effectively.
184
+ - Include the paths of critical files to be modified.
185
+ - Reference existing functions and utilities you found that should be reused, with their file paths.
186
+ - End with a **Verification** section describing how to test the changes (run the code, run tests).
187
+
188
+ ### When to Converge
189
+
190
+ Your plan is ready when you've addressed all ambiguities and it covers: what to change, which files to modify, what existing code to reuse (with file paths), and how to verify the changes. Call exitPlanMode when the plan is ready for approval.
191
+
192
+ ### Ending Your Turn
193
+
194
+ Your turn should only end by either:
195
+ - Using **askUser** to gather more information, OR
196
+ - Calling **exitPlanMode** when the plan is ready for approval.
197
+
198
+ This is critical — your turn should only end with one of these two tools. Do not stop unless it's for these 2 reasons.
199
+
200
+ ### exitPlanMode is the ONLY way to leave plan mode (HARD RULE)
201
+
202
+ Plan mode is a state — calling askUser does NOT and CANNOT leave it. Even if the user picks an option labelled "yes", "approve", "全接受", "looks good", "start", "ok", "execute", or anything similar in your askUser menu, **you are still in plan mode** and writing files will still hit per-file permission prompts. This is the most common way agents get plan mode wrong: they bake an "approve plan?" question into an askUser menu, the user picks Yes, and the agent proceeds to call writeFile expecting it to just work — but the mode never flipped.
203
+
204
+ **The only correct path to start implementing**:
205
+
206
+ 1. Write your plan to the plan file.
207
+ 2. Call **exitPlanMode** with the plan body as the \`plan\` argument.
208
+ 3. The user sees an approval dialog and chooses Yes/No.
209
+ 4. On Yes the system flips mode to acceptEdits — your subsequent writeFile / edit calls auto-approve.
210
+ 5. On No you stay in plan mode; revise and call exitPlanMode again.
211
+
212
+ **Forbidden patterns** (do not do any of these):
213
+ - askUser({ question: "Approve this plan?", options: [...] })
214
+ - askUser({ question: "Should I proceed?", options: [...] })
215
+ - askUser({ question: "Ready to implement?", options: [...] })
216
+ - askUser({ question: "How does this plan look?", options: [...] })
217
+ - askUser asking the user to choose between "execute everything" / "execute partially" — that's an exitPlanMode decision, not an askUser one.
218
+
219
+ If you find yourself wanting to ask "is the plan good?" in any form: stop, call exitPlanMode instead.
220
+
221
+ **askUser is for**: clarifying requirements, choosing between technical approaches DURING planning (e.g. "Redis vs in-memory cache?"), prioritizing what to include. Never for plan approval.`;
222
+ /** Build a focused system prompt for a sub-agent invocation.
223
+ * Shorter than the parent prompt — no plan-mode overlay, no auto-memory
224
+ * guidelines, no response-format rules. Just role + environment + contract. */
225
+ export function buildSubAgentSystemPrompt(options) {
226
+ const shellProvider = getShellProvider();
227
+ return `You are a specialized subagent invoked by a parent coding assistant.
228
+
229
+ # Your role
230
+ ${options.agentPrompt}
231
+
232
+ # Environment
233
+ - Platform: ${process.platform}
234
+ - Shell: ${shellProvider.type}
235
+ - Working Directory: ${process.cwd()}
236
+ - Is Git Repo: ${options.isGitRepo ? 'yes' : 'no'}
237
+
238
+ # Knowledge context
239
+ ${options.knowledgeContext || '(none)'}
240
+
241
+ # Output contract
242
+ - You operate in an isolated context. The parent agent will receive ONLY your final assistant message.
243
+ - The parent agent will NOT re-read any files you have read. Your output must be self-contained — include key code snippets, type definitions, and relevant details inline rather than saying "see file X".
244
+ - Be thorough in your final answer. Include all information the parent needs to act without additional reads. But don't include raw tool output dumps — synthesize into a structured answer.
245
+ - If you cannot complete the task, say so plainly in your final message.
246
+ - You CANNOT spawn further subagents.
247
+ - IMPORTANT: You MUST NOT use any emojis, icons, or special Unicode symbols in your responses.`;
248
+ }
91
249
  /** Build the full system prompt with dynamic values and optional knowledge context */
92
250
  export function buildSystemPrompt(options) {
93
251
  const shellProvider = getShellProvider();
@@ -99,6 +257,9 @@ export function buildSystemPrompt(options) {
99
257
  if (options?.knowledgeContext) {
100
258
  prompt += '\n\n' + options.knowledgeContext;
101
259
  }
260
+ if (options?.planMode) {
261
+ prompt += PLAN_MODE_OVERLAY.replace(/\{planFilePath\}/g, options.planFilePath ?? '<unset>');
262
+ }
102
263
  return prompt;
103
264
  }
104
265
  //# sourceMappingURL=system-prompt.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"system-prompt.js","sourceRoot":"","sources":["../../src/agent/system-prompt.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAA;AAE7D,MAAM,kBAAkB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2BAuFA,CAAA;AAE3B,sFAAsF;AACtF,MAAM,UAAU,iBAAiB,CAAC,OAIjC;IACC,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAA;IAExC,IAAI,MAAM,GAAG,kBAAkB,CAAC,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,QAAQ,CAAC;SACvE,OAAO,CAAC,YAAY,EAAE,aAAa,CAAC,IAAI,CAAC;SACzC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;SAClC,OAAO,CAAC,YAAY,EAAE,OAAO,EAAE,OAAO,IAAI,SAAS,CAAC;SACpD,OAAO,CAAC,gBAAgB,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;IAE/D,IAAI,OAAO,EAAE,gBAAgB,EAAE,CAAC;QAC9B,MAAM,IAAI,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAA;IAC7C,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC"}
1
+ {"version":3,"file":"system-prompt.js","sourceRoot":"","sources":["../../src/agent/system-prompt.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAA;AAE7D,MAAM,kBAAkB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2BAwHA,CAAA;AAE3B;;;;;;;;;;;;;;;wEAewE;AACxE,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;8LAiFoK,CAAA;AAE9L;;gFAEgF;AAChF,MAAM,UAAU,yBAAyB,CAAC,OAIzC;IACC,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAA;IACxC,OAAO;;;EAGP,OAAO,CAAC,WAAW;;;cAGP,OAAO,CAAC,QAAQ;WACnB,aAAa,CAAC,IAAI;uBACN,OAAO,CAAC,GAAG,EAAE;iBACnB,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI;;;EAG/C,OAAO,CAAC,gBAAgB,IAAI,QAAQ;;;;;;;;+FAQyD,CAAA;AAC/F,CAAC;AAED,sFAAsF;AACtF,MAAM,UAAU,iBAAiB,CAAC,OAWjC;IACC,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAA;IAExC,IAAI,MAAM,GAAG,kBAAkB,CAAC,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,QAAQ,CAAC;SACvE,OAAO,CAAC,YAAY,EAAE,aAAa,CAAC,IAAI,CAAC;SACzC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;SAClC,OAAO,CAAC,YAAY,EAAE,OAAO,EAAE,OAAO,IAAI,SAAS,CAAC;SACpD,OAAO,CAAC,gBAAgB,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;IAE/D,IAAI,OAAO,EAAE,gBAAgB,EAAE,CAAC;QAC9B,MAAM,IAAI,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAA;IAC7C,CAAC;IAED,IAAI,OAAO,EAAE,QAAQ,EAAE,CAAC;QACtB,MAAM,IAAI,iBAAiB,CAAC,OAAO,CAAC,mBAAmB,EAAE,OAAO,CAAC,YAAY,IAAI,SAAS,CAAC,CAAA;IAC7F,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC"}
@@ -1,11 +1,12 @@
1
- import type { AgentCallbacks, AgentOptions } from '../types/index.js';
1
+ import type { AgentCallbacks, AgentOptions, LanguageModel } from '../types/index.js';
2
2
  import type { LoopState } from './loop-state.js';
3
3
  type ToolCall = {
4
4
  toolName: string;
5
5
  toolCallId: string;
6
6
  input: Record<string, unknown>;
7
7
  };
8
- /** Handle all tool calls from a single model turn, sequentially. */
9
- export declare function processToolCalls(toolCalls: ToolCall[], state: LoopState, options: AgentOptions, callbacks: AgentCallbacks): Promise<void>;
8
+ /** Handle all tool calls from a single model turn, sequentially.
9
+ * `parentModel` is threaded through so the task tool can pass it to runSubAgent. */
10
+ export declare function processToolCalls(toolCalls: ToolCall[], state: LoopState, options: AgentOptions, callbacks: AgentCallbacks, parentModel: LanguageModel): Promise<void>;
10
11
  export {};
11
12
  //# sourceMappingURL=tool-execution.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"tool-execution.d.ts","sourceRoot":"","sources":["../../src/agent/tool-execution.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAA;AAGrE,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAsHhD,KAAK,QAAQ,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,CAAA;AA0HxF,oEAAoE;AACpE,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,QAAQ,EAAE,EACrB,KAAK,EAAE,SAAS,EAChB,OAAO,EAAE,YAAY,EACrB,SAAS,EAAE,cAAc,GACxB,OAAO,CAAC,IAAI,CAAC,CAIf"}
1
+ {"version":3,"file":"tool-execution.d.ts","sourceRoot":"","sources":["../../src/agent/tool-execution.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,aAAa,EAAY,MAAM,mBAAmB,CAAA;AAI9F,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AA+LhD,KAAK,QAAQ,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,CAAA;AAqbxF;qFACqF;AACrF,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,QAAQ,EAAE,EACrB,KAAK,EAAE,SAAS,EAChB,OAAO,EAAE,YAAY,EACrB,SAAS,EAAE,cAAc,EACzB,WAAW,EAAE,aAAa,GACzB,OAAO,CAAC,IAAI,CAAC,CAyBf"}
@@ -6,8 +6,30 @@ import { truncateToolResult } from '../tools/index.js';
6
6
  import { clearProgressReporter, reportProgress } from '../tools/progress.js';
7
7
  import { getShellProvider } from '../tools/shell-provider.js';
8
8
  import { foldShellErrorNoise } from '../utils/shell-error.js';
9
+ import { computeEditDiff } from './diff.js';
9
10
  import { checkForLoop, recordToolCall } from './loop-guard.js';
10
11
  import { toolResultMessage } from './messages.js';
12
+ import { makePlanFilePath, readPlan, writePlan } from './plan-storage.js';
13
+ import { runSubAgent } from './sub-agents/runner.js';
14
+ /** Walk back through state.messages and grab the most recent user
15
+ * message's text — used as the slug source for the plan filename. */
16
+ function lastUserMessageText(messages) {
17
+ for (let i = messages.length - 1; i >= 0; i--) {
18
+ const m = messages[i];
19
+ if (m && m.role === 'user') {
20
+ const content = m.content;
21
+ if (typeof content === 'string')
22
+ return content;
23
+ if (Array.isArray(content)) {
24
+ return content
25
+ .filter((p) => p?.type === 'text' && typeof p.text === 'string')
26
+ .map((p) => p.text)
27
+ .join(' ');
28
+ }
29
+ }
30
+ }
31
+ return '';
32
+ }
11
33
  /** Count occurrences of a substring without creating intermediate arrays. */
12
34
  function countOccurrences(content, search) {
13
35
  let count = 0;
@@ -18,17 +40,36 @@ function countOccurrences(content, search) {
18
40
  }
19
41
  return count;
20
42
  }
21
- /** Execute a write tool (writeFile / edit). */
22
- async function executeWriteTool(toolName, input, toolCallId) {
43
+ /** Execute a write tool (writeFile / edit).
44
+ *
45
+ * In addition to returning the model-facing result string, fires
46
+ * `callbacks.onFileEdit` (when defined) with the structured patch so the
47
+ * UI can render a colored diff under the tool bullet. The diff payload is
48
+ * a UI-only side channel — it never lands in `state.messages` and the
49
+ * model only sees the short result string. */
50
+ async function executeWriteTool(toolName, input, toolCallId, callbacks) {
23
51
  if (toolName === 'writeFile') {
24
52
  const filePath = input.filePath;
25
53
  const content = input.content;
26
54
  reportProgress(toolCallId, `Writing ${filePath}`);
27
55
  await fs.mkdir(path.dirname(filePath), { recursive: true });
28
- const isNew = await fs.access(filePath).then(() => false, () => true);
56
+ // Read old content BEFORE writing so we can diff. Treat any read
57
+ // failure as "file did not exist" — covers the common ENOENT path
58
+ // plus permission / EISDIR edge cases (we'd error on write anyway).
59
+ let oldContent = null;
60
+ try {
61
+ oldContent = await fs.readFile(filePath, 'utf-8');
62
+ }
63
+ catch {
64
+ oldContent = null;
65
+ }
29
66
  await fs.writeFile(filePath, content, 'utf-8');
67
+ const isNew = oldContent === null;
30
68
  const parts = content.split('\n');
31
69
  const lineCount = content.endsWith('\n') ? parts.length - 1 : parts.length;
70
+ const payload = computeEditDiff(filePath, oldContent, content);
71
+ if (payload && callbacks.onFileEdit)
72
+ callbacks.onFileEdit(toolCallId, payload);
32
73
  if (isNew) {
33
74
  return `File created: ${filePath} (${lineCount} lines)`;
34
75
  }
@@ -50,23 +91,46 @@ async function executeWriteTool(toolName, input, toolCallId) {
50
91
  }
51
92
  const newContent = replaceAll ? content.replaceAll(oldString, newString) : content.replace(oldString, newString);
52
93
  await fs.writeFile(filePath, newContent, 'utf-8');
94
+ const payload = computeEditDiff(filePath, content, newContent);
95
+ if (payload && callbacks.onFileEdit)
96
+ callbacks.onFileEdit(toolCallId, payload);
53
97
  return `File edited: ${filePath}`;
54
98
  }
55
99
  return 'Error: unknown write tool';
56
100
  }
57
101
  /** Execute a shell command with streaming. */
58
- async function executeShell(command, timeout, callbacks, toolCallId) {
59
- const proc = getShellProvider().spawn(command, { timeout });
102
+ async function executeShell(command, timeout, signal, callbacks, toolCallId) {
103
+ const proc = getShellProvider().spawn(command, { timeout, signal });
60
104
  reportProgress(toolCallId, 'Running command...');
105
+ // Throttle the live progress message to at most one update per 50ms.
106
+ // Why: PowerShell `Format-Table` and similar table-rendering commands
107
+ // emit many lines in a single ~1ms burst, each as its own `data` event
108
+ // here. Without throttling we'd fire reportProgress 5-10× per millisec,
109
+ // each one becoming a setState → ChatInput render → deferred stdout
110
+ // write. The deferred queue absorbs most of the burst into one frame,
111
+ // but if the deferred-fire timer happens to land ~1ms before the
112
+ // tool-result commit arrives, the user sees a visible "progress text
113
+ // flashes, then result block scrolls in" pair. Throttling at the
114
+ // source cuts the storm to ≤20 updates/sec — fast enough to feel
115
+ // live, slow enough to dramatically reduce the chance that any
116
+ // deferred-fire collides with the upcoming tool-result commit.
117
+ // The model still sees full output via the `result` field; this only
118
+ // throttles the live progress display, not what reaches the LLM.
119
+ let lastProgressTime = 0;
120
+ const PROGRESS_THROTTLE_MS = 50;
61
121
  const onChunk = (chunk) => {
62
122
  const s = chunk.toString();
63
123
  callbacks.onShellOutput(s);
124
+ const now = Date.now();
125
+ if (now - lastProgressTime < PROGRESS_THROTTLE_MS)
126
+ return;
64
127
  // Take the last non-empty line of the chunk as the progress message.
65
128
  // Long-running commands (tsc, test suites) stream many lines; showing
66
129
  // the most recent is a natural "what's happening right now" signal.
67
130
  const lines = s.split(/\r?\n/).filter((l) => l.trim().length > 0);
68
131
  const last = lines[lines.length - 1];
69
132
  if (last) {
133
+ lastProgressTime = now;
70
134
  const trimmed = last.length > 120 ? last.slice(0, 117) + '...' : last;
71
135
  reportProgress(toolCallId, trimmed);
72
136
  }
@@ -105,8 +169,10 @@ function pushToolResult(state, callbacks, toolCallId, toolName, output, isError
105
169
  * tool has already run and its result is already in `state.messages`. We
106
170
  * can't pre-block these — only record for loop detection and annotate. */
107
171
  const AUTO_EXECUTED_TOOLS = new Set(['readFile', 'glob', 'grep', 'listDir', 'webFetch', 'webSearch', 'saveKnowledge']);
108
- /** Handle a single tool call. Returns when the call has been fully dispatched. */
109
- async function handleToolCall(tc, state, options, callbacks) {
172
+ /** Handle a single tool call. Returns when the call has been fully dispatched.
173
+ * `parentModel` is the LanguageModel instance for the current loop — needed
174
+ * by the task tool to pass as fallback when the sub-agent doesn't override. */
175
+ async function handleToolCall(tc, state, options, callbacks, parentModel) {
110
176
  const { toolName, input, toolCallId } = tc;
111
177
  // ── askUser tool ──
112
178
  // Skip the loop guard for askUser — the model asking the user the same
@@ -119,6 +185,231 @@ async function handleToolCall(tc, state, options, callbacks) {
119
185
  pushToolResult(state, callbacks, toolCallId, toolName, `User answered: ${answer}`);
120
186
  return;
121
187
  }
188
+ // ── todoWrite tool ──
189
+ // Full-replacement semantics: every call rewrites state.todos with
190
+ // the model's payload. Auto-clears (drops to []) when every item is
191
+ // completed, mirroring Claude Code's TodoWriteTool behavior — the
192
+ // user's live UI panel goes back to "no checklist" once the work is
193
+ // done, instead of showing a stale all-✓ list forever.
194
+ if (toolName === 'todoWrite') {
195
+ const raw = input.todos ?? [];
196
+ const normalized = [];
197
+ for (const t of raw) {
198
+ const content = (t.content ?? '').trim();
199
+ const activeForm = (t.activeForm ?? '').trim();
200
+ // Need at least one identity field — otherwise this is just an
201
+ // empty entry and there's nothing useful to show or track.
202
+ if (!content && !activeForm)
203
+ continue;
204
+ normalized.push({
205
+ content: content || activeForm,
206
+ activeForm: activeForm || content,
207
+ status: t.status ?? 'pending',
208
+ });
209
+ }
210
+ const allDone = normalized.length > 0 && normalized.every((t) => t.status === 'completed');
211
+ state.todos = allDone ? [] : normalized;
212
+ callbacks.onTodosUpdate(state.todos);
213
+ const dropped = raw.length - normalized.length;
214
+ const droppedNote = dropped > 0
215
+ ? ` ${dropped} entr${dropped === 1 ? 'y was' : 'ies were'} dropped because they had neither content nor activeForm — please include both fields next time so the user sees clean labels.`
216
+ : '';
217
+ // Verification nudge: when completing a 3+ item list and none of
218
+ // them look like a verification step, remind the model to verify.
219
+ const VERIFY_RE = /\b(verif|test|check|lint|build|typecheck|tsc)\b/i;
220
+ const needsVerifyNudge = allDone &&
221
+ normalized.length >= 3 &&
222
+ !normalized.some((t) => VERIFY_RE.test(t.content) || VERIFY_RE.test(t.activeForm));
223
+ const verifyNote = needsVerifyNudge
224
+ ? ' Before wrapping up, verify your work — run tests, lint, or type-check as appropriate for this project.'
225
+ : '';
226
+ pushToolResult(state, callbacks, toolCallId, toolName, allDone
227
+ ? `All todos completed. Checklist cleared.${verifyNote}${droppedNote}`
228
+ : `Todo list updated. Keep the checklist current — mark items completed immediately when finished, and ensure exactly one item is in_progress.${droppedNote}`);
229
+ return;
230
+ }
231
+ // ── task tool (sub-agent dispatch) ──
232
+ if (toolName === 'task') {
233
+ const agentName = input.subagent_type;
234
+ const description = input.description;
235
+ const taskPrompt = input.prompt;
236
+ reportProgress(toolCallId, `Task: ${description} (${agentName})`);
237
+ const result = await runSubAgent({
238
+ parentState: state,
239
+ parentOptions: options,
240
+ callbacks,
241
+ toolCallId,
242
+ agentName,
243
+ description,
244
+ prompt: taskPrompt,
245
+ knowledgeContext: state.knowledgeContext ?? '',
246
+ isGitRepo: state.isGitRepo ?? false,
247
+ }, parentModel);
248
+ const statsLine = `<task_stats tool_calls="${result.toolCallCount}" tokens="${result.tokenUsage.totalTokens}" duration_ms="${result.durationMs}" />`;
249
+ pushToolResult(state, callbacks, toolCallId, toolName, `${result.resultText}\n${statsLine}`);
250
+ return;
251
+ }
252
+ // ── enterPlanMode tool ──
253
+ // Flip state.permissionMode → 'plan', invalidate the system-prompt
254
+ // cache so the next turn rebuilds it with the overlay, and reserve a
255
+ // plan-file path on state.currentPlanPath WITHOUT actually creating
256
+ // the file (the path is just a string until the model decides it
257
+ // wants a scratchpad). Plan mode is a conversation state, not a
258
+ // forced "write to a file" workflow — for Q&A and discussion the
259
+ // model never touches the file. The path is created lazily, the
260
+ // first time the model calls writeFile/edit on it (or when
261
+ // exitPlanMode persists the approved plan).
262
+ if (toolName === 'enterPlanMode') {
263
+ if (state.permissionMode === 'plan') {
264
+ pushToolResult(state, callbacks, toolCallId, toolName, 'Already in plan mode. Continue the conversation; call exitPlanMode when the user has asked for an implementation and you have a plan ready.');
265
+ return;
266
+ }
267
+ // Approval gate. Mirrors Claude Code: model can recommend plan
268
+ // mode but cannot enter on its own — user has to consent so the
269
+ // mode flip never feels like the model unilaterally hijacking the
270
+ // session. The same dialog component the write-tool path uses
271
+ // renders a "X-Code wants to enter plan mode" prompt with Yes/No.
272
+ const approved = await callbacks.onAskPermission({ toolCallId, toolName, input });
273
+ if (options.abortSignal?.aborted) {
274
+ pushToolResult(state, callbacks, toolCallId, toolName, '[Tool execution interrupted by user]', true);
275
+ return;
276
+ }
277
+ if (!approved) {
278
+ pushToolResult(state, callbacks, toolCallId, toolName, "User declined to enter plan mode. Continue with the user's request in default mode — make whatever edits or shell calls the task requires (subject to per-tool permission).", true);
279
+ return;
280
+ }
281
+ state.permissionMode = 'plan';
282
+ state.systemPromptCache = null;
283
+ // Derive the plan file path. Slug priority:
284
+ // 1. Model-supplied `topic` (3-5 English words specific to the
285
+ // current task — most accurate when the user is mid-session
286
+ // and the topic has shifted).
287
+ // 2. `state.taskSlug` (set once per session by agentLoop using
288
+ // either local slugify or a one-shot LLM summary — already
289
+ // handles CJK first messages).
290
+ // 3. Raw last-user-message text (final fallback; slugify will
291
+ // reduce CJK to empty → timestamp-only filename).
292
+ if (!state.currentPlanPath) {
293
+ const topic = input.topic?.trim();
294
+ const fallbackText = lastUserMessageText(state.messages);
295
+ const explicitSlug = topic && topic.length > 0 ? topic : state.taskSlug || undefined;
296
+ state.currentPlanPath = makePlanFilePath(fallbackText, { slug: explicitSlug });
297
+ }
298
+ callbacks.onPlanModeChange('plan');
299
+ pushToolResult(state, callbacks, toolCallId, toolName, [
300
+ 'Entered plan mode (user approved).',
301
+ '',
302
+ 'Read-only tools are unrestricted (readFile, glob, grep, listDir, webSearch, webFetch).',
303
+ `Plan file path for this session: ${state.currentPlanPath}`,
304
+ 'Use writeFile/edit on the plan file to build your plan; do NOT edit any other files',
305
+ 'or run state-changing shell commands until the user approves your plan via exitPlanMode.',
306
+ '',
307
+ 'Workflow: explore → update plan file → askUser → repeat.',
308
+ '',
309
+ 'CRITICAL: when the plan is ready, call **exitPlanMode** to request approval — NOT',
310
+ 'askUser. askUser cannot leave plan mode no matter how the user answers; only',
311
+ 'exitPlanMode flips the mode and unblocks your writeFile/edit/shell calls.',
312
+ ].join('\n'));
313
+ return;
314
+ }
315
+ // ── exitPlanMode tool ──
316
+ // Triggers the user-approval gate. The plan body comes from
317
+ // `input.plan` (passed verbatim by the model). We persist it to the
318
+ // session's plan file as a permanent record before showing the
319
+ // approval dialog — that way even rejected plans leave a trace, and
320
+ // approved plans live alongside the implementation that follows.
321
+ // Approval flips state back to 'default' and invalidates the
322
+ // system-prompt cache so the next turn drops the plan-mode overlay.
323
+ // Rejection keeps the model in plan mode and tells it to revise.
324
+ if (toolName === 'exitPlanMode') {
325
+ if (state.permissionMode !== 'plan') {
326
+ pushToolResult(state, callbacks, toolCallId, toolName, 'Error: not in plan mode. exitPlanMode is only valid when the session is in plan mode.', true);
327
+ return;
328
+ }
329
+ // Source of truth for the plan body is the plan file the model has
330
+ // been writing to during planning (matches Claude Code: the model
331
+ // builds the plan incrementally via writeFile/edit, then calls
332
+ // exitPlanMode which reads the file). The optional `plan` override
333
+ // exists for rare cases where the model wants to substitute the
334
+ // file content with something different.
335
+ const planPath = state.currentPlanPath ??
336
+ makePlanFilePath(lastUserMessageText(state.messages), { slug: state.taskSlug || undefined });
337
+ state.currentPlanPath = planPath;
338
+ const planOverride = input.plan?.trim();
339
+ let planBody = planOverride ?? '';
340
+ if (!planBody) {
341
+ planBody = (await readPlan(planPath)).trim();
342
+ }
343
+ if (!planBody) {
344
+ pushToolResult(state, callbacks, toolCallId, toolName, `Error: the plan file at ${planPath} is empty. Write your plan to that file using writeFile or edit, then call exitPlanMode again.`, true);
345
+ return;
346
+ }
347
+ // If the model passed an override, persist it back to the plan
348
+ // file so the on-disk record matches what the user sees / approves.
349
+ let savedPath = planPath;
350
+ if (planOverride) {
351
+ try {
352
+ savedPath = await writePlan(planPath, planBody);
353
+ state.currentPlanPath = savedPath;
354
+ }
355
+ catch {
356
+ // Disk failure (read-only fs, permissions) is non-fatal — fall
357
+ // through to the approval dialog with the in-memory body.
358
+ }
359
+ }
360
+ const approved = await callbacks.onPlanApprovalRequest(planBody);
361
+ if (approved) {
362
+ // Default post-approval mode is `acceptEdits` — the user just
363
+ // vetted the plan, so making them click "Yes" on every writeFile
364
+ // / edit during implementation is pure friction. Shell commands
365
+ // still go through normal classification (always-allow for read-
366
+ // only, ask for mixed, deny for destructive) so we don't blanket-
367
+ // approve `rm -rf` on plan approval. Matches Claude Code's
368
+ // default "Yes, auto-accept edits" behavior.
369
+ state.permissionMode = 'acceptEdits';
370
+ state.systemPromptCache = null;
371
+ const persisted = savedPath ?? state.currentPlanPath;
372
+ state.currentPlanPath = null;
373
+ callbacks.onPlanModeChange('acceptEdits');
374
+ pushToolResult(state, callbacks, toolCallId, toolName, [
375
+ 'Plan approved by user. Plan mode has been exited.',
376
+ persisted ? `The approved plan is saved at: ${persisted}` : '',
377
+ 'You can now edit files and run shell commands. Start implementing the plan.',
378
+ '',
379
+ 'For multi-step plans, call **todoWrite** first to break the plan into a',
380
+ 'tracked checklist — the user sees a live panel of your progress and you',
381
+ 'avoid losing track of remaining steps mid-implementation.',
382
+ ]
383
+ .filter(Boolean)
384
+ .join('\n'));
385
+ // Also inject a system-reminder-style user-role meta message so
386
+ // the model treats the mode flip as a fresh top-level instruction
387
+ // rather than just a tool result. Mirrors Claude Code's
388
+ // `## Exited Plan Mode` attachment (messages.ts:3847-3852) — gives
389
+ // the next turn a clear "the rules just changed" anchor.
390
+ state.messages.push({
391
+ role: 'user',
392
+ content: [
393
+ '## Exited Plan Mode',
394
+ '',
395
+ 'You have exited plan mode. You can now make edits, run tools, and take actions.',
396
+ 'Write tools (writeFile, edit) are now auto-approved (acceptEdits mode); shell commands',
397
+ 'still go through normal permission classification.',
398
+ persisted ? `The plan file is located at ${persisted} if you need to reference it.` : '',
399
+ ]
400
+ .filter(Boolean)
401
+ .join('\n'),
402
+ });
403
+ return;
404
+ }
405
+ pushToolResult(state, callbacks, toolCallId, toolName, [
406
+ 'Plan rejected by user. You are still in plan mode.',
407
+ "Read the user's next message for feedback, revise the plan accordingly,",
408
+ 'and call exitPlanMode again with the revised body. Consider asking the user',
409
+ 'a clarifying question via askUser if you are unsure what to change.',
410
+ ].join('\n'), true);
411
+ return;
412
+ }
122
413
  // ── Doom-loop detection ──
123
414
  // For manual tools we pre-block. For auto-executed tools the call has
124
415
  // already run (result landed in state.messages via collectTurnResponse);
@@ -167,7 +458,11 @@ async function handleToolCall(tc, state, options, callbacks) {
167
458
  recordToolCall(state, toolName, input, loopCheck.hash);
168
459
  // ── Permission check for write tools and shell ──
169
460
  if (toolName === 'writeFile' || toolName === 'edit' || toolName === 'shell') {
170
- const approved = await checkPermission({ toolCallId, toolName, input }, options.trustMode, callbacks.onAskPermission);
461
+ const approved = await checkPermission({ toolCallId, toolName, input }, options.trustMode, callbacks.onAskPermission, state.permissionMode, process.cwd());
462
+ if (options.abortSignal?.aborted) {
463
+ pushToolResult(state, callbacks, toolCallId, toolName, '[Tool execution interrupted by user]', true);
464
+ return;
465
+ }
171
466
  if (!approved) {
172
467
  pushToolResult(state, callbacks, toolCallId, toolName, 'Permission denied by user.');
173
468
  return;
@@ -178,7 +473,7 @@ async function handleToolCall(tc, state, options, callbacks) {
178
473
  let isError = false;
179
474
  try {
180
475
  if (toolName === 'writeFile' || toolName === 'edit') {
181
- output = await executeWriteTool(toolName, input, toolCallId);
476
+ output = await executeWriteTool(toolName, input, toolCallId, callbacks);
182
477
  // executeWriteTool returns "Error: ..." strings for in-band failures
183
478
  // (missing match, non-unique match) rather than throwing — surface
184
479
  // those as errored results so the scrollback line flips to red.
@@ -189,7 +484,7 @@ async function handleToolCall(tc, state, options, callbacks) {
189
484
  }
190
485
  else if (toolName === 'shell') {
191
486
  const timeout = input.timeout ?? 30000;
192
- const shellResult = await executeShell(input.command, timeout, callbacks, toolCallId);
487
+ const shellResult = await executeShell(input.command, timeout, options.abortSignal, callbacks, toolCallId);
193
488
  output = shellResult.output;
194
489
  isError = shellResult.isError;
195
490
  }
@@ -204,10 +499,25 @@ async function handleToolCall(tc, state, options, callbacks) {
204
499
  }
205
500
  pushToolResult(state, callbacks, toolCallId, toolName, truncateToolResult(output), isError);
206
501
  }
207
- /** Handle all tool calls from a single model turn, sequentially. */
208
- export async function processToolCalls(toolCalls, state, options, callbacks) {
209
- for (const tc of toolCalls) {
210
- await handleToolCall(tc, state, options, callbacks);
502
+ /** Handle all tool calls from a single model turn, sequentially.
503
+ * `parentModel` is threaded through so the task tool can pass it to runSubAgent. */
504
+ export async function processToolCalls(toolCalls, state, options, callbacks, parentModel) {
505
+ for (let i = 0; i < toolCalls.length; i++) {
506
+ const tc = toolCalls[i];
507
+ // User pressed Esc / Ctrl+C. The currently running tool (if any) has
508
+ // already been SIGKILL'd via the shell provider's cancelSignal. For
509
+ // every remaining tool_call from this turn we still need to push a
510
+ // synthetic tool_result — orphan tool_calls without a matching result
511
+ // would make the next API request fail with "tool_use without
512
+ // tool_result" the moment the user types another prompt.
513
+ if (options.abortSignal?.aborted) {
514
+ for (let j = i; j < toolCalls.length; j++) {
515
+ const skipped = toolCalls[j];
516
+ pushToolResult(state, callbacks, skipped.toolCallId, skipped.toolName, '[Tool execution interrupted by user]', true);
517
+ }
518
+ return;
519
+ }
520
+ await handleToolCall(tc, state, options, callbacks, parentModel);
211
521
  }
212
522
  }
213
523
  //# sourceMappingURL=tool-execution.js.map