npm - @x-code-cli/core - Versions diffs - 0.1.11 → 0.2.1 - Mend

@x-code-cli/core 0.1.11 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

package/dist/agent/api-errors.d.ts.map +1 -1
package/dist/agent/api-errors.js +18 -0
package/dist/agent/api-errors.js.map +1 -1
package/dist/agent/diff.d.ts +35 -0
package/dist/agent/diff.d.ts.map +1 -0
package/dist/agent/diff.js +83 -0
package/dist/agent/diff.js.map +1 -0
package/dist/agent/loop-state.d.ts +45 -3
package/dist/agent/loop-state.d.ts.map +1 -1
package/dist/agent/loop-state.js +24 -3
package/dist/agent/loop-state.js.map +1 -1
package/dist/agent/loop.d.ts +10 -6
package/dist/agent/loop.d.ts.map +1 -1
package/dist/agent/loop.js +212 -30
package/dist/agent/loop.js.map +1 -1
package/dist/agent/plan-storage.d.ts +55 -0
package/dist/agent/plan-storage.d.ts.map +1 -0
package/dist/agent/plan-storage.js +156 -0
package/dist/agent/plan-storage.js.map +1 -0
package/dist/agent/session-store.d.ts +114 -0
package/dist/agent/session-store.d.ts.map +1 -0
package/dist/agent/session-store.js +415 -0
package/dist/agent/session-store.js.map +1 -0
package/dist/agent/sub-agents/built-in.d.ts +3 -0
package/dist/agent/sub-agents/built-in.d.ts.map +1 -0
package/dist/agent/sub-agents/built-in.js +98 -0
package/dist/agent/sub-agents/built-in.js.map +1 -0
package/dist/agent/sub-agents/index.d.ts +7 -0
package/dist/agent/sub-agents/index.d.ts.map +1 -0
package/dist/agent/sub-agents/index.js +5 -0
package/dist/agent/sub-agents/index.js.map +1 -0
package/dist/agent/sub-agents/loader.d.ts +5 -0
package/dist/agent/sub-agents/loader.d.ts.map +1 -0
package/dist/agent/sub-agents/loader.js +117 -0
package/dist/agent/sub-agents/loader.js.map +1 -0
package/dist/agent/sub-agents/registry.d.ts +14 -0
package/dist/agent/sub-agents/registry.d.ts.map +1 -0
package/dist/agent/sub-agents/registry.js +37 -0
package/dist/agent/sub-agents/registry.js.map +1 -0
package/dist/agent/sub-agents/runner.d.ts +26 -0
package/dist/agent/sub-agents/runner.d.ts.map +1 -0
package/dist/agent/sub-agents/runner.js +287 -0
package/dist/agent/sub-agents/runner.js.map +1 -0
package/dist/agent/sub-agents/types.d.ts +63 -0
package/dist/agent/sub-agents/types.d.ts.map +1 -0
package/dist/agent/sub-agents/types.js +2 -0
package/dist/agent/sub-agents/types.js.map +1 -0
package/dist/agent/system-prompt.d.ts +15 -0
package/dist/agent/system-prompt.d.ts.map +1 -1
package/dist/agent/system-prompt.js +161 -0
package/dist/agent/system-prompt.js.map +1 -1
package/dist/agent/tool-execution.d.ts +4 -3
package/dist/agent/tool-execution.d.ts.map +1 -1
package/dist/agent/tool-execution.js +324 -14
package/dist/agent/tool-execution.js.map +1 -1
package/dist/agent/tool-result-sanitize.d.ts +12 -0
package/dist/agent/tool-result-sanitize.d.ts.map +1 -1
package/dist/agent/tool-result-sanitize.js +70 -0
package/dist/agent/tool-result-sanitize.js.map +1 -1
package/dist/config/index.d.ts +6 -0
package/dist/config/index.d.ts.map +1 -1
package/dist/config/index.js.map +1 -1
package/dist/index.d.ts +15 -5
package/dist/index.d.ts.map +1 -1
package/dist/index.js +13 -3
package/dist/index.js.map +1 -1
package/dist/knowledge/session.d.ts +4 -7
package/dist/knowledge/session.d.ts.map +1 -1
package/dist/knowledge/session.js +20 -55
package/dist/knowledge/session.js.map +1 -1
package/dist/permissions/index.d.ts +21 -4
package/dist/permissions/index.d.ts.map +1 -1
package/dist/permissions/index.js +37 -3
package/dist/permissions/index.js.map +1 -1
package/dist/permissions/session-store.d.ts +60 -0
package/dist/permissions/session-store.d.ts.map +1 -0
package/dist/permissions/session-store.js +233 -0
package/dist/permissions/session-store.js.map +1 -0
package/dist/tools/ask-user.d.ts.map +1 -1
package/dist/tools/ask-user.js +8 -6
package/dist/tools/ask-user.js.map +1 -1
package/dist/tools/enter-plan-mode.d.ts +25 -0
package/dist/tools/enter-plan-mode.d.ts.map +1 -0
package/dist/tools/enter-plan-mode.js +120 -0
package/dist/tools/enter-plan-mode.js.map +1 -0
package/dist/tools/exit-plan-mode.d.ts +13 -0
package/dist/tools/exit-plan-mode.d.ts.map +1 -0
package/dist/tools/exit-plan-mode.js +22 -0
package/dist/tools/exit-plan-mode.js.map +1 -0
package/dist/tools/grep.d.ts +1 -1
package/dist/tools/index.d.ts +20 -4
package/dist/tools/index.d.ts.map +1 -1
package/dist/tools/index.js +7 -1
package/dist/tools/index.js.map +1 -1
package/dist/tools/save-knowledge.d.ts +2 -2
package/dist/tools/shell-provider.d.ts +4 -0
package/dist/tools/shell-provider.d.ts.map +1 -1
package/dist/tools/shell-provider.js +2 -0
package/dist/tools/shell-provider.js.map +1 -1
package/dist/tools/task.d.ts +14 -0
package/dist/tools/task.d.ts.map +1 -0
package/dist/tools/task.js +95 -0
package/dist/tools/task.js.map +1 -0
package/dist/tools/todo-write.d.ts +21 -0
package/dist/tools/todo-write.d.ts.map +1 -0
package/dist/tools/todo-write.js +117 -0
package/dist/tools/todo-write.js.map +1 -0
package/dist/types/index.d.ts +104 -1
package/dist/types/index.d.ts.map +1 -1
package/dist/types/index.js.map +1 -1
package/package.json +1 -1
package/dist/knowledge/session-usage.d.ts +0 -24
package/dist/knowledge/session-usage.d.ts.map +0 -1
package/dist/knowledge/session-usage.js +0 -86
package/dist/knowledge/session-usage.js.map +0 -1

package/dist/agent/system-prompt.js CHANGED Viewed

@@ -20,6 +20,39 @@ You have access to these tools:
 - webFetch: Fetch and extract content from URLs
 - askUser: Ask the user clarifying questions with choices
 - saveKnowledge: Save project/user knowledge facts to persistent memory
+- todoWrite: Track multi-step tasks with a live checklist visible to the user
+- task: Delegate a task to a specialized sub-agent (explore, plan, review, general-purpose)
+## Sub-agent Delegation
+Use the task tool to delegate research, exploration, planning, or review tasks to a specialized sub-agent. Sub-agents run in isolated context — they don't see your conversation history and their intermediate tool calls never pollute your context window. Only the final conclusion comes back.
+When to delegate:
+- Open-ended research or exploration that needs many reads/greps
+- Code review of pending changes
+- Implementation planning that requires reading many files
+- Any multi-step investigation where you only need the conclusion, not the raw tool output
+When NOT to delegate:
+- Reading a specific file — use readFile directly
+- Searching for a known symbol — use grep directly
+- Simple single-step tasks you can do faster yourself
+- Tasks where your immediate next step is blocked on the raw output
+Your prompt to the sub-agent must be self-contained: include file paths, function names, what you've already learned, and what you need back. Terse prompts produce shallow results.
+IMPORTANT — trust sub-agent results. When a sub-agent returns findings (file contents, code snippets, architecture descriptions), do NOT re-read the same files yourself. The sub-agent has already done that work. If the result is missing specific details, ask a follow-up sub-agent with a targeted prompt rather than duplicating the exploration manually.
+Concurrency: NEVER launch multiple sub-agents that could write to the same files. Parallel sub-agents are fine when their tasks are independent and read-only.
+## Task Management
+Break down and manage your work with the todoWrite tool. The user sees a live checklist panel of your progress — it makes long tasks feel structured and gives visibility into your plan.
+- For any task with 3+ steps, call todoWrite EARLY — ideally on your first implementation turn.
+- Right after exitPlanMode is approved and you have a plan with several phases, translate the plan steps into todos before writing code.
+- Mark each task as in_progress BEFORE starting it and completed IMMEDIATELY after finishing. Do not batch completions at the end.
+- Exactly one item should be in_progress at all times.
+- Do NOT use todoWrite for single-file edits, trivial fixes, pure Q&A, or tasks with 1-2 obvious steps — todos add ceremony with no benefit.
+- When all tasks are done, verify your work (run tests, check for errors) before moving on.
 ## Response Format
 - IMPORTANT: You MUST NOT use any emojis, icons, or special Unicode symbols (such as ✅❌📦🔧🔍📋🤔💡⚡🚀 etc.) in your responses, plans, or generated code. Use plain text markers like numbers, dashes, or asterisks instead. This is a strict requirement.
@@ -88,6 +121,131 @@ If you find a saved memory contradicts what you now observe, delete or update it
 - Shell: {shell}
 - Working Directory: {cwd}
 - Is Git Repo: {isGitRepo}`;
+/** Plan-mode overlay appended to the base system prompt when
+ *  `permissionMode === 'plan'`. Verbatim port of Claude Code's
+ *  interview-phase plan-mode prompt (`messages.ts:3331-3382`), with
+ *  read-only tool names + plan-file path substituted for our codebase.
+ *  The overlay lives in the byte-stable systemPromptCache and is
+ *  rebuilt only when permissionMode flips — within a mode, every turn
+ *  reuses the same prefix, preserving prefix-cache hits.
+ *
+ *  Why the iterative-interview shape matters: the BIG behavioral
+ *  difference between plan mode and default mode in Claude Code is
+ *  that plan mode is **conversational and turn-bounded** — every turn
+ *  ends with either askUser or exitPlanMode, never with the model just
+ *  trailing off. That's what gives plan mode its "user is in the
+ *  driver's seat" feel. Without this rule, plan mode collapses into
+ *  default mode with a read-only suffix and offers no real UX value.
+ *  See a.log in the repo for an example of the right behavior shape. */
+const PLAN_MODE_OVERLAY = `
+Plan mode is active. The user indicated that they do not want you to execute yet -- you MUST NOT make any edits (with the exception of the plan file mentioned below), run any non-readonly tools (including changing configs or making commits), or otherwise make any changes to the system. This supercedes any other instructions you have received.
+## Plan File Info
+The plan file for this session lives at: {planFilePath}
+This is the ONLY file you are allowed to edit. Use writeFile to create it (first time) and edit to update it. All other write/shell tools are off-limits until the user approves your plan via exitPlanMode.
+## Iterative Planning Workflow
+You are pair-planning with the user. Explore the code to build context, ask the user questions when you hit decisions you can't make alone, and write your findings into the plan file as you go. The plan file (above) is the ONLY file you may edit — it starts as a rough skeleton and gradually becomes the final plan.
+### The Loop
+Repeat this cycle until the plan is complete:
+1. **Explore** — Use readFile, glob, grep, listDir, webSearch, webFetch to read code. Look for existing functions, utilities, and patterns to reuse.
+2. **Update the plan file** — After each discovery, immediately capture what you learned. Don't wait until the end.
+3. **Ask the user** — When you hit an ambiguity or decision you can't resolve from code alone, use askUser. Then go back to step 1.
+### First Turn
+Start by quickly scanning a few key files to form an initial understanding of the task scope. Then write a skeleton plan (headers and rough notes) and ask the user your first round of questions. Don't explore exhaustively before engaging the user.
+### Asking Good Questions
+- Never ask what you could find out by reading the code.
+- Focus on things only the user can answer: requirements, preferences, tradeoffs, edge case priorities.
+- Scale depth to the task — a vague feature request needs many rounds; a focused bug fix may need one or none.
+- Each option's \`description\` should make the tradeoff of that choice obvious in one line.
+### askUser Footer Options (auto-injected in plan mode — do not include yourself)
+The UI automatically appends two extra options to every askUser menu while in plan mode:
+- **"Chat about this"** — the user wants to discuss without picking from your menu. If they choose this, engage them conversationally; do NOT immediately re-issue another askUser menu.
+- **"Skip interview and plan immediately"** — the user is done with interviews. Stop asking questions, write the final plan to the plan file using everything you have so far, then call exitPlanMode.
+You will see these come back as the answer string verbatim ("User answered: Chat about this" / "User answered: Skip interview and plan immediately") — recognize and honor them. Do NOT include either of these in your own \`options\` array; the UI adds them.
+### Plan File Structure
+Your plan file should be divided into clear sections using markdown headers, based on the request. Fill out these sections as you go.
+- Begin with a **Context** section: explain why this change is being made — the problem or need it addresses, what prompted it, and the intended outcome.
+- Include only your recommended approach, not all alternatives.
+- Keep the file concise enough to scan quickly, but detailed enough to execute effectively.
+- Include the paths of critical files to be modified.
+- Reference existing functions and utilities you found that should be reused, with their file paths.
+- End with a **Verification** section describing how to test the changes (run the code, run tests).
+### When to Converge
+Your plan is ready when you've addressed all ambiguities and it covers: what to change, which files to modify, what existing code to reuse (with file paths), and how to verify the changes. Call exitPlanMode when the plan is ready for approval.
+### Ending Your Turn
+Your turn should only end by either:
+- Using **askUser** to gather more information, OR
+- Calling **exitPlanMode** when the plan is ready for approval.
+This is critical — your turn should only end with one of these two tools. Do not stop unless it's for these 2 reasons.
+### exitPlanMode is the ONLY way to leave plan mode (HARD RULE)
+Plan mode is a state — calling askUser does NOT and CANNOT leave it. Even if the user picks an option labelled "yes", "approve", "全接受", "looks good", "start", "ok", "execute", or anything similar in your askUser menu, **you are still in plan mode** and writing files will still hit per-file permission prompts. This is the most common way agents get plan mode wrong: they bake an "approve plan?" question into an askUser menu, the user picks Yes, and the agent proceeds to call writeFile expecting it to just work — but the mode never flipped.
+**The only correct path to start implementing**:
+1. Write your plan to the plan file.
+2. Call **exitPlanMode** with the plan body as the \`plan\` argument.
+3. The user sees an approval dialog and chooses Yes/No.
+4. On Yes the system flips mode to acceptEdits — your subsequent writeFile / edit calls auto-approve.
+5. On No you stay in plan mode; revise and call exitPlanMode again.
+**Forbidden patterns** (do not do any of these):
+- askUser({ question: "Approve this plan?", options: [...] })
+- askUser({ question: "Should I proceed?", options: [...] })
+- askUser({ question: "Ready to implement?", options: [...] })
+- askUser({ question: "How does this plan look?", options: [...] })
+- askUser asking the user to choose between "execute everything" / "execute partially" — that's an exitPlanMode decision, not an askUser one.
+If you find yourself wanting to ask "is the plan good?" in any form: stop, call exitPlanMode instead.
+**askUser is for**: clarifying requirements, choosing between technical approaches DURING planning (e.g. "Redis vs in-memory cache?"), prioritizing what to include. Never for plan approval.`;
+/** Build a focused system prompt for a sub-agent invocation.
+ *  Shorter than the parent prompt — no plan-mode overlay, no auto-memory
+ *  guidelines, no response-format rules. Just role + environment + contract. */
+export function buildSubAgentSystemPrompt(options) {
+    const shellProvider = getShellProvider();
+    return `You are a specialized subagent invoked by a parent coding assistant.
+# Your role
+${options.agentPrompt}
+# Environment
+- Platform: ${process.platform}
+- Shell: ${shellProvider.type}
+- Working Directory: ${process.cwd()}
+- Is Git Repo: ${options.isGitRepo ? 'yes' : 'no'}
+# Knowledge context
+${options.knowledgeContext || '(none)'}
+# Output contract
+- You operate in an isolated context. The parent agent will receive ONLY your final assistant message.
+- The parent agent will NOT re-read any files you have read. Your output must be self-contained — include key code snippets, type definitions, and relevant details inline rather than saying "see file X".
+- Be thorough in your final answer. Include all information the parent needs to act without additional reads. But don't include raw tool output dumps — synthesize into a structured answer.
+- If you cannot complete the task, say so plainly in your final message.
+- You CANNOT spawn further subagents.
+- IMPORTANT: You MUST NOT use any emojis, icons, or special Unicode symbols in your responses.`;
+}
 /** Build the full system prompt with dynamic values and optional knowledge context */
 export function buildSystemPrompt(options) {
     const shellProvider = getShellProvider();
@@ -99,6 +257,9 @@ export function buildSystemPrompt(options) {
     if (options?.knowledgeContext) {
         prompt += '\n\n' + options.knowledgeContext;
     }
+    if (options?.planMode) {
+        prompt += PLAN_MODE_OVERLAY.replace(/\{planFilePath\}/g, options.planFilePath ?? '<unset>');
+    }
     return prompt;
 }
 //# sourceMappingURL=system-prompt.js.map

package/dist/agent/system-prompt.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"system-prompt.js","sourceRoot":"","sources":["../../src/agent/system-prompt.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAA;AAE7D,MAAM,kBAAkB,GAAG~~;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2BAuFA~~,CAAA;AAE3B,sFAAsF;AACtF,MAAM,UAAU,iBAAiB,CAAC,~~OAIjC~~;IACC,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAA;IAExC,IAAI,MAAM,GAAG,kBAAkB,CAAC,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,QAAQ,CAAC;SACvE,OAAO,CAAC,YAAY,EAAE,aAAa,CAAC,IAAI,CAAC;SACzC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;SAClC,OAAO,CAAC,YAAY,EAAE,OAAO,EAAE,OAAO,IAAI,SAAS,CAAC;SACpD,OAAO,CAAC,gBAAgB,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;IAE/D,IAAI,OAAO,EAAE,gBAAgB,EAAE,CAAC;QAC9B,MAAM,IAAI,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAA;IAC7C,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC"}
1	+ {"version":3,"file":"system-prompt.js","sourceRoot":"","sources":["../../src/agent/system-prompt.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAA;AAE7D,MAAM,kBAAkB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2BAwHA,CAAA;AAE3B;;;;;;;;;;;;;;;wEAewE;AACxE,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;8LAiFoK,CAAA;AAE9L;;gFAEgF;AAChF,MAAM,UAAU,yBAAyB,CAAC,OAIzC;IACC,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAA;IACxC,OAAO;;;EAGP,OAAO,CAAC,WAAW;;;cAGP,OAAO,CAAC,QAAQ;WACnB,aAAa,CAAC,IAAI;uBACN,OAAO,CAAC,GAAG,EAAE;iBACnB,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI;;;EAG/C,OAAO,CAAC,gBAAgB,IAAI,QAAQ;;;;;;;;+FAQyD,CAAA;AAC/F,CAAC;AAED,sFAAsF;AACtF,MAAM,UAAU,iBAAiB,CAAC,OAWjC;IACC,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAA;IAExC,IAAI,MAAM,GAAG,kBAAkB,CAAC,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,QAAQ,CAAC;SACvE,OAAO,CAAC,YAAY,EAAE,aAAa,CAAC,IAAI,CAAC;SACzC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;SAClC,OAAO,CAAC,YAAY,EAAE,OAAO,EAAE,OAAO,IAAI,SAAS,CAAC;SACpD,OAAO,CAAC,gBAAgB,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;IAE/D,IAAI,OAAO,EAAE,gBAAgB,EAAE,CAAC;QAC9B,MAAM,IAAI,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAA;IAC7C,CAAC;IAED,IAAI,OAAO,EAAE,QAAQ,EAAE,CAAC;QACtB,MAAM,IAAI,iBAAiB,CAAC,OAAO,CAAC,mBAAmB,EAAE,OAAO,CAAC,YAAY,IAAI,SAAS,CAAC,CAAA;IAC7F,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC"}

package/dist/agent/tool-execution.d.ts CHANGED Viewed

@@ -1,11 +1,12 @@
-import type { AgentCallbacks, AgentOptions } from '../types/index.js';
+import type { AgentCallbacks, AgentOptions, LanguageModel } from '../types/index.js';
 import type { LoopState } from './loop-state.js';
 type ToolCall = {
     toolName: string;
     toolCallId: string;
     input: Record<string, unknown>;
 };
-/** Handle all tool calls from a single model turn, sequentially. */
-export declare function processToolCalls(toolCalls: ToolCall[], state: LoopState, options: AgentOptions, callbacks: AgentCallbacks): Promise<void>;
+/** Handle all tool calls from a single model turn, sequentially.
+ *  `parentModel` is threaded through so the task tool can pass it to runSubAgent. */
+export declare function processToolCalls(toolCalls: ToolCall[], state: LoopState, options: AgentOptions, callbacks: AgentCallbacks, parentModel: LanguageModel): Promise<void>;
 export {};
 //# sourceMappingURL=tool-execution.d.ts.map

package/dist/agent/tool-execution.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"tool-execution.d.ts","sourceRoot":"","sources":["../../src/agent/tool-execution.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAA;~~AAGrE~~,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;~~AAsHhD~~,KAAK,QAAQ,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,CAAA;~~AA0HxF,oEAAoE~~;~~AACpE~~,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,QAAQ,EAAE,EACrB,KAAK,EAAE,SAAS,EAChB,OAAO,EAAE,YAAY,EACrB,SAAS,EAAE,cAAc,~~GACxB~~,OAAO,CAAC,IAAI,CAAC,~~CAIf~~"}
1	+ {"version":3,"file":"tool-execution.d.ts","sourceRoot":"","sources":["../../src/agent/tool-execution.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,aAAa,EAAY,MAAM,mBAAmB,CAAA;AAI9F,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AA+LhD,KAAK,QAAQ,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,CAAA;AAqbxF;qFACqF;AACrF,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,QAAQ,EAAE,EACrB,KAAK,EAAE,SAAS,EAChB,OAAO,EAAE,YAAY,EACrB,SAAS,EAAE,cAAc,EACzB,WAAW,EAAE,aAAa,GACzB,OAAO,CAAC,IAAI,CAAC,CAyBf"}

package/dist/agent/tool-execution.js CHANGED Viewed

@@ -6,8 +6,30 @@ import { truncateToolResult } from '../tools/index.js';
 import { clearProgressReporter, reportProgress } from '../tools/progress.js';
 import { getShellProvider } from '../tools/shell-provider.js';
 import { foldShellErrorNoise } from '../utils/shell-error.js';
+import { computeEditDiff } from './diff.js';
 import { checkForLoop, recordToolCall } from './loop-guard.js';
 import { toolResultMessage } from './messages.js';
+import { makePlanFilePath, readPlan, writePlan } from './plan-storage.js';
+import { runSubAgent } from './sub-agents/runner.js';
+/** Walk back through state.messages and grab the most recent user
+ *  message's text — used as the slug source for the plan filename. */
+function lastUserMessageText(messages) {
+    for (let i = messages.length - 1; i >= 0; i--) {
+        const m = messages[i];
+        if (m && m.role === 'user') {
+            const content = m.content;
+            if (typeof content === 'string')
+                return content;
+            if (Array.isArray(content)) {
+                return content
+                    .filter((p) => p?.type === 'text' && typeof p.text === 'string')
+                    .map((p) => p.text)
+                    .join(' ');
+            }
+        }
+    }
+    return '';
+}
 /** Count occurrences of a substring without creating intermediate arrays. */
 function countOccurrences(content, search) {
     let count = 0;
@@ -18,17 +40,36 @@ function countOccurrences(content, search) {
     }
     return count;
 }
-/** Execute a write tool (writeFile / edit). */
-async function executeWriteTool(toolName, input, toolCallId) {
+/** Execute a write tool (writeFile / edit).
+ *
+ *  In addition to returning the model-facing result string, fires
+ *  `callbacks.onFileEdit` (when defined) with the structured patch so the
+ *  UI can render a colored diff under the tool bullet. The diff payload is
+ *  a UI-only side channel — it never lands in `state.messages` and the
+ *  model only sees the short result string. */
+async function executeWriteTool(toolName, input, toolCallId, callbacks) {
     if (toolName === 'writeFile') {
         const filePath = input.filePath;
         const content = input.content;
         reportProgress(toolCallId, `Writing ${filePath}`);
         await fs.mkdir(path.dirname(filePath), { recursive: true });
-        const isNew = await fs.access(filePath).then(() => false, () => true);
+        // Read old content BEFORE writing so we can diff. Treat any read
+        // failure as "file did not exist" — covers the common ENOENT path
+        // plus permission / EISDIR edge cases (we'd error on write anyway).
+        let oldContent = null;
+        try {
+            oldContent = await fs.readFile(filePath, 'utf-8');
+        }
+        catch {
+            oldContent = null;
+        }
         await fs.writeFile(filePath, content, 'utf-8');
+        const isNew = oldContent === null;
         const parts = content.split('\n');
         const lineCount = content.endsWith('\n') ? parts.length - 1 : parts.length;
+        const payload = computeEditDiff(filePath, oldContent, content);
+        if (payload && callbacks.onFileEdit)
+            callbacks.onFileEdit(toolCallId, payload);
         if (isNew) {
             return `File created: ${filePath} (${lineCount} lines)`;
         }
@@ -50,23 +91,46 @@ async function executeWriteTool(toolName, input, toolCallId) {
         }
         const newContent = replaceAll ? content.replaceAll(oldString, newString) : content.replace(oldString, newString);
         await fs.writeFile(filePath, newContent, 'utf-8');
+        const payload = computeEditDiff(filePath, content, newContent);
+        if (payload && callbacks.onFileEdit)
+            callbacks.onFileEdit(toolCallId, payload);
         return `File edited: ${filePath}`;
     }
     return 'Error: unknown write tool';
 }
 /** Execute a shell command with streaming. */
-async function executeShell(command, timeout, callbacks, toolCallId) {
-    const proc = getShellProvider().spawn(command, { timeout });
+async function executeShell(command, timeout, signal, callbacks, toolCallId) {
+    const proc = getShellProvider().spawn(command, { timeout, signal });
     reportProgress(toolCallId, 'Running command...');
+    // Throttle the live progress message to at most one update per 50ms.
+    // Why: PowerShell `Format-Table` and similar table-rendering commands
+    // emit many lines in a single ~1ms burst, each as its own `data` event
+    // here. Without throttling we'd fire reportProgress 5-10× per millisec,
+    // each one becoming a setState → ChatInput render → deferred stdout
+    // write. The deferred queue absorbs most of the burst into one frame,
+    // but if the deferred-fire timer happens to land ~1ms before the
+    // tool-result commit arrives, the user sees a visible "progress text
+    // flashes, then result block scrolls in" pair. Throttling at the
+    // source cuts the storm to ≤20 updates/sec — fast enough to feel
+    // live, slow enough to dramatically reduce the chance that any
+    // deferred-fire collides with the upcoming tool-result commit.
+    // The model still sees full output via the `result` field; this only
+    // throttles the live progress display, not what reaches the LLM.
+    let lastProgressTime = 0;
+    const PROGRESS_THROTTLE_MS = 50;
     const onChunk = (chunk) => {
         const s = chunk.toString();
         callbacks.onShellOutput(s);
+        const now = Date.now();
+        if (now - lastProgressTime < PROGRESS_THROTTLE_MS)
+            return;
         // Take the last non-empty line of the chunk as the progress message.
         // Long-running commands (tsc, test suites) stream many lines; showing
         // the most recent is a natural "what's happening right now" signal.
         const lines = s.split(/\r?\n/).filter((l) => l.trim().length > 0);
         const last = lines[lines.length - 1];
         if (last) {
+            lastProgressTime = now;
             const trimmed = last.length > 120 ? last.slice(0, 117) + '...' : last;
             reportProgress(toolCallId, trimmed);
         }
@@ -105,8 +169,10 @@ function pushToolResult(state, callbacks, toolCallId, toolName, output, isError
  *  tool has already run and its result is already in `state.messages`. We
  *  can't pre-block these — only record for loop detection and annotate. */
 const AUTO_EXECUTED_TOOLS = new Set(['readFile', 'glob', 'grep', 'listDir', 'webFetch', 'webSearch', 'saveKnowledge']);
-/** Handle a single tool call. Returns when the call has been fully dispatched. */
-async function handleToolCall(tc, state, options, callbacks) {
+/** Handle a single tool call. Returns when the call has been fully dispatched.
+ *  `parentModel` is the LanguageModel instance for the current loop — needed
+ *  by the task tool to pass as fallback when the sub-agent doesn't override. */
+async function handleToolCall(tc, state, options, callbacks, parentModel) {
     const { toolName, input, toolCallId } = tc;
     // ── askUser tool ──
     // Skip the loop guard for askUser — the model asking the user the same
@@ -119,6 +185,231 @@ async function handleToolCall(tc, state, options, callbacks) {
         pushToolResult(state, callbacks, toolCallId, toolName, `User answered: ${answer}`);
         return;
     }
+    // ── todoWrite tool ──
+    // Full-replacement semantics: every call rewrites state.todos with
+    // the model's payload. Auto-clears (drops to []) when every item is
+    // completed, mirroring Claude Code's TodoWriteTool behavior — the
+    // user's live UI panel goes back to "no checklist" once the work is
+    // done, instead of showing a stale all-✓ list forever.
+    if (toolName === 'todoWrite') {
+        const raw = input.todos ?? [];
+        const normalized = [];
+        for (const t of raw) {
+            const content = (t.content ?? '').trim();
+            const activeForm = (t.activeForm ?? '').trim();
+            // Need at least one identity field — otherwise this is just an
+            // empty entry and there's nothing useful to show or track.
+            if (!content && !activeForm)
+                continue;
+            normalized.push({
+                content: content || activeForm,
+                activeForm: activeForm || content,
+                status: t.status ?? 'pending',
+            });
+        }
+        const allDone = normalized.length > 0 && normalized.every((t) => t.status === 'completed');
+        state.todos = allDone ? [] : normalized;
+        callbacks.onTodosUpdate(state.todos);
+        const dropped = raw.length - normalized.length;
+        const droppedNote = dropped > 0
+            ? ` ${dropped} entr${dropped === 1 ? 'y was' : 'ies were'} dropped because they had neither content nor activeForm — please include both fields next time so the user sees clean labels.`
+            : '';
+        // Verification nudge: when completing a 3+ item list and none of
+        // them look like a verification step, remind the model to verify.
+        const VERIFY_RE = /\b(verif|test|check|lint|build|typecheck|tsc)\b/i;
+        const needsVerifyNudge = allDone &&
+            normalized.length >= 3 &&
+            !normalized.some((t) => VERIFY_RE.test(t.content) || VERIFY_RE.test(t.activeForm));
+        const verifyNote = needsVerifyNudge
+            ? ' Before wrapping up, verify your work — run tests, lint, or type-check as appropriate for this project.'
+            : '';
+        pushToolResult(state, callbacks, toolCallId, toolName, allDone
+            ? `All todos completed. Checklist cleared.${verifyNote}${droppedNote}`
+            : `Todo list updated. Keep the checklist current — mark items completed immediately when finished, and ensure exactly one item is in_progress.${droppedNote}`);
+        return;
+    }
+    // ── task tool (sub-agent dispatch) ──
+    if (toolName === 'task') {
+        const agentName = input.subagent_type;
+        const description = input.description;
+        const taskPrompt = input.prompt;
+        reportProgress(toolCallId, `Task: ${description} (${agentName})`);
+        const result = await runSubAgent({
+            parentState: state,
+            parentOptions: options,
+            callbacks,
+            toolCallId,
+            agentName,
+            description,
+            prompt: taskPrompt,
+            knowledgeContext: state.knowledgeContext ?? '',
+            isGitRepo: state.isGitRepo ?? false,
+        }, parentModel);
+        const statsLine = `<task_stats tool_calls="${result.toolCallCount}" tokens="${result.tokenUsage.totalTokens}" duration_ms="${result.durationMs}" />`;
+        pushToolResult(state, callbacks, toolCallId, toolName, `${result.resultText}\n${statsLine}`);
+        return;
+    }
+    // ── enterPlanMode tool ──
+    // Flip state.permissionMode → 'plan', invalidate the system-prompt
+    // cache so the next turn rebuilds it with the overlay, and reserve a
+    // plan-file path on state.currentPlanPath WITHOUT actually creating
+    // the file (the path is just a string until the model decides it
+    // wants a scratchpad). Plan mode is a conversation state, not a
+    // forced "write to a file" workflow — for Q&A and discussion the
+    // model never touches the file. The path is created lazily, the
+    // first time the model calls writeFile/edit on it (or when
+    // exitPlanMode persists the approved plan).
+    if (toolName === 'enterPlanMode') {
+        if (state.permissionMode === 'plan') {
+            pushToolResult(state, callbacks, toolCallId, toolName, 'Already in plan mode. Continue the conversation; call exitPlanMode when the user has asked for an implementation and you have a plan ready.');
+            return;
+        }
+        // Approval gate. Mirrors Claude Code: model can recommend plan
+        // mode but cannot enter on its own — user has to consent so the
+        // mode flip never feels like the model unilaterally hijacking the
+        // session. The same dialog component the write-tool path uses
+        // renders a "X-Code wants to enter plan mode" prompt with Yes/No.
+        const approved = await callbacks.onAskPermission({ toolCallId, toolName, input });
+        if (options.abortSignal?.aborted) {
+            pushToolResult(state, callbacks, toolCallId, toolName, '[Tool execution interrupted by user]', true);
+            return;
+        }
+        if (!approved) {
+            pushToolResult(state, callbacks, toolCallId, toolName, "User declined to enter plan mode. Continue with the user's request in default mode — make whatever edits or shell calls the task requires (subject to per-tool permission).", true);
+            return;
+        }
+        state.permissionMode = 'plan';
+        state.systemPromptCache = null;
+        // Derive the plan file path. Slug priority:
+        //   1. Model-supplied `topic` (3-5 English words specific to the
+        //      current task — most accurate when the user is mid-session
+        //      and the topic has shifted).
+        //   2. `state.taskSlug` (set once per session by agentLoop using
+        //      either local slugify or a one-shot LLM summary — already
+        //      handles CJK first messages).
+        //   3. Raw last-user-message text (final fallback; slugify will
+        //      reduce CJK to empty → timestamp-only filename).
+        if (!state.currentPlanPath) {
+            const topic = input.topic?.trim();
+            const fallbackText = lastUserMessageText(state.messages);
+            const explicitSlug = topic && topic.length > 0 ? topic : state.taskSlug || undefined;
+            state.currentPlanPath = makePlanFilePath(fallbackText, { slug: explicitSlug });
+        }
+        callbacks.onPlanModeChange('plan');
+        pushToolResult(state, callbacks, toolCallId, toolName, [
+            'Entered plan mode (user approved).',
+            '',
+            'Read-only tools are unrestricted (readFile, glob, grep, listDir, webSearch, webFetch).',
+            `Plan file path for this session: ${state.currentPlanPath}`,
+            'Use writeFile/edit on the plan file to build your plan; do NOT edit any other files',
+            'or run state-changing shell commands until the user approves your plan via exitPlanMode.',
+            '',
+            'Workflow: explore → update plan file → askUser → repeat.',
+            '',
+            'CRITICAL: when the plan is ready, call **exitPlanMode** to request approval — NOT',
+            'askUser. askUser cannot leave plan mode no matter how the user answers; only',
+            'exitPlanMode flips the mode and unblocks your writeFile/edit/shell calls.',
+        ].join('\n'));
+        return;
+    }
+    // ── exitPlanMode tool ──
+    // Triggers the user-approval gate. The plan body comes from
+    // `input.plan` (passed verbatim by the model). We persist it to the
+    // session's plan file as a permanent record before showing the
+    // approval dialog — that way even rejected plans leave a trace, and
+    // approved plans live alongside the implementation that follows.
+    // Approval flips state back to 'default' and invalidates the
+    // system-prompt cache so the next turn drops the plan-mode overlay.
+    // Rejection keeps the model in plan mode and tells it to revise.
+    if (toolName === 'exitPlanMode') {
+        if (state.permissionMode !== 'plan') {
+            pushToolResult(state, callbacks, toolCallId, toolName, 'Error: not in plan mode. exitPlanMode is only valid when the session is in plan mode.', true);
+            return;
+        }
+        // Source of truth for the plan body is the plan file the model has
+        // been writing to during planning (matches Claude Code: the model
+        // builds the plan incrementally via writeFile/edit, then calls
+        // exitPlanMode which reads the file). The optional `plan` override
+        // exists for rare cases where the model wants to substitute the
+        // file content with something different.
+        const planPath = state.currentPlanPath ??
+            makePlanFilePath(lastUserMessageText(state.messages), { slug: state.taskSlug || undefined });
+        state.currentPlanPath = planPath;
+        const planOverride = input.plan?.trim();
+        let planBody = planOverride ?? '';
+        if (!planBody) {
+            planBody = (await readPlan(planPath)).trim();
+        }
+        if (!planBody) {
+            pushToolResult(state, callbacks, toolCallId, toolName, `Error: the plan file at ${planPath} is empty. Write your plan to that file using writeFile or edit, then call exitPlanMode again.`, true);
+            return;
+        }
+        // If the model passed an override, persist it back to the plan
+        // file so the on-disk record matches what the user sees / approves.
+        let savedPath = planPath;
+        if (planOverride) {
+            try {
+                savedPath = await writePlan(planPath, planBody);
+                state.currentPlanPath = savedPath;
+            }
+            catch {
+                // Disk failure (read-only fs, permissions) is non-fatal — fall
+                // through to the approval dialog with the in-memory body.
+            }
+        }
+        const approved = await callbacks.onPlanApprovalRequest(planBody);
+        if (approved) {
+            // Default post-approval mode is `acceptEdits` — the user just
+            // vetted the plan, so making them click "Yes" on every writeFile
+            // / edit during implementation is pure friction. Shell commands
+            // still go through normal classification (always-allow for read-
+            // only, ask for mixed, deny for destructive) so we don't blanket-
+            // approve `rm -rf` on plan approval. Matches Claude Code's
+            // default "Yes, auto-accept edits" behavior.
+            state.permissionMode = 'acceptEdits';
+            state.systemPromptCache = null;
+            const persisted = savedPath ?? state.currentPlanPath;
+            state.currentPlanPath = null;
+            callbacks.onPlanModeChange('acceptEdits');
+            pushToolResult(state, callbacks, toolCallId, toolName, [
+                'Plan approved by user. Plan mode has been exited.',
+                persisted ? `The approved plan is saved at: ${persisted}` : '',
+                'You can now edit files and run shell commands. Start implementing the plan.',
+                '',
+                'For multi-step plans, call **todoWrite** first to break the plan into a',
+                'tracked checklist — the user sees a live panel of your progress and you',
+                'avoid losing track of remaining steps mid-implementation.',
+            ]
+                .filter(Boolean)
+                .join('\n'));
+            // Also inject a system-reminder-style user-role meta message so
+            // the model treats the mode flip as a fresh top-level instruction
+            // rather than just a tool result. Mirrors Claude Code's
+            // `## Exited Plan Mode` attachment (messages.ts:3847-3852) — gives
+            // the next turn a clear "the rules just changed" anchor.
+            state.messages.push({
+                role: 'user',
+                content: [
+                    '## Exited Plan Mode',
+                    '',
+                    'You have exited plan mode. You can now make edits, run tools, and take actions.',
+                    'Write tools (writeFile, edit) are now auto-approved (acceptEdits mode); shell commands',
+                    'still go through normal permission classification.',
+                    persisted ? `The plan file is located at ${persisted} if you need to reference it.` : '',
+                ]
+                    .filter(Boolean)
+                    .join('\n'),
+            });
+            return;
+        }
+        pushToolResult(state, callbacks, toolCallId, toolName, [
+            'Plan rejected by user. You are still in plan mode.',
+            "Read the user's next message for feedback, revise the plan accordingly,",
+            'and call exitPlanMode again with the revised body. Consider asking the user',
+            'a clarifying question via askUser if you are unsure what to change.',
+        ].join('\n'), true);
+        return;
+    }
     // ── Doom-loop detection ──
     // For manual tools we pre-block. For auto-executed tools the call has
     // already run (result landed in state.messages via collectTurnResponse);
@@ -167,7 +458,11 @@ async function handleToolCall(tc, state, options, callbacks) {
     recordToolCall(state, toolName, input, loopCheck.hash);
     // ── Permission check for write tools and shell ──
     if (toolName === 'writeFile' || toolName === 'edit' || toolName === 'shell') {
-        const approved = await checkPermission({ toolCallId, toolName, input }, options.trustMode, callbacks.onAskPermission);
+        const approved = await checkPermission({ toolCallId, toolName, input }, options.trustMode, callbacks.onAskPermission, state.permissionMode, process.cwd());
+        if (options.abortSignal?.aborted) {
+            pushToolResult(state, callbacks, toolCallId, toolName, '[Tool execution interrupted by user]', true);
+            return;
+        }
         if (!approved) {
             pushToolResult(state, callbacks, toolCallId, toolName, 'Permission denied by user.');
             return;
@@ -178,7 +473,7 @@ async function handleToolCall(tc, state, options, callbacks) {
     let isError = false;
     try {
         if (toolName === 'writeFile' || toolName === 'edit') {
-            output = await executeWriteTool(toolName, input, toolCallId);
+            output = await executeWriteTool(toolName, input, toolCallId, callbacks);
             // executeWriteTool returns "Error: ..." strings for in-band failures
             // (missing match, non-unique match) rather than throwing — surface
             // those as errored results so the scrollback line flips to red.
@@ -189,7 +484,7 @@ async function handleToolCall(tc, state, options, callbacks) {
         }
         else if (toolName === 'shell') {
             const timeout = input.timeout ?? 30000;
-            const shellResult = await executeShell(input.command, timeout, callbacks, toolCallId);
+            const shellResult = await executeShell(input.command, timeout, options.abortSignal, callbacks, toolCallId);
             output = shellResult.output;
             isError = shellResult.isError;
         }
@@ -204,10 +499,25 @@ async function handleToolCall(tc, state, options, callbacks) {
     }
     pushToolResult(state, callbacks, toolCallId, toolName, truncateToolResult(output), isError);
 }
-/** Handle all tool calls from a single model turn, sequentially. */
-export async function processToolCalls(toolCalls, state, options, callbacks) {
-    for (const tc of toolCalls) {
-        await handleToolCall(tc, state, options, callbacks);
+/** Handle all tool calls from a single model turn, sequentially.
+ *  `parentModel` is threaded through so the task tool can pass it to runSubAgent. */
+export async function processToolCalls(toolCalls, state, options, callbacks, parentModel) {
+    for (let i = 0; i < toolCalls.length; i++) {
+        const tc = toolCalls[i];
+        // User pressed Esc / Ctrl+C. The currently running tool (if any) has
+        // already been SIGKILL'd via the shell provider's cancelSignal. For
+        // every remaining tool_call from this turn we still need to push a
+        // synthetic tool_result — orphan tool_calls without a matching result
+        // would make the next API request fail with "tool_use without
+        // tool_result" the moment the user types another prompt.
+        if (options.abortSignal?.aborted) {
+            for (let j = i; j < toolCalls.length; j++) {
+                const skipped = toolCalls[j];
+                pushToolResult(state, callbacks, skipped.toolCallId, skipped.toolName, '[Tool execution interrupted by user]', true);
+            }
+            return;
+        }
+        await handleToolCall(tc, state, options, callbacks, parentModel);
     }
 }
 //# sourceMappingURL=tool-execution.js.map