npm - @serjm/deepseek-code - Versions diffs - 0.4.3 → 0.4.5 - Mend

@serjm/deepseek-code 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/CHANGELOG.md +61 -0
package/README.md +72 -109
package/README.ru.md +73 -109
package/dist/api/index.d.ts +9 -0
package/dist/api/index.d.ts.map +1 -1
package/dist/api/index.js +65 -2
package/dist/api/index.js.map +1 -1
package/dist/cli/index.d.ts +1 -0
package/dist/cli/index.d.ts.map +1 -1
package/dist/cli/index.js +15 -8
package/dist/cli/index.js.map +1 -1
package/dist/cli/interactive.d.ts.map +1 -1
package/dist/cli/interactive.js +65 -3
package/dist/cli/interactive.js.map +1 -1
package/dist/commands/index.d.ts.map +1 -1
package/dist/commands/index.js +26 -21
package/dist/commands/index.js.map +1 -1
package/dist/config/defaults.js +7 -7
package/dist/config/defaults.js.map +1 -1
package/dist/core/agent-loop.d.ts +44 -2
package/dist/core/agent-loop.d.ts.map +1 -1
package/dist/core/agent-loop.js +317 -102
package/dist/core/agent-loop.js.map +1 -1
package/dist/core/i18n.d.ts +3 -0
package/dist/core/i18n.d.ts.map +1 -1
package/dist/core/i18n.js +9 -0
package/dist/core/i18n.js.map +1 -1
package/dist/core/metrics.d.ts +3 -1
package/dist/core/metrics.d.ts.map +1 -1
package/dist/core/metrics.js +34 -5
package/dist/core/metrics.js.map +1 -1
package/dist/tools/bash.d.ts.map +1 -1
package/dist/tools/bash.js +299 -20
package/dist/tools/bash.js.map +1 -1
package/dist/tools/glob.d.ts.map +1 -1
package/dist/tools/glob.js +40 -3
package/dist/tools/glob.js.map +1 -1
package/dist/tools/grep.d.ts.map +1 -1
package/dist/tools/grep.js +69 -13
package/dist/tools/grep.js.map +1 -1
package/dist/tools/read.d.ts.map +1 -1
package/dist/tools/read.js +91 -0
package/dist/tools/read.js.map +1 -1
package/dist/tools/types.d.ts +21 -1
package/dist/tools/types.d.ts.map +1 -1
package/dist/tools/types.js +34 -0
package/dist/tools/types.js.map +1 -1
package/dist/ui/app.d.ts.map +1 -1
package/dist/ui/app.js +229 -162
package/dist/ui/app.js.map +1 -1
package/dist/ui/chat-view.d.ts +24 -3
package/dist/ui/chat-view.d.ts.map +1 -1
package/dist/ui/chat-view.js +116 -58
package/dist/ui/chat-view.js.map +1 -1
package/dist/ui/input-bar.d.ts.map +1 -1
package/dist/ui/input-bar.js +38 -4
package/dist/ui/input-bar.js.map +1 -1
package/dist/ui/setup-wizard.js +1 -1
package/dist/ui/setup-wizard.js.map +1 -1
package/dist/ui/status-bar.d.ts +5 -1
package/dist/ui/status-bar.d.ts.map +1 -1
package/dist/ui/status-bar.js +10 -4
package/dist/ui/status-bar.js.map +1 -1
package/dist/utils/logger.d.ts +15 -0
package/dist/utils/logger.d.ts.map +1 -1
package/dist/utils/logger.js +47 -0
package/dist/utils/logger.js.map +1 -1
package/package.json +3 -2

package/dist/core/agent-loop.js CHANGED Viewed

@@ -8,10 +8,17 @@ import { join } from 'node:path';
 import { platform, release, type } from 'node:os';
 import { MetricsCollector } from './metrics.js';
 import { hooksManager } from './hooks.js';
+const DEFAULT_MAX_ITERATIONS = 200;
+const DEFAULT_AUTO_COMPACT = {
+    enabled: true,
+    thresholdPercent: 70,
+    keepRecentMessages: 8,
+    minMessages: 18,
+};
 /**
  * Build a dynamic system prompt with project context.
  */
-function buildSystemPrompt(cwd, approvalMode) {
+export function buildSystemPrompt(cwd, approvalMode) {
     const osInfo = `${type()} ${release()} (${platform()})`;
     let projectInfo = '';
     if (cwd) {
@@ -81,86 +88,113 @@ function buildSystemPrompt(cwd, approvalMode) {
     if (locale === 'zh')
         responseLanguage = 'Chinese';
     const languageSection = `\n## Language\n- Respond in ${responseLanguage} unless the user explicitly asks otherwise.`;
-    return `You are DeepSeek Code, an AI-powered CLI agent for software development.
-You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${languageSection}
-## Guidelines
-1. **Plan first** — Before making changes, explore the codebase to understand the context.
-2. **Use the right tool** — Choose the most appropriate tool for each task.
-3. **Be precise** — When editing files, provide exact text matches.
-4. **Verify** — After changes, run tests or linting to ensure correctness.
-5. **Explain** — After completing a task, summarize what was done.
-## Tool Usage
-- Read files with \`read_file\` before editing them
-- Search with \`grep_search\` or \`glob\` to find relevant code
-- Use \`run_shell_command\` to run build/test commands
-- Create or overwrite files with \`write_file\`
-- Make targeted edits with \`edit\` (prefer over write_file for small changes)
-- Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
-When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
-## Important
-- ALWAYS use absolute paths when referring to files. The project root is \`${cwd || 'the current working directory'}\`.
-- When asked to audit or explore the project, start with \`glob\`, \`grep_search\`, and targeted reads to discover structure.
-- If the task implies a browser or rendered UI check, do not wait for the user to explicitly say "open browser" before using \`chrome\`.
-- Do NOT guess file paths — use \`glob\` or \`grep_search\` to discover them first.
-- When asked about your capabilities, answer based on the tools listed in the "Current Mode" section above. Do NOT claim you lack tools that are listed there but blocked by mode — instead explain that the current mode restricts them.
-- If the user asks "what tools do you have" or "what are your capabilities", refer to this prompt's tool list. If write_file or edit are listed as blocked, explain that they exist but are restricted in the current mode.
-- **CRITICAL: Never claim an action was performed without an actual tool call.** Do not say "opening browser", "running eval", "taking screenshot", "passing captcha", "navigating to page", or any other action unless you have actually called the corresponding tool and received a result. If a tool call was not made, state honestly that it was not executed. If a tool is blocked by the current mode, do not promise to use it — explain that it is unavailable in this mode. If a captcha or site protection is encountered, do not claim to bypass it — stop and report the issue honestly.
-- **CRITICAL: No post-factum reports without tool calls.** If Tool uses is 0 in the current response, do not claim "I checked the log", "I reviewed the previous run", "step X was successful", or any other retrospective analysis. You may only say: "I did not perform a check right now. Based on visible context I can assume..." Always separate findings into: **Verified** (confirmed by actual tool calls this turn), **Assumption** (inferred from visible context), **Not checked** (not examined this turn). Do not write "successful" for a step that was not actually executed or has no saved result. Use the \`/last-browser-test\` command to retrieve the last saved browser test report — do not reconstruct it from memory.
-## Honest Reporting
-- Do not claim files were changed unless tool results include changed=true or files=\`<list>\`.
-- Do not claim a change was verified unless tool results include verified=true.
-- Do not claim tests/checks passed unless you actually ran the command and saw success.
-- If no files changed, say "No files changed".
-- Final report must match tool results and Execution Summary.
-## Failed Tool Calls Policy
-- If any tool/shell command failed during the run, mention it in the final report.
-- Explain whether each failure was **critical** (blocked the task goal) or **non-critical** (retried successfully, fallback worked, or unrelated to the task).
-- Do not write "all checks passed" or "everything succeeded" if there were failed tool calls, unless you clearly separate successful required checks from non-critical failed attempts.
-- If a failed command was retried successfully, say so explicitly (e.g., "first attempt failed, retry succeeded").
-- If a failed command produced a temporary file or other side effect, clean it up or mention it in the report.
-## Execution Policy
-1. **Minimal reading**: for a small task, first locate the target with as few reads as possible. Usually 1-2 read_file calls and 1 edit is enough. Do not run a broad grep/glob if you already know the file.
-2. **Do not repeat identical tool calls**: do not call read_file/grep_search/glob with the same arguments twice unless you have reason to believe the file changed.
-3. **Checks**: run lint/typecheck/build/test only after making changes. Do not run the same check multiple times without a new edit. If you did not run a check, do not claim it passed.
-4. **Temporary files**: do not create lint_out.txt, test_out.txt, temp/debug files unnecessarily. If you created a temporary file, remove it before the final report. Do not leave garbage in the working tree.
-5. **Report**: the final report must match the real tool results. Only mention what you actually read, changed, or verified. If no files were changed, explicitly say "No files changed". If there were errors, report them — do not hide them.
-6. **Stop**: when the goal is achieved and checks are done — stop. Do not continue looking for extra issues without the user asking. Do not refactor beyond the task scope.
-## Source of Truth Policy
-1. **Do not invent** versions, release notes, dates, features, links, metrics, prices, or user/project facts.
-2. **Source files/data** provided by the user are the source of truth.
-3. **For release/version info**, use package.json, CHANGELOG.md, Git tags, npm, or GitHub Releases only if actually read/checked.
-4. **Unchecked facts** must be labeled as assumption or not verified.
-5. **Generated demo projects**: placeholder content is allowed only if explicitly requested.
-6. **Do not present** invented content as real project history.
-7. **If data is missing**, ask for it or write "Not verified" — never guess.
-## Project Acceptance Policy
-1. **For web projects**, build success alone is not enough. Verify that:
-   - install/build succeeds;
-   - dev server starts successfully;
-   - the main page opens in a browser;
-   - no framework error overlay (Nuxt/Vite/Next/etc.);
-   - browser console has no critical errors;
-   - git status has no junk files (.idea/, node_modules/, .nuxt/, .output/, dist/, temp files).
-2. **For container-first projects**:
-   - provide Containerfile/Dockerfile and compose.yaml;
-   - run through podman/docker compose;
-   - verify build inside the container;
-   - expose the correct host/port;
-   - add .dockerignore.
-3. **If browser or container verification was not performed**, do not claim the project is fully verified.
-4. **In the final report**, separate:
-   - Verified
-   - Not checked
+    return `You are DeepSeek Code, an AI-powered CLI agent for software development.
+You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${languageSection}
+## Guidelines
+1. **Plan first** — Before making changes, explore the codebase to understand the context.
+2. **Use the right tool** — Choose the most appropriate tool for each task.
+3. **Be precise** — When editing files, provide exact text matches.
+4. **Verify** — After changes, run tests or linting to ensure correctness.
+5. **Explain** — After completing a task, summarize what was done.
+## Tool Usage
+- Read files with \`read_file\` before editing them
+- Search with \`grep_search\` or \`glob\` to find relevant code
+- Use \`run_shell_command\` to run build/test commands
+- Create or overwrite files with \`write_file\`
+- Make targeted edits with \`edit\` (prefer over write_file for small changes)
+- Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
+When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
+## Workspace Boundary Policy
+- The current working directory is the active project workspace. Do not silently switch to another project path inside shell commands.
+- If \`write_file\`, \`edit\`, or \`read_file\` says a path is outside the workspace, stop and report the mismatch. Do not bypass the restriction by using shell redirection, PowerShell here-strings, Python scripts, or temporary generator scripts.
+- If the user intended a different folder, ask them to restart/open the CLI in that folder or confirm the correct workspace.
+- Avoid generating project files through ad-hoc scripts such as \`gen_helper.py\`, \`diag.py\`, or \`fix_pkg.py\`. Use the file tools for file content and remove any temporary helper before the final report.
+## Windows Shell Policy
+- The OS is listed in Project Context. If it is Windows or \`win32\`, write shell commands for PowerShell/cmd compatibility.
+- On Windows, do not assume Unix tools exist. Avoid \`sed\`, \`head\`, \`tail\`, \`cat\`, \`grep\`, \`find\`, \`xargs\`, \`rm\`, \`touch\`, or Bash-specific syntax unless you first verified the command exists.
+- On Windows, never use \`mkdir -p\`; it can create a literal \`-p\` directory. Use \`New-Item -ItemType Directory -Force <path>\` or \`mkdir <path>\` without \`-p\`.
+- Prefer built-in tools over shell for repository inspection: use \`read_file\` for file content, \`grep_search\` for text search, and \`glob\` for file discovery.
+- For Windows shell reads, prefer PowerShell commands such as \`Get-Content\`, \`Select-String\`, \`Get-ChildItem\`, \`Test-Path\`, \`Remove-Item\`, and \`New-Item\`.
+- The shell tool automatically runs recognized PowerShell cmdlets through PowerShell on Windows. Plain commands such as \`npm\`, \`node\`, \`git\`, and \`npx\` run normally.
+- Do not mix Bash/cmd chaining syntax with PowerShell cmdlets in the same command. Avoid \`cd path && Remove-Item ...\`; use separate tool calls or PowerShell-compatible \`Set-Location path; Remove-Item ...\` with explicit error checks.
+- If a command fails because of shell incompatibility, retry with an OS-compatible command and report the failed attempt honestly.
+- Never use broad process-kill commands such as \`taskkill /F /IM node.exe\`, \`Stop-Process -Name node\`, \`pkill node\`, or \`killall node\`. They can terminate the agent, the user's IDE terminal, and unrelated dev servers. Stop only a specific process you started and can identify by PID.
+## Important
+- ALWAYS use absolute paths when referring to files. The project root is \`${cwd || 'the current working directory'}\`.
+- When asked to audit or explore the project, start with \`glob\`, \`grep_search\`, and targeted reads to discover structure.
+- If the task implies a browser or rendered UI check, do not wait for the user to explicitly say "open browser" before using \`chrome\`.
+- Do NOT guess file paths — use \`glob\` or \`grep_search\` to discover them first.
+- When asked about your capabilities, answer based on the tools listed in the "Current Mode" section above. Do NOT claim you lack tools that are listed there but blocked by mode — instead explain that the current mode restricts them.
+- If the user asks "what tools do you have" or "what are your capabilities", refer to this prompt's tool list. If write_file or edit are listed as blocked, explain that they exist but are restricted in the current mode.
+- **CRITICAL: Never claim an action was performed without an actual tool call.** Do not say "opening browser", "running eval", "taking screenshot", "passing captcha", "navigating to page", or any other action unless you have actually called the corresponding tool and received a result. If a tool call was not made, state honestly that it was not executed. If a tool is blocked by the current mode, do not promise to use it — explain that it is unavailable in this mode. If a captcha or site protection is encountered, do not claim to bypass it — stop and report the issue honestly.
+- **CRITICAL: No post-factum reports without tool calls.** If Tool uses is 0 in the current response, do not claim "I checked the log", "I reviewed the previous run", "step X was successful", or any other retrospective analysis. You may only say: "I did not perform a check right now. Based on visible context I can assume..." Always separate findings into: **Verified** (confirmed by actual tool calls this turn), **Assumption** (inferred from visible context), **Not checked** (not examined this turn). Do not write "successful" for a step that was not actually executed or has no saved result. Use the \`/last-browser-test\` command to retrieve the last saved browser test report — do not reconstruct it from memory.
+## Honest Reporting
+- Do not claim files were changed unless tool results include changed=true or files=\`<list>\`.
+- Do not claim a change was verified unless tool results include verified=true.
+- Do not claim tests/checks passed unless you actually ran the command and saw success.
+- If no files changed, say "No files changed".
+- Final report must match tool results and Execution Summary.
+- Final report must start with a quality verdict: **Passed**, **Partial**, or **Failed**.
+- If there were failed tool calls, failed browser/chrome calls, a budget/iteration stop, or skipped required acceptance checks, the verdict cannot be **Passed** unless every failure is explicitly classified as non-critical and the required check later succeeded.
+- For web/UI projects, include a **Browser proof** block with the URL tested, page title, console error count, screenshot/rendered-state verdict, and whether Chrome/browser calls passed or failed. If browser proof was not performed, put it under **Not checked** and do not call the UI production-ready.
+- For UI/product-design tasks, visual acceptance is required. If the rendered screenshot is blank, sparse, sidebar-only, broken, or clearly below the requested quality, say **Partial** or **Failed** and list the next visual iteration instead of claiming the project is complete.
+## Failed Tool Calls Policy
+- If any tool/shell command failed during the run, mention it in the final report.
+- Explain whether each failure was **critical** (blocked the task goal) or **non-critical** (retried successfully, fallback worked, or unrelated to the task).
+- Do not write "all checks passed" or "everything succeeded" if there were failed tool calls, unless you clearly separate successful required checks from non-critical failed attempts.
+- If a failed command was retried successfully, say so explicitly (e.g., "first attempt failed, retry succeeded").
+- If a failed command produced a temporary file or other side effect, clean it up or mention it in the report.
+## Execution Policy
+1. **Minimal reading**: for a small task, first locate the target with as few reads as possible. Usually 1-2 read_file calls and 1 edit is enough. Do not run a broad grep/glob if you already know the file.
+2. **Do not repeat identical tool calls**: do not call read_file/grep_search/glob with the same arguments twice unless you have reason to believe the file changed.
+3. **Checks**: run lint/typecheck/build/test only after making changes. Do not run the same check multiple times without a new edit. If you did not run a check, do not claim it passed.
+4. **Temporary files**: do not create lint_out.txt, test_out.txt, err.txt, temp/debug scripts, one-off files like "1", or scratch files unnecessarily. Prefer command output in the tool result over redirected files. If you created a temporary file, remove it before the final report. Before the final report, check the working tree or otherwise verify no junk temp files remain. If cleanup failed or was not checked, say so explicitly.
+5. **Report**: the final report must match the real tool results. Only mention what you actually read, changed, or verified. If no files were changed, explicitly say "No files changed". If there were errors, report them — do not hide them.
+6. **Stop**: when the goal is achieved and checks are done — stop. Do not continue looking for extra issues without the user asking. Do not refactor beyond the task scope.
+## Source of Truth Policy
+1. **Do not invent** versions, release notes, dates, features, links, metrics, prices, or user/project facts.
+2. **Source files/data** provided by the user are the source of truth.
+3. **For release/version info**, use package.json, CHANGELOG.md, Git tags, npm, or GitHub Releases only if actually read/checked.
+4. **Unchecked facts** must be labeled as assumption or not verified.
+5. **Generated demo projects**: placeholder content is allowed only if explicitly requested.
+6. **Do not present** invented content as real project history.
+7. **If data is missing**, ask for it or write "Not verified" — never guess.
+## Project Acceptance Policy
+1. **For web projects**, build success alone is not enough. Verify that:
+   - install/build succeeds;
+   - dev server starts successfully;
+   - the main page opens in a browser;
+   - no framework error overlay (Nuxt/Vite/Next/etc.);
+   - browser console has no critical errors;
+   - the repository has an appropriate .gitignore for the stack;
+   - git status has no junk files (.idea/, node_modules/, .nuxt/, .output/, dist/, temp files, screenshots, logs).
+2. **Runtime/container verification is adaptive**, not Podman-only:
+   - first inspect available tooling and project files before choosing a path;
+   - if Docker Compose is available, use docker compose;
+   - if Podman/Podman Compose is available, use podman compose or podman-compose;
+   - if no container runtime is available, use the native package manager/dev server and report container verification as Not checked;
+   - do not spend many repeated attempts on one runtime. After two similar runtime failures, switch strategy or report the blocker.
+3. **For container-first projects**:
+   - keep one clear container entrypoint path (Dockerfile or Containerfile) and ensure compose references it correctly;
+   - verify build inside the container;
+   - expose the correct host/port;
+   - add .dockerignore or .containerignore as appropriate.
+4. **If browser, git-hygiene, or container verification was not performed**, do not claim the project is fully verified.
+5. **In the final report**, separate:
+   - Verified
+   - Not checked
    - Known issues`;
 }
 /**
@@ -178,13 +212,15 @@ export class AgentLoop extends EventEmitter {
     toolCallHistory = new Map();
     metrics = new MetricsCollector();
     iterationCount = 0;
+    followUpSeq = 0;
+    lastCompactedAtMessageCount = 0;
     constructor(config, options = {}) {
         super();
         this.api = new DeepSeekAPI(config);
         this.model = config.model;
         const defaultSystemPrompt = buildSystemPrompt(options.cwd || process.cwd(), options.approvalMode);
         this.options = {
-            maxIterations: 100,
+            maxIterations: DEFAULT_MAX_ITERATIONS,
             toolTimeout: 30000,
             approvalMode: 'default',
             cwd: process.cwd(),
@@ -194,9 +230,13 @@ export class AgentLoop extends EventEmitter {
             onReasoningChunk: () => { },
             onResponse: () => { },
             onError: () => { },
+            onCompactStart: () => { },
+            onCompactProgress: () => { },
+            onCompactEnd: () => { },
             onApprovalRequest: async () => true,
             systemPrompt: defaultSystemPrompt,
             signal: undefined,
+            autoCompact: DEFAULT_AUTO_COMPACT,
             ...options,
         };
         this.tools = getToolsForMode(this.options.approvalMode);
@@ -217,6 +257,21 @@ export class AgentLoop extends EventEmitter {
     getMetrics() {
         return this.metrics;
     }
+    /**
+     * Add a user follow-up message during an active agent loop.
+     * The message will be picked up on the next API iteration.
+     * Does NOT start a new loop or reset state.
+     */
+    addUserFollowUp(content) {
+        const trimmed = content?.trim();
+        if (!trimmed)
+            return;
+        this.followUpSeq++;
+        this.messages.push({
+            role: 'user',
+            content: `User follow-up while task was running:\n${trimmed}`,
+        });
+    }
     /**
      * Set approval mode — updates which tools are available and rebuilds system prompt.
      */
@@ -267,37 +322,36 @@ export class AgentLoop extends EventEmitter {
             projectDir: this.options.cwd,
             messageCount: this.messages.length,
         }).catch(() => { });
-        while (this.iterationCount < Math.min(this.options.maxIterations, this.options.budget?.maxIterations ?? this.options.maxIterations)) {
+        while (this.iterationCount < this.getIterationLimit()) {
             this.iterationCount++;
             // Budget: check maxToolCalls at top of each iteration
             if (this.checkBudgetHalt()) {
                 return this.buildBudgetHaltMessage();
             }
             try {
+                await this.maybeAutoCompact();
                 // Use streaming chat to get real-time output
                 // Budget: check maxApiCalls before API call
                 if (this.options.budget?.maxApiCalls && this.metrics.apiCalls >= this.options.budget.maxApiCalls) {
                     return this.buildBudgetHaltMessage();
                 }
+                // Cancelled before we even start the request — nothing to drain.
+                if (this.options.signal?.aborted) {
+                    return this.finishCancelled();
+                }
+                const followUpSeqAtRequestStart = this.followUpSeq;
                 const stream = this.api.streamChat(this.messages, openAITools);
                 let responseContent = '';
                 let toolCalls = [];
-                // Check for cancellation
-                if (this.options.signal?.aborted) {
-                    const cancelledMsg = i18n.t('agentCancelled');
-                    this.messages.push({ role: 'assistant', content: cancelledMsg });
-                    this.options.onResponse(cancelledMsg);
-                    this.finalizeSession();
-                    return cancelledMsg;
-                }
+                // Cooperative cancellation: once aborted we stop acting on chunks but keep
+                // draining the stream to its natural end. Breaking out early would tear
+                // down the streaming socket mid-flight, which hard-crashed the process on
+                // Windows. The UI already shows the paused state immediately.
+                let cancelledDuringStream = false;
                 for await (const chunk of stream) {
-                    // Check for cancellation during streaming
                     if (this.options.signal?.aborted) {
-                        const cancelledMsg = i18n.t('agentCancelled');
-                        this.messages.push({ role: 'assistant', content: cancelledMsg });
-                        this.options.onResponse(cancelledMsg);
-                        this.finalizeSession();
-                        return cancelledMsg;
+                        cancelledDuringStream = true;
+                        continue;
                     }
                     if (chunk.type === 'usage' && chunk.usage) {
                         this.metrics.recordUsage(chunk.usage);
@@ -323,6 +377,10 @@ export class AgentLoop extends EventEmitter {
                         }
                     }
                 }
+                // Stream drained — if the user cancelled mid-stream, stop here cleanly.
+                if (cancelledDuringStream || this.options.signal?.aborted) {
+                    return this.finishCancelled();
+                }
                 // Budget: catch limits reached during streaming usage accounting.
                 if (this.checkBudgetHalt()) {
                     return this.buildBudgetHaltMessage();
@@ -438,7 +496,8 @@ export class AgentLoop extends EventEmitter {
                         try {
                             const toolResult = await this.executeTool(tc.function.name, args);
                             const duration = Date.now() - startTime;
-                            this.metrics.recordToolCallEnd(tc.function.name, toolResult.success);
+                            const toolLabel = this.buildToolCallLabel(tc.function.name, args);
+                            this.metrics.recordToolCallEnd(tc.function.name, toolResult.success, toolLabel, toolResult.success ? undefined : toolResult.error);
                             toolCallEvent.status = toolResult.success ? 'completed' : 'failed';
                             toolCallEvent.result = toolResult.output;
                             toolCallEvent.error = toolResult.error;
@@ -468,7 +527,8 @@ export class AgentLoop extends EventEmitter {
                         catch (err) {
                             const duration = Date.now() - startTime;
                             const errorMsg = err.message;
-                            this.metrics.recordToolCallEnd(tc.function.name, false);
+                            const toolLabel = this.buildToolCallLabel(tc.function.name, args);
+                            this.metrics.recordToolCallEnd(tc.function.name, false, toolLabel, errorMsg);
                             toolCallEvent.status = 'failed';
                             toolCallEvent.error = errorMsg;
                             toolCallEvent.durationMs = duration;
@@ -497,6 +557,11 @@ export class AgentLoop extends EventEmitter {
                     const fallback = 'I have completed the requested actions. What else would you like me to do?';
                     this.messages.push({ role: 'assistant', content: fallback });
                     this.options.onResponse(fallback);
+                    // Check if a follow-up arrived while the API request was streaming
+                    if (this.followUpSeq > followUpSeqAtRequestStart) {
+                        // Follow-up received during this request — continue loop instead of finishing
+                        continue;
+                    }
                     this.finalizeSession();
                     const summary = this.metrics.getSummary(this.model);
                     this.options.onStreamChunk(summary);
@@ -504,6 +569,11 @@ export class AgentLoop extends EventEmitter {
                 }
                 this.messages.push({ role: 'assistant', content: responseContent });
                 this.options.onResponse(responseContent);
+                // Check if a follow-up arrived while this API request was streaming
+                if (this.followUpSeq > followUpSeqAtRequestStart) {
+                    // Follow-up received during the stream — continue loop, skip finalization
+                    continue;
+                }
                 // Output execution summary
                 this.finalizeSession();
                 const summary = this.metrics.getSummary(this.model);
@@ -512,20 +582,165 @@ export class AgentLoop extends EventEmitter {
             }
             catch (err) {
                 const error = err;
+                // If the user cancelled, treat any resulting error as a clean stop.
+                if (this.options.signal?.aborted) {
+                    return this.finishCancelled();
+                }
                 this.options.onError(error);
                 throw error;
             }
         }
         // Max iterations reached
-        const timeoutMsg = `Агент достиг максимального числа итераций (${this.options.maxIterations}). Задача может быть не завершена.`;
+        const timeoutMsg = `Агент достиг максимального числа итераций (${this.getIterationLimit()}). Задача может быть не завершена.`;
         this.messages.push({ role: 'assistant', content: timeoutMsg });
         this.options.onResponse(timeoutMsg);
         this.finalizeSession();
+        const summary = this.metrics.getSummary(this.model);
+        this.options.onStreamChunk(summary);
         return timeoutMsg;
     }
+    /** Record a clean user-cancellation result and finalize the session. */
+    finishCancelled() {
+        const cancelledMsg = i18n.t('agentCancelled');
+        this.messages.push({ role: 'assistant', content: cancelledMsg });
+        this.options.onResponse(cancelledMsg);
+        this.finalizeSession();
+        return cancelledMsg;
+    }
+    getIterationLimit() {
+        const budgetLimit = this.options.budget?.maxIterations;
+        if (budgetLimit && budgetLimit > 0) {
+            return Math.min(this.options.maxIterations, budgetLimit);
+        }
+        return this.options.maxIterations;
+    }
+    getAutoCompactOptions() {
+        return {
+            ...DEFAULT_AUTO_COMPACT,
+            ...(this.options.autoCompact ?? {}),
+        };
+    }
+    async maybeAutoCompact() {
+        const compact = this.getAutoCompactOptions();
+        if (!compact.enabled)
+            return;
+        const contextPercent = this.metrics.getCurrentWindowPercent();
+        const beforeMessages = this.messages.length;
+        if (contextPercent < compact.thresholdPercent)
+            return;
+        if (beforeMessages < compact.minMessages)
+            return;
+        if (beforeMessages <= this.lastCompactedAtMessageCount + compact.keepRecentMessages)
+            return;
+        const startEvent = {
+            phase: 'start',
+            progress: 5,
+            contextPercent,
+            beforeMessages,
+        };
+        this.options.onCompactStart(startEvent);
+        this.options.onCompactProgress({ ...startEvent, phase: 'summarizing', progress: 35 });
+        try {
+            const result = await this.api.chat([
+                {
+                    role: 'system',
+                    content: 'Compress the conversation for continuation. Preserve concrete user goals, decisions, file paths, commands, failures, verification results, pending work, and constraints. Do not invent facts. Return concise bullet points.',
+                },
+                {
+                    role: 'user',
+                    content: this.buildCompactTranscript(),
+                },
+            ]);
+            if (result.usage) {
+                this.metrics.recordUsage(result.usage);
+            }
+            const summary = result.content.trim() || 'Auto-compaction completed, but the summarizer returned an empty summary.';
+            this.options.onCompactProgress({
+                phase: 'replacing',
+                progress: 80,
+                contextPercent,
+                beforeMessages,
+            });
+            const systemMsg = this.messages.find(m => m.role === 'system');
+            this.messages = [
+                ...(systemMsg ? [systemMsg] : []),
+                {
+                    role: 'assistant',
+                    content: `**Context Auto-Compacted**\n\nOriginal messages: ${beforeMessages}\nPrevious context: ${contextPercent}% of window\n\n${summary}`,
+                },
+            ];
+            this.lastCompactedAtMessageCount = this.messages.length;
+            this.options.onCompactEnd({
+                phase: 'done',
+                progress: 100,
+                contextPercent,
+                beforeMessages,
+                afterMessages: this.messages.length,
+            });
+        }
+        catch (err) {
+            this.options.onCompactEnd({
+                phase: 'failed',
+                progress: 100,
+                contextPercent,
+                beforeMessages,
+                error: err.message,
+            });
+            throw err;
+        }
+    }
+    buildCompactTranscript() {
+        return this.messages
+            .filter(message => message.role !== 'system')
+            .map((message, index) => {
+            const content = typeof message.content === 'string'
+                ? message.content
+                : JSON.stringify(message.content);
+            const toolCalls = message.tool_calls?.length ? ` tool_calls=${message.tool_calls.map(tc => tc.function.name).join(',')}` : '';
+            return `#${index + 1} ${message.role}${toolCalls}\n${content.slice(0, 8000)}`;
+        })
+            .join('\n\n---\n\n');
+    }
     /**
-     * Parse tool arguments from JSON string.
+     * Build a short human-readable label for a tool call.
+     * Used in Execution Summary to identify which files/commands failed.
      */
+    buildToolCallLabel(toolName, args) {
+        try {
+            switch (toolName) {
+                case 'run_shell_command': {
+                    const cmd = args.command ?? args.cmd ?? '';
+                    if (typeof cmd === 'string' && cmd.length > 0) {
+                        return cmd.length > 120 ? cmd.slice(0, 117) + '...' : cmd;
+                    }
+                    break;
+                }
+                case 'read_file':
+                case 'edit':
+                case 'write_file': {
+                    const path = args.path ?? args.file_path ?? args.file ?? '';
+                    if (typeof path === 'string' && path.length > 0) {
+                        return path.length > 120 ? path.slice(0, 117) + '...' : path;
+                    }
+                    break;
+                }
+                case 'grep_search':
+                case 'glob': {
+                    const pattern = args.pattern ?? '';
+                    if (typeof pattern === 'string' && pattern.length > 0) {
+                        return pattern.length > 120 ? pattern.slice(0, 117) + '...' : pattern;
+                    }
+                    break;
+                }
+            }
+            // Fallback: serialize first meaningful string value
+            const fallback = JSON.stringify(args);
+            return fallback.length > 120 ? fallback.slice(0, 117) + '...' : fallback;
+        }
+        catch {
+            return String(args);
+        }
+    }
     /**
      * Check if any budget limit has been exceeded (called at top of each iteration).
      * Returns the field name that exceeded or null if all good.
@@ -633,7 +848,7 @@ export class AgentLoop extends EventEmitter {
             };
         }
         try {
-            const result = await def.tool.execute(args);
+            const result = await def.tool.execute(args, this.options.signal);
             return {
                 success: result.success,
                 output: result.output,