npm - @serjm/deepseek-code - Versions diffs - 0.4.3 → 0.4.6 - Mend

@serjm/deepseek-code 0.4.3 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/CHANGELOG.md +85 -0
package/README.md +72 -109
package/README.ru.md +73 -109
package/dist/api/index.d.ts +5 -0
package/dist/api/index.d.ts.map +1 -1
package/dist/api/index.js +42 -4
package/dist/api/index.js.map +1 -1
package/dist/cli/index.d.ts +1 -0
package/dist/cli/index.d.ts.map +1 -1
package/dist/cli/index.js +15 -8
package/dist/cli/index.js.map +1 -1
package/dist/cli/interactive.d.ts.map +1 -1
package/dist/cli/interactive.js +65 -3
package/dist/cli/interactive.js.map +1 -1
package/dist/commands/index.d.ts.map +1 -1
package/dist/commands/index.js +26 -21
package/dist/commands/index.js.map +1 -1
package/dist/config/defaults.d.ts +9 -0
package/dist/config/defaults.d.ts.map +1 -1
package/dist/config/defaults.js +25 -7
package/dist/config/defaults.js.map +1 -1
package/dist/core/agent-loop.d.ts +56 -3
package/dist/core/agent-loop.d.ts.map +1 -1
package/dist/core/agent-loop.js +458 -104
package/dist/core/agent-loop.js.map +1 -1
package/dist/core/i18n.d.ts +3 -0
package/dist/core/i18n.d.ts.map +1 -1
package/dist/core/i18n.js +9 -0
package/dist/core/i18n.js.map +1 -1
package/dist/core/mcp-tools.d.ts +15 -0
package/dist/core/mcp-tools.d.ts.map +1 -0
package/dist/core/mcp-tools.js +94 -0
package/dist/core/mcp-tools.js.map +1 -0
package/dist/core/metrics.d.ts +9 -2
package/dist/core/metrics.d.ts.map +1 -1
package/dist/core/metrics.js +51 -9
package/dist/core/metrics.js.map +1 -1
package/dist/tools/bash.d.ts.map +1 -1
package/dist/tools/bash.js +317 -23
package/dist/tools/bash.js.map +1 -1
package/dist/tools/chrome-manager.d.ts.map +1 -1
package/dist/tools/chrome-manager.js +5 -2
package/dist/tools/chrome-manager.js.map +1 -1
package/dist/tools/chrome.d.ts.map +1 -1
package/dist/tools/chrome.js +8 -3
package/dist/tools/chrome.js.map +1 -1
package/dist/tools/glob.d.ts.map +1 -1
package/dist/tools/glob.js +40 -3
package/dist/tools/glob.js.map +1 -1
package/dist/tools/grep.d.ts.map +1 -1
package/dist/tools/grep.js +69 -13
package/dist/tools/grep.js.map +1 -1
package/dist/tools/process-manager.d.ts +17 -0
package/dist/tools/process-manager.d.ts.map +1 -0
package/dist/tools/process-manager.js +94 -0
package/dist/tools/process-manager.js.map +1 -0
package/dist/tools/read.d.ts.map +1 -1
package/dist/tools/read.js +94 -0
package/dist/tools/read.js.map +1 -1
package/dist/tools/shell.d.ts +20 -0
package/dist/tools/shell.d.ts.map +1 -0
package/dist/tools/shell.js +100 -0
package/dist/tools/shell.js.map +1 -0
package/dist/tools/types.d.ts +27 -1
package/dist/tools/types.d.ts.map +1 -1
package/dist/tools/types.js +43 -1
package/dist/tools/types.js.map +1 -1
package/dist/ui/app.d.ts.map +1 -1
package/dist/ui/app.js +219 -178
package/dist/ui/app.js.map +1 -1
package/dist/ui/chat-view.d.ts +24 -3
package/dist/ui/chat-view.d.ts.map +1 -1
package/dist/ui/chat-view.js +116 -58
package/dist/ui/chat-view.js.map +1 -1
package/dist/ui/input-bar.d.ts.map +1 -1
package/dist/ui/input-bar.js +38 -4
package/dist/ui/input-bar.js.map +1 -1
package/dist/ui/setup-wizard.js +1 -1
package/dist/ui/setup-wizard.js.map +1 -1
package/dist/ui/status-bar.d.ts +5 -1
package/dist/ui/status-bar.d.ts.map +1 -1
package/dist/ui/status-bar.js +10 -4
package/dist/ui/status-bar.js.map +1 -1
package/dist/utils/logger.d.ts +15 -0
package/dist/utils/logger.d.ts.map +1 -1
package/dist/utils/logger.js +47 -0
package/dist/utils/logger.js.map +1 -1
package/package.json +3 -2

package/dist/core/agent-loop.js CHANGED Viewed

@@ -1,6 +1,9 @@
 import { DeepSeekAPI } from '../api/index.js';
 import { toOpenAITools, sanitizeArgs } from '../tools/types.js';
 import { getDefaultTools, getToolsForMode } from '../tools/registry.js';
+import { getMcpToolDefinitions, projectIdFromCwd } from './mcp-tools.js';
+import { resolveWindowsShell } from '../tools/shell.js';
+import { contextWindowFor } from '../config/defaults.js';
 import { EventEmitter } from 'node:events';
 import { i18n } from './i18n.js';
 import { readFileSync, readdirSync, existsSync } from 'node:fs';
@@ -8,10 +11,53 @@ import { join } from 'node:path';
 import { platform, release, type } from 'node:os';
 import { MetricsCollector } from './metrics.js';
 import { hooksManager } from './hooks.js';
+const DEFAULT_MAX_ITERATIONS = 200;
+const DEFAULT_AUTO_COMPACT = {
+    enabled: true,
+    thresholdPercent: 70,
+    keepRecentMessages: 8,
+    minMessages: 18,
+};
 /**
  * Build a dynamic system prompt with project context.
  */
-function buildSystemPrompt(cwd, approvalMode) {
+const NO_BRUTE_FORCE = '- **Do NOT brute-force command variants.** If a command fails, read the actual error and fix the real cause, then issue ONE corrected command. Never spray permutations (cmd /c …, chcp, node.exe vs npx, env-prefix variants) hoping one works — that just fills the log with failures.';
+const NO_BROAD_KILL = "- Never use broad process-kill commands such as `taskkill /F /IM node.exe`, `Stop-Process -Name node`, `pkill node`, or `killall node`. They can terminate the agent, the user's IDE terminal, and unrelated dev servers. Stop only a specific process you started and can identify by PID.";
+/**
+ * Shell guidance that matches the shell `run_shell_command` actually uses, so the
+ * model writes the correct dialect instead of guessing (and brute-forcing).
+ */
+function buildShellPolicySection() {
+    if (platform() !== 'win32') {
+        return `## Shell Policy
+- \`run_shell_command\` runs through your system shell (\`/bin/sh\`). Standard POSIX syntax works: \`&&\`, \`||\`, \`$VAR\`, pipes, redirects.
+- Prefer the built-in tools over shell for inspection: \`read_file\`, \`grep_search\`, \`glob\`.
+${NO_BRUTE_FORCE}
+${NO_BROAD_KILL}`;
+    }
+    if (resolveWindowsShell() === 'cmd') {
+        return `## Windows Shell Policy
+- On this machine \`run_shell_command\` runs through **cmd.exe** (PowerShell was unavailable or \`DEEPSEEK_CODE_SHELL=cmd\`). Write cmd syntax — NOT PowerShell.
+- Chain with \`&&\` (next only on success) or \`&\` (always). Environment variables: \`%VAR%\`; set inline with \`set VAR=value&& <command>\`.
+- Do NOT use PowerShell syntax here (\`$env:\`, \`;\` as a separator, \`> $null\`, cmdlets) — it fails under cmd and can create junk files like a literal \`$null\`.
+- Unix tools do NOT exist (\`sed\`, \`head\`, \`tail\`, \`grep\`, \`cat\`, \`ls\`, \`rm\`, \`touch\`, \`xargs\`). Use \`findstr\`, \`type\`, \`dir\`, \`del\`, or the \`read_file\`/\`grep_search\`/\`glob\` tools.
+- Never use \`mkdir -p\` (creates a literal \`-p\` directory); use \`mkdir <path>\`.
+- Prefer the built-in tools over shell for inspection: \`read_file\`, \`grep_search\`, \`glob\`.
+${NO_BRUTE_FORCE}
+${NO_BROAD_KILL}`;
+    }
+    return `## Windows Shell Policy
+- On Windows, \`run_shell_command\` runs through **Windows PowerShell 5.1** (a single, predictable shell — never cmd.exe). Write every command in PowerShell syntax. \`npm\`, \`node\`, \`git\`, \`npx\` run normally inside it.
+- **No \`&&\` or \`||\`** — PowerShell 5.1 does not support them (it is a parse error). Run sequentially with \`;\`. To run B only if A succeeded: \`A; if ($?) { B }\`. Example: \`npm run build; if ($?) { npm test }\` — NOT \`npm run build && npm test\`.
+- **Environment variables**: read with \`$env:NAME\`, set inline with \`$env:NAME='value'; <command>\`. There is no \`VAR=value cmd\` prefix and no \`set VAR=...\`.
+- **Redirects work as PowerShell**: \`> $null\`, \`2>$null\`, \`*> out.txt\` are valid here.
+- These are PowerShell aliases and work fine: \`cat\`, \`ls\`, \`rm\`, \`cp\`, \`mv\`, \`echo\`, \`pwd\`. These do NOT exist (use the noted replacement): \`sed\`, \`head\` (→ \`Get-Content -TotalCount n\`), \`tail\` (→ \`Get-Content -Tail n\`), \`grep\` (→ \`grep_search\` tool or \`Select-String\`), \`xargs\`, \`touch\` (→ \`New-Item\`).
+- Never use \`mkdir -p\` (the \`-p\` is not a PowerShell parameter). Use \`New-Item -ItemType Directory -Force <path>\`.
+- Prefer the built-in tools over shell for inspection: \`read_file\` for file content, \`grep_search\` for text search, \`glob\` for file discovery.
+${NO_BRUTE_FORCE}
+${NO_BROAD_KILL}`;
+}
+export function buildSystemPrompt(cwd, approvalMode, model) {
     const osInfo = `${type()} ${release()} (${platform()})`;
     let projectInfo = '';
     if (cwd) {
@@ -75,92 +121,143 @@ function buildSystemPrompt(cwd, approvalMode) {
         '',
         ...toolListLines,
     ].join('\n');
+    // Shared Context Hub / external MCP tools. Plan mode is read-only and does not
+    // expose them, so only advertise when they are actually active.
+    let mcpSection = '';
+    if (mode !== 'plan') {
+        const mcpTools = getMcpToolDefinitions();
+        if (mcpTools.length > 0) {
+            const mcpToolLines = mcpTools.map(def => `  - \`${def.tool.name}\` — ${def.tool.description}`);
+            const hubProjectId = cwd ? projectIdFromCwd(cwd) : '';
+            const orientation = mcpTools.some(def => def.tool.name === 'workspace_resume')
+                ? `\n- If a \`workspace_resume\` tool is available, call it ONCE at the start of a new task to load project memory, open/handed-off tasks, and recent sessions in a single token-budgeted call — prefer it over reading many files just to get oriented.${hubProjectId ? ` This project's hub \`project_id\` is \`${hubProjectId}\` — pass it as the \`project_id\` argument (same id for memory/task tools).` : ''}`
+                : '';
+            const identity = `provider="deepseek", client="dsc"${model ? `, model="${model}"` : ''}`;
+            mcpSection = [
+                '\n## Shared Context Hub (MCP)',
+                'External MCP tools are connected — shared memory and a task queue used by other agents working on the same projects.' + orientation,
+                '- Use `task_list`/`task_claim` to pick up work handed off by another agent, and `session_log`/`memory_write` to record durable outcomes for the next agent.',
+                `- Identify yourself when writing to the hub so the shared history shows who did the work: call \`session_log\` with ${identity}; set \`surface="dsc"\` on \`memory_write\`.`,
+                '- These tools are real and callable like any other; do not claim you used them without an actual tool call.',
+                'Connected MCP tools:',
+                ...mcpToolLines,
+            ].join('\n');
+        }
+    }
     let responseLanguage = 'English';
     if (locale === 'ru')
         responseLanguage = 'Russian';
     if (locale === 'zh')
         responseLanguage = 'Chinese';
     const languageSection = `\n## Language\n- Respond in ${responseLanguage} unless the user explicitly asks otherwise.`;
-    return `You are DeepSeek Code, an AI-powered CLI agent for software development.
-You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${languageSection}
-## Guidelines
-1. **Plan first** — Before making changes, explore the codebase to understand the context.
-2. **Use the right tool** — Choose the most appropriate tool for each task.
-3. **Be precise** — When editing files, provide exact text matches.
-4. **Verify** — After changes, run tests or linting to ensure correctness.
-5. **Explain** — After completing a task, summarize what was done.
-## Tool Usage
-- Read files with \`read_file\` before editing them
-- Search with \`grep_search\` or \`glob\` to find relevant code
-- Use \`run_shell_command\` to run build/test commands
-- Create or overwrite files with \`write_file\`
-- Make targeted edits with \`edit\` (prefer over write_file for small changes)
-- Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
-When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
-## Important
-- ALWAYS use absolute paths when referring to files. The project root is \`${cwd || 'the current working directory'}\`.
-- When asked to audit or explore the project, start with \`glob\`, \`grep_search\`, and targeted reads to discover structure.
-- If the task implies a browser or rendered UI check, do not wait for the user to explicitly say "open browser" before using \`chrome\`.
-- Do NOT guess file paths — use \`glob\` or \`grep_search\` to discover them first.
-- When asked about your capabilities, answer based on the tools listed in the "Current Mode" section above. Do NOT claim you lack tools that are listed there but blocked by mode — instead explain that the current mode restricts them.
-- If the user asks "what tools do you have" or "what are your capabilities", refer to this prompt's tool list. If write_file or edit are listed as blocked, explain that they exist but are restricted in the current mode.
-- **CRITICAL: Never claim an action was performed without an actual tool call.** Do not say "opening browser", "running eval", "taking screenshot", "passing captcha", "navigating to page", or any other action unless you have actually called the corresponding tool and received a result. If a tool call was not made, state honestly that it was not executed. If a tool is blocked by the current mode, do not promise to use it — explain that it is unavailable in this mode. If a captcha or site protection is encountered, do not claim to bypass it — stop and report the issue honestly.
-- **CRITICAL: No post-factum reports without tool calls.** If Tool uses is 0 in the current response, do not claim "I checked the log", "I reviewed the previous run", "step X was successful", or any other retrospective analysis. You may only say: "I did not perform a check right now. Based on visible context I can assume..." Always separate findings into: **Verified** (confirmed by actual tool calls this turn), **Assumption** (inferred from visible context), **Not checked** (not examined this turn). Do not write "successful" for a step that was not actually executed or has no saved result. Use the \`/last-browser-test\` command to retrieve the last saved browser test report — do not reconstruct it from memory.
-## Honest Reporting
-- Do not claim files were changed unless tool results include changed=true or files=\`<list>\`.
-- Do not claim a change was verified unless tool results include verified=true.
-- Do not claim tests/checks passed unless you actually ran the command and saw success.
-- If no files changed, say "No files changed".
-- Final report must match tool results and Execution Summary.
-## Failed Tool Calls Policy
-- If any tool/shell command failed during the run, mention it in the final report.
-- Explain whether each failure was **critical** (blocked the task goal) or **non-critical** (retried successfully, fallback worked, or unrelated to the task).
-- Do not write "all checks passed" or "everything succeeded" if there were failed tool calls, unless you clearly separate successful required checks from non-critical failed attempts.
-- If a failed command was retried successfully, say so explicitly (e.g., "first attempt failed, retry succeeded").
-- If a failed command produced a temporary file or other side effect, clean it up or mention it in the report.
-## Execution Policy
-1. **Minimal reading**: for a small task, first locate the target with as few reads as possible. Usually 1-2 read_file calls and 1 edit is enough. Do not run a broad grep/glob if you already know the file.
-2. **Do not repeat identical tool calls**: do not call read_file/grep_search/glob with the same arguments twice unless you have reason to believe the file changed.
-3. **Checks**: run lint/typecheck/build/test only after making changes. Do not run the same check multiple times without a new edit. If you did not run a check, do not claim it passed.
-4. **Temporary files**: do not create lint_out.txt, test_out.txt, temp/debug files unnecessarily. If you created a temporary file, remove it before the final report. Do not leave garbage in the working tree.
-5. **Report**: the final report must match the real tool results. Only mention what you actually read, changed, or verified. If no files were changed, explicitly say "No files changed". If there were errors, report them — do not hide them.
-6. **Stop**: when the goal is achieved and checks are done — stop. Do not continue looking for extra issues without the user asking. Do not refactor beyond the task scope.
-## Source of Truth Policy
-1. **Do not invent** versions, release notes, dates, features, links, metrics, prices, or user/project facts.
-2. **Source files/data** provided by the user are the source of truth.
-3. **For release/version info**, use package.json, CHANGELOG.md, Git tags, npm, or GitHub Releases only if actually read/checked.
-4. **Unchecked facts** must be labeled as assumption or not verified.
-5. **Generated demo projects**: placeholder content is allowed only if explicitly requested.
-6. **Do not present** invented content as real project history.
-7. **If data is missing**, ask for it or write "Not verified" — never guess.
-## Project Acceptance Policy
-1. **For web projects**, build success alone is not enough. Verify that:
-   - install/build succeeds;
-   - dev server starts successfully;
-   - the main page opens in a browser;
-   - no framework error overlay (Nuxt/Vite/Next/etc.);
-   - browser console has no critical errors;
-   - git status has no junk files (.idea/, node_modules/, .nuxt/, .output/, dist/, temp files).
-2. **For container-first projects**:
-   - provide Containerfile/Dockerfile and compose.yaml;
-   - run through podman/docker compose;
-   - verify build inside the container;
-   - expose the correct host/port;
-   - add .dockerignore.
-3. **If browser or container verification was not performed**, do not claim the project is fully verified.
-4. **In the final report**, separate:
-   - Verified
-   - Not checked
+    const shellPolicySection = buildShellPolicySection();
+    return `You are DeepSeek Code, an AI-powered CLI agent for software development.
+You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${mcpSection}${languageSection}
+## Guidelines
+1. **Plan first** — Before making changes, explore the codebase to understand the context.
+2. **Use the right tool** — Choose the most appropriate tool for each task.
+3. **Be precise** — When editing files, provide exact text matches.
+4. **Verify** — After changes, run tests or linting to ensure correctness.
+5. **Explain** — After completing a task, summarize what was done.
+## Tool Usage
+- Read files with \`read_file\` before editing them
+- Search with \`grep_search\` or \`glob\` to find relevant code
+- Use \`run_shell_command\` to run build/test commands
+- Create or overwrite files with \`write_file\`
+- Make targeted edits with \`edit\` (prefer over write_file for small changes)
+- Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
+When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
+## Workspace Boundary Policy
+- The current working directory is the active project workspace. Do not silently switch to another project path inside shell commands.
+- If \`write_file\`, \`edit\`, or \`read_file\` says a path is outside the workspace, stop and report the mismatch. Do not bypass the restriction by using shell redirection, PowerShell here-strings, Python scripts, or temporary generator scripts.
+- If the user intended a different folder, ask them to restart/open the CLI in that folder or confirm the correct workspace.
+- Avoid generating project files through ad-hoc scripts such as \`gen_helper.py\`, \`diag.py\`, or \`fix_pkg.py\`. Use the file tools for file content and remove any temporary helper before the final report.
+${shellPolicySection}
+## Long-Running Processes (dev/preview servers, watchers)
+- A dev/preview server (e.g. \`npm run dev\`, \`nuxt dev\`, \`vite\`) does NOT exit — never run it as a normal blocking command, it will hang and hit the timeout.
+- Start it with \`run_shell_command\` using \`background: true\`. To then verify the app, pass \`wait_for_port: <port>\` in the same call — it returns once the port is accepting connections (or reports it never became ready). Do NOT open the browser before the port is ready (that causes ERR_CONNECTION_REFUSED).
+- Correct flow for a browser check: (1) \`run_shell_command\` with \`background:true, wait_for_port:3000\`; (2) use the \`chrome\` tool to open \`http://localhost:3000\` and inspect; (3) \`run_shell_command\` with \`stop_pid:<pid>\` to stop the server. Always stop a background process you started.
+- If \`wait_for_port\` reports the port never opened, the server failed to start — read the returned output, fix the cause, and report it; do not pretend the page rendered.
+## Important
+- ALWAYS use absolute paths when referring to files. The project root is \`${cwd || 'the current working directory'}\`.
+- When asked to audit or explore the project, start with \`glob\`, \`grep_search\`, and targeted reads to discover structure.
+- If the task implies a browser or rendered UI check, do not wait for the user to explicitly say "open browser" before using \`chrome\`.
+- Do NOT guess file paths — use \`glob\` or \`grep_search\` to discover them first.
+- When asked about your capabilities, answer based on the tools listed in the "Current Mode" section above. Do NOT claim you lack tools that are listed there but blocked by mode — instead explain that the current mode restricts them.
+- If the user asks "what tools do you have" or "what are your capabilities", refer to this prompt's tool list. If write_file or edit are listed as blocked, explain that they exist but are restricted in the current mode.
+- **CRITICAL: Never claim an action was performed without an actual tool call.** Do not say "opening browser", "running eval", "taking screenshot", "passing captcha", "navigating to page", or any other action unless you have actually called the corresponding tool and received a result. If a tool call was not made, state honestly that it was not executed. If a tool is blocked by the current mode, do not promise to use it — explain that it is unavailable in this mode. If a captcha or site protection is encountered, do not claim to bypass it — stop and report the issue honestly.
+- **CRITICAL: No post-factum reports without tool calls.** If Tool uses is 0 in the current response, do not claim "I checked the log", "I reviewed the previous run", "step X was successful", or any other retrospective analysis. You may only say: "I did not perform a check right now. Based on visible context I can assume..." Always separate findings into: **Verified** (confirmed by actual tool calls this turn), **Assumption** (inferred from visible context), **Not checked** (not examined this turn). Do not write "successful" for a step that was not actually executed or has no saved result. Use the \`/last-browser-test\` command to retrieve the last saved browser test report — do not reconstruct it from memory.
+## Honest Reporting
+- **An iteration/step counts as done ONLY if THIS run contains the tool calls that did it.** Narrating "Iteration N complete, files created, committed" in a turn with no corresponding write_file/edit/shell calls is fabrication. If you only planned or described work, say "planned, not yet executed". When a \`[verified-state]\` ledger appears in context, treat it as the single source of truth about what this run has actually done.
+- Do not claim files were changed unless tool results include changed=true or files=\`<list>\`.
+- Do not claim a change was verified unless tool results include verified=true.
+- Do not claim tests/checks passed unless you actually ran the command and saw success.
+- If no files changed, say "No files changed".
+- Final report must match tool results and Execution Summary.
+- Final report must start with a quality verdict: **Passed**, **Partial**, or **Failed**.
+- If there were failed tool calls, failed browser/chrome calls, a budget/iteration stop, or skipped required acceptance checks, the verdict cannot be **Passed** unless every failure is explicitly classified as non-critical and the required check later succeeded.
+- For web/UI projects, include a **Browser proof** block with the URL tested, page title, console error count, screenshot/rendered-state verdict, and whether Chrome/browser calls passed or failed. If browser proof was not performed, put it under **Not checked** and do not call the UI production-ready.
+- For UI/product-design tasks, visual acceptance is required. If the rendered screenshot is blank, sparse, sidebar-only, broken, or clearly below the requested quality, say **Partial** or **Failed** and list the next visual iteration instead of claiming the project is complete.
+## Failed Tool Calls Policy
+- If any tool/shell command failed during the run, mention it in the final report.
+- Explain whether each failure was **critical** (blocked the task goal) or **non-critical** (retried successfully, fallback worked, or unrelated to the task).
+- Do not write "all checks passed" or "everything succeeded" if there were failed tool calls, unless you clearly separate successful required checks from non-critical failed attempts.
+- If a failed command was retried successfully, say so explicitly (e.g., "first attempt failed, retry succeeded").
+- **Separate the FINAL result from the attempts to reach it.** A clean final result (e.g. "lint: 0 errors") describes only the last run. If earlier commands/attempts failed, do NOT present the whole run as clean — say e.g. "lint passed on the 2nd attempt; 1st failed: <reason>". Writing "0 errors" while hiding several failed attempts before it is dishonest. With failed attempts present, the verdict is at best **Partial**.
+- **"Could not run" is NOT "broken".** If you could not execute a check in your environment (command/tool unavailable, permission denied, a shell error on your side), report it as **Not checked — could not run \`<X>\`: <reason>**. Do NOT report your own inability to run a tool as a defect or failure of the project being worked on.
+- If a failed command produced a temporary file or other side effect, clean it up or mention it in the report.
+## Execution Policy
+1. **Minimal reading**: for a small task, first locate the target with as few reads as possible. Usually 1-2 read_file calls and 1 edit is enough. Do not run a broad grep/glob if you already know the file.
+2. **Do not repeat identical tool calls**: do not call read_file/grep_search/glob with the same arguments twice unless you have reason to believe the file changed.
+3. **Checks**: run lint/typecheck/build/test only after making changes. Do not run the same check multiple times without a new edit. If you did not run a check, do not claim it passed.
+4. **Temporary files**: do not create lint_out.txt, test_out.txt, err.txt, temp/debug scripts, one-off files like "1", or scratch files unnecessarily. **Never redirect a command's output to a file just to read it back** (e.g. \`eslint . > lint-output.txt\`) — the tool already returns stdout/stderr to you, so the file is pure junk. If you created a temporary file, remove it before the final report. Before the final report, check the working tree or otherwise verify no junk temp files remain. If cleanup failed or was not checked, say so explicitly.
+5. **Report**: the final report must match the real tool results. Only mention what you actually read, changed, or verified. If no files were changed, explicitly say "No files changed". If there were errors, report them — do not hide them.
+6. **Stop**: when the goal is achieved and checks are done — stop. Do not continue looking for extra issues without the user asking. Do not refactor beyond the task scope.
+## Source of Truth Policy
+1. **Do not invent** versions, release notes, dates, features, links, metrics, prices, or user/project facts.
+2. **Source files/data** provided by the user are the source of truth.
+3. **For release/version info**, use package.json, CHANGELOG.md, Git tags, npm, or GitHub Releases only if actually read/checked.
+4. **Unchecked facts** must be labeled as assumption or not verified.
+5. **Generated demo projects**: placeholder content is allowed only if explicitly requested.
+6. **Do not present** invented content as real project history.
+7. **If data is missing**, ask for it or write "Not verified" — never guess.
+## Project Acceptance Policy
+1. **For web projects**, build success alone is not enough. Verify that:
+   - install/build succeeds;
+   - dev server starts successfully;
+   - the main page opens in a browser;
+   - no framework error overlay (Nuxt/Vite/Next/etc.);
+   - browser console has no critical errors;
+   - the repository has an appropriate .gitignore for the stack;
+   - git status has no junk files (.idea/, node_modules/, .nuxt/, .output/, dist/, temp files, screenshots, logs).
+2. **Runtime/container verification is adaptive**, not Podman-only:
+   - first inspect available tooling and project files before choosing a path;
+   - if Docker Compose is available, use docker compose;
+   - if Podman/Podman Compose is available, use podman compose or podman-compose;
+   - if no container runtime is available, use the native package manager/dev server and report container verification as Not checked;
+   - do not spend many repeated attempts on one runtime. After two similar runtime failures, switch strategy or report the blocker.
+3. **For container-first projects**:
+   - keep one clear container entrypoint path (Dockerfile or Containerfile) and ensure compose references it correctly;
+   - verify build inside the container;
+   - expose the correct host/port;
+   - add .dockerignore or .containerignore as appropriate.
+4. **If browser, git-hygiene, or container verification was not performed**, do not claim the project is fully verified.
+5. **In the final report**, separate:
+   - Verified
+   - Not checked
    - Known issues`;
 }
 /**
@@ -178,13 +275,16 @@ export class AgentLoop extends EventEmitter {
     toolCallHistory = new Map();
     metrics = new MetricsCollector();
     iterationCount = 0;
+    followUpSeq = 0;
+    lastCompactedAtMessageCount = 0;
     constructor(config, options = {}) {
         super();
         this.api = new DeepSeekAPI(config);
         this.model = config.model;
-        const defaultSystemPrompt = buildSystemPrompt(options.cwd || process.cwd(), options.approvalMode);
+        this.metrics.setContextWindow(contextWindowFor(this.model));
+        const defaultSystemPrompt = buildSystemPrompt(options.cwd || process.cwd(), options.approvalMode, this.model);
         this.options = {
-            maxIterations: 100,
+            maxIterations: DEFAULT_MAX_ITERATIONS,
             toolTimeout: 30000,
             approvalMode: 'default',
             cwd: process.cwd(),
@@ -194,9 +294,13 @@ export class AgentLoop extends EventEmitter {
             onReasoningChunk: () => { },
             onResponse: () => { },
             onError: () => { },
+            onCompactStart: () => { },
+            onCompactProgress: () => { },
+            onCompactEnd: () => { },
             onApprovalRequest: async () => true,
             systemPrompt: defaultSystemPrompt,
             signal: undefined,
+            autoCompact: DEFAULT_AUTO_COMPACT,
             ...options,
         };
         this.tools = getToolsForMode(this.options.approvalMode);
@@ -217,6 +321,46 @@ export class AgentLoop extends EventEmitter {
     getMetrics() {
         return this.metrics;
     }
+    /**
+     * Add a user follow-up message during an active agent loop.
+     * The message will be picked up on the next API iteration.
+     * Does NOT start a new loop or reset state.
+     */
+    addUserFollowUp(content) {
+        const trimmed = content?.trim();
+        if (!trimmed)
+            return;
+        this.followUpSeq++;
+        // Attach the verified-state ledger so the model stays grounded in what was
+        // ACTUALLY done (it tends to narrate planned iterations as completed during
+        // very long runs).
+        this.messages.push({
+            role: 'user',
+            content: `User follow-up while task was running:\n${trimmed}\n\n${this.buildVerifiedLedger()}`,
+        });
+    }
+    /**
+     * Compact, tool-derived summary of what was REALLY done in this run: files
+     * actually written/edited (verified by the tools) and tool-call counts.
+     * Injected at grounding points (after auto-compaction, with follow-ups) so the
+     * model cannot drift into claiming work that has no tool calls behind it.
+     */
+    buildVerifiedLedger() {
+        const calls = [...this.toolCallHistory.values()];
+        const changedFiles = new Set();
+        for (const call of calls) {
+            if (call.changed && call.changedFiles) {
+                for (const file of call.changedFiles)
+                    changedFiles.add(file);
+            }
+        }
+        const completed = calls.filter(c => c.status === 'completed').length;
+        const failed = calls.filter(c => c.status === 'failed' || c.status === 'rejected').length;
+        const filesList = changedFiles.size > 0
+            ? [...changedFiles].slice(0, 40).join(', ') + (changedFiles.size > 40 ? ` … +${changedFiles.size - 40} more` : '')
+            : '(none)';
+        return `[verified-state] Tool calls so far in THIS run: ${completed} ok, ${failed} failed. Files actually changed (tool-verified): ${filesList}. Anything not listed here was NOT done in this run — do not claim it as completed; verify with git/glob before claiming prior work.`;
+    }
     /**
      * Set approval mode — updates which tools are available and rebuilds system prompt.
      */
@@ -224,13 +368,27 @@ export class AgentLoop extends EventEmitter {
         this.options.approvalMode = mode;
         this.tools = getToolsForMode(mode);
         // Rebuild system prompt with updated mode info
-        this.options.systemPrompt = buildSystemPrompt(this.options.cwd, mode);
+        this.options.systemPrompt = buildSystemPrompt(this.options.cwd, mode, this.model);
         // Update the system message if it exists
         const sysIdx = this.messages.findIndex(m => m.role === 'system');
         if (sysIdx !== -1) {
             this.messages[sysIdx] = { role: 'system', content: this.options.systemPrompt };
         }
     }
+    /**
+     * Built-in tools for the current mode plus any connected MCP tools.
+     * MCP servers connect asynchronously at startup, so this is recomputed at the
+     * start of each loop. Plan mode stays read-only and excludes MCP tools; a
+     * name clash resolves in favor of the built-in tool.
+     */
+    buildActiveTools() {
+        const base = getToolsForMode(this.options.approvalMode);
+        if (this.options.approvalMode === 'plan')
+            return base;
+        const taken = new Set(base.map(t => t.tool.name));
+        const mcp = getMcpToolDefinitions().filter(t => !taken.has(t.tool.name));
+        return [...base, ...mcp];
+    }
     /**
      * Run the agent loop with a user prompt.
      * Returns the final assistant response text.
@@ -258,6 +416,14 @@ export class AgentLoop extends EventEmitter {
      * Uses streaming for real-time text output via onStreamChunk callback.
      */
     async executeLoop() {
+        // Refresh tools and system prompt so MCP servers that finished connecting
+        // after this loop was constructed are reflected in both.
+        this.tools = this.buildActiveTools();
+        const sysIdx = this.messages.findIndex(m => m.role === 'system');
+        if (sysIdx !== -1) {
+            this.options.systemPrompt = buildSystemPrompt(this.options.cwd, this.options.approvalMode, this.model);
+            this.messages[sysIdx] = { role: 'system', content: this.options.systemPrompt };
+        }
         const openAITools = toOpenAITools(this.tools);
         // Capture git baseline before session starts
         this.metrics.captureGitBaseline(this.options.cwd);
@@ -267,37 +433,36 @@ export class AgentLoop extends EventEmitter {
             projectDir: this.options.cwd,
             messageCount: this.messages.length,
         }).catch(() => { });
-        while (this.iterationCount < Math.min(this.options.maxIterations, this.options.budget?.maxIterations ?? this.options.maxIterations)) {
+        while (this.iterationCount < this.getIterationLimit()) {
             this.iterationCount++;
             // Budget: check maxToolCalls at top of each iteration
             if (this.checkBudgetHalt()) {
                 return this.buildBudgetHaltMessage();
             }
             try {
+                await this.maybeAutoCompact();
                 // Use streaming chat to get real-time output
                 // Budget: check maxApiCalls before API call
                 if (this.options.budget?.maxApiCalls && this.metrics.apiCalls >= this.options.budget.maxApiCalls) {
                     return this.buildBudgetHaltMessage();
                 }
+                // Cancelled before we even start the request — nothing to drain.
+                if (this.options.signal?.aborted) {
+                    return this.finishCancelled();
+                }
+                const followUpSeqAtRequestStart = this.followUpSeq;
                 const stream = this.api.streamChat(this.messages, openAITools);
                 let responseContent = '';
                 let toolCalls = [];
-                // Check for cancellation
-                if (this.options.signal?.aborted) {
-                    const cancelledMsg = i18n.t('agentCancelled');
-                    this.messages.push({ role: 'assistant', content: cancelledMsg });
-                    this.options.onResponse(cancelledMsg);
-                    this.finalizeSession();
-                    return cancelledMsg;
-                }
+                // Cooperative cancellation: once aborted we stop acting on chunks but keep
+                // draining the stream to its natural end. Breaking out early would tear
+                // down the streaming socket mid-flight, which hard-crashed the process on
+                // Windows. The UI already shows the paused state immediately.
+                let cancelledDuringStream = false;
                 for await (const chunk of stream) {
-                    // Check for cancellation during streaming
                     if (this.options.signal?.aborted) {
-                        const cancelledMsg = i18n.t('agentCancelled');
-                        this.messages.push({ role: 'assistant', content: cancelledMsg });
-                        this.options.onResponse(cancelledMsg);
-                        this.finalizeSession();
-                        return cancelledMsg;
+                        cancelledDuringStream = true;
+                        continue;
                     }
                     if (chunk.type === 'usage' && chunk.usage) {
                         this.metrics.recordUsage(chunk.usage);
@@ -323,6 +488,10 @@ export class AgentLoop extends EventEmitter {
                         }
                     }
                 }
+                // Stream drained — if the user cancelled mid-stream, stop here cleanly.
+                if (cancelledDuringStream || this.options.signal?.aborted) {
+                    return this.finishCancelled();
+                }
                 // Budget: catch limits reached during streaming usage accounting.
                 if (this.checkBudgetHalt()) {
                     return this.buildBudgetHaltMessage();
@@ -438,7 +607,8 @@ export class AgentLoop extends EventEmitter {
                         try {
                             const toolResult = await this.executeTool(tc.function.name, args);
                             const duration = Date.now() - startTime;
-                            this.metrics.recordToolCallEnd(tc.function.name, toolResult.success);
+                            const toolLabel = this.buildToolCallLabel(tc.function.name, args);
+                            this.metrics.recordToolCallEnd(tc.function.name, toolResult.success, toolLabel, toolResult.success ? undefined : toolResult.error);
                             toolCallEvent.status = toolResult.success ? 'completed' : 'failed';
                             toolCallEvent.result = toolResult.output;
                             toolCallEvent.error = toolResult.error;
@@ -468,7 +638,8 @@ export class AgentLoop extends EventEmitter {
                         catch (err) {
                             const duration = Date.now() - startTime;
                             const errorMsg = err.message;
-                            this.metrics.recordToolCallEnd(tc.function.name, false);
+                            const toolLabel = this.buildToolCallLabel(tc.function.name, args);
+                            this.metrics.recordToolCallEnd(tc.function.name, false, toolLabel, errorMsg);
                             toolCallEvent.status = 'failed';
                             toolCallEvent.error = errorMsg;
                             toolCallEvent.durationMs = duration;
@@ -497,6 +668,11 @@ export class AgentLoop extends EventEmitter {
                     const fallback = 'I have completed the requested actions. What else would you like me to do?';
                     this.messages.push({ role: 'assistant', content: fallback });
                     this.options.onResponse(fallback);
+                    // Check if a follow-up arrived while the API request was streaming
+                    if (this.followUpSeq > followUpSeqAtRequestStart) {
+                        // Follow-up received during this request — continue loop instead of finishing
+                        continue;
+                    }
                     this.finalizeSession();
                     const summary = this.metrics.getSummary(this.model);
                     this.options.onStreamChunk(summary);
@@ -504,6 +680,11 @@ export class AgentLoop extends EventEmitter {
                 }
                 this.messages.push({ role: 'assistant', content: responseContent });
                 this.options.onResponse(responseContent);
+                // Check if a follow-up arrived while this API request was streaming
+                if (this.followUpSeq > followUpSeqAtRequestStart) {
+                    // Follow-up received during the stream — continue loop, skip finalization
+                    continue;
+                }
                 // Output execution summary
                 this.finalizeSession();
                 const summary = this.metrics.getSummary(this.model);
@@ -512,20 +693,193 @@ export class AgentLoop extends EventEmitter {
             }
             catch (err) {
                 const error = err;
+                // If the user cancelled, treat any resulting error as a clean stop.
+                if (this.options.signal?.aborted) {
+                    return this.finishCancelled();
+                }
                 this.options.onError(error);
                 throw error;
             }
         }
         // Max iterations reached
-        const timeoutMsg = `Агент достиг максимального числа итераций (${this.options.maxIterations}). Задача может быть не завершена.`;
+        const timeoutMsg = `Агент достиг максимального числа итераций (${this.getIterationLimit()}). Задача может быть не завершена.`;
         this.messages.push({ role: 'assistant', content: timeoutMsg });
         this.options.onResponse(timeoutMsg);
         this.finalizeSession();
+        const summary = this.metrics.getSummary(this.model);
+        this.options.onStreamChunk(summary);
         return timeoutMsg;
     }
+    /** Record a clean user-cancellation result and finalize the session. */
+    finishCancelled() {
+        const cancelledMsg = i18n.t('agentCancelled');
+        this.messages.push({ role: 'assistant', content: cancelledMsg });
+        this.options.onResponse(cancelledMsg);
+        this.finalizeSession();
+        return cancelledMsg;
+    }
+    getIterationLimit() {
+        const budgetLimit = this.options.budget?.maxIterations;
+        if (budgetLimit && budgetLimit > 0) {
+            return Math.min(this.options.maxIterations, budgetLimit);
+        }
+        return this.options.maxIterations;
+    }
+    getAutoCompactOptions() {
+        return {
+            ...DEFAULT_AUTO_COMPACT,
+            ...(this.options.autoCompact ?? {}),
+        };
+    }
+    async maybeAutoCompact() {
+        const compact = this.getAutoCompactOptions();
+        if (!compact.enabled)
+            return;
+        const contextPercent = this.metrics.getCurrentWindowPercent();
+        const beforeMessages = this.messages.length;
+        if (contextPercent < compact.thresholdPercent)
+            return;
+        if (beforeMessages < compact.minMessages)
+            return;
+        if (beforeMessages <= this.lastCompactedAtMessageCount + compact.keepRecentMessages)
+            return;
+        const startEvent = {
+            phase: 'start',
+            progress: 5,
+            contextPercent,
+            beforeMessages,
+        };
+        this.options.onCompactStart(startEvent);
+        this.options.onCompactProgress({ ...startEvent, phase: 'summarizing', progress: 35 });
+        try {
+            const result = await this.api.chat([
+                {
+                    role: 'system',
+                    content: 'Compress the conversation for continuation. Preserve concrete user goals, decisions, file paths, commands, failures, verification results, pending work, and constraints. Do not invent facts. Return concise bullet points.',
+                },
+                {
+                    role: 'user',
+                    content: this.buildCompactTranscript(),
+                },
+            ]);
+            if (result.usage) {
+                this.metrics.recordUsage(result.usage);
+            }
+            const summary = result.content.trim() || 'Auto-compaction completed, but the summarizer returned an empty summary.';
+            this.options.onCompactProgress({
+                phase: 'replacing',
+                progress: 80,
+                contextPercent,
+                beforeMessages,
+            });
+            const systemMsg = this.messages.find(m => m.role === 'system');
+            // BUG FIX: previously the whole history was replaced by [system, summary],
+            // silently dropping both the original task text and the recent messages
+            // that keepRecentMessages promised to keep. Continuing from a lossy
+            // summary alone is how the model drifts into claiming un-done work.
+            // Keep: the original user task verbatim + the recent tail.
+            const firstUserMsg = this.messages.find(m => m.role === 'user');
+            let taskText = '';
+            if (firstUserMsg) {
+                taskText = typeof firstUserMsg.content === 'string'
+                    ? firstUserMsg.content
+                    : firstUserMsg.content.filter(b => b.type === 'text').map(b => b.text).join('\n');
+            }
+            const taskMsg = taskText
+                ? {
+                    role: 'user',
+                    content: `**Original task (verbatim, pre-compaction):**\n${taskText.length > 6000 ? taskText.slice(0, 6000) + '\n…[truncated]' : taskText}`,
+                }
+                : null;
+            // Recent tail, trimmed so it does not start with an orphan tool message
+            // (a tool result must follow its assistant tool_calls message).
+            const tail = this.messages.slice(-compact.keepRecentMessages).filter(m => m.role !== 'system');
+            while (tail.length > 0 && tail[0].role === 'tool')
+                tail.shift();
+            this.messages = [
+                ...(systemMsg ? [systemMsg] : []),
+                ...(taskMsg ? [taskMsg] : []),
+                {
+                    role: 'assistant',
+                    // The summary is lossy — after compaction the model is prone to
+                    // "remembering" planned work as done. Pin the tool-verified ledger
+                    // right next to it so reality stays in context.
+                    content: `**Context Auto-Compacted**\n\nOriginal messages: ${beforeMessages}\nPrevious context: ${contextPercent}% of window\n\n${summary}\n\n${this.buildVerifiedLedger()}`,
+                },
+                ...tail,
+            ];
+            this.lastCompactedAtMessageCount = this.messages.length;
+            this.options.onCompactEnd({
+                phase: 'done',
+                progress: 100,
+                contextPercent,
+                beforeMessages,
+                afterMessages: this.messages.length,
+            });
+        }
+        catch (err) {
+            this.options.onCompactEnd({
+                phase: 'failed',
+                progress: 100,
+                contextPercent,
+                beforeMessages,
+                error: err.message,
+            });
+            throw err;
+        }
+    }
+    buildCompactTranscript() {
+        return this.messages
+            .filter(message => message.role !== 'system')
+            .map((message, index) => {
+            const content = typeof message.content === 'string'
+                ? message.content
+                : JSON.stringify(message.content);
+            const toolCalls = message.tool_calls?.length ? ` tool_calls=${message.tool_calls.map(tc => tc.function.name).join(',')}` : '';
+            return `#${index + 1} ${message.role}${toolCalls}\n${content.slice(0, 8000)}`;
+        })
+            .join('\n\n---\n\n');
+    }
     /**
-     * Parse tool arguments from JSON string.
+     * Build a short human-readable label for a tool call.
+     * Used in Execution Summary to identify which files/commands failed.
      */
+    buildToolCallLabel(toolName, args) {
+        try {
+            switch (toolName) {
+                case 'run_shell_command': {
+                    const cmd = args.command ?? args.cmd ?? '';
+                    if (typeof cmd === 'string' && cmd.length > 0) {
+                        return cmd.length > 120 ? cmd.slice(0, 117) + '...' : cmd;
+                    }
+                    break;
+                }
+                case 'read_file':
+                case 'edit':
+                case 'write_file': {
+                    const path = args.path ?? args.file_path ?? args.file ?? '';
+                    if (typeof path === 'string' && path.length > 0) {
+                        return path.length > 120 ? path.slice(0, 117) + '...' : path;
+                    }
+                    break;
+                }
+                case 'grep_search':
+                case 'glob': {
+                    const pattern = args.pattern ?? '';
+                    if (typeof pattern === 'string' && pattern.length > 0) {
+                        return pattern.length > 120 ? pattern.slice(0, 117) + '...' : pattern;
+                    }
+                    break;
+                }
+            }
+            // Fallback: serialize first meaningful string value
+            const fallback = JSON.stringify(args);
+            return fallback.length > 120 ? fallback.slice(0, 117) + '...' : fallback;
+        }
+        catch {
+            return String(args);
+        }
+    }
     /**
      * Check if any budget limit has been exceeded (called at top of each iteration).
      * Returns the field name that exceeded or null if all good.
@@ -633,7 +987,7 @@ export class AgentLoop extends EventEmitter {
             };
         }
         try {
-            const result = await def.tool.execute(args);
+            const result = await def.tool.execute(args, this.options.signal);
             return {
                 success: result.success,
                 output: result.output,