@serjm/deepseek-code 0.4.3 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/CHANGELOG.md +85 -0
  2. package/README.md +72 -109
  3. package/README.ru.md +73 -109
  4. package/dist/api/index.d.ts +5 -0
  5. package/dist/api/index.d.ts.map +1 -1
  6. package/dist/api/index.js +42 -4
  7. package/dist/api/index.js.map +1 -1
  8. package/dist/cli/index.d.ts +1 -0
  9. package/dist/cli/index.d.ts.map +1 -1
  10. package/dist/cli/index.js +15 -8
  11. package/dist/cli/index.js.map +1 -1
  12. package/dist/cli/interactive.d.ts.map +1 -1
  13. package/dist/cli/interactive.js +65 -3
  14. package/dist/cli/interactive.js.map +1 -1
  15. package/dist/commands/index.d.ts.map +1 -1
  16. package/dist/commands/index.js +26 -21
  17. package/dist/commands/index.js.map +1 -1
  18. package/dist/config/defaults.d.ts +9 -0
  19. package/dist/config/defaults.d.ts.map +1 -1
  20. package/dist/config/defaults.js +25 -7
  21. package/dist/config/defaults.js.map +1 -1
  22. package/dist/core/agent-loop.d.ts +56 -3
  23. package/dist/core/agent-loop.d.ts.map +1 -1
  24. package/dist/core/agent-loop.js +458 -104
  25. package/dist/core/agent-loop.js.map +1 -1
  26. package/dist/core/i18n.d.ts +3 -0
  27. package/dist/core/i18n.d.ts.map +1 -1
  28. package/dist/core/i18n.js +9 -0
  29. package/dist/core/i18n.js.map +1 -1
  30. package/dist/core/mcp-tools.d.ts +15 -0
  31. package/dist/core/mcp-tools.d.ts.map +1 -0
  32. package/dist/core/mcp-tools.js +94 -0
  33. package/dist/core/mcp-tools.js.map +1 -0
  34. package/dist/core/metrics.d.ts +9 -2
  35. package/dist/core/metrics.d.ts.map +1 -1
  36. package/dist/core/metrics.js +51 -9
  37. package/dist/core/metrics.js.map +1 -1
  38. package/dist/tools/bash.d.ts.map +1 -1
  39. package/dist/tools/bash.js +317 -23
  40. package/dist/tools/bash.js.map +1 -1
  41. package/dist/tools/chrome-manager.d.ts.map +1 -1
  42. package/dist/tools/chrome-manager.js +5 -2
  43. package/dist/tools/chrome-manager.js.map +1 -1
  44. package/dist/tools/chrome.d.ts.map +1 -1
  45. package/dist/tools/chrome.js +8 -3
  46. package/dist/tools/chrome.js.map +1 -1
  47. package/dist/tools/glob.d.ts.map +1 -1
  48. package/dist/tools/glob.js +40 -3
  49. package/dist/tools/glob.js.map +1 -1
  50. package/dist/tools/grep.d.ts.map +1 -1
  51. package/dist/tools/grep.js +69 -13
  52. package/dist/tools/grep.js.map +1 -1
  53. package/dist/tools/process-manager.d.ts +17 -0
  54. package/dist/tools/process-manager.d.ts.map +1 -0
  55. package/dist/tools/process-manager.js +94 -0
  56. package/dist/tools/process-manager.js.map +1 -0
  57. package/dist/tools/read.d.ts.map +1 -1
  58. package/dist/tools/read.js +94 -0
  59. package/dist/tools/read.js.map +1 -1
  60. package/dist/tools/shell.d.ts +20 -0
  61. package/dist/tools/shell.d.ts.map +1 -0
  62. package/dist/tools/shell.js +100 -0
  63. package/dist/tools/shell.js.map +1 -0
  64. package/dist/tools/types.d.ts +27 -1
  65. package/dist/tools/types.d.ts.map +1 -1
  66. package/dist/tools/types.js +43 -1
  67. package/dist/tools/types.js.map +1 -1
  68. package/dist/ui/app.d.ts.map +1 -1
  69. package/dist/ui/app.js +219 -178
  70. package/dist/ui/app.js.map +1 -1
  71. package/dist/ui/chat-view.d.ts +24 -3
  72. package/dist/ui/chat-view.d.ts.map +1 -1
  73. package/dist/ui/chat-view.js +116 -58
  74. package/dist/ui/chat-view.js.map +1 -1
  75. package/dist/ui/input-bar.d.ts.map +1 -1
  76. package/dist/ui/input-bar.js +38 -4
  77. package/dist/ui/input-bar.js.map +1 -1
  78. package/dist/ui/setup-wizard.js +1 -1
  79. package/dist/ui/setup-wizard.js.map +1 -1
  80. package/dist/ui/status-bar.d.ts +5 -1
  81. package/dist/ui/status-bar.d.ts.map +1 -1
  82. package/dist/ui/status-bar.js +10 -4
  83. package/dist/ui/status-bar.js.map +1 -1
  84. package/dist/utils/logger.d.ts +15 -0
  85. package/dist/utils/logger.d.ts.map +1 -1
  86. package/dist/utils/logger.js +47 -0
  87. package/dist/utils/logger.js.map +1 -1
  88. package/package.json +3 -2
@@ -1,6 +1,9 @@
1
1
  import { DeepSeekAPI } from '../api/index.js';
2
2
  import { toOpenAITools, sanitizeArgs } from '../tools/types.js';
3
3
  import { getDefaultTools, getToolsForMode } from '../tools/registry.js';
4
+ import { getMcpToolDefinitions, projectIdFromCwd } from './mcp-tools.js';
5
+ import { resolveWindowsShell } from '../tools/shell.js';
6
+ import { contextWindowFor } from '../config/defaults.js';
4
7
  import { EventEmitter } from 'node:events';
5
8
  import { i18n } from './i18n.js';
6
9
  import { readFileSync, readdirSync, existsSync } from 'node:fs';
@@ -8,10 +11,53 @@ import { join } from 'node:path';
8
11
  import { platform, release, type } from 'node:os';
9
12
  import { MetricsCollector } from './metrics.js';
10
13
  import { hooksManager } from './hooks.js';
14
+ const DEFAULT_MAX_ITERATIONS = 200;
15
+ const DEFAULT_AUTO_COMPACT = {
16
+ enabled: true,
17
+ thresholdPercent: 70,
18
+ keepRecentMessages: 8,
19
+ minMessages: 18,
20
+ };
11
21
  /**
12
22
  * Build a dynamic system prompt with project context.
13
23
  */
14
- function buildSystemPrompt(cwd, approvalMode) {
24
+ const NO_BRUTE_FORCE = '- **Do NOT brute-force command variants.** If a command fails, read the actual error and fix the real cause, then issue ONE corrected command. Never spray permutations (cmd /c …, chcp, node.exe vs npx, env-prefix variants) hoping one works — that just fills the log with failures.';
25
+ const NO_BROAD_KILL = "- Never use broad process-kill commands such as `taskkill /F /IM node.exe`, `Stop-Process -Name node`, `pkill node`, or `killall node`. They can terminate the agent, the user's IDE terminal, and unrelated dev servers. Stop only a specific process you started and can identify by PID.";
26
+ /**
27
+ * Shell guidance that matches the shell `run_shell_command` actually uses, so the
28
+ * model writes the correct dialect instead of guessing (and brute-forcing).
29
+ */
30
+ function buildShellPolicySection() {
31
+ if (platform() !== 'win32') {
32
+ return `## Shell Policy
33
+ - \`run_shell_command\` runs through your system shell (\`/bin/sh\`). Standard POSIX syntax works: \`&&\`, \`||\`, \`$VAR\`, pipes, redirects.
34
+ - Prefer the built-in tools over shell for inspection: \`read_file\`, \`grep_search\`, \`glob\`.
35
+ ${NO_BRUTE_FORCE}
36
+ ${NO_BROAD_KILL}`;
37
+ }
38
+ if (resolveWindowsShell() === 'cmd') {
39
+ return `## Windows Shell Policy
40
+ - On this machine \`run_shell_command\` runs through **cmd.exe** (PowerShell was unavailable or \`DEEPSEEK_CODE_SHELL=cmd\`). Write cmd syntax — NOT PowerShell.
41
+ - Chain with \`&&\` (next only on success) or \`&\` (always). Environment variables: \`%VAR%\`; set inline with \`set VAR=value&& <command>\`.
42
+ - Do NOT use PowerShell syntax here (\`$env:\`, \`;\` as a separator, \`> $null\`, cmdlets) — it fails under cmd and can create junk files like a literal \`$null\`.
43
+ - Unix tools do NOT exist (\`sed\`, \`head\`, \`tail\`, \`grep\`, \`cat\`, \`ls\`, \`rm\`, \`touch\`, \`xargs\`). Use \`findstr\`, \`type\`, \`dir\`, \`del\`, or the \`read_file\`/\`grep_search\`/\`glob\` tools.
44
+ - Never use \`mkdir -p\` (creates a literal \`-p\` directory); use \`mkdir <path>\`.
45
+ - Prefer the built-in tools over shell for inspection: \`read_file\`, \`grep_search\`, \`glob\`.
46
+ ${NO_BRUTE_FORCE}
47
+ ${NO_BROAD_KILL}`;
48
+ }
49
+ return `## Windows Shell Policy
50
+ - On Windows, \`run_shell_command\` runs through **Windows PowerShell 5.1** (a single, predictable shell — never cmd.exe). Write every command in PowerShell syntax. \`npm\`, \`node\`, \`git\`, \`npx\` run normally inside it.
51
+ - **No \`&&\` or \`||\`** — PowerShell 5.1 does not support them (it is a parse error). Run sequentially with \`;\`. To run B only if A succeeded: \`A; if ($?) { B }\`. Example: \`npm run build; if ($?) { npm test }\` — NOT \`npm run build && npm test\`.
52
+ - **Environment variables**: read with \`$env:NAME\`, set inline with \`$env:NAME='value'; <command>\`. There is no \`VAR=value cmd\` prefix and no \`set VAR=...\`.
53
+ - **Redirects work as PowerShell**: \`> $null\`, \`2>$null\`, \`*> out.txt\` are valid here.
54
+ - These are PowerShell aliases and work fine: \`cat\`, \`ls\`, \`rm\`, \`cp\`, \`mv\`, \`echo\`, \`pwd\`. These do NOT exist (use the noted replacement): \`sed\`, \`head\` (→ \`Get-Content -TotalCount n\`), \`tail\` (→ \`Get-Content -Tail n\`), \`grep\` (→ \`grep_search\` tool or \`Select-String\`), \`xargs\`, \`touch\` (→ \`New-Item\`).
55
+ - Never use \`mkdir -p\` (the \`-p\` is not a PowerShell parameter). Use \`New-Item -ItemType Directory -Force <path>\`.
56
+ - Prefer the built-in tools over shell for inspection: \`read_file\` for file content, \`grep_search\` for text search, \`glob\` for file discovery.
57
+ ${NO_BRUTE_FORCE}
58
+ ${NO_BROAD_KILL}`;
59
+ }
60
+ export function buildSystemPrompt(cwd, approvalMode, model) {
15
61
  const osInfo = `${type()} ${release()} (${platform()})`;
16
62
  let projectInfo = '';
17
63
  if (cwd) {
@@ -75,92 +121,143 @@ function buildSystemPrompt(cwd, approvalMode) {
75
121
  '',
76
122
  ...toolListLines,
77
123
  ].join('\n');
124
+ // Shared Context Hub / external MCP tools. Plan mode is read-only and does not
125
+ // expose them, so only advertise when they are actually active.
126
+ let mcpSection = '';
127
+ if (mode !== 'plan') {
128
+ const mcpTools = getMcpToolDefinitions();
129
+ if (mcpTools.length > 0) {
130
+ const mcpToolLines = mcpTools.map(def => ` - \`${def.tool.name}\` — ${def.tool.description}`);
131
+ const hubProjectId = cwd ? projectIdFromCwd(cwd) : '';
132
+ const orientation = mcpTools.some(def => def.tool.name === 'workspace_resume')
133
+ ? `\n- If a \`workspace_resume\` tool is available, call it ONCE at the start of a new task to load project memory, open/handed-off tasks, and recent sessions in a single token-budgeted call — prefer it over reading many files just to get oriented.${hubProjectId ? ` This project's hub \`project_id\` is \`${hubProjectId}\` — pass it as the \`project_id\` argument (same id for memory/task tools).` : ''}`
134
+ : '';
135
+ const identity = `provider="deepseek", client="dsc"${model ? `, model="${model}"` : ''}`;
136
+ mcpSection = [
137
+ '\n## Shared Context Hub (MCP)',
138
+ 'External MCP tools are connected — shared memory and a task queue used by other agents working on the same projects.' + orientation,
139
+ '- Use `task_list`/`task_claim` to pick up work handed off by another agent, and `session_log`/`memory_write` to record durable outcomes for the next agent.',
140
+ `- Identify yourself when writing to the hub so the shared history shows who did the work: call \`session_log\` with ${identity}; set \`surface="dsc"\` on \`memory_write\`.`,
141
+ '- These tools are real and callable like any other; do not claim you used them without an actual tool call.',
142
+ 'Connected MCP tools:',
143
+ ...mcpToolLines,
144
+ ].join('\n');
145
+ }
146
+ }
78
147
  let responseLanguage = 'English';
79
148
  if (locale === 'ru')
80
149
  responseLanguage = 'Russian';
81
150
  if (locale === 'zh')
82
151
  responseLanguage = 'Chinese';
83
152
  const languageSection = `\n## Language\n- Respond in ${responseLanguage} unless the user explicitly asks otherwise.`;
84
- return `You are DeepSeek Code, an AI-powered CLI agent for software development.
85
-
86
- You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${languageSection}
87
-
88
- ## Guidelines
89
- 1. **Plan first** — Before making changes, explore the codebase to understand the context.
90
- 2. **Use the right tool** — Choose the most appropriate tool for each task.
91
- 3. **Be precise** — When editing files, provide exact text matches.
92
- 4. **Verify** — After changes, run tests or linting to ensure correctness.
93
- 5. **Explain** — After completing a task, summarize what was done.
94
-
95
- ## Tool Usage
96
- - Read files with \`read_file\` before editing them
97
- - Search with \`grep_search\` or \`glob\` to find relevant code
98
- - Use \`run_shell_command\` to run build/test commands
99
- - Create or overwrite files with \`write_file\`
100
- - Make targeted edits with \`edit\` (prefer over write_file for small changes)
101
- - Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
102
-
103
- When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
104
-
105
- ## Important
106
- - ALWAYS use absolute paths when referring to files. The project root is \`${cwd || 'the current working directory'}\`.
107
- - When asked to audit or explore the project, start with \`glob\`, \`grep_search\`, and targeted reads to discover structure.
108
- - If the task implies a browser or rendered UI check, do not wait for the user to explicitly say "open browser" before using \`chrome\`.
109
- - Do NOT guess file paths use \`glob\` or \`grep_search\` to discover them first.
110
- - When asked about your capabilities, answer based on the tools listed in the "Current Mode" section above. Do NOT claim you lack tools that are listed there but blocked by mode instead explain that the current mode restricts them.
111
- - If the user asks "what tools do you have" or "what are your capabilities", refer to this prompt's tool list. If write_file or edit are listed as blocked, explain that they exist but are restricted in the current mode.
112
- - **CRITICAL: Never claim an action was performed without an actual tool call.** Do not say "opening browser", "running eval", "taking screenshot", "passing captcha", "navigating to page", or any other action unless you have actually called the corresponding tool and received a result. If a tool call was not made, state honestly that it was not executed. If a tool is blocked by the current mode, do not promise to use it — explain that it is unavailable in this mode. If a captcha or site protection is encountered, do not claim to bypass it — stop and report the issue honestly.
113
- - **CRITICAL: No post-factum reports without tool calls.** If Tool uses is 0 in the current response, do not claim "I checked the log", "I reviewed the previous run", "step X was successful", or any other retrospective analysis. You may only say: "I did not perform a check right now. Based on visible context I can assume..." Always separate findings into: **Verified** (confirmed by actual tool calls this turn), **Assumption** (inferred from visible context), **Not checked** (not examined this turn). Do not write "successful" for a step that was not actually executed or has no saved result. Use the \`/last-browser-test\` command to retrieve the last saved browser test report — do not reconstruct it from memory.
114
-
115
- ## Honest Reporting
116
- - Do not claim files were changed unless tool results include changed=true or files=\`<list>\`.
117
- - Do not claim a change was verified unless tool results include verified=true.
118
- - Do not claim tests/checks passed unless you actually ran the command and saw success.
119
- - If no files changed, say "No files changed".
120
- - Final report must match tool results and Execution Summary.
121
-
122
- ## Failed Tool Calls Policy
123
- - If any tool/shell command failed during the run, mention it in the final report.
124
- - Explain whether each failure was **critical** (blocked the task goal) or **non-critical** (retried successfully, fallback worked, or unrelated to the task).
125
- - Do not write "all checks passed" or "everything succeeded" if there were failed tool calls, unless you clearly separate successful required checks from non-critical failed attempts.
126
- - If a failed command was retried successfully, say so explicitly (e.g., "first attempt failed, retry succeeded").
127
- - If a failed command produced a temporary file or other side effect, clean it up or mention it in the report.
128
-
129
- ## Execution Policy
130
- 1. **Minimal reading**: for a small task, first locate the target with as few reads as possible. Usually 1-2 read_file calls and 1 edit is enough. Do not run a broad grep/glob if you already know the file.
131
- 2. **Do not repeat identical tool calls**: do not call read_file/grep_search/glob with the same arguments twice unless you have reason to believe the file changed.
132
- 3. **Checks**: run lint/typecheck/build/test only after making changes. Do not run the same check multiple times without a new edit. If you did not run a check, do not claim it passed.
133
- 4. **Temporary files**: do not create lint_out.txt, test_out.txt, temp/debug files unnecessarily. If you created a temporary file, remove it before the final report. Do not leave garbage in the working tree.
134
- 5. **Report**: the final report must match the real tool results. Only mention what you actually read, changed, or verified. If no files were changed, explicitly say "No files changed". If there were errors, report them — do not hide them.
135
- 6. **Stop**: when the goal is achieved and checks are done — stop. Do not continue looking for extra issues without the user asking. Do not refactor beyond the task scope.
136
-
137
- ## Source of Truth Policy
138
- 1. **Do not invent** versions, release notes, dates, features, links, metrics, prices, or user/project facts.
139
- 2. **Source files/data** provided by the user are the source of truth.
140
- 3. **For release/version info**, use package.json, CHANGELOG.md, Git tags, npm, or GitHub Releases only if actually read/checked.
141
- 4. **Unchecked facts** must be labeled as assumption or not verified.
142
- 5. **Generated demo projects**: placeholder content is allowed only if explicitly requested.
143
- 6. **Do not present** invented content as real project history.
144
- 7. **If data is missing**, ask for it or write "Not verified" never guess.
145
-
146
- ## Project Acceptance Policy
147
- 1. **For web projects**, build success alone is not enough. Verify that:
148
- - install/build succeeds;
149
- - dev server starts successfully;
150
- - the main page opens in a browser;
151
- - no framework error overlay (Nuxt/Vite/Next/etc.);
152
- - browser console has no critical errors;
153
- - git status has no junk files (.idea/, node_modules/, .nuxt/, .output/, dist/, temp files).
154
- 2. **For container-first projects**:
155
- - provide Containerfile/Dockerfile and compose.yaml;
156
- - run through podman/docker compose;
157
- - verify build inside the container;
158
- - expose the correct host/port;
159
- - add .dockerignore.
160
- 3. **If browser or container verification was not performed**, do not claim the project is fully verified.
161
- 4. **In the final report**, separate:
162
- - Verified
163
- - Not checked
153
+ const shellPolicySection = buildShellPolicySection();
154
+ return `You are DeepSeek Code, an AI-powered CLI agent for software development.
155
+
156
+ You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${mcpSection}${languageSection}
157
+
158
+ ## Guidelines
159
+ 1. **Plan first** — Before making changes, explore the codebase to understand the context.
160
+ 2. **Use the right tool** — Choose the most appropriate tool for each task.
161
+ 3. **Be precise** — When editing files, provide exact text matches.
162
+ 4. **Verify** — After changes, run tests or linting to ensure correctness.
163
+ 5. **Explain** — After completing a task, summarize what was done.
164
+
165
+ ## Tool Usage
166
+ - Read files with \`read_file\` before editing them
167
+ - Search with \`grep_search\` or \`glob\` to find relevant code
168
+ - Use \`run_shell_command\` to run build/test commands
169
+ - Create or overwrite files with \`write_file\`
170
+ - Make targeted edits with \`edit\` (prefer over write_file for small changes)
171
+ - Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
172
+
173
+ When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
174
+
175
+ ## Workspace Boundary Policy
176
+ - The current working directory is the active project workspace. Do not silently switch to another project path inside shell commands.
177
+ - If \`write_file\`, \`edit\`, or \`read_file\` says a path is outside the workspace, stop and report the mismatch. Do not bypass the restriction by using shell redirection, PowerShell here-strings, Python scripts, or temporary generator scripts.
178
+ - If the user intended a different folder, ask them to restart/open the CLI in that folder or confirm the correct workspace.
179
+ - Avoid generating project files through ad-hoc scripts such as \`gen_helper.py\`, \`diag.py\`, or \`fix_pkg.py\`. Use the file tools for file content and remove any temporary helper before the final report.
180
+
181
+ ${shellPolicySection}
182
+
183
+ ## Long-Running Processes (dev/preview servers, watchers)
184
+ - A dev/preview server (e.g. \`npm run dev\`, \`nuxt dev\`, \`vite\`) does NOT exit — never run it as a normal blocking command, it will hang and hit the timeout.
185
+ - Start it with \`run_shell_command\` using \`background: true\`. To then verify the app, pass \`wait_for_port: <port>\` in the same call — it returns once the port is accepting connections (or reports it never became ready). Do NOT open the browser before the port is ready (that causes ERR_CONNECTION_REFUSED).
186
+ - Correct flow for a browser check: (1) \`run_shell_command\` with \`background:true, wait_for_port:3000\`; (2) use the \`chrome\` tool to open \`http://localhost:3000\` and inspect; (3) \`run_shell_command\` with \`stop_pid:<pid>\` to stop the server. Always stop a background process you started.
187
+ - If \`wait_for_port\` reports the port never opened, the server failed to start — read the returned output, fix the cause, and report it; do not pretend the page rendered.
188
+
189
+ ## Important
190
+ - ALWAYS use absolute paths when referring to files. The project root is \`${cwd || 'the current working directory'}\`.
191
+ - When asked to audit or explore the project, start with \`glob\`, \`grep_search\`, and targeted reads to discover structure.
192
+ - If the task implies a browser or rendered UI check, do not wait for the user to explicitly say "open browser" before using \`chrome\`.
193
+ - Do NOT guess file paths use \`glob\` or \`grep_search\` to discover them first.
194
+ - When asked about your capabilities, answer based on the tools listed in the "Current Mode" section above. Do NOT claim you lack tools that are listed there but blocked by mode — instead explain that the current mode restricts them.
195
+ - If the user asks "what tools do you have" or "what are your capabilities", refer to this prompt's tool list. If write_file or edit are listed as blocked, explain that they exist but are restricted in the current mode.
196
+ - **CRITICAL: Never claim an action was performed without an actual tool call.** Do not say "opening browser", "running eval", "taking screenshot", "passing captcha", "navigating to page", or any other action unless you have actually called the corresponding tool and received a result. If a tool call was not made, state honestly that it was not executed. If a tool is blocked by the current mode, do not promise to use it explain that it is unavailable in this mode. If a captcha or site protection is encountered, do not claim to bypass it — stop and report the issue honestly.
197
+ - **CRITICAL: No post-factum reports without tool calls.** If Tool uses is 0 in the current response, do not claim "I checked the log", "I reviewed the previous run", "step X was successful", or any other retrospective analysis. You may only say: "I did not perform a check right now. Based on visible context I can assume..." Always separate findings into: **Verified** (confirmed by actual tool calls this turn), **Assumption** (inferred from visible context), **Not checked** (not examined this turn). Do not write "successful" for a step that was not actually executed or has no saved result. Use the \`/last-browser-test\` command to retrieve the last saved browser test report — do not reconstruct it from memory.
198
+
199
+ ## Honest Reporting
200
+ - **An iteration/step counts as done ONLY if THIS run contains the tool calls that did it.** Narrating "Iteration N complete, files created, committed" in a turn with no corresponding write_file/edit/shell calls is fabrication. If you only planned or described work, say "planned, not yet executed". When a \`[verified-state]\` ledger appears in context, treat it as the single source of truth about what this run has actually done.
201
+ - Do not claim files were changed unless tool results include changed=true or files=\`<list>\`.
202
+ - Do not claim a change was verified unless tool results include verified=true.
203
+ - Do not claim tests/checks passed unless you actually ran the command and saw success.
204
+ - If no files changed, say "No files changed".
205
+ - Final report must match tool results and Execution Summary.
206
+ - Final report must start with a quality verdict: **Passed**, **Partial**, or **Failed**.
207
+ - If there were failed tool calls, failed browser/chrome calls, a budget/iteration stop, or skipped required acceptance checks, the verdict cannot be **Passed** unless every failure is explicitly classified as non-critical and the required check later succeeded.
208
+ - For web/UI projects, include a **Browser proof** block with the URL tested, page title, console error count, screenshot/rendered-state verdict, and whether Chrome/browser calls passed or failed. If browser proof was not performed, put it under **Not checked** and do not call the UI production-ready.
209
+ - For UI/product-design tasks, visual acceptance is required. If the rendered screenshot is blank, sparse, sidebar-only, broken, or clearly below the requested quality, say **Partial** or **Failed** and list the next visual iteration instead of claiming the project is complete.
210
+
211
+ ## Failed Tool Calls Policy
212
+ - If any tool/shell command failed during the run, mention it in the final report.
213
+ - Explain whether each failure was **critical** (blocked the task goal) or **non-critical** (retried successfully, fallback worked, or unrelated to the task).
214
+ - Do not write "all checks passed" or "everything succeeded" if there were failed tool calls, unless you clearly separate successful required checks from non-critical failed attempts.
215
+ - If a failed command was retried successfully, say so explicitly (e.g., "first attempt failed, retry succeeded").
216
+ - **Separate the FINAL result from the attempts to reach it.** A clean final result (e.g. "lint: 0 errors") describes only the last run. If earlier commands/attempts failed, do NOT present the whole run as clean — say e.g. "lint passed on the 2nd attempt; 1st failed: <reason>". Writing "0 errors" while hiding several failed attempts before it is dishonest. With failed attempts present, the verdict is at best **Partial**.
217
+ - **"Could not run" is NOT "broken".** If you could not execute a check in your environment (command/tool unavailable, permission denied, a shell error on your side), report it as **Not checked — could not run \`<X>\`: <reason>**. Do NOT report your own inability to run a tool as a defect or failure of the project being worked on.
218
+ - If a failed command produced a temporary file or other side effect, clean it up or mention it in the report.
219
+
220
+ ## Execution Policy
221
+ 1. **Minimal reading**: for a small task, first locate the target with as few reads as possible. Usually 1-2 read_file calls and 1 edit is enough. Do not run a broad grep/glob if you already know the file.
222
+ 2. **Do not repeat identical tool calls**: do not call read_file/grep_search/glob with the same arguments twice unless you have reason to believe the file changed.
223
+ 3. **Checks**: run lint/typecheck/build/test only after making changes. Do not run the same check multiple times without a new edit. If you did not run a check, do not claim it passed.
224
+ 4. **Temporary files**: do not create lint_out.txt, test_out.txt, err.txt, temp/debug scripts, one-off files like "1", or scratch files unnecessarily. **Never redirect a command's output to a file just to read it back** (e.g. \`eslint . > lint-output.txt\`) — the tool already returns stdout/stderr to you, so the file is pure junk. If you created a temporary file, remove it before the final report. Before the final report, check the working tree or otherwise verify no junk temp files remain. If cleanup failed or was not checked, say so explicitly.
225
+ 5. **Report**: the final report must match the real tool results. Only mention what you actually read, changed, or verified. If no files were changed, explicitly say "No files changed". If there were errors, report them — do not hide them.
226
+ 6. **Stop**: when the goal is achieved and checks are done — stop. Do not continue looking for extra issues without the user asking. Do not refactor beyond the task scope.
227
+
228
+ ## Source of Truth Policy
229
+ 1. **Do not invent** versions, release notes, dates, features, links, metrics, prices, or user/project facts.
230
+ 2. **Source files/data** provided by the user are the source of truth.
231
+ 3. **For release/version info**, use package.json, CHANGELOG.md, Git tags, npm, or GitHub Releases only if actually read/checked.
232
+ 4. **Unchecked facts** must be labeled as assumption or not verified.
233
+ 5. **Generated demo projects**: placeholder content is allowed only if explicitly requested.
234
+ 6. **Do not present** invented content as real project history.
235
+ 7. **If data is missing**, ask for it or write "Not verified" — never guess.
236
+
237
+ ## Project Acceptance Policy
238
+ 1. **For web projects**, build success alone is not enough. Verify that:
239
+ - install/build succeeds;
240
+ - dev server starts successfully;
241
+ - the main page opens in a browser;
242
+ - no framework error overlay (Nuxt/Vite/Next/etc.);
243
+ - browser console has no critical errors;
244
+ - the repository has an appropriate .gitignore for the stack;
245
+ - git status has no junk files (.idea/, node_modules/, .nuxt/, .output/, dist/, temp files, screenshots, logs).
246
+ 2. **Runtime/container verification is adaptive**, not Podman-only:
247
+ - first inspect available tooling and project files before choosing a path;
248
+ - if Docker Compose is available, use docker compose;
249
+ - if Podman/Podman Compose is available, use podman compose or podman-compose;
250
+ - if no container runtime is available, use the native package manager/dev server and report container verification as Not checked;
251
+ - do not spend many repeated attempts on one runtime. After two similar runtime failures, switch strategy or report the blocker.
252
+ 3. **For container-first projects**:
253
+ - keep one clear container entrypoint path (Dockerfile or Containerfile) and ensure compose references it correctly;
254
+ - verify build inside the container;
255
+ - expose the correct host/port;
256
+ - add .dockerignore or .containerignore as appropriate.
257
+ 4. **If browser, git-hygiene, or container verification was not performed**, do not claim the project is fully verified.
258
+ 5. **In the final report**, separate:
259
+ - Verified
260
+ - Not checked
164
261
  - Known issues`;
165
262
  }
166
263
  /**
@@ -178,13 +275,16 @@ export class AgentLoop extends EventEmitter {
178
275
  toolCallHistory = new Map();
179
276
  metrics = new MetricsCollector();
180
277
  iterationCount = 0;
278
+ followUpSeq = 0;
279
+ lastCompactedAtMessageCount = 0;
181
280
  constructor(config, options = {}) {
182
281
  super();
183
282
  this.api = new DeepSeekAPI(config);
184
283
  this.model = config.model;
185
- const defaultSystemPrompt = buildSystemPrompt(options.cwd || process.cwd(), options.approvalMode);
284
+ this.metrics.setContextWindow(contextWindowFor(this.model));
285
+ const defaultSystemPrompt = buildSystemPrompt(options.cwd || process.cwd(), options.approvalMode, this.model);
186
286
  this.options = {
187
- maxIterations: 100,
287
+ maxIterations: DEFAULT_MAX_ITERATIONS,
188
288
  toolTimeout: 30000,
189
289
  approvalMode: 'default',
190
290
  cwd: process.cwd(),
@@ -194,9 +294,13 @@ export class AgentLoop extends EventEmitter {
194
294
  onReasoningChunk: () => { },
195
295
  onResponse: () => { },
196
296
  onError: () => { },
297
+ onCompactStart: () => { },
298
+ onCompactProgress: () => { },
299
+ onCompactEnd: () => { },
197
300
  onApprovalRequest: async () => true,
198
301
  systemPrompt: defaultSystemPrompt,
199
302
  signal: undefined,
303
+ autoCompact: DEFAULT_AUTO_COMPACT,
200
304
  ...options,
201
305
  };
202
306
  this.tools = getToolsForMode(this.options.approvalMode);
@@ -217,6 +321,46 @@ export class AgentLoop extends EventEmitter {
217
321
  getMetrics() {
218
322
  return this.metrics;
219
323
  }
324
+ /**
325
+ * Add a user follow-up message during an active agent loop.
326
+ * The message will be picked up on the next API iteration.
327
+ * Does NOT start a new loop or reset state.
328
+ */
329
+ addUserFollowUp(content) {
330
+ const trimmed = content?.trim();
331
+ if (!trimmed)
332
+ return;
333
+ this.followUpSeq++;
334
+ // Attach the verified-state ledger so the model stays grounded in what was
335
+ // ACTUALLY done (it tends to narrate planned iterations as completed during
336
+ // very long runs).
337
+ this.messages.push({
338
+ role: 'user',
339
+ content: `User follow-up while task was running:\n${trimmed}\n\n${this.buildVerifiedLedger()}`,
340
+ });
341
+ }
342
+ /**
343
+ * Compact, tool-derived summary of what was REALLY done in this run: files
344
+ * actually written/edited (verified by the tools) and tool-call counts.
345
+ * Injected at grounding points (after auto-compaction, with follow-ups) so the
346
+ * model cannot drift into claiming work that has no tool calls behind it.
347
+ */
348
+ buildVerifiedLedger() {
349
+ const calls = [...this.toolCallHistory.values()];
350
+ const changedFiles = new Set();
351
+ for (const call of calls) {
352
+ if (call.changed && call.changedFiles) {
353
+ for (const file of call.changedFiles)
354
+ changedFiles.add(file);
355
+ }
356
+ }
357
+ const completed = calls.filter(c => c.status === 'completed').length;
358
+ const failed = calls.filter(c => c.status === 'failed' || c.status === 'rejected').length;
359
+ const filesList = changedFiles.size > 0
360
+ ? [...changedFiles].slice(0, 40).join(', ') + (changedFiles.size > 40 ? ` … +${changedFiles.size - 40} more` : '')
361
+ : '(none)';
362
+ return `[verified-state] Tool calls so far in THIS run: ${completed} ok, ${failed} failed. Files actually changed (tool-verified): ${filesList}. Anything not listed here was NOT done in this run — do not claim it as completed; verify with git/glob before claiming prior work.`;
363
+ }
220
364
  /**
221
365
  * Set approval mode — updates which tools are available and rebuilds system prompt.
222
366
  */
@@ -224,13 +368,27 @@ export class AgentLoop extends EventEmitter {
224
368
  this.options.approvalMode = mode;
225
369
  this.tools = getToolsForMode(mode);
226
370
  // Rebuild system prompt with updated mode info
227
- this.options.systemPrompt = buildSystemPrompt(this.options.cwd, mode);
371
+ this.options.systemPrompt = buildSystemPrompt(this.options.cwd, mode, this.model);
228
372
  // Update the system message if it exists
229
373
  const sysIdx = this.messages.findIndex(m => m.role === 'system');
230
374
  if (sysIdx !== -1) {
231
375
  this.messages[sysIdx] = { role: 'system', content: this.options.systemPrompt };
232
376
  }
233
377
  }
378
+ /**
379
+ * Built-in tools for the current mode plus any connected MCP tools.
380
+ * MCP servers connect asynchronously at startup, so this is recomputed at the
381
+ * start of each loop. Plan mode stays read-only and excludes MCP tools; a
382
+ * name clash resolves in favor of the built-in tool.
383
+ */
384
+ buildActiveTools() {
385
+ const base = getToolsForMode(this.options.approvalMode);
386
+ if (this.options.approvalMode === 'plan')
387
+ return base;
388
+ const taken = new Set(base.map(t => t.tool.name));
389
+ const mcp = getMcpToolDefinitions().filter(t => !taken.has(t.tool.name));
390
+ return [...base, ...mcp];
391
+ }
234
392
  /**
235
393
  * Run the agent loop with a user prompt.
236
394
  * Returns the final assistant response text.
@@ -258,6 +416,14 @@ export class AgentLoop extends EventEmitter {
258
416
  * Uses streaming for real-time text output via onStreamChunk callback.
259
417
  */
260
418
  async executeLoop() {
419
+ // Refresh tools and system prompt so MCP servers that finished connecting
420
+ // after this loop was constructed are reflected in both.
421
+ this.tools = this.buildActiveTools();
422
+ const sysIdx = this.messages.findIndex(m => m.role === 'system');
423
+ if (sysIdx !== -1) {
424
+ this.options.systemPrompt = buildSystemPrompt(this.options.cwd, this.options.approvalMode, this.model);
425
+ this.messages[sysIdx] = { role: 'system', content: this.options.systemPrompt };
426
+ }
261
427
  const openAITools = toOpenAITools(this.tools);
262
428
  // Capture git baseline before session starts
263
429
  this.metrics.captureGitBaseline(this.options.cwd);
@@ -267,37 +433,36 @@ export class AgentLoop extends EventEmitter {
267
433
  projectDir: this.options.cwd,
268
434
  messageCount: this.messages.length,
269
435
  }).catch(() => { });
270
- while (this.iterationCount < Math.min(this.options.maxIterations, this.options.budget?.maxIterations ?? this.options.maxIterations)) {
436
+ while (this.iterationCount < this.getIterationLimit()) {
271
437
  this.iterationCount++;
272
438
  // Budget: check maxToolCalls at top of each iteration
273
439
  if (this.checkBudgetHalt()) {
274
440
  return this.buildBudgetHaltMessage();
275
441
  }
276
442
  try {
443
+ await this.maybeAutoCompact();
277
444
  // Use streaming chat to get real-time output
278
445
  // Budget: check maxApiCalls before API call
279
446
  if (this.options.budget?.maxApiCalls && this.metrics.apiCalls >= this.options.budget.maxApiCalls) {
280
447
  return this.buildBudgetHaltMessage();
281
448
  }
449
+ // Cancelled before we even start the request — nothing to drain.
450
+ if (this.options.signal?.aborted) {
451
+ return this.finishCancelled();
452
+ }
453
+ const followUpSeqAtRequestStart = this.followUpSeq;
282
454
  const stream = this.api.streamChat(this.messages, openAITools);
283
455
  let responseContent = '';
284
456
  let toolCalls = [];
285
- // Check for cancellation
286
- if (this.options.signal?.aborted) {
287
- const cancelledMsg = i18n.t('agentCancelled');
288
- this.messages.push({ role: 'assistant', content: cancelledMsg });
289
- this.options.onResponse(cancelledMsg);
290
- this.finalizeSession();
291
- return cancelledMsg;
292
- }
457
+ // Cooperative cancellation: once aborted we stop acting on chunks but keep
458
+ // draining the stream to its natural end. Breaking out early would tear
459
+ // down the streaming socket mid-flight, which hard-crashed the process on
460
+ // Windows. The UI already shows the paused state immediately.
461
+ let cancelledDuringStream = false;
293
462
  for await (const chunk of stream) {
294
- // Check for cancellation during streaming
295
463
  if (this.options.signal?.aborted) {
296
- const cancelledMsg = i18n.t('agentCancelled');
297
- this.messages.push({ role: 'assistant', content: cancelledMsg });
298
- this.options.onResponse(cancelledMsg);
299
- this.finalizeSession();
300
- return cancelledMsg;
464
+ cancelledDuringStream = true;
465
+ continue;
301
466
  }
302
467
  if (chunk.type === 'usage' && chunk.usage) {
303
468
  this.metrics.recordUsage(chunk.usage);
@@ -323,6 +488,10 @@ export class AgentLoop extends EventEmitter {
323
488
  }
324
489
  }
325
490
  }
491
+ // Stream drained — if the user cancelled mid-stream, stop here cleanly.
492
+ if (cancelledDuringStream || this.options.signal?.aborted) {
493
+ return this.finishCancelled();
494
+ }
326
495
  // Budget: catch limits reached during streaming usage accounting.
327
496
  if (this.checkBudgetHalt()) {
328
497
  return this.buildBudgetHaltMessage();
@@ -438,7 +607,8 @@ export class AgentLoop extends EventEmitter {
438
607
  try {
439
608
  const toolResult = await this.executeTool(tc.function.name, args);
440
609
  const duration = Date.now() - startTime;
441
- this.metrics.recordToolCallEnd(tc.function.name, toolResult.success);
610
+ const toolLabel = this.buildToolCallLabel(tc.function.name, args);
611
+ this.metrics.recordToolCallEnd(tc.function.name, toolResult.success, toolLabel, toolResult.success ? undefined : toolResult.error);
442
612
  toolCallEvent.status = toolResult.success ? 'completed' : 'failed';
443
613
  toolCallEvent.result = toolResult.output;
444
614
  toolCallEvent.error = toolResult.error;
@@ -468,7 +638,8 @@ export class AgentLoop extends EventEmitter {
468
638
  catch (err) {
469
639
  const duration = Date.now() - startTime;
470
640
  const errorMsg = err.message;
471
- this.metrics.recordToolCallEnd(tc.function.name, false);
641
+ const toolLabel = this.buildToolCallLabel(tc.function.name, args);
642
+ this.metrics.recordToolCallEnd(tc.function.name, false, toolLabel, errorMsg);
472
643
  toolCallEvent.status = 'failed';
473
644
  toolCallEvent.error = errorMsg;
474
645
  toolCallEvent.durationMs = duration;
@@ -497,6 +668,11 @@ export class AgentLoop extends EventEmitter {
497
668
  const fallback = 'I have completed the requested actions. What else would you like me to do?';
498
669
  this.messages.push({ role: 'assistant', content: fallback });
499
670
  this.options.onResponse(fallback);
671
+ // Check if a follow-up arrived while the API request was streaming
672
+ if (this.followUpSeq > followUpSeqAtRequestStart) {
673
+ // Follow-up received during this request — continue loop instead of finishing
674
+ continue;
675
+ }
500
676
  this.finalizeSession();
501
677
  const summary = this.metrics.getSummary(this.model);
502
678
  this.options.onStreamChunk(summary);
@@ -504,6 +680,11 @@ export class AgentLoop extends EventEmitter {
504
680
  }
505
681
  this.messages.push({ role: 'assistant', content: responseContent });
506
682
  this.options.onResponse(responseContent);
683
+ // Check if a follow-up arrived while this API request was streaming
684
+ if (this.followUpSeq > followUpSeqAtRequestStart) {
685
+ // Follow-up received during the stream — continue loop, skip finalization
686
+ continue;
687
+ }
507
688
  // Output execution summary
508
689
  this.finalizeSession();
509
690
  const summary = this.metrics.getSummary(this.model);
@@ -512,20 +693,193 @@ export class AgentLoop extends EventEmitter {
512
693
  }
513
694
  catch (err) {
514
695
  const error = err;
696
+ // If the user cancelled, treat any resulting error as a clean stop.
697
+ if (this.options.signal?.aborted) {
698
+ return this.finishCancelled();
699
+ }
515
700
  this.options.onError(error);
516
701
  throw error;
517
702
  }
518
703
  }
519
704
  // Max iterations reached
520
- const timeoutMsg = `Агент достиг максимального числа итераций (${this.options.maxIterations}). Задача может быть не завершена.`;
705
+ const timeoutMsg = `Агент достиг максимального числа итераций (${this.getIterationLimit()}). Задача может быть не завершена.`;
521
706
  this.messages.push({ role: 'assistant', content: timeoutMsg });
522
707
  this.options.onResponse(timeoutMsg);
523
708
  this.finalizeSession();
709
+ const summary = this.metrics.getSummary(this.model);
710
+ this.options.onStreamChunk(summary);
524
711
  return timeoutMsg;
525
712
  }
713
+ /** Record a clean user-cancellation result and finalize the session. */
714
+ finishCancelled() {
715
+ const cancelledMsg = i18n.t('agentCancelled');
716
+ this.messages.push({ role: 'assistant', content: cancelledMsg });
717
+ this.options.onResponse(cancelledMsg);
718
+ this.finalizeSession();
719
+ return cancelledMsg;
720
+ }
721
+ getIterationLimit() {
722
+ const budgetLimit = this.options.budget?.maxIterations;
723
+ if (budgetLimit && budgetLimit > 0) {
724
+ return Math.min(this.options.maxIterations, budgetLimit);
725
+ }
726
+ return this.options.maxIterations;
727
+ }
728
+ getAutoCompactOptions() {
729
+ return {
730
+ ...DEFAULT_AUTO_COMPACT,
731
+ ...(this.options.autoCompact ?? {}),
732
+ };
733
+ }
734
+ async maybeAutoCompact() {
735
+ const compact = this.getAutoCompactOptions();
736
+ if (!compact.enabled)
737
+ return;
738
+ const contextPercent = this.metrics.getCurrentWindowPercent();
739
+ const beforeMessages = this.messages.length;
740
+ if (contextPercent < compact.thresholdPercent)
741
+ return;
742
+ if (beforeMessages < compact.minMessages)
743
+ return;
744
+ if (beforeMessages <= this.lastCompactedAtMessageCount + compact.keepRecentMessages)
745
+ return;
746
+ const startEvent = {
747
+ phase: 'start',
748
+ progress: 5,
749
+ contextPercent,
750
+ beforeMessages,
751
+ };
752
+ this.options.onCompactStart(startEvent);
753
+ this.options.onCompactProgress({ ...startEvent, phase: 'summarizing', progress: 35 });
754
+ try {
755
+ const result = await this.api.chat([
756
+ {
757
+ role: 'system',
758
+ content: 'Compress the conversation for continuation. Preserve concrete user goals, decisions, file paths, commands, failures, verification results, pending work, and constraints. Do not invent facts. Return concise bullet points.',
759
+ },
760
+ {
761
+ role: 'user',
762
+ content: this.buildCompactTranscript(),
763
+ },
764
+ ]);
765
+ if (result.usage) {
766
+ this.metrics.recordUsage(result.usage);
767
+ }
768
+ const summary = result.content.trim() || 'Auto-compaction completed, but the summarizer returned an empty summary.';
769
+ this.options.onCompactProgress({
770
+ phase: 'replacing',
771
+ progress: 80,
772
+ contextPercent,
773
+ beforeMessages,
774
+ });
775
+ const systemMsg = this.messages.find(m => m.role === 'system');
776
+ // BUG FIX: previously the whole history was replaced by [system, summary],
777
+ // silently dropping both the original task text and the recent messages
778
+ // that keepRecentMessages promised to keep. Continuing from a lossy
779
+ // summary alone is how the model drifts into claiming un-done work.
780
+ // Keep: the original user task verbatim + the recent tail.
781
+ const firstUserMsg = this.messages.find(m => m.role === 'user');
782
+ let taskText = '';
783
+ if (firstUserMsg) {
784
+ taskText = typeof firstUserMsg.content === 'string'
785
+ ? firstUserMsg.content
786
+ : firstUserMsg.content.filter(b => b.type === 'text').map(b => b.text).join('\n');
787
+ }
788
+ const taskMsg = taskText
789
+ ? {
790
+ role: 'user',
791
+ content: `**Original task (verbatim, pre-compaction):**\n${taskText.length > 6000 ? taskText.slice(0, 6000) + '\n…[truncated]' : taskText}`,
792
+ }
793
+ : null;
794
+ // Recent tail, trimmed so it does not start with an orphan tool message
795
+ // (a tool result must follow its assistant tool_calls message).
796
+ const tail = this.messages.slice(-compact.keepRecentMessages).filter(m => m.role !== 'system');
797
+ while (tail.length > 0 && tail[0].role === 'tool')
798
+ tail.shift();
799
+ this.messages = [
800
+ ...(systemMsg ? [systemMsg] : []),
801
+ ...(taskMsg ? [taskMsg] : []),
802
+ {
803
+ role: 'assistant',
804
+ // The summary is lossy — after compaction the model is prone to
805
+ // "remembering" planned work as done. Pin the tool-verified ledger
806
+ // right next to it so reality stays in context.
807
+ content: `**Context Auto-Compacted**\n\nOriginal messages: ${beforeMessages}\nPrevious context: ${contextPercent}% of window\n\n${summary}\n\n${this.buildVerifiedLedger()}`,
808
+ },
809
+ ...tail,
810
+ ];
811
+ this.lastCompactedAtMessageCount = this.messages.length;
812
+ this.options.onCompactEnd({
813
+ phase: 'done',
814
+ progress: 100,
815
+ contextPercent,
816
+ beforeMessages,
817
+ afterMessages: this.messages.length,
818
+ });
819
+ }
820
+ catch (err) {
821
+ this.options.onCompactEnd({
822
+ phase: 'failed',
823
+ progress: 100,
824
+ contextPercent,
825
+ beforeMessages,
826
+ error: err.message,
827
+ });
828
+ throw err;
829
+ }
830
+ }
831
+ buildCompactTranscript() {
832
+ return this.messages
833
+ .filter(message => message.role !== 'system')
834
+ .map((message, index) => {
835
+ const content = typeof message.content === 'string'
836
+ ? message.content
837
+ : JSON.stringify(message.content);
838
+ const toolCalls = message.tool_calls?.length ? ` tool_calls=${message.tool_calls.map(tc => tc.function.name).join(',')}` : '';
839
+ return `#${index + 1} ${message.role}${toolCalls}\n${content.slice(0, 8000)}`;
840
+ })
841
+ .join('\n\n---\n\n');
842
+ }
526
843
  /**
527
- * Parse tool arguments from JSON string.
844
+ * Build a short human-readable label for a tool call.
845
+ * Used in Execution Summary to identify which files/commands failed.
528
846
  */
847
+ buildToolCallLabel(toolName, args) {
848
+ try {
849
+ switch (toolName) {
850
+ case 'run_shell_command': {
851
+ const cmd = args.command ?? args.cmd ?? '';
852
+ if (typeof cmd === 'string' && cmd.length > 0) {
853
+ return cmd.length > 120 ? cmd.slice(0, 117) + '...' : cmd;
854
+ }
855
+ break;
856
+ }
857
+ case 'read_file':
858
+ case 'edit':
859
+ case 'write_file': {
860
+ const path = args.path ?? args.file_path ?? args.file ?? '';
861
+ if (typeof path === 'string' && path.length > 0) {
862
+ return path.length > 120 ? path.slice(0, 117) + '...' : path;
863
+ }
864
+ break;
865
+ }
866
+ case 'grep_search':
867
+ case 'glob': {
868
+ const pattern = args.pattern ?? '';
869
+ if (typeof pattern === 'string' && pattern.length > 0) {
870
+ return pattern.length > 120 ? pattern.slice(0, 117) + '...' : pattern;
871
+ }
872
+ break;
873
+ }
874
+ }
875
+ // Fallback: serialize first meaningful string value
876
+ const fallback = JSON.stringify(args);
877
+ return fallback.length > 120 ? fallback.slice(0, 117) + '...' : fallback;
878
+ }
879
+ catch {
880
+ return String(args);
881
+ }
882
+ }
529
883
  /**
530
884
  * Check if any budget limit has been exceeded (called at top of each iteration).
531
885
  * Returns the field name that exceeded or null if all good.
@@ -633,7 +987,7 @@ export class AgentLoop extends EventEmitter {
633
987
  };
634
988
  }
635
989
  try {
636
- const result = await def.tool.execute(args);
990
+ const result = await def.tool.execute(args, this.options.signal);
637
991
  return {
638
992
  success: result.success,
639
993
  output: result.output,