@serjm/deepseek-code 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +61 -0
  2. package/README.md +72 -109
  3. package/README.ru.md +73 -109
  4. package/dist/api/index.d.ts +9 -0
  5. package/dist/api/index.d.ts.map +1 -1
  6. package/dist/api/index.js +65 -2
  7. package/dist/api/index.js.map +1 -1
  8. package/dist/cli/index.d.ts +1 -0
  9. package/dist/cli/index.d.ts.map +1 -1
  10. package/dist/cli/index.js +15 -8
  11. package/dist/cli/index.js.map +1 -1
  12. package/dist/cli/interactive.d.ts.map +1 -1
  13. package/dist/cli/interactive.js +65 -3
  14. package/dist/cli/interactive.js.map +1 -1
  15. package/dist/commands/index.d.ts.map +1 -1
  16. package/dist/commands/index.js +26 -21
  17. package/dist/commands/index.js.map +1 -1
  18. package/dist/config/defaults.js +7 -7
  19. package/dist/config/defaults.js.map +1 -1
  20. package/dist/core/agent-loop.d.ts +44 -2
  21. package/dist/core/agent-loop.d.ts.map +1 -1
  22. package/dist/core/agent-loop.js +317 -102
  23. package/dist/core/agent-loop.js.map +1 -1
  24. package/dist/core/i18n.d.ts +3 -0
  25. package/dist/core/i18n.d.ts.map +1 -1
  26. package/dist/core/i18n.js +9 -0
  27. package/dist/core/i18n.js.map +1 -1
  28. package/dist/core/metrics.d.ts +3 -1
  29. package/dist/core/metrics.d.ts.map +1 -1
  30. package/dist/core/metrics.js +34 -5
  31. package/dist/core/metrics.js.map +1 -1
  32. package/dist/tools/bash.d.ts.map +1 -1
  33. package/dist/tools/bash.js +299 -20
  34. package/dist/tools/bash.js.map +1 -1
  35. package/dist/tools/glob.d.ts.map +1 -1
  36. package/dist/tools/glob.js +40 -3
  37. package/dist/tools/glob.js.map +1 -1
  38. package/dist/tools/grep.d.ts.map +1 -1
  39. package/dist/tools/grep.js +69 -13
  40. package/dist/tools/grep.js.map +1 -1
  41. package/dist/tools/read.d.ts.map +1 -1
  42. package/dist/tools/read.js +91 -0
  43. package/dist/tools/read.js.map +1 -1
  44. package/dist/tools/types.d.ts +21 -1
  45. package/dist/tools/types.d.ts.map +1 -1
  46. package/dist/tools/types.js +34 -0
  47. package/dist/tools/types.js.map +1 -1
  48. package/dist/ui/app.d.ts.map +1 -1
  49. package/dist/ui/app.js +229 -162
  50. package/dist/ui/app.js.map +1 -1
  51. package/dist/ui/chat-view.d.ts +24 -3
  52. package/dist/ui/chat-view.d.ts.map +1 -1
  53. package/dist/ui/chat-view.js +116 -58
  54. package/dist/ui/chat-view.js.map +1 -1
  55. package/dist/ui/input-bar.d.ts.map +1 -1
  56. package/dist/ui/input-bar.js +38 -4
  57. package/dist/ui/input-bar.js.map +1 -1
  58. package/dist/ui/setup-wizard.js +1 -1
  59. package/dist/ui/setup-wizard.js.map +1 -1
  60. package/dist/ui/status-bar.d.ts +5 -1
  61. package/dist/ui/status-bar.d.ts.map +1 -1
  62. package/dist/ui/status-bar.js +10 -4
  63. package/dist/ui/status-bar.js.map +1 -1
  64. package/dist/utils/logger.d.ts +15 -0
  65. package/dist/utils/logger.d.ts.map +1 -1
  66. package/dist/utils/logger.js +47 -0
  67. package/dist/utils/logger.js.map +1 -1
  68. package/package.json +3 -2
@@ -8,10 +8,17 @@ import { join } from 'node:path';
8
8
  import { platform, release, type } from 'node:os';
9
9
  import { MetricsCollector } from './metrics.js';
10
10
  import { hooksManager } from './hooks.js';
11
+ const DEFAULT_MAX_ITERATIONS = 200;
12
+ const DEFAULT_AUTO_COMPACT = {
13
+ enabled: true,
14
+ thresholdPercent: 70,
15
+ keepRecentMessages: 8,
16
+ minMessages: 18,
17
+ };
11
18
  /**
12
19
  * Build a dynamic system prompt with project context.
13
20
  */
14
- function buildSystemPrompt(cwd, approvalMode) {
21
+ export function buildSystemPrompt(cwd, approvalMode) {
15
22
  const osInfo = `${type()} ${release()} (${platform()})`;
16
23
  let projectInfo = '';
17
24
  if (cwd) {
@@ -81,86 +88,113 @@ function buildSystemPrompt(cwd, approvalMode) {
81
88
  if (locale === 'zh')
82
89
  responseLanguage = 'Chinese';
83
90
  const languageSection = `\n## Language\n- Respond in ${responseLanguage} unless the user explicitly asks otherwise.`;
84
- return `You are DeepSeek Code, an AI-powered CLI agent for software development.
85
-
86
- You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${languageSection}
87
-
88
- ## Guidelines
89
- 1. **Plan first** — Before making changes, explore the codebase to understand the context.
90
- 2. **Use the right tool** — Choose the most appropriate tool for each task.
91
- 3. **Be precise** — When editing files, provide exact text matches.
92
- 4. **Verify** — After changes, run tests or linting to ensure correctness.
93
- 5. **Explain** — After completing a task, summarize what was done.
94
-
95
- ## Tool Usage
96
- - Read files with \`read_file\` before editing them
97
- - Search with \`grep_search\` or \`glob\` to find relevant code
98
- - Use \`run_shell_command\` to run build/test commands
99
- - Create or overwrite files with \`write_file\`
100
- - Make targeted edits with \`edit\` (prefer over write_file for small changes)
101
- - Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
102
-
103
- When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
104
-
105
- ## Important
106
- - ALWAYS use absolute paths when referring to files. The project root is \`${cwd || 'the current working directory'}\`.
107
- - When asked to audit or explore the project, start with \`glob\`, \`grep_search\`, and targeted reads to discover structure.
108
- - If the task implies a browser or rendered UI check, do not wait for the user to explicitly say "open browser" before using \`chrome\`.
109
- - Do NOT guess file paths use \`glob\` or \`grep_search\` to discover them first.
110
- - When asked about your capabilities, answer based on the tools listed in the "Current Mode" section above. Do NOT claim you lack tools that are listed there but blocked by mode — instead explain that the current mode restricts them.
111
- - If the user asks "what tools do you have" or "what are your capabilities", refer to this prompt's tool list. If write_file or edit are listed as blocked, explain that they exist but are restricted in the current mode.
112
- - **CRITICAL: Never claim an action was performed without an actual tool call.** Do not say "opening browser", "running eval", "taking screenshot", "passing captcha", "navigating to page", or any other action unless you have actually called the corresponding tool and received a result. If a tool call was not made, state honestly that it was not executed. If a tool is blocked by the current mode, do not promise to use it — explain that it is unavailable in this mode. If a captcha or site protection is encountered, do not claim to bypass it — stop and report the issue honestly.
113
- - **CRITICAL: No post-factum reports without tool calls.** If Tool uses is 0 in the current response, do not claim "I checked the log", "I reviewed the previous run", "step X was successful", or any other retrospective analysis. You may only say: "I did not perform a check right now. Based on visible context I can assume..." Always separate findings into: **Verified** (confirmed by actual tool calls this turn), **Assumption** (inferred from visible context), **Not checked** (not examined this turn). Do not write "successful" for a step that was not actually executed or has no saved result. Use the \`/last-browser-test\` command to retrieve the last saved browser test report — do not reconstruct it from memory.
114
-
115
- ## Honest Reporting
116
- - Do not claim files were changed unless tool results include changed=true or files=\`<list>\`.
117
- - Do not claim a change was verified unless tool results include verified=true.
118
- - Do not claim tests/checks passed unless you actually ran the command and saw success.
119
- - If no files changed, say "No files changed".
120
- - Final report must match tool results and Execution Summary.
121
-
122
- ## Failed Tool Calls Policy
123
- - If any tool/shell command failed during the run, mention it in the final report.
124
- - Explain whether each failure was **critical** (blocked the task goal) or **non-critical** (retried successfully, fallback worked, or unrelated to the task).
125
- - Do not write "all checks passed" or "everything succeeded" if there were failed tool calls, unless you clearly separate successful required checks from non-critical failed attempts.
126
- - If a failed command was retried successfully, say so explicitly (e.g., "first attempt failed, retry succeeded").
127
- - If a failed command produced a temporary file or other side effect, clean it up or mention it in the report.
128
-
129
- ## Execution Policy
130
- 1. **Minimal reading**: for a small task, first locate the target with as few reads as possible. Usually 1-2 read_file calls and 1 edit is enough. Do not run a broad grep/glob if you already know the file.
131
- 2. **Do not repeat identical tool calls**: do not call read_file/grep_search/glob with the same arguments twice unless you have reason to believe the file changed.
132
- 3. **Checks**: run lint/typecheck/build/test only after making changes. Do not run the same check multiple times without a new edit. If you did not run a check, do not claim it passed.
133
- 4. **Temporary files**: do not create lint_out.txt, test_out.txt, temp/debug files unnecessarily. If you created a temporary file, remove it before the final report. Do not leave garbage in the working tree.
134
- 5. **Report**: the final report must match the real tool results. Only mention what you actually read, changed, or verified. If no files were changed, explicitly say "No files changed". If there were errors, report them — do not hide them.
135
- 6. **Stop**: when the goal is achieved and checks are done stop. Do not continue looking for extra issues without the user asking. Do not refactor beyond the task scope.
136
-
137
- ## Source of Truth Policy
138
- 1. **Do not invent** versions, release notes, dates, features, links, metrics, prices, or user/project facts.
139
- 2. **Source files/data** provided by the user are the source of truth.
140
- 3. **For release/version info**, use package.json, CHANGELOG.md, Git tags, npm, or GitHub Releases only if actually read/checked.
141
- 4. **Unchecked facts** must be labeled as assumption or not verified.
142
- 5. **Generated demo projects**: placeholder content is allowed only if explicitly requested.
143
- 6. **Do not present** invented content as real project history.
144
- 7. **If data is missing**, ask for it or write "Not verified" never guess.
145
-
146
- ## Project Acceptance Policy
147
- 1. **For web projects**, build success alone is not enough. Verify that:
148
- - install/build succeeds;
149
- - dev server starts successfully;
150
- - the main page opens in a browser;
151
- - no framework error overlay (Nuxt/Vite/Next/etc.);
152
- - browser console has no critical errors;
153
- - git status has no junk files (.idea/, node_modules/, .nuxt/, .output/, dist/, temp files).
154
- 2. **For container-first projects**:
155
- - provide Containerfile/Dockerfile and compose.yaml;
156
- - run through podman/docker compose;
157
- - verify build inside the container;
158
- - expose the correct host/port;
159
- - add .dockerignore.
160
- 3. **If browser or container verification was not performed**, do not claim the project is fully verified.
161
- 4. **In the final report**, separate:
162
- - Verified
163
- - Not checked
91
+ return `You are DeepSeek Code, an AI-powered CLI agent for software development.
92
+
93
+ You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${languageSection}
94
+
95
+ ## Guidelines
96
+ 1. **Plan first** — Before making changes, explore the codebase to understand the context.
97
+ 2. **Use the right tool** — Choose the most appropriate tool for each task.
98
+ 3. **Be precise** — When editing files, provide exact text matches.
99
+ 4. **Verify** — After changes, run tests or linting to ensure correctness.
100
+ 5. **Explain** — After completing a task, summarize what was done.
101
+
102
+ ## Tool Usage
103
+ - Read files with \`read_file\` before editing them
104
+ - Search with \`grep_search\` or \`glob\` to find relevant code
105
+ - Use \`run_shell_command\` to run build/test commands
106
+ - Create or overwrite files with \`write_file\`
107
+ - Make targeted edits with \`edit\` (prefer over write_file for small changes)
108
+ - Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
109
+
110
+ When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
111
+
112
+ ## Workspace Boundary Policy
113
+ - The current working directory is the active project workspace. Do not silently switch to another project path inside shell commands.
114
+ - If \`write_file\`, \`edit\`, or \`read_file\` says a path is outside the workspace, stop and report the mismatch. Do not bypass the restriction by using shell redirection, PowerShell here-strings, Python scripts, or temporary generator scripts.
115
+ - If the user intended a different folder, ask them to restart/open the CLI in that folder or confirm the correct workspace.
116
+ - Avoid generating project files through ad-hoc scripts such as \`gen_helper.py\`, \`diag.py\`, or \`fix_pkg.py\`. Use the file tools for file content and remove any temporary helper before the final report.
117
+
118
+ ## Windows Shell Policy
119
+ - The OS is listed in Project Context. If it is Windows or \`win32\`, write shell commands for PowerShell/cmd compatibility.
120
+ - On Windows, do not assume Unix tools exist. Avoid \`sed\`, \`head\`, \`tail\`, \`cat\`, \`grep\`, \`find\`, \`xargs\`, \`rm\`, \`touch\`, or Bash-specific syntax unless you first verified the command exists.
121
+ - On Windows, never use \`mkdir -p\`; it can create a literal \`-p\` directory. Use \`New-Item -ItemType Directory -Force <path>\` or \`mkdir <path>\` without \`-p\`.
122
+ - Prefer built-in tools over shell for repository inspection: use \`read_file\` for file content, \`grep_search\` for text search, and \`glob\` for file discovery.
123
+ - For Windows shell reads, prefer PowerShell commands such as \`Get-Content\`, \`Select-String\`, \`Get-ChildItem\`, \`Test-Path\`, \`Remove-Item\`, and \`New-Item\`.
124
+ - The shell tool automatically runs recognized PowerShell cmdlets through PowerShell on Windows. Plain commands such as \`npm\`, \`node\`, \`git\`, and \`npx\` run normally.
125
+ - Do not mix Bash/cmd chaining syntax with PowerShell cmdlets in the same command. Avoid \`cd path && Remove-Item ...\`; use separate tool calls or PowerShell-compatible \`Set-Location path; Remove-Item ...\` with explicit error checks.
126
+ - If a command fails because of shell incompatibility, retry with an OS-compatible command and report the failed attempt honestly.
127
+ - Never use broad process-kill commands such as \`taskkill /F /IM node.exe\`, \`Stop-Process -Name node\`, \`pkill node\`, or \`killall node\`. They can terminate the agent, the user's IDE terminal, and unrelated dev servers. Stop only a specific process you started and can identify by PID.
128
+
129
+ ## Important
130
+ - ALWAYS use absolute paths when referring to files. The project root is \`${cwd || 'the current working directory'}\`.
131
+ - When asked to audit or explore the project, start with \`glob\`, \`grep_search\`, and targeted reads to discover structure.
132
+ - If the task implies a browser or rendered UI check, do not wait for the user to explicitly say "open browser" before using \`chrome\`.
133
+ - Do NOT guess file paths use \`glob\` or \`grep_search\` to discover them first.
134
+ - When asked about your capabilities, answer based on the tools listed in the "Current Mode" section above. Do NOT claim you lack tools that are listed there but blocked by mode — instead explain that the current mode restricts them.
135
+ - If the user asks "what tools do you have" or "what are your capabilities", refer to this prompt's tool list. If write_file or edit are listed as blocked, explain that they exist but are restricted in the current mode.
136
+ - **CRITICAL: Never claim an action was performed without an actual tool call.** Do not say "opening browser", "running eval", "taking screenshot", "passing captcha", "navigating to page", or any other action unless you have actually called the corresponding tool and received a result. If a tool call was not made, state honestly that it was not executed. If a tool is blocked by the current mode, do not promise to use it — explain that it is unavailable in this mode. If a captcha or site protection is encountered, do not claim to bypass it — stop and report the issue honestly.
137
+ - **CRITICAL: No post-factum reports without tool calls.** If Tool uses is 0 in the current response, do not claim "I checked the log", "I reviewed the previous run", "step X was successful", or any other retrospective analysis. You may only say: "I did not perform a check right now. Based on visible context I can assume..." Always separate findings into: **Verified** (confirmed by actual tool calls this turn), **Assumption** (inferred from visible context), **Not checked** (not examined this turn). Do not write "successful" for a step that was not actually executed or has no saved result. Use the \`/last-browser-test\` command to retrieve the last saved browser test report — do not reconstruct it from memory.
138
+
139
+ ## Honest Reporting
140
+ - Do not claim files were changed unless tool results include changed=true or files=\`<list>\`.
141
+ - Do not claim a change was verified unless tool results include verified=true.
142
+ - Do not claim tests/checks passed unless you actually ran the command and saw success.
143
+ - If no files changed, say "No files changed".
144
+ - Final report must match tool results and Execution Summary.
145
+ - Final report must start with a quality verdict: **Passed**, **Partial**, or **Failed**.
146
+ - If there were failed tool calls, failed browser/chrome calls, a budget/iteration stop, or skipped required acceptance checks, the verdict cannot be **Passed** unless every failure is explicitly classified as non-critical and the required check later succeeded.
147
+ - For web/UI projects, include a **Browser proof** block with the URL tested, page title, console error count, screenshot/rendered-state verdict, and whether Chrome/browser calls passed or failed. If browser proof was not performed, put it under **Not checked** and do not call the UI production-ready.
148
+ - For UI/product-design tasks, visual acceptance is required. If the rendered screenshot is blank, sparse, sidebar-only, broken, or clearly below the requested quality, say **Partial** or **Failed** and list the next visual iteration instead of claiming the project is complete.
149
+
150
+ ## Failed Tool Calls Policy
151
+ - If any tool/shell command failed during the run, mention it in the final report.
152
+ - Explain whether each failure was **critical** (blocked the task goal) or **non-critical** (retried successfully, fallback worked, or unrelated to the task).
153
+ - Do not write "all checks passed" or "everything succeeded" if there were failed tool calls, unless you clearly separate successful required checks from non-critical failed attempts.
154
+ - If a failed command was retried successfully, say so explicitly (e.g., "first attempt failed, retry succeeded").
155
+ - If a failed command produced a temporary file or other side effect, clean it up or mention it in the report.
156
+
157
+ ## Execution Policy
158
+ 1. **Minimal reading**: for a small task, first locate the target with as few reads as possible. Usually 1-2 read_file calls and 1 edit is enough. Do not run a broad grep/glob if you already know the file.
159
+ 2. **Do not repeat identical tool calls**: do not call read_file/grep_search/glob with the same arguments twice unless you have reason to believe the file changed.
160
+ 3. **Checks**: run lint/typecheck/build/test only after making changes. Do not run the same check multiple times without a new edit. If you did not run a check, do not claim it passed.
161
+ 4. **Temporary files**: do not create lint_out.txt, test_out.txt, err.txt, temp/debug scripts, one-off files like "1", or scratch files unnecessarily. Prefer command output in the tool result over redirected files. If you created a temporary file, remove it before the final report. Before the final report, check the working tree or otherwise verify no junk temp files remain. If cleanup failed or was not checked, say so explicitly.
162
+ 5. **Report**: the final report must match the real tool results. Only mention what you actually read, changed, or verified. If no files were changed, explicitly say "No files changed". If there were errors, report them — do not hide them.
163
+ 6. **Stop**: when the goal is achieved and checks are done — stop. Do not continue looking for extra issues without the user asking. Do not refactor beyond the task scope.
164
+
165
+ ## Source of Truth Policy
166
+ 1. **Do not invent** versions, release notes, dates, features, links, metrics, prices, or user/project facts.
167
+ 2. **Source files/data** provided by the user are the source of truth.
168
+ 3. **For release/version info**, use package.json, CHANGELOG.md, Git tags, npm, or GitHub Releases only if actually read/checked.
169
+ 4. **Unchecked facts** must be labeled as assumption or not verified.
170
+ 5. **Generated demo projects**: placeholder content is allowed only if explicitly requested.
171
+ 6. **Do not present** invented content as real project history.
172
+ 7. **If data is missing**, ask for it or write "Not verified" — never guess.
173
+
174
+ ## Project Acceptance Policy
175
+ 1. **For web projects**, build success alone is not enough. Verify that:
176
+ - install/build succeeds;
177
+ - dev server starts successfully;
178
+ - the main page opens in a browser;
179
+ - no framework error overlay (Nuxt/Vite/Next/etc.);
180
+ - browser console has no critical errors;
181
+ - the repository has an appropriate .gitignore for the stack;
182
+ - git status has no junk files (.idea/, node_modules/, .nuxt/, .output/, dist/, temp files, screenshots, logs).
183
+ 2. **Runtime/container verification is adaptive**, not Podman-only:
184
+ - first inspect available tooling and project files before choosing a path;
185
+ - if Docker Compose is available, use docker compose;
186
+ - if Podman/Podman Compose is available, use podman compose or podman-compose;
187
+ - if no container runtime is available, use the native package manager/dev server and report container verification as Not checked;
188
+ - do not spend many repeated attempts on one runtime. After two similar runtime failures, switch strategy or report the blocker.
189
+ 3. **For container-first projects**:
190
+ - keep one clear container entrypoint path (Dockerfile or Containerfile) and ensure compose references it correctly;
191
+ - verify build inside the container;
192
+ - expose the correct host/port;
193
+ - add .dockerignore or .containerignore as appropriate.
194
+ 4. **If browser, git-hygiene, or container verification was not performed**, do not claim the project is fully verified.
195
+ 5. **In the final report**, separate:
196
+ - Verified
197
+ - Not checked
164
198
  - Known issues`;
165
199
  }
166
200
  /**
@@ -178,13 +212,15 @@ export class AgentLoop extends EventEmitter {
178
212
  toolCallHistory = new Map();
179
213
  metrics = new MetricsCollector();
180
214
  iterationCount = 0;
215
+ followUpSeq = 0;
216
+ lastCompactedAtMessageCount = 0;
181
217
  constructor(config, options = {}) {
182
218
  super();
183
219
  this.api = new DeepSeekAPI(config);
184
220
  this.model = config.model;
185
221
  const defaultSystemPrompt = buildSystemPrompt(options.cwd || process.cwd(), options.approvalMode);
186
222
  this.options = {
187
- maxIterations: 100,
223
+ maxIterations: DEFAULT_MAX_ITERATIONS,
188
224
  toolTimeout: 30000,
189
225
  approvalMode: 'default',
190
226
  cwd: process.cwd(),
@@ -194,9 +230,13 @@ export class AgentLoop extends EventEmitter {
194
230
  onReasoningChunk: () => { },
195
231
  onResponse: () => { },
196
232
  onError: () => { },
233
+ onCompactStart: () => { },
234
+ onCompactProgress: () => { },
235
+ onCompactEnd: () => { },
197
236
  onApprovalRequest: async () => true,
198
237
  systemPrompt: defaultSystemPrompt,
199
238
  signal: undefined,
239
+ autoCompact: DEFAULT_AUTO_COMPACT,
200
240
  ...options,
201
241
  };
202
242
  this.tools = getToolsForMode(this.options.approvalMode);
@@ -217,6 +257,21 @@ export class AgentLoop extends EventEmitter {
217
257
  getMetrics() {
218
258
  return this.metrics;
219
259
  }
260
+ /**
261
+ * Add a user follow-up message during an active agent loop.
262
+ * The message will be picked up on the next API iteration.
263
+ * Does NOT start a new loop or reset state.
264
+ */
265
+ addUserFollowUp(content) {
266
+ const trimmed = content?.trim();
267
+ if (!trimmed)
268
+ return;
269
+ this.followUpSeq++;
270
+ this.messages.push({
271
+ role: 'user',
272
+ content: `User follow-up while task was running:\n${trimmed}`,
273
+ });
274
+ }
220
275
  /**
221
276
  * Set approval mode — updates which tools are available and rebuilds system prompt.
222
277
  */
@@ -267,37 +322,36 @@ export class AgentLoop extends EventEmitter {
267
322
  projectDir: this.options.cwd,
268
323
  messageCount: this.messages.length,
269
324
  }).catch(() => { });
270
- while (this.iterationCount < Math.min(this.options.maxIterations, this.options.budget?.maxIterations ?? this.options.maxIterations)) {
325
+ while (this.iterationCount < this.getIterationLimit()) {
271
326
  this.iterationCount++;
272
327
  // Budget: check maxToolCalls at top of each iteration
273
328
  if (this.checkBudgetHalt()) {
274
329
  return this.buildBudgetHaltMessage();
275
330
  }
276
331
  try {
332
+ await this.maybeAutoCompact();
277
333
  // Use streaming chat to get real-time output
278
334
  // Budget: check maxApiCalls before API call
279
335
  if (this.options.budget?.maxApiCalls && this.metrics.apiCalls >= this.options.budget.maxApiCalls) {
280
336
  return this.buildBudgetHaltMessage();
281
337
  }
338
+ // Cancelled before we even start the request — nothing to drain.
339
+ if (this.options.signal?.aborted) {
340
+ return this.finishCancelled();
341
+ }
342
+ const followUpSeqAtRequestStart = this.followUpSeq;
282
343
  const stream = this.api.streamChat(this.messages, openAITools);
283
344
  let responseContent = '';
284
345
  let toolCalls = [];
285
- // Check for cancellation
286
- if (this.options.signal?.aborted) {
287
- const cancelledMsg = i18n.t('agentCancelled');
288
- this.messages.push({ role: 'assistant', content: cancelledMsg });
289
- this.options.onResponse(cancelledMsg);
290
- this.finalizeSession();
291
- return cancelledMsg;
292
- }
346
+ // Cooperative cancellation: once aborted we stop acting on chunks but keep
347
+ // draining the stream to its natural end. Breaking out early would tear
348
+ // down the streaming socket mid-flight, which hard-crashed the process on
349
+ // Windows. The UI already shows the paused state immediately.
350
+ let cancelledDuringStream = false;
293
351
  for await (const chunk of stream) {
294
- // Check for cancellation during streaming
295
352
  if (this.options.signal?.aborted) {
296
- const cancelledMsg = i18n.t('agentCancelled');
297
- this.messages.push({ role: 'assistant', content: cancelledMsg });
298
- this.options.onResponse(cancelledMsg);
299
- this.finalizeSession();
300
- return cancelledMsg;
353
+ cancelledDuringStream = true;
354
+ continue;
301
355
  }
302
356
  if (chunk.type === 'usage' && chunk.usage) {
303
357
  this.metrics.recordUsage(chunk.usage);
@@ -323,6 +377,10 @@ export class AgentLoop extends EventEmitter {
323
377
  }
324
378
  }
325
379
  }
380
+ // Stream drained — if the user cancelled mid-stream, stop here cleanly.
381
+ if (cancelledDuringStream || this.options.signal?.aborted) {
382
+ return this.finishCancelled();
383
+ }
326
384
  // Budget: catch limits reached during streaming usage accounting.
327
385
  if (this.checkBudgetHalt()) {
328
386
  return this.buildBudgetHaltMessage();
@@ -438,7 +496,8 @@ export class AgentLoop extends EventEmitter {
438
496
  try {
439
497
  const toolResult = await this.executeTool(tc.function.name, args);
440
498
  const duration = Date.now() - startTime;
441
- this.metrics.recordToolCallEnd(tc.function.name, toolResult.success);
499
+ const toolLabel = this.buildToolCallLabel(tc.function.name, args);
500
+ this.metrics.recordToolCallEnd(tc.function.name, toolResult.success, toolLabel, toolResult.success ? undefined : toolResult.error);
442
501
  toolCallEvent.status = toolResult.success ? 'completed' : 'failed';
443
502
  toolCallEvent.result = toolResult.output;
444
503
  toolCallEvent.error = toolResult.error;
@@ -468,7 +527,8 @@ export class AgentLoop extends EventEmitter {
468
527
  catch (err) {
469
528
  const duration = Date.now() - startTime;
470
529
  const errorMsg = err.message;
471
- this.metrics.recordToolCallEnd(tc.function.name, false);
530
+ const toolLabel = this.buildToolCallLabel(tc.function.name, args);
531
+ this.metrics.recordToolCallEnd(tc.function.name, false, toolLabel, errorMsg);
472
532
  toolCallEvent.status = 'failed';
473
533
  toolCallEvent.error = errorMsg;
474
534
  toolCallEvent.durationMs = duration;
@@ -497,6 +557,11 @@ export class AgentLoop extends EventEmitter {
497
557
  const fallback = 'I have completed the requested actions. What else would you like me to do?';
498
558
  this.messages.push({ role: 'assistant', content: fallback });
499
559
  this.options.onResponse(fallback);
560
+ // Check if a follow-up arrived while the API request was streaming
561
+ if (this.followUpSeq > followUpSeqAtRequestStart) {
562
+ // Follow-up received during this request — continue loop instead of finishing
563
+ continue;
564
+ }
500
565
  this.finalizeSession();
501
566
  const summary = this.metrics.getSummary(this.model);
502
567
  this.options.onStreamChunk(summary);
@@ -504,6 +569,11 @@ export class AgentLoop extends EventEmitter {
504
569
  }
505
570
  this.messages.push({ role: 'assistant', content: responseContent });
506
571
  this.options.onResponse(responseContent);
572
+ // Check if a follow-up arrived while this API request was streaming
573
+ if (this.followUpSeq > followUpSeqAtRequestStart) {
574
+ // Follow-up received during the stream — continue loop, skip finalization
575
+ continue;
576
+ }
507
577
  // Output execution summary
508
578
  this.finalizeSession();
509
579
  const summary = this.metrics.getSummary(this.model);
@@ -512,20 +582,165 @@ export class AgentLoop extends EventEmitter {
512
582
  }
513
583
  catch (err) {
514
584
  const error = err;
585
+ // If the user cancelled, treat any resulting error as a clean stop.
586
+ if (this.options.signal?.aborted) {
587
+ return this.finishCancelled();
588
+ }
515
589
  this.options.onError(error);
516
590
  throw error;
517
591
  }
518
592
  }
519
593
  // Max iterations reached
520
- const timeoutMsg = `Агент достиг максимального числа итераций (${this.options.maxIterations}). Задача может быть не завершена.`;
594
+ const timeoutMsg = `Агент достиг максимального числа итераций (${this.getIterationLimit()}). Задача может быть не завершена.`;
521
595
  this.messages.push({ role: 'assistant', content: timeoutMsg });
522
596
  this.options.onResponse(timeoutMsg);
523
597
  this.finalizeSession();
598
+ const summary = this.metrics.getSummary(this.model);
599
+ this.options.onStreamChunk(summary);
524
600
  return timeoutMsg;
525
601
  }
602
+ /** Record a clean user-cancellation result and finalize the session. */
603
+ finishCancelled() {
604
+ const cancelledMsg = i18n.t('agentCancelled');
605
+ this.messages.push({ role: 'assistant', content: cancelledMsg });
606
+ this.options.onResponse(cancelledMsg);
607
+ this.finalizeSession();
608
+ return cancelledMsg;
609
+ }
610
+ getIterationLimit() {
611
+ const budgetLimit = this.options.budget?.maxIterations;
612
+ if (budgetLimit && budgetLimit > 0) {
613
+ return Math.min(this.options.maxIterations, budgetLimit);
614
+ }
615
+ return this.options.maxIterations;
616
+ }
617
+ getAutoCompactOptions() {
618
+ return {
619
+ ...DEFAULT_AUTO_COMPACT,
620
+ ...(this.options.autoCompact ?? {}),
621
+ };
622
+ }
623
+ async maybeAutoCompact() {
624
+ const compact = this.getAutoCompactOptions();
625
+ if (!compact.enabled)
626
+ return;
627
+ const contextPercent = this.metrics.getCurrentWindowPercent();
628
+ const beforeMessages = this.messages.length;
629
+ if (contextPercent < compact.thresholdPercent)
630
+ return;
631
+ if (beforeMessages < compact.minMessages)
632
+ return;
633
+ if (beforeMessages <= this.lastCompactedAtMessageCount + compact.keepRecentMessages)
634
+ return;
635
+ const startEvent = {
636
+ phase: 'start',
637
+ progress: 5,
638
+ contextPercent,
639
+ beforeMessages,
640
+ };
641
+ this.options.onCompactStart(startEvent);
642
+ this.options.onCompactProgress({ ...startEvent, phase: 'summarizing', progress: 35 });
643
+ try {
644
+ const result = await this.api.chat([
645
+ {
646
+ role: 'system',
647
+ content: 'Compress the conversation for continuation. Preserve concrete user goals, decisions, file paths, commands, failures, verification results, pending work, and constraints. Do not invent facts. Return concise bullet points.',
648
+ },
649
+ {
650
+ role: 'user',
651
+ content: this.buildCompactTranscript(),
652
+ },
653
+ ]);
654
+ if (result.usage) {
655
+ this.metrics.recordUsage(result.usage);
656
+ }
657
+ const summary = result.content.trim() || 'Auto-compaction completed, but the summarizer returned an empty summary.';
658
+ this.options.onCompactProgress({
659
+ phase: 'replacing',
660
+ progress: 80,
661
+ contextPercent,
662
+ beforeMessages,
663
+ });
664
+ const systemMsg = this.messages.find(m => m.role === 'system');
665
+ this.messages = [
666
+ ...(systemMsg ? [systemMsg] : []),
667
+ {
668
+ role: 'assistant',
669
+ content: `**Context Auto-Compacted**\n\nOriginal messages: ${beforeMessages}\nPrevious context: ${contextPercent}% of window\n\n${summary}`,
670
+ },
671
+ ];
672
+ this.lastCompactedAtMessageCount = this.messages.length;
673
+ this.options.onCompactEnd({
674
+ phase: 'done',
675
+ progress: 100,
676
+ contextPercent,
677
+ beforeMessages,
678
+ afterMessages: this.messages.length,
679
+ });
680
+ }
681
+ catch (err) {
682
+ this.options.onCompactEnd({
683
+ phase: 'failed',
684
+ progress: 100,
685
+ contextPercent,
686
+ beforeMessages,
687
+ error: err.message,
688
+ });
689
+ throw err;
690
+ }
691
+ }
692
+ buildCompactTranscript() {
693
+ return this.messages
694
+ .filter(message => message.role !== 'system')
695
+ .map((message, index) => {
696
+ const content = typeof message.content === 'string'
697
+ ? message.content
698
+ : JSON.stringify(message.content);
699
+ const toolCalls = message.tool_calls?.length ? ` tool_calls=${message.tool_calls.map(tc => tc.function.name).join(',')}` : '';
700
+ return `#${index + 1} ${message.role}${toolCalls}\n${content.slice(0, 8000)}`;
701
+ })
702
+ .join('\n\n---\n\n');
703
+ }
526
704
  /**
527
- * Parse tool arguments from JSON string.
705
+ * Build a short human-readable label for a tool call.
706
+ * Used in Execution Summary to identify which files/commands failed.
528
707
  */
708
+ buildToolCallLabel(toolName, args) {
709
+ try {
710
+ switch (toolName) {
711
+ case 'run_shell_command': {
712
+ const cmd = args.command ?? args.cmd ?? '';
713
+ if (typeof cmd === 'string' && cmd.length > 0) {
714
+ return cmd.length > 120 ? cmd.slice(0, 117) + '...' : cmd;
715
+ }
716
+ break;
717
+ }
718
+ case 'read_file':
719
+ case 'edit':
720
+ case 'write_file': {
721
+ const path = args.path ?? args.file_path ?? args.file ?? '';
722
+ if (typeof path === 'string' && path.length > 0) {
723
+ return path.length > 120 ? path.slice(0, 117) + '...' : path;
724
+ }
725
+ break;
726
+ }
727
+ case 'grep_search':
728
+ case 'glob': {
729
+ const pattern = args.pattern ?? '';
730
+ if (typeof pattern === 'string' && pattern.length > 0) {
731
+ return pattern.length > 120 ? pattern.slice(0, 117) + '...' : pattern;
732
+ }
733
+ break;
734
+ }
735
+ }
736
+ // Fallback: serialize first meaningful string value
737
+ const fallback = JSON.stringify(args);
738
+ return fallback.length > 120 ? fallback.slice(0, 117) + '...' : fallback;
739
+ }
740
+ catch {
741
+ return String(args);
742
+ }
743
+ }
529
744
  /**
530
745
  * Check if any budget limit has been exceeded (called at top of each iteration).
531
746
  * Returns the field name that exceeded or null if all good.
@@ -633,7 +848,7 @@ export class AgentLoop extends EventEmitter {
633
848
  };
634
849
  }
635
850
  try {
636
- const result = await def.tool.execute(args);
851
+ const result = await def.tool.execute(args, this.options.signal);
637
852
  return {
638
853
  success: result.success,
639
854
  output: result.output,