@serjm/deepseek-code 0.4.3 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +61 -0
- package/README.md +72 -109
- package/README.ru.md +73 -109
- package/dist/api/index.d.ts +9 -0
- package/dist/api/index.d.ts.map +1 -1
- package/dist/api/index.js +65 -2
- package/dist/api/index.js.map +1 -1
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +15 -8
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/interactive.d.ts.map +1 -1
- package/dist/cli/interactive.js +65 -3
- package/dist/cli/interactive.js.map +1 -1
- package/dist/commands/index.d.ts.map +1 -1
- package/dist/commands/index.js +26 -21
- package/dist/commands/index.js.map +1 -1
- package/dist/config/defaults.js +7 -7
- package/dist/config/defaults.js.map +1 -1
- package/dist/core/agent-loop.d.ts +44 -2
- package/dist/core/agent-loop.d.ts.map +1 -1
- package/dist/core/agent-loop.js +317 -102
- package/dist/core/agent-loop.js.map +1 -1
- package/dist/core/i18n.d.ts +3 -0
- package/dist/core/i18n.d.ts.map +1 -1
- package/dist/core/i18n.js +9 -0
- package/dist/core/i18n.js.map +1 -1
- package/dist/core/metrics.d.ts +3 -1
- package/dist/core/metrics.d.ts.map +1 -1
- package/dist/core/metrics.js +34 -5
- package/dist/core/metrics.js.map +1 -1
- package/dist/tools/bash.d.ts.map +1 -1
- package/dist/tools/bash.js +299 -20
- package/dist/tools/bash.js.map +1 -1
- package/dist/tools/glob.d.ts.map +1 -1
- package/dist/tools/glob.js +40 -3
- package/dist/tools/glob.js.map +1 -1
- package/dist/tools/grep.d.ts.map +1 -1
- package/dist/tools/grep.js +69 -13
- package/dist/tools/grep.js.map +1 -1
- package/dist/tools/read.d.ts.map +1 -1
- package/dist/tools/read.js +91 -0
- package/dist/tools/read.js.map +1 -1
- package/dist/tools/types.d.ts +21 -1
- package/dist/tools/types.d.ts.map +1 -1
- package/dist/tools/types.js +34 -0
- package/dist/tools/types.js.map +1 -1
- package/dist/ui/app.d.ts.map +1 -1
- package/dist/ui/app.js +229 -162
- package/dist/ui/app.js.map +1 -1
- package/dist/ui/chat-view.d.ts +24 -3
- package/dist/ui/chat-view.d.ts.map +1 -1
- package/dist/ui/chat-view.js +116 -58
- package/dist/ui/chat-view.js.map +1 -1
- package/dist/ui/input-bar.d.ts.map +1 -1
- package/dist/ui/input-bar.js +38 -4
- package/dist/ui/input-bar.js.map +1 -1
- package/dist/ui/setup-wizard.js +1 -1
- package/dist/ui/setup-wizard.js.map +1 -1
- package/dist/ui/status-bar.d.ts +5 -1
- package/dist/ui/status-bar.d.ts.map +1 -1
- package/dist/ui/status-bar.js +10 -4
- package/dist/ui/status-bar.js.map +1 -1
- package/dist/utils/logger.d.ts +15 -0
- package/dist/utils/logger.d.ts.map +1 -1
- package/dist/utils/logger.js +47 -0
- package/dist/utils/logger.js.map +1 -1
- package/package.json +3 -2
package/dist/core/agent-loop.js
CHANGED
|
@@ -8,10 +8,17 @@ import { join } from 'node:path';
|
|
|
8
8
|
import { platform, release, type } from 'node:os';
|
|
9
9
|
import { MetricsCollector } from './metrics.js';
|
|
10
10
|
import { hooksManager } from './hooks.js';
|
|
11
|
+
const DEFAULT_MAX_ITERATIONS = 200;
|
|
12
|
+
const DEFAULT_AUTO_COMPACT = {
|
|
13
|
+
enabled: true,
|
|
14
|
+
thresholdPercent: 70,
|
|
15
|
+
keepRecentMessages: 8,
|
|
16
|
+
minMessages: 18,
|
|
17
|
+
};
|
|
11
18
|
/**
|
|
12
19
|
* Build a dynamic system prompt with project context.
|
|
13
20
|
*/
|
|
14
|
-
function buildSystemPrompt(cwd, approvalMode) {
|
|
21
|
+
export function buildSystemPrompt(cwd, approvalMode) {
|
|
15
22
|
const osInfo = `${type()} ${release()} (${platform()})`;
|
|
16
23
|
let projectInfo = '';
|
|
17
24
|
if (cwd) {
|
|
@@ -81,86 +88,113 @@ function buildSystemPrompt(cwd, approvalMode) {
|
|
|
81
88
|
if (locale === 'zh')
|
|
82
89
|
responseLanguage = 'Chinese';
|
|
83
90
|
const languageSection = `\n## Language\n- Respond in ${responseLanguage} unless the user explicitly asks otherwise.`;
|
|
84
|
-
return `You are DeepSeek Code, an AI-powered CLI agent for software development.
|
|
85
|
-
|
|
86
|
-
You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${languageSection}
|
|
87
|
-
|
|
88
|
-
## Guidelines
|
|
89
|
-
1. **Plan first** — Before making changes, explore the codebase to understand the context.
|
|
90
|
-
2. **Use the right tool** — Choose the most appropriate tool for each task.
|
|
91
|
-
3. **Be precise** — When editing files, provide exact text matches.
|
|
92
|
-
4. **Verify** — After changes, run tests or linting to ensure correctness.
|
|
93
|
-
5. **Explain** — After completing a task, summarize what was done.
|
|
94
|
-
|
|
95
|
-
## Tool Usage
|
|
96
|
-
- Read files with \`read_file\` before editing them
|
|
97
|
-
- Search with \`grep_search\` or \`glob\` to find relevant code
|
|
98
|
-
- Use \`run_shell_command\` to run build/test commands
|
|
99
|
-
- Create or overwrite files with \`write_file\`
|
|
100
|
-
- Make targeted edits with \`edit\` (prefer over write_file for small changes)
|
|
101
|
-
- Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
|
|
102
|
-
|
|
103
|
-
When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
|
|
104
|
-
|
|
105
|
-
##
|
|
106
|
-
-
|
|
107
|
-
-
|
|
108
|
-
- If the
|
|
109
|
-
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
-
|
|
113
|
-
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
-
|
|
117
|
-
-
|
|
118
|
-
- Do not
|
|
119
|
-
- If
|
|
120
|
-
-
|
|
121
|
-
|
|
122
|
-
##
|
|
123
|
-
-
|
|
124
|
-
-
|
|
125
|
-
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
91
|
+
return `You are DeepSeek Code, an AI-powered CLI agent for software development.
|
|
92
|
+
|
|
93
|
+
You have access to a set of tools that allow you to read, write, and edit files, run shell commands, search code, and use a real browser when rendered UI or web behavior matters.${projectInfo}${capabilitiesSection}${languageSection}
|
|
94
|
+
|
|
95
|
+
## Guidelines
|
|
96
|
+
1. **Plan first** — Before making changes, explore the codebase to understand the context.
|
|
97
|
+
2. **Use the right tool** — Choose the most appropriate tool for each task.
|
|
98
|
+
3. **Be precise** — When editing files, provide exact text matches.
|
|
99
|
+
4. **Verify** — After changes, run tests or linting to ensure correctness.
|
|
100
|
+
5. **Explain** — After completing a task, summarize what was done.
|
|
101
|
+
|
|
102
|
+
## Tool Usage
|
|
103
|
+
- Read files with \`read_file\` before editing them
|
|
104
|
+
- Search with \`grep_search\` or \`glob\` to find relevant code
|
|
105
|
+
- Use \`run_shell_command\` to run build/test commands
|
|
106
|
+
- Create or overwrite files with \`write_file\`
|
|
107
|
+
- Make targeted edits with \`edit\` (prefer over write_file for small changes)
|
|
108
|
+
- Use \`chrome\` proactively for UI flows, localhost app validation, rendered DOM state, screenshots, console logs, and network inspection
|
|
109
|
+
|
|
110
|
+
When you need to run multiple tools, call them one at a time and wait for results before deciding the next step.
|
|
111
|
+
|
|
112
|
+
## Workspace Boundary Policy
|
|
113
|
+
- The current working directory is the active project workspace. Do not silently switch to another project path inside shell commands.
|
|
114
|
+
- If \`write_file\`, \`edit\`, or \`read_file\` says a path is outside the workspace, stop and report the mismatch. Do not bypass the restriction by using shell redirection, PowerShell here-strings, Python scripts, or temporary generator scripts.
|
|
115
|
+
- If the user intended a different folder, ask them to restart/open the CLI in that folder or confirm the correct workspace.
|
|
116
|
+
- Avoid generating project files through ad-hoc scripts such as \`gen_helper.py\`, \`diag.py\`, or \`fix_pkg.py\`. Use the file tools for file content and remove any temporary helper before the final report.
|
|
117
|
+
|
|
118
|
+
## Windows Shell Policy
|
|
119
|
+
- The OS is listed in Project Context. If it is Windows or \`win32\`, write shell commands for PowerShell/cmd compatibility.
|
|
120
|
+
- On Windows, do not assume Unix tools exist. Avoid \`sed\`, \`head\`, \`tail\`, \`cat\`, \`grep\`, \`find\`, \`xargs\`, \`rm\`, \`touch\`, or Bash-specific syntax unless you first verified the command exists.
|
|
121
|
+
- On Windows, never use \`mkdir -p\`; it can create a literal \`-p\` directory. Use \`New-Item -ItemType Directory -Force <path>\` or \`mkdir <path>\` without \`-p\`.
|
|
122
|
+
- Prefer built-in tools over shell for repository inspection: use \`read_file\` for file content, \`grep_search\` for text search, and \`glob\` for file discovery.
|
|
123
|
+
- For Windows shell reads, prefer PowerShell commands such as \`Get-Content\`, \`Select-String\`, \`Get-ChildItem\`, \`Test-Path\`, \`Remove-Item\`, and \`New-Item\`.
|
|
124
|
+
- The shell tool automatically runs recognized PowerShell cmdlets through PowerShell on Windows. Plain commands such as \`npm\`, \`node\`, \`git\`, and \`npx\` run normally.
|
|
125
|
+
- Do not mix Bash/cmd chaining syntax with PowerShell cmdlets in the same command. Avoid \`cd path && Remove-Item ...\`; use separate tool calls or PowerShell-compatible \`Set-Location path; Remove-Item ...\` with explicit error checks.
|
|
126
|
+
- If a command fails because of shell incompatibility, retry with an OS-compatible command and report the failed attempt honestly.
|
|
127
|
+
- Never use broad process-kill commands such as \`taskkill /F /IM node.exe\`, \`Stop-Process -Name node\`, \`pkill node\`, or \`killall node\`. They can terminate the agent, the user's IDE terminal, and unrelated dev servers. Stop only a specific process you started and can identify by PID.
|
|
128
|
+
|
|
129
|
+
## Important
|
|
130
|
+
- ALWAYS use absolute paths when referring to files. The project root is \`${cwd || 'the current working directory'}\`.
|
|
131
|
+
- When asked to audit or explore the project, start with \`glob\`, \`grep_search\`, and targeted reads to discover structure.
|
|
132
|
+
- If the task implies a browser or rendered UI check, do not wait for the user to explicitly say "open browser" before using \`chrome\`.
|
|
133
|
+
- Do NOT guess file paths — use \`glob\` or \`grep_search\` to discover them first.
|
|
134
|
+
- When asked about your capabilities, answer based on the tools listed in the "Current Mode" section above. Do NOT claim you lack tools that are listed there but blocked by mode — instead explain that the current mode restricts them.
|
|
135
|
+
- If the user asks "what tools do you have" or "what are your capabilities", refer to this prompt's tool list. If write_file or edit are listed as blocked, explain that they exist but are restricted in the current mode.
|
|
136
|
+
- **CRITICAL: Never claim an action was performed without an actual tool call.** Do not say "opening browser", "running eval", "taking screenshot", "passing captcha", "navigating to page", or any other action unless you have actually called the corresponding tool and received a result. If a tool call was not made, state honestly that it was not executed. If a tool is blocked by the current mode, do not promise to use it — explain that it is unavailable in this mode. If a captcha or site protection is encountered, do not claim to bypass it — stop and report the issue honestly.
|
|
137
|
+
- **CRITICAL: No post-factum reports without tool calls.** If Tool uses is 0 in the current response, do not claim "I checked the log", "I reviewed the previous run", "step X was successful", or any other retrospective analysis. You may only say: "I did not perform a check right now. Based on visible context I can assume..." Always separate findings into: **Verified** (confirmed by actual tool calls this turn), **Assumption** (inferred from visible context), **Not checked** (not examined this turn). Do not write "successful" for a step that was not actually executed or has no saved result. Use the \`/last-browser-test\` command to retrieve the last saved browser test report — do not reconstruct it from memory.
|
|
138
|
+
|
|
139
|
+
## Honest Reporting
|
|
140
|
+
- Do not claim files were changed unless tool results include changed=true or files=\`<list>\`.
|
|
141
|
+
- Do not claim a change was verified unless tool results include verified=true.
|
|
142
|
+
- Do not claim tests/checks passed unless you actually ran the command and saw success.
|
|
143
|
+
- If no files changed, say "No files changed".
|
|
144
|
+
- Final report must match tool results and Execution Summary.
|
|
145
|
+
- Final report must start with a quality verdict: **Passed**, **Partial**, or **Failed**.
|
|
146
|
+
- If there were failed tool calls, failed browser/chrome calls, a budget/iteration stop, or skipped required acceptance checks, the verdict cannot be **Passed** unless every failure is explicitly classified as non-critical and the required check later succeeded.
|
|
147
|
+
- For web/UI projects, include a **Browser proof** block with the URL tested, page title, console error count, screenshot/rendered-state verdict, and whether Chrome/browser calls passed or failed. If browser proof was not performed, put it under **Not checked** and do not call the UI production-ready.
|
|
148
|
+
- For UI/product-design tasks, visual acceptance is required. If the rendered screenshot is blank, sparse, sidebar-only, broken, or clearly below the requested quality, say **Partial** or **Failed** and list the next visual iteration instead of claiming the project is complete.
|
|
149
|
+
|
|
150
|
+
## Failed Tool Calls Policy
|
|
151
|
+
- If any tool/shell command failed during the run, mention it in the final report.
|
|
152
|
+
- Explain whether each failure was **critical** (blocked the task goal) or **non-critical** (retried successfully, fallback worked, or unrelated to the task).
|
|
153
|
+
- Do not write "all checks passed" or "everything succeeded" if there were failed tool calls, unless you clearly separate successful required checks from non-critical failed attempts.
|
|
154
|
+
- If a failed command was retried successfully, say so explicitly (e.g., "first attempt failed, retry succeeded").
|
|
155
|
+
- If a failed command produced a temporary file or other side effect, clean it up or mention it in the report.
|
|
156
|
+
|
|
157
|
+
## Execution Policy
|
|
158
|
+
1. **Minimal reading**: for a small task, first locate the target with as few reads as possible. Usually 1-2 read_file calls and 1 edit is enough. Do not run a broad grep/glob if you already know the file.
|
|
159
|
+
2. **Do not repeat identical tool calls**: do not call read_file/grep_search/glob with the same arguments twice unless you have reason to believe the file changed.
|
|
160
|
+
3. **Checks**: run lint/typecheck/build/test only after making changes. Do not run the same check multiple times without a new edit. If you did not run a check, do not claim it passed.
|
|
161
|
+
4. **Temporary files**: do not create lint_out.txt, test_out.txt, err.txt, temp/debug scripts, one-off files like "1", or scratch files unnecessarily. Prefer command output in the tool result over redirected files. If you created a temporary file, remove it before the final report. Before the final report, check the working tree or otherwise verify no junk temp files remain. If cleanup failed or was not checked, say so explicitly.
|
|
162
|
+
5. **Report**: the final report must match the real tool results. Only mention what you actually read, changed, or verified. If no files were changed, explicitly say "No files changed". If there were errors, report them — do not hide them.
|
|
163
|
+
6. **Stop**: when the goal is achieved and checks are done — stop. Do not continue looking for extra issues without the user asking. Do not refactor beyond the task scope.
|
|
164
|
+
|
|
165
|
+
## Source of Truth Policy
|
|
166
|
+
1. **Do not invent** versions, release notes, dates, features, links, metrics, prices, or user/project facts.
|
|
167
|
+
2. **Source files/data** provided by the user are the source of truth.
|
|
168
|
+
3. **For release/version info**, use package.json, CHANGELOG.md, Git tags, npm, or GitHub Releases only if actually read/checked.
|
|
169
|
+
4. **Unchecked facts** must be labeled as assumption or not verified.
|
|
170
|
+
5. **Generated demo projects**: placeholder content is allowed only if explicitly requested.
|
|
171
|
+
6. **Do not present** invented content as real project history.
|
|
172
|
+
7. **If data is missing**, ask for it or write "Not verified" — never guess.
|
|
173
|
+
|
|
174
|
+
## Project Acceptance Policy
|
|
175
|
+
1. **For web projects**, build success alone is not enough. Verify that:
|
|
176
|
+
- install/build succeeds;
|
|
177
|
+
- dev server starts successfully;
|
|
178
|
+
- the main page opens in a browser;
|
|
179
|
+
- no framework error overlay (Nuxt/Vite/Next/etc.);
|
|
180
|
+
- browser console has no critical errors;
|
|
181
|
+
- the repository has an appropriate .gitignore for the stack;
|
|
182
|
+
- git status has no junk files (.idea/, node_modules/, .nuxt/, .output/, dist/, temp files, screenshots, logs).
|
|
183
|
+
2. **Runtime/container verification is adaptive**, not Podman-only:
|
|
184
|
+
- first inspect available tooling and project files before choosing a path;
|
|
185
|
+
- if Docker Compose is available, use docker compose;
|
|
186
|
+
- if Podman/Podman Compose is available, use podman compose or podman-compose;
|
|
187
|
+
- if no container runtime is available, use the native package manager/dev server and report container verification as Not checked;
|
|
188
|
+
- do not spend many repeated attempts on one runtime. After two similar runtime failures, switch strategy or report the blocker.
|
|
189
|
+
3. **For container-first projects**:
|
|
190
|
+
- keep one clear container entrypoint path (Dockerfile or Containerfile) and ensure compose references it correctly;
|
|
191
|
+
- verify build inside the container;
|
|
192
|
+
- expose the correct host/port;
|
|
193
|
+
- add .dockerignore or .containerignore as appropriate.
|
|
194
|
+
4. **If browser, git-hygiene, or container verification was not performed**, do not claim the project is fully verified.
|
|
195
|
+
5. **In the final report**, separate:
|
|
196
|
+
- Verified
|
|
197
|
+
- Not checked
|
|
164
198
|
- Known issues`;
|
|
165
199
|
}
|
|
166
200
|
/**
|
|
@@ -178,13 +212,15 @@ export class AgentLoop extends EventEmitter {
|
|
|
178
212
|
toolCallHistory = new Map();
|
|
179
213
|
metrics = new MetricsCollector();
|
|
180
214
|
iterationCount = 0;
|
|
215
|
+
followUpSeq = 0;
|
|
216
|
+
lastCompactedAtMessageCount = 0;
|
|
181
217
|
constructor(config, options = {}) {
|
|
182
218
|
super();
|
|
183
219
|
this.api = new DeepSeekAPI(config);
|
|
184
220
|
this.model = config.model;
|
|
185
221
|
const defaultSystemPrompt = buildSystemPrompt(options.cwd || process.cwd(), options.approvalMode);
|
|
186
222
|
this.options = {
|
|
187
|
-
maxIterations:
|
|
223
|
+
maxIterations: DEFAULT_MAX_ITERATIONS,
|
|
188
224
|
toolTimeout: 30000,
|
|
189
225
|
approvalMode: 'default',
|
|
190
226
|
cwd: process.cwd(),
|
|
@@ -194,9 +230,13 @@ export class AgentLoop extends EventEmitter {
|
|
|
194
230
|
onReasoningChunk: () => { },
|
|
195
231
|
onResponse: () => { },
|
|
196
232
|
onError: () => { },
|
|
233
|
+
onCompactStart: () => { },
|
|
234
|
+
onCompactProgress: () => { },
|
|
235
|
+
onCompactEnd: () => { },
|
|
197
236
|
onApprovalRequest: async () => true,
|
|
198
237
|
systemPrompt: defaultSystemPrompt,
|
|
199
238
|
signal: undefined,
|
|
239
|
+
autoCompact: DEFAULT_AUTO_COMPACT,
|
|
200
240
|
...options,
|
|
201
241
|
};
|
|
202
242
|
this.tools = getToolsForMode(this.options.approvalMode);
|
|
@@ -217,6 +257,21 @@ export class AgentLoop extends EventEmitter {
|
|
|
217
257
|
getMetrics() {
|
|
218
258
|
return this.metrics;
|
|
219
259
|
}
|
|
260
|
+
/**
|
|
261
|
+
* Add a user follow-up message during an active agent loop.
|
|
262
|
+
* The message will be picked up on the next API iteration.
|
|
263
|
+
* Does NOT start a new loop or reset state.
|
|
264
|
+
*/
|
|
265
|
+
addUserFollowUp(content) {
|
|
266
|
+
const trimmed = content?.trim();
|
|
267
|
+
if (!trimmed)
|
|
268
|
+
return;
|
|
269
|
+
this.followUpSeq++;
|
|
270
|
+
this.messages.push({
|
|
271
|
+
role: 'user',
|
|
272
|
+
content: `User follow-up while task was running:\n${trimmed}`,
|
|
273
|
+
});
|
|
274
|
+
}
|
|
220
275
|
/**
|
|
221
276
|
* Set approval mode — updates which tools are available and rebuilds system prompt.
|
|
222
277
|
*/
|
|
@@ -267,37 +322,36 @@ export class AgentLoop extends EventEmitter {
|
|
|
267
322
|
projectDir: this.options.cwd,
|
|
268
323
|
messageCount: this.messages.length,
|
|
269
324
|
}).catch(() => { });
|
|
270
|
-
while (this.iterationCount <
|
|
325
|
+
while (this.iterationCount < this.getIterationLimit()) {
|
|
271
326
|
this.iterationCount++;
|
|
272
327
|
// Budget: check maxToolCalls at top of each iteration
|
|
273
328
|
if (this.checkBudgetHalt()) {
|
|
274
329
|
return this.buildBudgetHaltMessage();
|
|
275
330
|
}
|
|
276
331
|
try {
|
|
332
|
+
await this.maybeAutoCompact();
|
|
277
333
|
// Use streaming chat to get real-time output
|
|
278
334
|
// Budget: check maxApiCalls before API call
|
|
279
335
|
if (this.options.budget?.maxApiCalls && this.metrics.apiCalls >= this.options.budget.maxApiCalls) {
|
|
280
336
|
return this.buildBudgetHaltMessage();
|
|
281
337
|
}
|
|
338
|
+
// Cancelled before we even start the request — nothing to drain.
|
|
339
|
+
if (this.options.signal?.aborted) {
|
|
340
|
+
return this.finishCancelled();
|
|
341
|
+
}
|
|
342
|
+
const followUpSeqAtRequestStart = this.followUpSeq;
|
|
282
343
|
const stream = this.api.streamChat(this.messages, openAITools);
|
|
283
344
|
let responseContent = '';
|
|
284
345
|
let toolCalls = [];
|
|
285
|
-
//
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
this.finalizeSession();
|
|
291
|
-
return cancelledMsg;
|
|
292
|
-
}
|
|
346
|
+
// Cooperative cancellation: once aborted we stop acting on chunks but keep
|
|
347
|
+
// draining the stream to its natural end. Breaking out early would tear
|
|
348
|
+
// down the streaming socket mid-flight, which hard-crashed the process on
|
|
349
|
+
// Windows. The UI already shows the paused state immediately.
|
|
350
|
+
let cancelledDuringStream = false;
|
|
293
351
|
for await (const chunk of stream) {
|
|
294
|
-
// Check for cancellation during streaming
|
|
295
352
|
if (this.options.signal?.aborted) {
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
this.options.onResponse(cancelledMsg);
|
|
299
|
-
this.finalizeSession();
|
|
300
|
-
return cancelledMsg;
|
|
353
|
+
cancelledDuringStream = true;
|
|
354
|
+
continue;
|
|
301
355
|
}
|
|
302
356
|
if (chunk.type === 'usage' && chunk.usage) {
|
|
303
357
|
this.metrics.recordUsage(chunk.usage);
|
|
@@ -323,6 +377,10 @@ export class AgentLoop extends EventEmitter {
|
|
|
323
377
|
}
|
|
324
378
|
}
|
|
325
379
|
}
|
|
380
|
+
// Stream drained — if the user cancelled mid-stream, stop here cleanly.
|
|
381
|
+
if (cancelledDuringStream || this.options.signal?.aborted) {
|
|
382
|
+
return this.finishCancelled();
|
|
383
|
+
}
|
|
326
384
|
// Budget: catch limits reached during streaming usage accounting.
|
|
327
385
|
if (this.checkBudgetHalt()) {
|
|
328
386
|
return this.buildBudgetHaltMessage();
|
|
@@ -438,7 +496,8 @@ export class AgentLoop extends EventEmitter {
|
|
|
438
496
|
try {
|
|
439
497
|
const toolResult = await this.executeTool(tc.function.name, args);
|
|
440
498
|
const duration = Date.now() - startTime;
|
|
441
|
-
this.
|
|
499
|
+
const toolLabel = this.buildToolCallLabel(tc.function.name, args);
|
|
500
|
+
this.metrics.recordToolCallEnd(tc.function.name, toolResult.success, toolLabel, toolResult.success ? undefined : toolResult.error);
|
|
442
501
|
toolCallEvent.status = toolResult.success ? 'completed' : 'failed';
|
|
443
502
|
toolCallEvent.result = toolResult.output;
|
|
444
503
|
toolCallEvent.error = toolResult.error;
|
|
@@ -468,7 +527,8 @@ export class AgentLoop extends EventEmitter {
|
|
|
468
527
|
catch (err) {
|
|
469
528
|
const duration = Date.now() - startTime;
|
|
470
529
|
const errorMsg = err.message;
|
|
471
|
-
this.
|
|
530
|
+
const toolLabel = this.buildToolCallLabel(tc.function.name, args);
|
|
531
|
+
this.metrics.recordToolCallEnd(tc.function.name, false, toolLabel, errorMsg);
|
|
472
532
|
toolCallEvent.status = 'failed';
|
|
473
533
|
toolCallEvent.error = errorMsg;
|
|
474
534
|
toolCallEvent.durationMs = duration;
|
|
@@ -497,6 +557,11 @@ export class AgentLoop extends EventEmitter {
|
|
|
497
557
|
const fallback = 'I have completed the requested actions. What else would you like me to do?';
|
|
498
558
|
this.messages.push({ role: 'assistant', content: fallback });
|
|
499
559
|
this.options.onResponse(fallback);
|
|
560
|
+
// Check if a follow-up arrived while the API request was streaming
|
|
561
|
+
if (this.followUpSeq > followUpSeqAtRequestStart) {
|
|
562
|
+
// Follow-up received during this request — continue loop instead of finishing
|
|
563
|
+
continue;
|
|
564
|
+
}
|
|
500
565
|
this.finalizeSession();
|
|
501
566
|
const summary = this.metrics.getSummary(this.model);
|
|
502
567
|
this.options.onStreamChunk(summary);
|
|
@@ -504,6 +569,11 @@ export class AgentLoop extends EventEmitter {
|
|
|
504
569
|
}
|
|
505
570
|
this.messages.push({ role: 'assistant', content: responseContent });
|
|
506
571
|
this.options.onResponse(responseContent);
|
|
572
|
+
// Check if a follow-up arrived while this API request was streaming
|
|
573
|
+
if (this.followUpSeq > followUpSeqAtRequestStart) {
|
|
574
|
+
// Follow-up received during the stream — continue loop, skip finalization
|
|
575
|
+
continue;
|
|
576
|
+
}
|
|
507
577
|
// Output execution summary
|
|
508
578
|
this.finalizeSession();
|
|
509
579
|
const summary = this.metrics.getSummary(this.model);
|
|
@@ -512,20 +582,165 @@ export class AgentLoop extends EventEmitter {
|
|
|
512
582
|
}
|
|
513
583
|
catch (err) {
|
|
514
584
|
const error = err;
|
|
585
|
+
// If the user cancelled, treat any resulting error as a clean stop.
|
|
586
|
+
if (this.options.signal?.aborted) {
|
|
587
|
+
return this.finishCancelled();
|
|
588
|
+
}
|
|
515
589
|
this.options.onError(error);
|
|
516
590
|
throw error;
|
|
517
591
|
}
|
|
518
592
|
}
|
|
519
593
|
// Max iterations reached
|
|
520
|
-
const timeoutMsg = `Агент достиг максимального числа итераций (${this.
|
|
594
|
+
const timeoutMsg = `Агент достиг максимального числа итераций (${this.getIterationLimit()}). Задача может быть не завершена.`;
|
|
521
595
|
this.messages.push({ role: 'assistant', content: timeoutMsg });
|
|
522
596
|
this.options.onResponse(timeoutMsg);
|
|
523
597
|
this.finalizeSession();
|
|
598
|
+
const summary = this.metrics.getSummary(this.model);
|
|
599
|
+
this.options.onStreamChunk(summary);
|
|
524
600
|
return timeoutMsg;
|
|
525
601
|
}
|
|
602
|
+
/** Record a clean user-cancellation result and finalize the session. */
|
|
603
|
+
finishCancelled() {
|
|
604
|
+
const cancelledMsg = i18n.t('agentCancelled');
|
|
605
|
+
this.messages.push({ role: 'assistant', content: cancelledMsg });
|
|
606
|
+
this.options.onResponse(cancelledMsg);
|
|
607
|
+
this.finalizeSession();
|
|
608
|
+
return cancelledMsg;
|
|
609
|
+
}
|
|
610
|
+
getIterationLimit() {
|
|
611
|
+
const budgetLimit = this.options.budget?.maxIterations;
|
|
612
|
+
if (budgetLimit && budgetLimit > 0) {
|
|
613
|
+
return Math.min(this.options.maxIterations, budgetLimit);
|
|
614
|
+
}
|
|
615
|
+
return this.options.maxIterations;
|
|
616
|
+
}
|
|
617
|
+
getAutoCompactOptions() {
|
|
618
|
+
return {
|
|
619
|
+
...DEFAULT_AUTO_COMPACT,
|
|
620
|
+
...(this.options.autoCompact ?? {}),
|
|
621
|
+
};
|
|
622
|
+
}
|
|
623
|
+
async maybeAutoCompact() {
|
|
624
|
+
const compact = this.getAutoCompactOptions();
|
|
625
|
+
if (!compact.enabled)
|
|
626
|
+
return;
|
|
627
|
+
const contextPercent = this.metrics.getCurrentWindowPercent();
|
|
628
|
+
const beforeMessages = this.messages.length;
|
|
629
|
+
if (contextPercent < compact.thresholdPercent)
|
|
630
|
+
return;
|
|
631
|
+
if (beforeMessages < compact.minMessages)
|
|
632
|
+
return;
|
|
633
|
+
if (beforeMessages <= this.lastCompactedAtMessageCount + compact.keepRecentMessages)
|
|
634
|
+
return;
|
|
635
|
+
const startEvent = {
|
|
636
|
+
phase: 'start',
|
|
637
|
+
progress: 5,
|
|
638
|
+
contextPercent,
|
|
639
|
+
beforeMessages,
|
|
640
|
+
};
|
|
641
|
+
this.options.onCompactStart(startEvent);
|
|
642
|
+
this.options.onCompactProgress({ ...startEvent, phase: 'summarizing', progress: 35 });
|
|
643
|
+
try {
|
|
644
|
+
const result = await this.api.chat([
|
|
645
|
+
{
|
|
646
|
+
role: 'system',
|
|
647
|
+
content: 'Compress the conversation for continuation. Preserve concrete user goals, decisions, file paths, commands, failures, verification results, pending work, and constraints. Do not invent facts. Return concise bullet points.',
|
|
648
|
+
},
|
|
649
|
+
{
|
|
650
|
+
role: 'user',
|
|
651
|
+
content: this.buildCompactTranscript(),
|
|
652
|
+
},
|
|
653
|
+
]);
|
|
654
|
+
if (result.usage) {
|
|
655
|
+
this.metrics.recordUsage(result.usage);
|
|
656
|
+
}
|
|
657
|
+
const summary = result.content.trim() || 'Auto-compaction completed, but the summarizer returned an empty summary.';
|
|
658
|
+
this.options.onCompactProgress({
|
|
659
|
+
phase: 'replacing',
|
|
660
|
+
progress: 80,
|
|
661
|
+
contextPercent,
|
|
662
|
+
beforeMessages,
|
|
663
|
+
});
|
|
664
|
+
const systemMsg = this.messages.find(m => m.role === 'system');
|
|
665
|
+
this.messages = [
|
|
666
|
+
...(systemMsg ? [systemMsg] : []),
|
|
667
|
+
{
|
|
668
|
+
role: 'assistant',
|
|
669
|
+
content: `**Context Auto-Compacted**\n\nOriginal messages: ${beforeMessages}\nPrevious context: ${contextPercent}% of window\n\n${summary}`,
|
|
670
|
+
},
|
|
671
|
+
];
|
|
672
|
+
this.lastCompactedAtMessageCount = this.messages.length;
|
|
673
|
+
this.options.onCompactEnd({
|
|
674
|
+
phase: 'done',
|
|
675
|
+
progress: 100,
|
|
676
|
+
contextPercent,
|
|
677
|
+
beforeMessages,
|
|
678
|
+
afterMessages: this.messages.length,
|
|
679
|
+
});
|
|
680
|
+
}
|
|
681
|
+
catch (err) {
|
|
682
|
+
this.options.onCompactEnd({
|
|
683
|
+
phase: 'failed',
|
|
684
|
+
progress: 100,
|
|
685
|
+
contextPercent,
|
|
686
|
+
beforeMessages,
|
|
687
|
+
error: err.message,
|
|
688
|
+
});
|
|
689
|
+
throw err;
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
buildCompactTranscript() {
|
|
693
|
+
return this.messages
|
|
694
|
+
.filter(message => message.role !== 'system')
|
|
695
|
+
.map((message, index) => {
|
|
696
|
+
const content = typeof message.content === 'string'
|
|
697
|
+
? message.content
|
|
698
|
+
: JSON.stringify(message.content);
|
|
699
|
+
const toolCalls = message.tool_calls?.length ? ` tool_calls=${message.tool_calls.map(tc => tc.function.name).join(',')}` : '';
|
|
700
|
+
return `#${index + 1} ${message.role}${toolCalls}\n${content.slice(0, 8000)}`;
|
|
701
|
+
})
|
|
702
|
+
.join('\n\n---\n\n');
|
|
703
|
+
}
|
|
526
704
|
/**
|
|
527
|
-
*
|
|
705
|
+
* Build a short human-readable label for a tool call.
|
|
706
|
+
* Used in Execution Summary to identify which files/commands failed.
|
|
528
707
|
*/
|
|
708
|
+
buildToolCallLabel(toolName, args) {
|
|
709
|
+
try {
|
|
710
|
+
switch (toolName) {
|
|
711
|
+
case 'run_shell_command': {
|
|
712
|
+
const cmd = args.command ?? args.cmd ?? '';
|
|
713
|
+
if (typeof cmd === 'string' && cmd.length > 0) {
|
|
714
|
+
return cmd.length > 120 ? cmd.slice(0, 117) + '...' : cmd;
|
|
715
|
+
}
|
|
716
|
+
break;
|
|
717
|
+
}
|
|
718
|
+
case 'read_file':
|
|
719
|
+
case 'edit':
|
|
720
|
+
case 'write_file': {
|
|
721
|
+
const path = args.path ?? args.file_path ?? args.file ?? '';
|
|
722
|
+
if (typeof path === 'string' && path.length > 0) {
|
|
723
|
+
return path.length > 120 ? path.slice(0, 117) + '...' : path;
|
|
724
|
+
}
|
|
725
|
+
break;
|
|
726
|
+
}
|
|
727
|
+
case 'grep_search':
|
|
728
|
+
case 'glob': {
|
|
729
|
+
const pattern = args.pattern ?? '';
|
|
730
|
+
if (typeof pattern === 'string' && pattern.length > 0) {
|
|
731
|
+
return pattern.length > 120 ? pattern.slice(0, 117) + '...' : pattern;
|
|
732
|
+
}
|
|
733
|
+
break;
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
// Fallback: serialize first meaningful string value
|
|
737
|
+
const fallback = JSON.stringify(args);
|
|
738
|
+
return fallback.length > 120 ? fallback.slice(0, 117) + '...' : fallback;
|
|
739
|
+
}
|
|
740
|
+
catch {
|
|
741
|
+
return String(args);
|
|
742
|
+
}
|
|
743
|
+
}
|
|
529
744
|
/**
|
|
530
745
|
* Check if any budget limit has been exceeded (called at top of each iteration).
|
|
531
746
|
* Returns the field name that exceeded or null if all good.
|
|
@@ -633,7 +848,7 @@ export class AgentLoop extends EventEmitter {
|
|
|
633
848
|
};
|
|
634
849
|
}
|
|
635
850
|
try {
|
|
636
|
-
const result = await def.tool.execute(args);
|
|
851
|
+
const result = await def.tool.execute(args, this.options.signal);
|
|
637
852
|
return {
|
|
638
853
|
success: result.success,
|
|
639
854
|
output: result.output,
|