@semalt-ai/code 1.8.1 → 1.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +14 -1
- package/CLAUDE.md +2 -1
- package/index.js +15 -1
- package/lib/agent.js +582 -121
- package/lib/api.js +182 -25
- package/lib/commands.js +57 -80
- package/lib/config.js +32 -4
- package/lib/constants.js +51 -1
- package/lib/metrics.js +16 -3
- package/lib/permissions.js +66 -67
- package/lib/prompts.js +93 -86
- package/lib/tool_specs.js +499 -0
- package/lib/tools.js +405 -192
- package/lib/ui/ansi.js +13 -1
- package/lib/ui/chat-history.js +201 -61
- package/lib/ui/create-ui.js +116 -373
- package/lib/ui/diff.js +87 -75
- package/lib/ui/input-field.js +75 -57
- package/lib/ui/status-bar.js +53 -23
- package/lib/ui/terminal.js +58 -0
- package/lib/ui/theme.js +78 -0
- package/lib/ui/utils.js +63 -1
- package/lib/ui/writer.js +255 -0
- package/lib/ui.js +5 -0
- package/package.json +1 -1
package/lib/agent.js
CHANGED
|
@@ -2,8 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
const { logToolCall } = require('./audit');
|
|
4
4
|
const { Metrics } = require('./metrics');
|
|
5
|
-
const {
|
|
5
|
+
const { getSystemPrompt } = require('./prompts');
|
|
6
6
|
const { TAG_REGISTRY } = require('./constants');
|
|
7
|
+
const { mapInvokeToCall } = require('./tools');
|
|
8
|
+
const { UI_THEME } = require('./ui/theme');
|
|
9
|
+
const { RST } = require('./ui/ansi');
|
|
10
|
+
const { getCols: _getCols, repeatToWidth } = require('./ui/utils');
|
|
7
11
|
|
|
8
12
|
class StreamParser {
|
|
9
13
|
constructor(onToken, onTagOpen, onTagContent, onTagClose) {
|
|
@@ -40,14 +44,18 @@ class StreamParser {
|
|
|
40
44
|
const tagRaw = this.buffer.slice(1, gtIdx).trim();
|
|
41
45
|
const selfClose = tagRaw.endsWith('/');
|
|
42
46
|
const tagBody = selfClose ? tagRaw.slice(0, -1).trim() : tagRaw;
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
const
|
|
47
|
+
// Split on whitespace OR `=` so both MiniMax-style `<parameter name="x">`
|
|
48
|
+
// and Qwen3-Coder `<parameter=x>` resolve to the same tagName.
|
|
49
|
+
const delimIdx = tagBody.search(/[\s=]/);
|
|
50
|
+
const tagName = (delimIdx === -1 ? tagBody : tagBody.slice(0, delimIdx)).toLowerCase();
|
|
51
|
+
const attrStr = delimIdx === -1 ? '' : tagBody.slice(delimIdx + 1);
|
|
46
52
|
|
|
47
53
|
const attrs = {};
|
|
48
|
-
const
|
|
54
|
+
const attrReDouble = /(\w+)="([^"]*)"/g;
|
|
55
|
+
const attrReSingle = /(\w+)='([^']*)'/g;
|
|
49
56
|
let m;
|
|
50
|
-
while ((m =
|
|
57
|
+
while ((m = attrReDouble.exec(attrStr)) !== null) attrs[m[1]] = m[2];
|
|
58
|
+
while ((m = attrReSingle.exec(attrStr)) !== null) attrs[m[1]] = m[2];
|
|
51
59
|
|
|
52
60
|
this.buffer = this.buffer.slice(gtIdx + 1);
|
|
53
61
|
|
|
@@ -70,11 +78,37 @@ class StreamParser {
|
|
|
70
78
|
} else {
|
|
71
79
|
const closing = '</' + this.insideTag + '>';
|
|
72
80
|
const closeIdx = this.buffer.toLowerCase().indexOf(closing);
|
|
81
|
+
const entry = TAG_REGISTRY[this.insideTag];
|
|
82
|
+
const streamInner = entry && entry.type === 'final';
|
|
73
83
|
if (closeIdx === -1) {
|
|
74
|
-
|
|
75
|
-
|
|
84
|
+
if (streamInner) {
|
|
85
|
+
// Emit content live through onToken, but hold back any trailing
|
|
86
|
+
// substring that could be a prefix of the closing tag (chunk
|
|
87
|
+
// boundary splitting `</final_answer>` into e.g. `</fin` + `al…`).
|
|
88
|
+
const lowBuf = this.buffer.toLowerCase();
|
|
89
|
+
const lowClose = closing;
|
|
90
|
+
let safeUpTo = this.buffer.length;
|
|
91
|
+
const ltIdx = lowBuf.lastIndexOf('<');
|
|
92
|
+
if (ltIdx !== -1) {
|
|
93
|
+
const tail = lowBuf.slice(ltIdx);
|
|
94
|
+
if (lowClose.startsWith(tail)) safeUpTo = ltIdx;
|
|
95
|
+
}
|
|
96
|
+
if (safeUpTo > 0) {
|
|
97
|
+
const emit = this.buffer.slice(0, safeUpTo);
|
|
98
|
+
this.onToken(emit);
|
|
99
|
+
this.tagContent += emit;
|
|
100
|
+
this.buffer = this.buffer.slice(safeUpTo);
|
|
101
|
+
}
|
|
102
|
+
} else {
|
|
103
|
+
this.tagContent += this.buffer;
|
|
104
|
+
this.buffer = '';
|
|
105
|
+
}
|
|
76
106
|
break;
|
|
77
107
|
}
|
|
108
|
+
if (streamInner) {
|
|
109
|
+
const emit = this.buffer.slice(0, closeIdx);
|
|
110
|
+
if (emit) this.onToken(emit);
|
|
111
|
+
}
|
|
78
112
|
this.tagContent += this.buffer.slice(0, closeIdx);
|
|
79
113
|
this.buffer = this.buffer.slice(closeIdx + closing.length);
|
|
80
114
|
this.onTagContent(this.insideTag, this.tagContent);
|
|
@@ -99,7 +133,7 @@ function cleanAssistantContent(raw) {
|
|
|
99
133
|
}
|
|
100
134
|
|
|
101
135
|
for (const [tag, entry] of Object.entries(TAG_REGISTRY)) {
|
|
102
|
-
if (entry.type === 'strip') {
|
|
136
|
+
if (entry.type === 'strip' || entry.type === 'final') {
|
|
103
137
|
// Strip only the wrapper tags; keep the inner content
|
|
104
138
|
text = text.replace(new RegExp(`<${tag}[^>]*>`, 'gi'), '');
|
|
105
139
|
text = text.replace(new RegExp(`<\\/${tag}>`, 'gi'), '');
|
|
@@ -112,13 +146,142 @@ function cleanAssistantContent(raw) {
|
|
|
112
146
|
}
|
|
113
147
|
}
|
|
114
148
|
|
|
115
|
-
text = text.replace(/<\/?[a-zA-Z_][a-zA-Z0-9_]*(\s[^>]*)?>/g, '');
|
|
116
|
-
text = text.replace(/\n{2,}/g, '\n');
|
|
117
|
-
|
|
118
149
|
return text.trim();
|
|
119
150
|
}
|
|
120
151
|
|
|
121
|
-
function
|
|
152
|
+
function estimateTokens(text) {
|
|
153
|
+
return Math.floor((text || '').length / 4);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function detectFormat(reply, toolCalls) {
|
|
157
|
+
if (!reply || !reply.trim()) return 'empty';
|
|
158
|
+
if (/<(minimax:tool_call|qwen:tool_call|tool_call|function_call)\b/i.test(reply)) return 'tool_call';
|
|
159
|
+
if (toolCalls && toolCalls.length > 0) return 'command';
|
|
160
|
+
return 'text';
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Spot known-tag names that are present in the reply but didn't produce any
|
|
164
|
+
// parsed tool calls. Common culprits are attribute-required tags (create_file,
|
|
165
|
+
// write_file, etc.) emitted without the `path` attribute, usually because the
|
|
166
|
+
// model put nonsense like `<attrs: path=...>` inside the body. Returning a
|
|
167
|
+
// specific hint lets the agent loop push a corrective user message and keep
|
|
168
|
+
// going instead of silently stalling.
|
|
169
|
+
function detectMalformedTags(text) {
|
|
170
|
+
const issues = [];
|
|
171
|
+
const PATH_REQUIRED = ['create_file', 'write_file', 'append_file'];
|
|
172
|
+
for (const tag of PATH_REQUIRED) {
|
|
173
|
+
const re = new RegExp(`<${tag}\\b([^>]*)>`, 'g');
|
|
174
|
+
for (const m of text.matchAll(re)) {
|
|
175
|
+
const attrs = m[1] || '';
|
|
176
|
+
if (!/\bpath\s*=\s*['"]/.test(attrs)) {
|
|
177
|
+
issues.push({
|
|
178
|
+
tag,
|
|
179
|
+
hint: `Use <${tag} path="/absolute/path">FILE CONTENT HERE</${tag}>. Put the path as a quoted attribute on the opening tag, and the actual file contents between the tags — not a nested pseudo-tag.`,
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// Deduplicate by tag so we don't spam the model with the same hint per occurrence.
|
|
185
|
+
const seen = new Set();
|
|
186
|
+
return issues.filter((i) => (seen.has(i.tag) ? false : (seen.add(i.tag), true)));
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function previewCommand(call) {
|
|
190
|
+
if (!call) return 'NONE — CLIENT WILL STALL';
|
|
191
|
+
const tag = call[0] || 'unknown';
|
|
192
|
+
const arg = call[1] || '';
|
|
193
|
+
const oneLine = String(arg).replace(/\s+/g, ' ').trim();
|
|
194
|
+
const trimmed = oneLine.length > 80 ? oneLine.slice(0, 77) + '...' : oneLine;
|
|
195
|
+
return trimmed ? `<${tag}> ${trimmed}` : `<${tag}>`;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function formatDebugBlock(sections) {
|
|
199
|
+
// The debug block is rendered as a tool-output message in the TUI. Chat
|
|
200
|
+
// history indents output by 5 cols; account for that so the frame still
|
|
201
|
+
// reaches the visible right edge instead of wrapping.
|
|
202
|
+
const totalW = Math.max(40, _getCols());
|
|
203
|
+
const frameW = Math.max(20, totalW - 7);
|
|
204
|
+
const H = UI_THEME.muted; // frame glyphs
|
|
205
|
+
const L = UI_THEME.info; // block label ("DEBUG TOOL RESULTS")
|
|
206
|
+
const S = UI_THEME.subtle; // iteration tag, secondary text
|
|
207
|
+
const K = UI_THEME.accent; // section-header bullets ("▸ SUMMARY")
|
|
208
|
+
const W = UI_THEME.warning; // warning markers
|
|
209
|
+
|
|
210
|
+
const header = sections.title || 'DEBUG';
|
|
211
|
+
const iter = `[iteration ${sections.iteration}]`;
|
|
212
|
+
|
|
213
|
+
const out = [];
|
|
214
|
+
// Top frame: " LABEL [iteration N] ═════… " — fills to width.
|
|
215
|
+
const headPrefix = `${H}══ ${RST}${L}${header}${RST} ${S}${iter}${RST} `;
|
|
216
|
+
const headVisible = 4 + header.length + 1 + iter.length + 1; // "══ " + label + " " + iter + " "
|
|
217
|
+
out.push(`${headPrefix}${H}${repeatToWidth('═', frameW, headVisible)}${RST}`);
|
|
218
|
+
|
|
219
|
+
const pushSection = (title) => out.push(`${K}▸ ${RST}${L}${title}${RST}`);
|
|
220
|
+
|
|
221
|
+
for (const [title, rows] of sections.blocks) {
|
|
222
|
+
pushSection(title);
|
|
223
|
+
const width = Math.max(...rows.map((r) => r[0].length));
|
|
224
|
+
for (const [k, v] of rows) {
|
|
225
|
+
const val = (v === undefined || v === null) ? '—' : String(v);
|
|
226
|
+
out.push(` ${S}${k.padEnd(width + 2)}${RST}${val}`);
|
|
227
|
+
}
|
|
228
|
+
out.push('');
|
|
229
|
+
}
|
|
230
|
+
if (sections.raw !== undefined) {
|
|
231
|
+
pushSection('RAW RESPONSE');
|
|
232
|
+
out.push(sections.raw ? sections.raw : `${S}(empty)${RST}`);
|
|
233
|
+
out.push('');
|
|
234
|
+
if (sections.rawFooter && sections.rawFooter.length) {
|
|
235
|
+
pushSection('STREAM FOOTER');
|
|
236
|
+
const width = Math.max(...sections.rawFooter.map((r) => r[0].length));
|
|
237
|
+
for (const [k, v] of sections.rawFooter) {
|
|
238
|
+
const val = (v === undefined || v === null) ? '—' : String(v);
|
|
239
|
+
out.push(` ${S}${k.padEnd(width + 2)}${RST}${val}`);
|
|
240
|
+
}
|
|
241
|
+
out.push('');
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
if (sections.entries && sections.entries.length) {
|
|
245
|
+
for (const entry of sections.entries) {
|
|
246
|
+
pushSection(entry.title);
|
|
247
|
+
if (entry.rows) {
|
|
248
|
+
const width = Math.max(...entry.rows.map((r) => r[0].length));
|
|
249
|
+
for (const [k, v] of entry.rows) {
|
|
250
|
+
const val = (v === undefined || v === null) ? '—' : String(v);
|
|
251
|
+
out.push(` ${S}${k.padEnd(width + 2)}${RST}${val}`);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
if (entry.body !== undefined) {
|
|
255
|
+
const body = entry.body === '' ? `${S}(empty)${RST}` : entry.body;
|
|
256
|
+
for (const line of String(body).split('\n')) out.push(' ' + line);
|
|
257
|
+
}
|
|
258
|
+
out.push('');
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
if (sections.warnings && sections.warnings.length) {
|
|
262
|
+
pushSection('WARNINGS');
|
|
263
|
+
for (const w of sections.warnings) out.push(` ${W}⚠ ${w}${RST}`);
|
|
264
|
+
out.push('');
|
|
265
|
+
}
|
|
266
|
+
// Bottom frame: plain full-width rule in muted.
|
|
267
|
+
out.push(`${H}${repeatToWidth('═', frameW)}${RST}`);
|
|
268
|
+
return out.join('\n');
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function truncateForDebug(text, maxLines = 40, maxChars = 2000) {
|
|
272
|
+
if (text === undefined || text === null) return '';
|
|
273
|
+
let s = String(text);
|
|
274
|
+
if (s.length > maxChars) {
|
|
275
|
+
s = s.slice(0, maxChars) + `\n… [truncated, ${String(text).length - maxChars} more chars]`;
|
|
276
|
+
}
|
|
277
|
+
const lines = s.split('\n');
|
|
278
|
+
if (lines.length > maxLines) {
|
|
279
|
+
return lines.slice(0, maxLines).join('\n') + `\n… [truncated, ${lines.length - maxLines} more lines]`;
|
|
280
|
+
}
|
|
281
|
+
return s;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, ui, getConfig }) {
|
|
122
285
|
const { BOLD, FG_DARK, FG_GRAY, FG_TEAL, FG_YELLOW, RST, THEME, getCols } = ui;
|
|
123
286
|
|
|
124
287
|
function formatFileResult(call, result) {
|
|
@@ -140,20 +303,8 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
140
303
|
case 'file_stat':
|
|
141
304
|
return `Stat ${result.path}: size=${result.size_kb} KB, mtime=${result.mtime}, type=${result.type}, mode=${result.mode}`;
|
|
142
305
|
case 'http_get': {
|
|
143
|
-
if (result.chunked) {
|
|
144
|
-
return `HTTP GET ${args[0]} (${result.status_code}) [Part 1/${result.total_parts}]:\n${result.body}\n\n[Response is large and was split into ${result.total_parts} parts. Use <http_get_next key="${args[0]}"/> to retrieve the next part.]`;
|
|
145
|
-
}
|
|
146
306
|
return `HTTP GET ${args[0]} (${result.status_code}):\n${result.body}`;
|
|
147
307
|
}
|
|
148
|
-
case 'http_get_next': {
|
|
149
|
-
if (result.done && !result.body) {
|
|
150
|
-
return `http_get_next "${args[0]}": No more content available.`;
|
|
151
|
-
}
|
|
152
|
-
const more = result.done
|
|
153
|
-
? ' [Final part]'
|
|
154
|
-
: `\n\n[Use <http_get_next key="${args[0]}"/> to retrieve part ${result.part + 1}/${result.total_parts}.]`;
|
|
155
|
-
return `HTTP content "${args[0]}" [Part ${result.part}/${result.total_parts}]:\n${result.body}${more}`;
|
|
156
|
-
}
|
|
157
308
|
case 'ask_user':
|
|
158
309
|
return `User answered "${result.question}": ${result.answer}`;
|
|
159
310
|
case 'store_memory':
|
|
@@ -257,10 +408,6 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
257
408
|
const raw = attrs.raw || '';
|
|
258
409
|
return formatFileResult(['http_get', url, raw], await agentExecFile('http_get', url, raw));
|
|
259
410
|
}
|
|
260
|
-
case 'http_get_next': {
|
|
261
|
-
const key = attrs.key || content;
|
|
262
|
-
return formatFileResult(['http_get_next', key], await agentExecFile('http_get_next', key));
|
|
263
|
-
}
|
|
264
411
|
case 'ask_user': {
|
|
265
412
|
const q = attrs.question || content;
|
|
266
413
|
return formatFileResult(['ask_user', q], await agentExecFile('ask_user', q));
|
|
@@ -295,9 +442,9 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
295
442
|
return;
|
|
296
443
|
}
|
|
297
444
|
|
|
298
|
-
if (entry.type === 'strip') return;
|
|
445
|
+
if (entry.type === 'strip' || entry.type === 'final') return;
|
|
299
446
|
|
|
300
|
-
// Tool execution happens in the toolCalls loop after streaming; handleTag only handles visual/strip.
|
|
447
|
+
// Tool execution happens in the toolCalls loop after streaming; handleTag only handles visual/strip/final.
|
|
301
448
|
}
|
|
302
449
|
|
|
303
450
|
async function runAgentLoop(messages, model, maxIterations = Infinity, tokenLimit = null, opts = {}) {
|
|
@@ -312,9 +459,32 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
312
459
|
const isAborted = getAbortFlag || (() => false);
|
|
313
460
|
const cb = callbacks;
|
|
314
461
|
const metrics = new Metrics(tokenLimit);
|
|
315
|
-
const activeSystemPrompt = overrideSystemPrompt !== null ? overrideSystemPrompt : SYSTEM_PROMPT;
|
|
316
462
|
const mode = overrideMode || 'system_role';
|
|
317
463
|
|
|
464
|
+
// Route debug blocks to the UI callback when present (interactive TUI mode
|
|
465
|
+
// overwrites stderr with redraws, losing the output). Fall back to stderr
|
|
466
|
+
// for one-shot/non-TTY flows where there's no UI to host the block.
|
|
467
|
+
const emitDebug = (block) => {
|
|
468
|
+
if (typeof cb.onDebug === 'function') cb.onDebug(block);
|
|
469
|
+
else process.stderr.write('\n' + block + '\n');
|
|
470
|
+
};
|
|
471
|
+
|
|
472
|
+
// Resolve native_tools from the active profile (matched by api_base+model).
|
|
473
|
+
// Fallback to true if no matching profile — mirrors config-normalization default.
|
|
474
|
+
const _cfg = typeof getConfig === 'function' ? getConfig() : {};
|
|
475
|
+
const _profile = Array.isArray(_cfg.models)
|
|
476
|
+
? _cfg.models.find((p) => p && p.api_base === _cfg.api_base && p.model === model)
|
|
477
|
+
: null;
|
|
478
|
+
const nativeTools = _profile && _profile.native_tools === false ? false : true;
|
|
479
|
+
|
|
480
|
+
const activeSystemPrompt = overrideSystemPrompt !== null ? overrideSystemPrompt : getSystemPrompt(nativeTools);
|
|
481
|
+
|
|
482
|
+
// Response contract: every model response must end with a tool call or
|
|
483
|
+
// <final_answer>...</final_answer>. Anything else is degraded — push a
|
|
484
|
+
// synthetic nudge and retry, capped to prevent runaway loops.
|
|
485
|
+
const MAX_DEGRADED_RETRIES = 2;
|
|
486
|
+
let degradedRetries = 0;
|
|
487
|
+
|
|
318
488
|
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
319
489
|
if (isAborted()) break;
|
|
320
490
|
const linePrefix = `${FG_TEAL}${BOLD}◆ ${RST}`;
|
|
@@ -366,49 +536,96 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
366
536
|
}
|
|
367
537
|
: null;
|
|
368
538
|
|
|
369
|
-
if (debug) {
|
|
370
|
-
const header = `\n───── messages sent to agent (iteration ${iteration + 1}) ─────\n`;
|
|
371
|
-
const footer = `\n───── end messages ─────\n`;
|
|
372
|
-
process.stderr.write(header + JSON.stringify(messagesWithSystem, null, 2) + footer);
|
|
373
|
-
}
|
|
374
|
-
|
|
375
539
|
const MAX_RETRIES = 3;
|
|
540
|
+
const RETRYABLE_STATUS = new Set([408, 425, 429, 500, 502, 503, 504]);
|
|
541
|
+
const NON_RETRYABLE_STATUS = new Set([400, 401, 403, 404, 413, 422]);
|
|
376
542
|
let result = null;
|
|
377
543
|
let lastApiErr = null;
|
|
378
544
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
545
|
+
// AbortController per iteration: watcher polls isAborted() every 50ms
|
|
546
|
+
// and flips controller.abort() as soon as the flag flips.
|
|
547
|
+
const controller = new AbortController();
|
|
548
|
+
const abortWatcher = setInterval(() => {
|
|
549
|
+
if (isAborted() && !controller.signal.aborted) controller.abort();
|
|
550
|
+
}, 50);
|
|
551
|
+
|
|
552
|
+
try {
|
|
553
|
+
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
|
554
|
+
if (attempt === 1) {
|
|
555
|
+
callbacks.onRequestSent?.();
|
|
556
|
+
}
|
|
557
|
+
try {
|
|
558
|
+
result = await chatStream(messagesWithSystem, {
|
|
559
|
+
model,
|
|
560
|
+
nativeTools,
|
|
561
|
+
linePrefix: wrappedOnToken ? '' : linePrefix,
|
|
562
|
+
showThink,
|
|
563
|
+
onToken: wrappedOnToken,
|
|
564
|
+
silent: !!wrappedOnToken,
|
|
565
|
+
signal: controller.signal,
|
|
566
|
+
onTrim: (info) => {
|
|
567
|
+
// Setter (rather than re-reading config per iteration) keeps
|
|
568
|
+
// the Metrics instance authoritative: a 400-overflow discovery
|
|
569
|
+
// mid-loop immediately updates 85%-warning thresholds and the
|
|
570
|
+
// status bar without threading config access into agent.js.
|
|
571
|
+
if (info && info.reason === 'overflow-400' && typeof info.limit === 'number' && info.limit > 0) {
|
|
572
|
+
metrics.setModelTokenLimit(info.limit);
|
|
573
|
+
}
|
|
574
|
+
if (cb.onError) {
|
|
575
|
+
cb.onError({
|
|
576
|
+
message: `Context trimmed (${info.reason}): ${info.dropped} message(s) dropped, kept ~${info.keptTokens} tokens (limit ${info.limit}).`,
|
|
577
|
+
isWarning: true,
|
|
578
|
+
});
|
|
579
|
+
}
|
|
580
|
+
},
|
|
581
|
+
});
|
|
582
|
+
lastApiErr = null;
|
|
583
|
+
break;
|
|
584
|
+
} catch (err) {
|
|
585
|
+
lastApiErr = err;
|
|
586
|
+
if (debug) {
|
|
587
|
+
const status = err.statusCode ? `HTTP ${err.statusCode}` : 'network error';
|
|
588
|
+
const body = err.rawBody !== undefined ? err.rawBody : (err.stack || err.message || String(err));
|
|
589
|
+
const block = formatDebugBlock({
|
|
590
|
+
iteration: iteration + 1,
|
|
591
|
+
blocks: [
|
|
592
|
+
['REQUEST', [
|
|
593
|
+
['model:', model],
|
|
594
|
+
['endpoint:', err.endpoint || '(unknown)'],
|
|
595
|
+
['timestamp:', new Date().toISOString()],
|
|
596
|
+
['native_tools:', nativeTools],
|
|
597
|
+
['attempt:', `${attempt}/${MAX_RETRIES}`],
|
|
598
|
+
]],
|
|
599
|
+
['RESPONSE', [
|
|
600
|
+
['status:', status],
|
|
601
|
+
['detail:', err.detail || ''],
|
|
602
|
+
]],
|
|
603
|
+
],
|
|
604
|
+
raw: body || '(empty body)',
|
|
605
|
+
warnings: [`HTTP error on attempt ${attempt}/${MAX_RETRIES}: ${err.message}`],
|
|
606
|
+
});
|
|
607
|
+
emitDebug(block);
|
|
608
|
+
}
|
|
609
|
+
const sc = err.statusCode;
|
|
610
|
+
const retryable = !sc || RETRYABLE_STATUS.has(sc);
|
|
611
|
+
if (!retryable || NON_RETRYABLE_STATUS.has(sc)) break;
|
|
612
|
+
if (attempt >= MAX_RETRIES) break;
|
|
613
|
+
// Backoff: base 1000ms doubling (1s, 2s, 4s). For 429, honor
|
|
614
|
+
// Retry-After header when it's a plausible seconds value.
|
|
615
|
+
let delayMs = 1000 * Math.pow(2, attempt - 1);
|
|
616
|
+
if (sc === 429) {
|
|
617
|
+
const ra = err.responseHeaders && err.responseHeaders['retry-after'];
|
|
618
|
+
const raNum = ra !== undefined ? Number(ra) : NaN;
|
|
619
|
+
if (Number.isFinite(raNum) && raNum >= 0 && raNum <= 30) {
|
|
620
|
+
delayMs = Math.round(raNum * 1000);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
cb.onRetry?.(attempt + 1, MAX_RETRIES);
|
|
624
|
+
await new Promise((r) => setTimeout(r, delayMs));
|
|
410
625
|
}
|
|
411
626
|
}
|
|
627
|
+
} finally {
|
|
628
|
+
clearInterval(abortWatcher);
|
|
412
629
|
}
|
|
413
630
|
|
|
414
631
|
if (lastApiErr) {
|
|
@@ -420,26 +637,25 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
420
637
|
const usage = result ? result.usage : null;
|
|
421
638
|
metrics.endTurn(usage, model);
|
|
422
639
|
|
|
423
|
-
if (debug) {
|
|
424
|
-
const header = `\n───── raw ai response (iteration ${iteration + 1}) ─────\n`;
|
|
425
|
-
const footer = `\n───── end raw response ─────\n`;
|
|
426
|
-
process.stderr.write(header + (reply || '(empty)') + footer);
|
|
427
|
-
}
|
|
428
|
-
|
|
429
640
|
if (cb.onMetricsUpdate) {
|
|
430
641
|
cb.onMetricsUpdate({
|
|
431
642
|
totalTokens: metrics.totalTokens(),
|
|
432
643
|
contextTokens: metrics.contextTokens(),
|
|
433
644
|
turns: metrics.turns.length,
|
|
645
|
+
tokenLimit: metrics.tokenLimitStatus(),
|
|
434
646
|
});
|
|
435
647
|
}
|
|
436
648
|
|
|
437
649
|
const limitStatus = metrics.tokenLimitStatus();
|
|
438
|
-
if (limitStatus !== null && limitStatus.pct >= 85) {
|
|
650
|
+
if (limitStatus !== null && limitStatus.pct !== null && limitStatus.pct >= 85) {
|
|
439
651
|
const warnMsg = `Context at ${limitStatus.pct}% of limit (${limitStatus.used}/${limitStatus.limit} tokens). Consider /compact.`;
|
|
440
652
|
if (cb.onError) {
|
|
441
653
|
cb.onError({ message: warnMsg, isWarning: true });
|
|
442
654
|
} else {
|
|
655
|
+
// Non-TUI fallback (cb.onError is unset only for one-shot CLI
|
|
656
|
+
// commands like `cmdCode`, which don't run the shared live-region
|
|
657
|
+
// writer). Direct stdout write is safe here: no status-bar timer
|
|
658
|
+
// or bubble renderer is competing for stdout.
|
|
443
659
|
process.stdout.write(
|
|
444
660
|
`\n ${THEME.warn}⚠ ${warnMsg}${THEME.reset}\n`
|
|
445
661
|
);
|
|
@@ -447,6 +663,32 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
447
663
|
}
|
|
448
664
|
|
|
449
665
|
if (!reply) {
|
|
666
|
+
if (debug && result) {
|
|
667
|
+
const block = formatDebugBlock({
|
|
668
|
+
iteration: iteration + 1,
|
|
669
|
+
blocks: [
|
|
670
|
+
['REQUEST', [
|
|
671
|
+
['model:', result.request?.model || model],
|
|
672
|
+
['endpoint:', result.endpoint || '(unknown)'],
|
|
673
|
+
['timestamp:', new Date().toISOString()],
|
|
674
|
+
['native_tools:', result.request?.native_tools ?? nativeTools],
|
|
675
|
+
]],
|
|
676
|
+
['RESPONSE', [
|
|
677
|
+
['finish_reason:', result.finish_reason || '(unknown)'],
|
|
678
|
+
['completion_tokens:', usage?.completion_tokens ?? 0],
|
|
679
|
+
['latency_ms:', result.elapsed_ms ?? '?'],
|
|
680
|
+
]],
|
|
681
|
+
['PARSED', [
|
|
682
|
+
['detected_format:', 'empty'],
|
|
683
|
+
['commands_found:', 0],
|
|
684
|
+
['first_command:', 'NONE — CLIENT WILL STALL'],
|
|
685
|
+
]],
|
|
686
|
+
],
|
|
687
|
+
raw: '',
|
|
688
|
+
warnings: ['Agent returned an empty response — connection to model may have dropped'],
|
|
689
|
+
});
|
|
690
|
+
emitDebug(block);
|
|
691
|
+
}
|
|
450
692
|
// Empty reply from the model — stream resolved with no content and no
|
|
451
693
|
// tool_calls. Most common causes: server-side disconnect mid-stream,
|
|
452
694
|
// context-window overflow that slipped past the 400/413 handler, or a
|
|
@@ -459,68 +701,208 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
459
701
|
break;
|
|
460
702
|
}
|
|
461
703
|
|
|
462
|
-
|
|
704
|
+
// Native function-calling path: the model returned structured tool_calls.
|
|
705
|
+
// Convert them to the internal [action, ...args] tuple shape so downstream
|
|
706
|
+
// dispatch is unchanged, and remember the OpenAI id per call so results
|
|
707
|
+
// can be rooted back to their tool_call on the next turn.
|
|
708
|
+
const nativeToolCalls = Array.isArray(result?.toolCalls) ? result.toolCalls : [];
|
|
709
|
+
let toolCalls;
|
|
710
|
+
let nativeToolCallIds = [];
|
|
711
|
+
if (nativeToolCalls.length > 0) {
|
|
712
|
+
toolCalls = [];
|
|
713
|
+
for (const tc of nativeToolCalls) {
|
|
714
|
+
let args;
|
|
715
|
+
try {
|
|
716
|
+
args = tc.function?.arguments ? JSON.parse(tc.function.arguments) : {};
|
|
717
|
+
} catch (err) {
|
|
718
|
+
if (cb.onError) cb.onError({ message: `Failed to parse tool_call arguments for ${tc.function?.name || '(unknown)'}: ${err.message}`, isWarning: true });
|
|
719
|
+
continue;
|
|
720
|
+
}
|
|
721
|
+
const call = mapInvokeToCall(tc.function?.name, args);
|
|
722
|
+
if (call) {
|
|
723
|
+
toolCalls.push(call);
|
|
724
|
+
nativeToolCallIds.push(tc.id);
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
} else {
|
|
728
|
+
toolCalls = extractToolCalls(reply, { model });
|
|
729
|
+
}
|
|
730
|
+
const isNativeCall = nativeToolCalls.length > 0;
|
|
463
731
|
const cleanedReply = cleanAssistantContent(reply);
|
|
732
|
+
// Protocol contract: a valid response ends with a tool call OR a
|
|
733
|
+
// <final_answer>...</final_answer> block. Anything else is degraded.
|
|
734
|
+
const hasFinal = /<final_answer\b[\s\S]*?<\/final_answer>/i.test(reply);
|
|
735
|
+
|
|
736
|
+
if (debug && result) {
|
|
737
|
+
const lastUserMsg = [...messagesWithSystem].reverse().find((m) => m.role === 'user');
|
|
738
|
+
const sysMsg = messagesWithSystem.find((m) => m.role === 'system');
|
|
739
|
+
const systemPromptTok = estimateTokens(sysMsg ? sysMsg.content : activeSystemPrompt);
|
|
740
|
+
const currentInputTok = estimateTokens(lastUserMsg ? lastUserMsg.content : '');
|
|
741
|
+
const historyTok = messagesWithSystem.reduce((sum, m) => {
|
|
742
|
+
if (m === sysMsg || m === lastUserMsg) return sum;
|
|
743
|
+
return sum + estimateTokens(m.content || '');
|
|
744
|
+
}, 0);
|
|
745
|
+
const promptTokens = usage && usage.prompt_tokens != null
|
|
746
|
+
? usage.prompt_tokens
|
|
747
|
+
: systemPromptTok + historyTok + currentInputTok;
|
|
748
|
+
const completionTokens = usage && usage.completion_tokens != null
|
|
749
|
+
? usage.completion_tokens
|
|
750
|
+
: estimateTokens(reply);
|
|
751
|
+
const thinkingTokens = result.reasoning ? estimateTokens(result.reasoning) : 0;
|
|
752
|
+
const visibleTokens = Math.max(completionTokens - thinkingTokens, 0);
|
|
753
|
+
const contextLimit = tokenLimit || null;
|
|
754
|
+
const ctxPct = contextLimit ? Math.round((promptTokens / contextLimit) * 100) : null;
|
|
755
|
+
const detected = detectFormat(reply, toolCalls);
|
|
756
|
+
const firstCmd = toolCalls.length > 0 ? previewCommand(toolCalls[0]) : previewCommand(null);
|
|
757
|
+
const toolTags = Object.entries(TAG_REGISTRY)
|
|
758
|
+
.filter(([, e]) => e.type === 'tool')
|
|
759
|
+
.map(([t]) => t);
|
|
464
760
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
// unclosed tag + its trailing content, so cleanedReply looks
|
|
470
|
-
// legitimate (just the planning preamble) and extractToolCalls finds
|
|
471
|
-
// zero calls — the loop would break silently and the user sees the
|
|
472
|
-
// planning text followed by nothing. Surface it so the user can retry,
|
|
473
|
-
// shorten the request, or bump max_tokens.
|
|
474
|
-
let truncatedTag = null;
|
|
475
|
-
for (const [tag, entry] of Object.entries(TAG_REGISTRY)) {
|
|
476
|
-
if (entry.type !== 'tool') continue;
|
|
477
|
-
let opens = 0;
|
|
478
|
-
for (const m of reply.matchAll(new RegExp(`<${tag}([^>]*)>`, 'gi'))) {
|
|
479
|
-
// Skip self-closing (`<tag .../>`) — they don't need a matching close.
|
|
480
|
-
if (!m[1].trimEnd().endsWith('/')) opens++;
|
|
761
|
+
const warnings = [];
|
|
762
|
+
if (result.finish_reason === 'length') warnings.push('finish_reason=length → response truncated, increase max_tokens');
|
|
763
|
+
if (detected === 'tool_call' && toolCalls.length === 0) {
|
|
764
|
+
warnings.push('commands_found=0 → agent emitted no command, client will stall');
|
|
481
765
|
}
|
|
482
|
-
if (
|
|
483
|
-
|
|
484
|
-
|
|
766
|
+
if (ctxPct !== null && ctxPct > 80) warnings.push(`context_used=${ctxPct}% → approaching context limit`);
|
|
767
|
+
|
|
768
|
+
const block = formatDebugBlock({
|
|
769
|
+
iteration: iteration + 1,
|
|
770
|
+
blocks: [
|
|
771
|
+
['REQUEST', [
|
|
772
|
+
['model:', result.request?.model || model],
|
|
773
|
+
['endpoint:', result.endpoint || '(unknown)'],
|
|
774
|
+
['timestamp:', new Date().toISOString()],
|
|
775
|
+
['native_tools:', result.request?.native_tools ?? nativeTools],
|
|
776
|
+
]],
|
|
777
|
+
['CONTEXT', [
|
|
778
|
+
['total_messages:', messagesWithSystem.length],
|
|
779
|
+
['system_prompt_tok:', systemPromptTok],
|
|
780
|
+
['history_tok:', historyTok],
|
|
781
|
+
['current_input_tok:', currentInputTok],
|
|
782
|
+
['context_used:', contextLimit
|
|
783
|
+
? `${promptTokens} / ${contextLimit} (${ctxPct}%)`
|
|
784
|
+
: `${promptTokens} / unknown`],
|
|
785
|
+
]],
|
|
786
|
+
['PARAMETERS', [
|
|
787
|
+
['max_tokens:', result.request?.max_tokens ?? '(default)'],
|
|
788
|
+
['temperature:', result.request?.temperature ?? '(default)'],
|
|
789
|
+
['stop_sequences:', JSON.stringify(result.request?.stop || [])],
|
|
790
|
+
['reasoning_effort:', '(n/a)'],
|
|
791
|
+
['tools_enabled:', `${toolTags.length} XML tags (via system prompt)`],
|
|
792
|
+
]],
|
|
793
|
+
['RESPONSE', [
|
|
794
|
+
['finish_reason:', result.finish_reason || '(unknown)'],
|
|
795
|
+
['completion_tokens:', completionTokens],
|
|
796
|
+
['thinking_tokens:', thinkingTokens],
|
|
797
|
+
['visible_tokens:', visibleTokens],
|
|
798
|
+
['latency_ms:', result.elapsed_ms ?? '?'],
|
|
799
|
+
]],
|
|
800
|
+
['PARSED', [
|
|
801
|
+
['detected_format:', detected],
|
|
802
|
+
['commands_found:', toolCalls.length],
|
|
803
|
+
['first_command:', firstCmd],
|
|
804
|
+
]],
|
|
805
|
+
],
|
|
806
|
+
raw: reply || '',
|
|
807
|
+
rawFooter: [
|
|
808
|
+
['finish_reason:', result.finish_reason || '(unknown)'],
|
|
809
|
+
['total_tokens:', result.usage_from_provider && result.usage
|
|
810
|
+
? (result.usage.prompt_tokens || 0) + (result.usage.completion_tokens || 0)
|
|
811
|
+
: '(no usage)'],
|
|
812
|
+
['content_chars:', (result.content || '').length],
|
|
813
|
+
['reasoning_chars:', (result.reasoning_details || '').length],
|
|
814
|
+
['tool_calls_seen:', result.tool_calls_count > 0 ? `yes (${result.tool_calls_count})` : 'no'],
|
|
815
|
+
['native_mode:', isNativeCall ? `yes (${nativeToolCalls.length} call${nativeToolCalls.length === 1 ? '' : 's'})` : 'no'],
|
|
816
|
+
],
|
|
817
|
+
warnings,
|
|
818
|
+
});
|
|
819
|
+
emitDebug(block);
|
|
485
820
|
}
|
|
821
|
+
|
|
822
|
+
// Detect mid-tag truncation: StreamParser tracks `insideTag` for the
|
|
823
|
+
// currently-unclosed tag. If it's still set after the stream settles,
|
|
824
|
+
// the response was cut off while inside a tool tag (hit max_tokens or
|
|
825
|
+
// a server-side cutoff). cleanAssistantContent strips the unclosed
|
|
826
|
+
// tag + trailing content, so extractToolCalls would find nothing and
|
|
827
|
+
// the loop would break silently. Surface it.
|
|
828
|
+
//
|
|
829
|
+
// When `cb.onToken` is unset (non-streaming UI), the parser was never
|
|
830
|
+
// fed — push the final reply through it once so `insideTag` reflects
|
|
831
|
+
// the terminal state.
|
|
832
|
+
if (!wrappedOnToken && reply) parser.push(reply);
|
|
833
|
+
const truncatedTag = parser.insideTag && TAG_REGISTRY[parser.insideTag]?.type === 'tool'
|
|
834
|
+
? parser.insideTag
|
|
835
|
+
: null;
|
|
486
836
|
if (truncatedTag && cb.onError) {
|
|
487
837
|
cb.onError({ message: `Response truncated mid-<${truncatedTag}> tag — likely hit max_tokens or a server-side cutoff. Try again, shorten the request, or raise the model's max_tokens.`, isWarning: true });
|
|
488
838
|
}
|
|
489
839
|
|
|
490
|
-
|
|
840
|
+
const assistantMsg = { role: 'assistant', content: cleanedReply };
|
|
841
|
+
if (isNativeCall) assistantMsg.tool_calls = nativeToolCalls;
|
|
842
|
+
messages.push(assistantMsg);
|
|
491
843
|
// When showThink is off and the turn has tool calls, suppress the text bubble —
|
|
492
844
|
// pre-tool reasoning is noise, tool result bubbles already convey what happened.
|
|
493
845
|
const displayReply = (!showThink && toolCalls.length > 0) ? '' : cleanedReply;
|
|
494
846
|
if (cb.onAssistantMessage) cb.onAssistantMessage(displayReply);
|
|
495
847
|
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
848
|
+
if (toolCalls.length === 0) {
|
|
849
|
+
// Detect malformed known-tag syntax (e.g. <create_file> with no path
|
|
850
|
+
// attribute, usually paired with nonsense like <attrs: path=...> inside
|
|
851
|
+
// the body). Push a corrective feedback message and keep looping so
|
|
852
|
+
// the model self-corrects instead of silently stalling on the user.
|
|
853
|
+
const malformed = detectMalformedTags(reply);
|
|
854
|
+
if (malformed.length > 0) {
|
|
855
|
+
const hintBlock = malformed.map((m) => `- <${m.tag}>: ${m.hint}`).join('\n');
|
|
856
|
+
const summary = malformed.map((m) => `<${m.tag}>`).join(', ');
|
|
857
|
+
if (cb.onError) {
|
|
858
|
+
cb.onError({ message: `Detected malformed tool tag(s): ${summary}. Asking the model to retry with correct syntax.`, isWarning: true });
|
|
859
|
+
}
|
|
860
|
+
messages.push({
|
|
861
|
+
role: 'user',
|
|
862
|
+
content: `Your last response contained malformed tool tags that the parser could not execute:\n\n${hintBlock}\n\nRe-emit the tool calls using the exact syntax above. Do not nest pseudo-tags like <attrs: ...> inside the body.`,
|
|
863
|
+
});
|
|
864
|
+
continue;
|
|
502
865
|
}
|
|
503
|
-
break;
|
|
504
|
-
}
|
|
505
866
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
867
|
+
if (hasFinal) {
|
|
868
|
+
// Model declared it is done — honor the protocol and terminate.
|
|
869
|
+
// An empty <final_answer></final_answer> is the model's choice;
|
|
870
|
+
// we don't police content.
|
|
871
|
+
degradedRetries = 0;
|
|
872
|
+
break;
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
// Protocol violation: neither a tool call nor a <final_answer>. Nudge
|
|
876
|
+
// the model to restate in-protocol, capped to prevent runaway loops.
|
|
877
|
+
if (degradedRetries >= MAX_DEGRADED_RETRIES) {
|
|
511
878
|
if (cb.onError) {
|
|
512
|
-
cb.onError({ message:
|
|
879
|
+
cb.onError({ message: `Agent violated the response contract after ${MAX_DEGRADED_RETRIES} retries — no tool call or <final_answer> block emitted. Stopping.`, isWarning: false });
|
|
513
880
|
}
|
|
881
|
+
break;
|
|
514
882
|
}
|
|
515
|
-
|
|
883
|
+
degradedRetries++;
|
|
884
|
+
if (cb.onError) {
|
|
885
|
+
cb.onError({ message: 'Response missing tool call or <final_answer> — nudging model to retry in-protocol.', isWarning: true });
|
|
886
|
+
}
|
|
887
|
+
messages.push({
|
|
888
|
+
role: 'user',
|
|
889
|
+
content: 'Your previous response contained neither a tool call nor a <final_answer> block, which violates the response contract. If you need to perform an action, emit the appropriate tool tag now. If you are done, wrap your reply in <final_answer>...</final_answer>. Do not describe intended actions in prose.',
|
|
890
|
+
});
|
|
891
|
+
continue;
|
|
516
892
|
}
|
|
893
|
+
// Non-degraded response (has tool calls) — reset the retry counter.
|
|
894
|
+
degradedRetries = 0;
|
|
517
895
|
if (isAborted()) break;
|
|
518
896
|
|
|
519
897
|
if (!cb.onToolStart) {
|
|
898
|
+
// Non-TUI fallback: only one-shot CLI commands leave cb.onToolStart
|
|
899
|
+
// unset. The shared live-region writer isn't running, so a direct
|
|
900
|
+
// write here can't interleave with a bubble/status redraw.
|
|
520
901
|
process.stdout.write(`\n ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}\n`);
|
|
521
902
|
}
|
|
522
903
|
|
|
523
904
|
const results = [];
|
|
905
|
+
const debugEntries = debug ? [] : null;
|
|
524
906
|
let aborted = false;
|
|
525
907
|
|
|
526
908
|
for (const call of toolCalls) {
|
|
@@ -540,6 +922,7 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
540
922
|
const resultStr = `Command \`${arg}\`: Permission denied by user.`;
|
|
541
923
|
if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
|
|
542
924
|
results.push(resultStr);
|
|
925
|
+
if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
|
|
543
926
|
aborted = true;
|
|
544
927
|
break;
|
|
545
928
|
} else {
|
|
@@ -548,6 +931,14 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
548
931
|
const resultStr = `Command \`${arg}\`:\nExit code: ${shellResult.exit_code}\n${out}`;
|
|
549
932
|
if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
|
|
550
933
|
results.push(resultStr);
|
|
934
|
+
if (debugEntries) debugEntries.push({
|
|
935
|
+
tag,
|
|
936
|
+
call,
|
|
937
|
+
ms,
|
|
938
|
+
status: shellResult.exit_code === 0 ? 'ok' : 'nonzero_exit',
|
|
939
|
+
exitCode: shellResult.exit_code,
|
|
940
|
+
result: resultStr,
|
|
941
|
+
});
|
|
551
942
|
}
|
|
552
943
|
continue;
|
|
553
944
|
}
|
|
@@ -559,12 +950,21 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
559
950
|
const resultStr = `${tag} ${call[1] || ''}: Permission denied by user.`;
|
|
560
951
|
if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
|
|
561
952
|
results.push(resultStr);
|
|
953
|
+
if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
|
|
562
954
|
aborted = true;
|
|
563
955
|
break;
|
|
564
956
|
} else {
|
|
565
957
|
const resultStr = formatFileResult(call, fileResult);
|
|
566
958
|
if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
|
|
567
959
|
results.push(resultStr);
|
|
960
|
+
if (debugEntries) debugEntries.push({
|
|
961
|
+
tag,
|
|
962
|
+
call,
|
|
963
|
+
ms,
|
|
964
|
+
status: fileResult.error ? 'error' : 'ok',
|
|
965
|
+
exitCode: null,
|
|
966
|
+
result: resultStr,
|
|
967
|
+
});
|
|
568
968
|
}
|
|
569
969
|
} catch (err) {
|
|
570
970
|
const ms = Date.now() - toolStart;
|
|
@@ -572,13 +972,60 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
572
972
|
if (cb.onError) {
|
|
573
973
|
cb.onError({ message: `Tool error (${tag}): ${err.message}`, isWarning: true });
|
|
574
974
|
} else {
|
|
975
|
+
// Non-TUI fallback — see comment on the onToolStart branch above.
|
|
575
976
|
process.stdout.write(`\n ${THEME.warn}⚠ Tool error (${tag}): ${err.message}${THEME.reset}\n`);
|
|
576
977
|
}
|
|
577
978
|
logToolCall(tag, { args: call.slice(1) }, false, 'error');
|
|
578
979
|
results.push(`${tag}: Error — ${err.message}`);
|
|
980
|
+
if (debugEntries) debugEntries.push({ tag, call, ms, status: 'exception', exitCode: null, result: `Error — ${err.message}` });
|
|
579
981
|
}
|
|
580
982
|
}
|
|
581
983
|
|
|
984
|
+
if (debug && debugEntries && debugEntries.length > 0) {
|
|
985
|
+
const totalMs = debugEntries.reduce((s, e) => s + (e.ms || 0), 0);
|
|
986
|
+
const statusCounts = debugEntries.reduce((acc, e) => {
|
|
987
|
+
acc[e.status] = (acc[e.status] || 0) + 1;
|
|
988
|
+
return acc;
|
|
989
|
+
}, {});
|
|
990
|
+
const statusSummary = Object.entries(statusCounts)
|
|
991
|
+
.map(([k, v]) => `${k}=${v}`)
|
|
992
|
+
.join(', ');
|
|
993
|
+
|
|
994
|
+
const entries = debugEntries.map((e, idx) => {
|
|
995
|
+
const argsPreview = (e.call || []).slice(1).map((a) => {
|
|
996
|
+
if (a === undefined || a === null) return '';
|
|
997
|
+
const s = String(a).replace(/\s+/g, ' ').trim();
|
|
998
|
+
return s.length > 120 ? s.slice(0, 117) + '...' : s;
|
|
999
|
+
}).filter((s) => s.length > 0).join(' │ ');
|
|
1000
|
+
const rows = [
|
|
1001
|
+
['tag:', e.tag],
|
|
1002
|
+
['args:', argsPreview || '(none)'],
|
|
1003
|
+
['status:', e.status + (e.exitCode !== null && e.exitCode !== undefined ? ` (exit=${e.exitCode})` : '')],
|
|
1004
|
+
['latency_ms:', e.ms],
|
|
1005
|
+
];
|
|
1006
|
+
return {
|
|
1007
|
+
title: `TOOL ${idx + 1}/${debugEntries.length}`,
|
|
1008
|
+
rows,
|
|
1009
|
+
body: truncateForDebug(e.result),
|
|
1010
|
+
};
|
|
1011
|
+
});
|
|
1012
|
+
|
|
1013
|
+
const block = formatDebugBlock({
|
|
1014
|
+
title: 'DEBUG TOOL RESULTS',
|
|
1015
|
+
iteration: iteration + 1,
|
|
1016
|
+
blocks: [
|
|
1017
|
+
['SUMMARY', [
|
|
1018
|
+
['tools_executed:', debugEntries.length],
|
|
1019
|
+
['total_latency_ms:', totalMs],
|
|
1020
|
+
['status_breakdown:', statusSummary],
|
|
1021
|
+
['aborted:', aborted ? 'yes' : 'no'],
|
|
1022
|
+
]],
|
|
1023
|
+
],
|
|
1024
|
+
entries,
|
|
1025
|
+
});
|
|
1026
|
+
emitDebug(block);
|
|
1027
|
+
}
|
|
1028
|
+
|
|
582
1029
|
if (aborted) {
|
|
583
1030
|
const warnMsg = isAborted()
|
|
584
1031
|
? 'Agent interrupted.'
|
|
@@ -586,24 +1033,37 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
586
1033
|
if (cb.onError) {
|
|
587
1034
|
cb.onError({ message: warnMsg, isWarning: true });
|
|
588
1035
|
} else {
|
|
1036
|
+
// Non-TUI fallback — see comment above on the Found-actions path.
|
|
589
1037
|
process.stdout.write(`\n ${FG_YELLOW}⚠${RST} ${FG_GRAY}${warnMsg}${RST}`);
|
|
590
1038
|
}
|
|
591
1039
|
// Push whatever results accumulated before the denial so the LLM has
|
|
592
1040
|
// context if the user asks to continue.
|
|
593
1041
|
if (results.length > 0) {
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
1042
|
+
if (isNativeCall) {
|
|
1043
|
+
for (let i = 0; i < results.length; i++) {
|
|
1044
|
+
messages.push({ role: 'tool', tool_call_id: nativeToolCallIds[i], content: results[i] });
|
|
1045
|
+
}
|
|
1046
|
+
} else {
|
|
1047
|
+
messages.push({
|
|
1048
|
+
role: 'user',
|
|
1049
|
+
content: `Tool execution results (partial — stopped after user denied an action):\n\n${results.join('\n\n')}`,
|
|
1050
|
+
});
|
|
1051
|
+
}
|
|
598
1052
|
}
|
|
599
1053
|
break;
|
|
600
1054
|
}
|
|
601
1055
|
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
}
|
|
1056
|
+
if (isNativeCall) {
|
|
1057
|
+
for (let i = 0; i < results.length; i++) {
|
|
1058
|
+
messages.push({ role: 'tool', tool_call_id: nativeToolCallIds[i], content: results[i] });
|
|
1059
|
+
}
|
|
1060
|
+
} else {
|
|
1061
|
+
const feedback = results.join('\n\n');
|
|
1062
|
+
messages.push({
|
|
1063
|
+
role: 'user',
|
|
1064
|
+
content: `Tool execution results:\n\n${feedback}\n\nContinue with the task. If everything is done, summarize what was accomplished.`,
|
|
1065
|
+
});
|
|
1066
|
+
}
|
|
607
1067
|
}
|
|
608
1068
|
|
|
609
1069
|
return { messages, metrics };
|
|
@@ -616,4 +1076,5 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
|
|
|
616
1076
|
|
|
617
1077
|
module.exports = {
|
|
618
1078
|
createAgentRunner,
|
|
1079
|
+
formatDebugBlock,
|
|
619
1080
|
};
|