@semalt-ai/code 1.8.1 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/agent.js CHANGED
@@ -2,8 +2,14 @@
2
2
 
3
3
  const { logToolCall } = require('./audit');
4
4
  const { Metrics } = require('./metrics');
5
- const { SYSTEM_PROMPT } = require('./prompts');
5
+ const { getSystemPrompt } = require('./prompts');
6
6
  const { TAG_REGISTRY } = require('./constants');
7
+ const { mapInvokeToCall } = require('./tools');
8
+ const { UI_THEME } = require('./ui/theme');
9
+ const { RST } = require('./ui/ansi');
10
+ const { getCols: _getCols, repeatToWidth } = require('./ui/utils');
11
+ const writer = require('./ui/writer');
12
+ const messages = require('./ui/messages');
7
13
 
8
14
  class StreamParser {
9
15
  constructor(onToken, onTagOpen, onTagContent, onTagClose) {
@@ -40,14 +46,18 @@ class StreamParser {
40
46
  const tagRaw = this.buffer.slice(1, gtIdx).trim();
41
47
  const selfClose = tagRaw.endsWith('/');
42
48
  const tagBody = selfClose ? tagRaw.slice(0, -1).trim() : tagRaw;
43
- const spaceIdx = tagBody.search(/\s/);
44
- const tagName = (spaceIdx === -1 ? tagBody : tagBody.slice(0, spaceIdx)).toLowerCase();
45
- const attrStr = spaceIdx === -1 ? '' : tagBody.slice(spaceIdx + 1);
49
+ // Split on whitespace OR `=` so both MiniMax-style `<parameter name="x">`
50
+ // and Qwen3-Coder `<parameter=x>` resolve to the same tagName.
51
+ const delimIdx = tagBody.search(/[\s=]/);
52
+ const tagName = (delimIdx === -1 ? tagBody : tagBody.slice(0, delimIdx)).toLowerCase();
53
+ const attrStr = delimIdx === -1 ? '' : tagBody.slice(delimIdx + 1);
46
54
 
47
55
  const attrs = {};
48
- const attrRe = /(\w+)="([^"]*)"/g;
56
+ const attrReDouble = /(\w+)="([^"]*)"/g;
57
+ const attrReSingle = /(\w+)='([^']*)'/g;
49
58
  let m;
50
- while ((m = attrRe.exec(attrStr)) !== null) attrs[m[1]] = m[2];
59
+ while ((m = attrReDouble.exec(attrStr)) !== null) attrs[m[1]] = m[2];
60
+ while ((m = attrReSingle.exec(attrStr)) !== null) attrs[m[1]] = m[2];
51
61
 
52
62
  this.buffer = this.buffer.slice(gtIdx + 1);
53
63
 
@@ -70,11 +80,37 @@ class StreamParser {
70
80
  } else {
71
81
  const closing = '</' + this.insideTag + '>';
72
82
  const closeIdx = this.buffer.toLowerCase().indexOf(closing);
83
+ const entry = TAG_REGISTRY[this.insideTag];
84
+ const streamInner = entry && entry.type === 'final';
73
85
  if (closeIdx === -1) {
74
- this.tagContent += this.buffer;
75
- this.buffer = '';
86
+ if (streamInner) {
87
+ // Emit content live through onToken, but hold back any trailing
88
+ // substring that could be a prefix of the closing tag (chunk
89
+ // boundary splitting `</final_answer>` into e.g. `</fin` + `al…`).
90
+ const lowBuf = this.buffer.toLowerCase();
91
+ const lowClose = closing;
92
+ let safeUpTo = this.buffer.length;
93
+ const ltIdx = lowBuf.lastIndexOf('<');
94
+ if (ltIdx !== -1) {
95
+ const tail = lowBuf.slice(ltIdx);
96
+ if (lowClose.startsWith(tail)) safeUpTo = ltIdx;
97
+ }
98
+ if (safeUpTo > 0) {
99
+ const emit = this.buffer.slice(0, safeUpTo);
100
+ this.onToken(emit);
101
+ this.tagContent += emit;
102
+ this.buffer = this.buffer.slice(safeUpTo);
103
+ }
104
+ } else {
105
+ this.tagContent += this.buffer;
106
+ this.buffer = '';
107
+ }
76
108
  break;
77
109
  }
110
+ if (streamInner) {
111
+ const emit = this.buffer.slice(0, closeIdx);
112
+ if (emit) this.onToken(emit);
113
+ }
78
114
  this.tagContent += this.buffer.slice(0, closeIdx);
79
115
  this.buffer = this.buffer.slice(closeIdx + closing.length);
80
116
  this.onTagContent(this.insideTag, this.tagContent);
@@ -99,7 +135,7 @@ function cleanAssistantContent(raw) {
99
135
  }
100
136
 
101
137
  for (const [tag, entry] of Object.entries(TAG_REGISTRY)) {
102
- if (entry.type === 'strip') {
138
+ if (entry.type === 'strip' || entry.type === 'final') {
103
139
  // Strip only the wrapper tags; keep the inner content
104
140
  text = text.replace(new RegExp(`<${tag}[^>]*>`, 'gi'), '');
105
141
  text = text.replace(new RegExp(`<\\/${tag}>`, 'gi'), '');
@@ -112,13 +148,277 @@ function cleanAssistantContent(raw) {
112
148
  }
113
149
  }
114
150
 
115
- text = text.replace(/<\/?[a-zA-Z_][a-zA-Z0-9_]*(\s[^>]*)?>/g, '');
116
- text = text.replace(/\n{2,}/g, '\n');
117
-
118
151
  return text.trim();
119
152
  }
120
153
 
121
- function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, ui }) {
154
+ function estimateTokens(text) {
155
+ return Math.floor((text || '').length / 4);
156
+ }
157
+
158
+ // User-initiated aborts surface through several shapes depending on where in
159
+ // the Node http stack the signal fires: `new Error('Aborted')` from our own
160
+ // abort paths in api.js, or AbortError/ABORT_ERR from Node's built-ins. The
161
+ // authoritative check is the signal itself — this helper is the fallback.
162
+ function isAbortError(err) {
163
+ if (!err) return false;
164
+ if (err.name === 'AbortError') return true;
165
+ if (err.code === 'ABORT_ERR' || err.code === 'ERR_ABORTED') return true;
166
+ if (typeof err.message === 'string' && /^Aborted$/i.test(err.message)) return true;
167
+ return false;
168
+ }
169
+
170
+ function abortableSleep(ms, signal) {
171
+ return new Promise((resolve) => {
172
+ if (signal && signal.aborted) { resolve(); return; }
173
+ const t = setTimeout(resolve, ms);
174
+ if (signal) {
175
+ signal.addEventListener('abort', () => {
176
+ clearTimeout(t);
177
+ resolve();
178
+ }, { once: true });
179
+ }
180
+ });
181
+ }
182
+
183
+ function detectFormat(reply, toolCalls) {
184
+ if (!reply || !reply.trim()) return 'empty';
185
+ if (/<(minimax:tool_call|qwen:tool_call|tool_call|function_call)\b/i.test(reply)) return 'tool_call';
186
+ if (toolCalls && toolCalls.length > 0) return 'command';
187
+ return 'text';
188
+ }
189
+
190
+ // Spot known-tag names that are present in the reply but didn't produce any
191
+ // parsed tool calls. Common culprits are attribute-required tags (create_file,
192
+ // write_file, etc.) emitted without the `path` attribute, usually because the
193
+ // model put nonsense like `<attrs: path=...>` inside the body. Returning a
194
+ // specific hint lets the agent loop push a corrective user message and keep
195
+ // going instead of silently stalling.
196
+ function detectMalformedTags(text) {
197
+ const issues = [];
198
+ const PATH_REQUIRED = ['create_file', 'write_file', 'append_file'];
199
+ for (const tag of PATH_REQUIRED) {
200
+ const re = new RegExp(`<${tag}\\b([^>]*)>`, 'g');
201
+ for (const m of text.matchAll(re)) {
202
+ const attrs = m[1] || '';
203
+ if (!/\bpath\s*=\s*['"]/.test(attrs)) {
204
+ issues.push({
205
+ tag,
206
+ hint: `Use <${tag} path="/absolute/path">FILE CONTENT HERE</${tag}>. Put the path as a quoted attribute on the opening tag, and the actual file contents between the tags — not a nested pseudo-tag.`,
207
+ });
208
+ }
209
+ }
210
+ }
211
+ // Deduplicate by tag so we don't spam the model with the same hint per occurrence.
212
+ const seen = new Set();
213
+ return issues.filter((i) => (seen.has(i.tag) ? false : (seen.add(i.tag), true)));
214
+ }
215
+
216
+ function previewCommand(call) {
217
+ if (!call) return 'NONE — CLIENT WILL STALL';
218
+ const tag = call[0] || 'unknown';
219
+ const arg = call[1] || '';
220
+ const oneLine = String(arg).replace(/\s+/g, ' ').trim();
221
+ const trimmed = oneLine.length > 80 ? oneLine.slice(0, 77) + '...' : oneLine;
222
+ return trimmed ? `<${tag}> ${trimmed}` : `<${tag}>`;
223
+ }
224
+
225
+ function formatDebugBlock(sections) {
226
+ // The debug block is rendered as a tool-output message in the TUI. Chat
227
+ // history indents output by 5 cols; account for that so the frame still
228
+ // reaches the visible right edge instead of wrapping.
229
+ const totalW = Math.max(40, _getCols());
230
+ const frameW = Math.max(20, totalW - 7);
231
+ const H = UI_THEME.muted; // frame glyphs
232
+ const L = UI_THEME.info; // block label ("DEBUG TOOL RESULTS")
233
+ const S = UI_THEME.subtle; // iteration tag, secondary text
234
+ const K = UI_THEME.accent; // section-header bullets ("▸ SUMMARY")
235
+ const W = UI_THEME.warning; // warning markers
236
+
237
+ const header = sections.title || 'DEBUG';
238
+ const iter = `[iteration ${sections.iteration}]`;
239
+
240
+ const out = [];
241
+ // Top frame: " LABEL [iteration N] ═════… " — fills to width.
242
+ const headPrefix = `${H}══ ${RST}${L}${header}${RST} ${S}${iter}${RST} `;
243
+ const headVisible = 4 + header.length + 1 + iter.length + 1; // "══ " + label + " " + iter + " "
244
+ out.push(`${headPrefix}${H}${repeatToWidth('═', frameW, headVisible)}${RST}`);
245
+
246
+ const pushSection = (title) => out.push(`${K}▸ ${RST}${L}${title}${RST}`);
247
+
248
+ for (const [title, rows] of sections.blocks) {
249
+ pushSection(title);
250
+ const width = Math.max(...rows.map((r) => r[0].length));
251
+ for (const [k, v] of rows) {
252
+ const val = (v === undefined || v === null) ? '—' : String(v);
253
+ out.push(` ${S}${k.padEnd(width + 2)}${RST}${val}`);
254
+ }
255
+ out.push('');
256
+ }
257
+ if (sections.raw !== undefined) {
258
+ pushSection('RAW RESPONSE');
259
+ out.push(sections.raw ? sections.raw : `${S}(empty)${RST}`);
260
+ out.push('');
261
+ if (sections.rawFooter && sections.rawFooter.length) {
262
+ pushSection('STREAM FOOTER');
263
+ const width = Math.max(...sections.rawFooter.map((r) => r[0].length));
264
+ for (const [k, v] of sections.rawFooter) {
265
+ const val = (v === undefined || v === null) ? '—' : String(v);
266
+ out.push(` ${S}${k.padEnd(width + 2)}${RST}${val}`);
267
+ }
268
+ out.push('');
269
+ }
270
+ }
271
+ if (sections.entries && sections.entries.length) {
272
+ for (const entry of sections.entries) {
273
+ pushSection(entry.title);
274
+ if (entry.rows) {
275
+ const width = Math.max(...entry.rows.map((r) => r[0].length));
276
+ for (const [k, v] of entry.rows) {
277
+ const val = (v === undefined || v === null) ? '—' : String(v);
278
+ out.push(` ${S}${k.padEnd(width + 2)}${RST}${val}`);
279
+ }
280
+ }
281
+ if (entry.body !== undefined) {
282
+ const body = entry.body === '' ? `${S}(empty)${RST}` : entry.body;
283
+ for (const line of String(body).split('\n')) out.push(' ' + line);
284
+ }
285
+ out.push('');
286
+ }
287
+ }
288
+ if (sections.warnings && sections.warnings.length) {
289
+ pushSection('WARNINGS');
290
+ for (const w of sections.warnings) out.push(` ${W}⚠ ${w}${RST}`);
291
+ out.push('');
292
+ }
293
+ // Bottom frame: plain full-width rule in muted.
294
+ out.push(`${H}${repeatToWidth('═', frameW)}${RST}`);
295
+ return out.join('\n');
296
+ }
297
+
298
+ function truncateForDebug(text, maxLines = 40, maxChars = 2000) {
299
+ if (text === undefined || text === null) return '';
300
+ let s = String(text);
301
+ if (s.length > maxChars) {
302
+ s = s.slice(0, maxChars) + `\n… [truncated, ${String(text).length - maxChars} more chars]`;
303
+ }
304
+ const lines = s.split('\n');
305
+ if (lines.length > maxLines) {
306
+ return lines.slice(0, maxLines).join('\n') + `\n… [truncated, ${lines.length - maxLines} more lines]`;
307
+ }
308
+ return s;
309
+ }
310
+
311
+ // Per-tag meta extractor. Converts a tool-executor return value into the
312
+ // compact meta object consumed by the tool-line formatter — exit codes for
313
+ // shell, byte counts for file ops, status_code + bytes for HTTP, etc. A
314
+ // pure function by design: no UI state, no config reads. The callback
315
+ // layer (commands.js) feeds the meta into formatToolLine together with
316
+ // the tag, so the formatter can produce the 4-segment line in either the
317
+ // pending (live region) or final (scrollback) context.
318
+ function _metaForTool(tag, result) {
319
+ if (!result || result.error) return null;
320
+ switch (tag) {
321
+ case 'shell':
322
+ case 'exec':
323
+ return { exit_code: result.exit_code };
324
+ case 'read':
325
+ case 'read_file':
326
+ return {
327
+ bytes: typeof result.bytes === 'number'
328
+ ? result.bytes
329
+ : (result.content ? Buffer.byteLength(String(result.content), 'utf8') : 0),
330
+ };
331
+ case 'write':
332
+ case 'write_file':
333
+ case 'create_file':
334
+ case 'append':
335
+ case 'append_file':
336
+ case 'upload':
337
+ return { bytes: typeof result.bytes === 'number' ? result.bytes : 0 };
338
+ case 'list_dir':
339
+ return { count: Array.isArray(result.items) ? result.items.length : 0 };
340
+ case 'search_files':
341
+ return { count: Array.isArray(result.files) ? result.files.length : 0 };
342
+ case 'search_in_file':
343
+ return { count: Array.isArray(result.matches) ? result.matches.length : 0 };
344
+ case 'replace_in_file':
345
+ return { count: typeof result.count === 'number' ? result.count : 0 };
346
+ case 'http_get':
347
+ case 'download':
348
+ return {
349
+ status_code: result.status_code,
350
+ bytes: typeof result.bytes === 'number'
351
+ ? result.bytes
352
+ : (result.body ? Buffer.byteLength(String(result.body), 'utf8') : 0),
353
+ };
354
+ case 'file_stat':
355
+ return {
356
+ bytes: result.size_kb ? Math.round(parseFloat(result.size_kb) * 1024) : 0,
357
+ kind: result.type || null,
358
+ };
359
+ default:
360
+ return null;
361
+ }
362
+ }
363
+
364
+ // Turn a [action, arg1, arg2, …] call tuple into the `attrs` bag that
365
+ // formatToolLine looks up when building the operation string. Centralized
366
+ // here so the per-tag positional-arg contract is written down in exactly
367
+ // one place — any new tool added to the agent-loop tuple schema also gets
368
+ // its attrs mapping here.
369
+ function _attrsFromCall(call) {
370
+ if (!Array.isArray(call) || call.length === 0) return {};
371
+ const [tag, ...args] = call;
372
+ switch (tag) {
373
+ case 'shell':
374
+ case 'exec':
375
+ return { command: args[0] || '' };
376
+ case 'read':
377
+ case 'read_file':
378
+ case 'list_dir':
379
+ case 'delete_file':
380
+ case 'make_dir':
381
+ case 'remove_dir':
382
+ case 'file_stat':
383
+ return { path: args[0] || '' };
384
+ case 'write':
385
+ case 'write_file':
386
+ case 'create_file':
387
+ case 'append':
388
+ case 'append_file':
389
+ return { path: args[0] || '', content: args[1] || '' };
390
+ case 'upload':
391
+ return { path: args[0] || '' };
392
+ case 'move_file':
393
+ case 'copy_file':
394
+ return { src: args[0] || '', dst: args[1] || '' };
395
+ case 'edit_file':
396
+ return { path: args[0] || '', line: args[1], content: args[2] || '' };
397
+ case 'search_files':
398
+ return { pattern: args[0] || '', dir: args[1] || '.' };
399
+ case 'search_in_file':
400
+ return { path: args[0] || '', pattern: args[1] || '' };
401
+ case 'replace_in_file':
402
+ return { path: args[0] || '', search: args[1] || '', replace: args[2] || '', flags: args[3] || '' };
403
+ case 'get_env':
404
+ return { name: args[0] || '' };
405
+ case 'set_env':
406
+ return { name: args[0] || '', value: args[1] || '' };
407
+ case 'download':
408
+ case 'http_get':
409
+ return { url: args[0] || '' };
410
+ case 'ask_user':
411
+ return { question: args[0] || '' };
412
+ case 'store_memory':
413
+ return { key: args[0] || '', value: args[1] || '' };
414
+ case 'recall_memory':
415
+ return { key: args[0] || '' };
416
+ default:
417
+ return {};
418
+ }
419
+ }
420
+
421
+ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, ui, getConfig }) {
122
422
  const { BOLD, FG_DARK, FG_GRAY, FG_TEAL, FG_YELLOW, RST, THEME, getCols } = ui;
123
423
 
124
424
  function formatFileResult(call, result) {
@@ -140,20 +440,8 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
140
440
  case 'file_stat':
141
441
  return `Stat ${result.path}: size=${result.size_kb} KB, mtime=${result.mtime}, type=${result.type}, mode=${result.mode}`;
142
442
  case 'http_get': {
143
- if (result.chunked) {
144
- return `HTTP GET ${args[0]} (${result.status_code}) [Part 1/${result.total_parts}]:\n${result.body}\n\n[Response is large and was split into ${result.total_parts} parts. Use <http_get_next key="${args[0]}"/> to retrieve the next part.]`;
145
- }
146
443
  return `HTTP GET ${args[0]} (${result.status_code}):\n${result.body}`;
147
444
  }
148
- case 'http_get_next': {
149
- if (result.done && !result.body) {
150
- return `http_get_next "${args[0]}": No more content available.`;
151
- }
152
- const more = result.done
153
- ? ' [Final part]'
154
- : `\n\n[Use <http_get_next key="${args[0]}"/> to retrieve part ${result.part + 1}/${result.total_parts}.]`;
155
- return `HTTP content "${args[0]}" [Part ${result.part}/${result.total_parts}]:\n${result.body}${more}`;
156
- }
157
445
  case 'ask_user':
158
446
  return `User answered "${result.question}": ${result.answer}`;
159
447
  case 'store_memory':
@@ -257,10 +545,6 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
257
545
  const raw = attrs.raw || '';
258
546
  return formatFileResult(['http_get', url, raw], await agentExecFile('http_get', url, raw));
259
547
  }
260
- case 'http_get_next': {
261
- const key = attrs.key || content;
262
- return formatFileResult(['http_get_next', key], await agentExecFile('http_get_next', key));
263
- }
264
548
  case 'ask_user': {
265
549
  const q = attrs.question || content;
266
550
  return formatFileResult(['ask_user', q], await agentExecFile('ask_user', q));
@@ -295,9 +579,9 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
295
579
  return;
296
580
  }
297
581
 
298
- if (entry.type === 'strip') return;
582
+ if (entry.type === 'strip' || entry.type === 'final') return;
299
583
 
300
- // Tool execution happens in the toolCalls loop after streaming; handleTag only handles visual/strip.
584
+ // Tool execution happens in the toolCalls loop after streaming; handleTag only handles visual/strip/final.
301
585
  }
302
586
 
303
587
  async function runAgentLoop(messages, model, maxIterations = Infinity, tokenLimit = null, opts = {}) {
@@ -312,9 +596,27 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
312
596
  const isAborted = getAbortFlag || (() => false);
313
597
  const cb = callbacks;
314
598
  const metrics = new Metrics(tokenLimit);
315
- const activeSystemPrompt = overrideSystemPrompt !== null ? overrideSystemPrompt : SYSTEM_PROMPT;
316
599
  const mode = overrideMode || 'system_role';
317
600
 
601
+ // Route debug blocks to the UI callback when present (interactive TUI mode
602
+ // overwrites stderr with redraws, losing the output). Fall back to stderr
603
+ // for one-shot/non-TTY flows where there's no UI to host the block.
604
+ const emitDebug = (block) => {
605
+ if (typeof cb.onDebug === 'function') cb.onDebug(block);
606
+ // audit: allowed — stderr debug under --debug flag (no UI hosting available).
607
+ else process.stderr.write('\n' + block + '\n');
608
+ };
609
+
610
+ // Resolve native_tools from the active profile (matched by api_base+model).
611
+ // Fallback to true if no matching profile — mirrors config-normalization default.
612
+ const _cfg = typeof getConfig === 'function' ? getConfig() : {};
613
+ const _profile = Array.isArray(_cfg.models)
614
+ ? _cfg.models.find((p) => p && p.api_base === _cfg.api_base && p.model === model)
615
+ : null;
616
+ const nativeTools = _profile && _profile.native_tools === false ? false : true;
617
+
618
+ const activeSystemPrompt = overrideSystemPrompt !== null ? overrideSystemPrompt : getSystemPrompt(nativeTools);
619
+
318
620
  for (let iteration = 0; iteration < maxIterations; iteration++) {
319
621
  if (isAborted()) break;
320
622
  const linePrefix = `${FG_TEAL}${BOLD}◆ ${RST}`;
@@ -366,49 +668,116 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
366
668
  }
367
669
  : null;
368
670
 
369
- if (debug) {
370
- const header = `\n───── messages sent to agent (iteration ${iteration + 1}) ─────\n`;
371
- const footer = `\n───── end messages ─────\n`;
372
- process.stderr.write(header + JSON.stringify(messagesWithSystem, null, 2) + footer);
373
- }
374
-
375
671
  const MAX_RETRIES = 3;
672
+ const RETRYABLE_STATUS = new Set([408, 425, 429, 500, 502, 503, 504]);
673
+ const NON_RETRYABLE_STATUS = new Set([400, 401, 403, 404, 413, 422]);
376
674
  let result = null;
377
675
  let lastApiErr = null;
378
676
 
379
- for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
380
- if (attempt === 1) {
381
- callbacks.onRequestSent?.();
382
- } else {
383
- cb.onRetry?.(attempt, MAX_RETRIES);
384
- await new Promise((r) => setTimeout(r, 1000));
385
- }
386
- try {
387
- result = await chatStream(messagesWithSystem, {
388
- model,
389
- linePrefix: wrappedOnToken ? '' : linePrefix,
390
- showThink,
391
- onToken: wrappedOnToken,
392
- silent: !!wrappedOnToken,
393
- });
394
- lastApiErr = null;
395
- break;
396
- } catch (err) {
397
- lastApiErr = err;
398
- if (debug) {
399
- const header = `\n───── raw http error (iteration ${iteration + 1}, attempt ${attempt}/${MAX_RETRIES}) ─────\n`;
400
- const footer = `\n───── end raw http error ─────\n`;
401
- const status = err.statusCode ? `HTTP ${err.statusCode}` : 'network error';
402
- const headerLines = err.responseHeaders
403
- ? Object.entries(err.responseHeaders).map(([k, v]) => `${k}: ${v}`).join('\n')
404
- : '';
405
- const body = err.rawBody !== undefined ? err.rawBody : (err.stack || err.message || String(err));
406
- const parts = [status];
407
- if (headerLines) parts.push(headerLines);
408
- parts.push(body || '(empty body)');
409
- process.stderr.write(header + parts.join('\n\n') + footer);
677
+ // AbortController per iteration: watcher polls isAborted() every 50ms
678
+ // and flips controller.abort() as soon as the flag flips.
679
+ const controller = new AbortController();
680
+ const abortWatcher = setInterval(() => {
681
+ if (isAborted() && !controller.signal.aborted) controller.abort();
682
+ }, 50);
683
+
684
+ try {
685
+ for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
686
+ if (attempt === 1) {
687
+ callbacks.onRequestSent?.();
688
+ }
689
+ try {
690
+ result = await chatStream(messagesWithSystem, {
691
+ model,
692
+ nativeTools,
693
+ linePrefix: wrappedOnToken ? '' : linePrefix,
694
+ showThink,
695
+ onToken: wrappedOnToken,
696
+ silent: !!wrappedOnToken,
697
+ signal: controller.signal,
698
+ onTrim: (info) => {
699
+ // Setter (rather than re-reading config per iteration) keeps
700
+ // the Metrics instance authoritative: a 400-overflow discovery
701
+ // mid-loop immediately updates 85%-warning thresholds and the
702
+ // status bar without threading config access into agent.js.
703
+ if (info && info.reason === 'overflow-400' && typeof info.limit === 'number' && info.limit > 0) {
704
+ metrics.setModelTokenLimit(info.limit);
705
+ }
706
+ if (cb.onError) {
707
+ cb.onError({
708
+ message: `Context trimmed (${info.reason}): ${info.dropped} message(s) dropped, kept ~${info.keptTokens} tokens (limit ${info.limit}).`,
709
+ isWarning: true,
710
+ });
711
+ }
712
+ },
713
+ });
714
+ lastApiErr = null;
715
+ break;
716
+ } catch (err) {
717
+ // User-initiated abort: not a transient failure. Skip the retry
718
+ // counter, the "Retrying (N/M)..." status update, the debug dump,
719
+ // and the post-loop error surface. The "Interrupted." feedback is
720
+ // already shown by the input-field abort listener.
721
+ if (controller.signal.aborted || isAborted() || isAbortError(err)) {
722
+ lastApiErr = null;
723
+ break;
724
+ }
725
+ lastApiErr = err;
726
+ if (debug) {
727
+ const status = err.statusCode ? `HTTP ${err.statusCode}` : 'network error';
728
+ const body = err.rawBody !== undefined ? err.rawBody : (err.stack || err.message || String(err));
729
+ const block = formatDebugBlock({
730
+ iteration: iteration + 1,
731
+ blocks: [
732
+ ['REQUEST', [
733
+ ['model:', model],
734
+ ['endpoint:', err.endpoint || '(unknown)'],
735
+ ['timestamp:', new Date().toISOString()],
736
+ ['native_tools:', nativeTools],
737
+ ['attempt:', `${attempt}/${MAX_RETRIES}`],
738
+ ]],
739
+ ['RESPONSE', [
740
+ ['status:', status],
741
+ ['detail:', err.detail || ''],
742
+ ]],
743
+ ],
744
+ raw: body || '(empty body)',
745
+ warnings: [`HTTP error on attempt ${attempt}/${MAX_RETRIES}: ${err.message}`],
746
+ });
747
+ emitDebug(block);
748
+ }
749
+ const sc = err.statusCode;
750
+ const retryable = !sc || RETRYABLE_STATUS.has(sc);
751
+ if (!retryable || NON_RETRYABLE_STATUS.has(sc)) break;
752
+ if (attempt >= MAX_RETRIES) break;
753
+ // Backoff: base 1000ms doubling (1s, 2s, 4s). For 429, honor
754
+ // Retry-After header when it's a plausible seconds value.
755
+ let delayMs = 1000 * Math.pow(2, attempt - 1);
756
+ if (sc === 429) {
757
+ const ra = err.responseHeaders && err.responseHeaders['retry-after'];
758
+ const raNum = ra !== undefined ? Number(ra) : NaN;
759
+ if (Number.isFinite(raNum) && raNum >= 0 && raNum <= 30) {
760
+ delayMs = Math.round(raNum * 1000);
761
+ }
762
+ }
763
+ cb.onRetry?.(attempt + 1, MAX_RETRIES);
764
+ await abortableSleep(delayMs, controller.signal);
765
+ // Ctrl+C pressed during backoff: bail without the next attempt.
766
+ if (controller.signal.aborted || isAborted()) {
767
+ lastApiErr = null;
768
+ break;
769
+ }
410
770
  }
411
771
  }
772
+ } finally {
773
+ clearInterval(abortWatcher);
774
+ }
775
+
776
+ // User-initiated abort: exit the turn quietly. Skip the empty-reply
777
+ // "connection dropped" warning below — the abort listener already
778
+ // surfaced "Interrupted." and the outer prompt will return.
779
+ if (controller.signal.aborted || isAborted()) {
780
+ break;
412
781
  }
413
782
 
414
783
  if (lastApiErr) {
@@ -420,33 +789,52 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
420
789
  const usage = result ? result.usage : null;
421
790
  metrics.endTurn(usage, model);
422
791
 
423
- if (debug) {
424
- const header = `\n───── raw ai response (iteration ${iteration + 1}) ─────\n`;
425
- const footer = `\n───── end raw response ─────\n`;
426
- process.stderr.write(header + (reply || '(empty)') + footer);
427
- }
428
-
429
792
  if (cb.onMetricsUpdate) {
430
793
  cb.onMetricsUpdate({
431
794
  totalTokens: metrics.totalTokens(),
432
795
  contextTokens: metrics.contextTokens(),
433
796
  turns: metrics.turns.length,
797
+ tokenLimit: metrics.tokenLimitStatus(),
434
798
  });
435
799
  }
436
800
 
437
801
  const limitStatus = metrics.tokenLimitStatus();
438
- if (limitStatus !== null && limitStatus.pct >= 85) {
802
+ if (limitStatus !== null && limitStatus.pct !== null && limitStatus.pct >= 85) {
439
803
  const warnMsg = `Context at ${limitStatus.pct}% of limit (${limitStatus.used}/${limitStatus.limit} tokens). Consider /compact.`;
440
804
  if (cb.onError) {
441
805
  cb.onError({ message: warnMsg, isWarning: true });
442
806
  } else {
443
- process.stdout.write(
444
- `\n ${THEME.warn}⚠ ${warnMsg}${THEME.reset}\n`
445
- );
807
+ messages.sysWarn(warnMsg);
446
808
  }
447
809
  }
448
810
 
449
811
  if (!reply) {
812
+ if (debug && result) {
813
+ const block = formatDebugBlock({
814
+ iteration: iteration + 1,
815
+ blocks: [
816
+ ['REQUEST', [
817
+ ['model:', result.request?.model || model],
818
+ ['endpoint:', result.endpoint || '(unknown)'],
819
+ ['timestamp:', new Date().toISOString()],
820
+ ['native_tools:', result.request?.native_tools ?? nativeTools],
821
+ ]],
822
+ ['RESPONSE', [
823
+ ['finish_reason:', result.finish_reason || '(unknown)'],
824
+ ['completion_tokens:', usage?.completion_tokens ?? 0],
825
+ ['latency_ms:', result.elapsed_ms ?? '?'],
826
+ ]],
827
+ ['PARSED', [
828
+ ['detected_format:', 'empty'],
829
+ ['commands_found:', 0],
830
+ ['first_command:', 'NONE — CLIENT WILL STALL'],
831
+ ]],
832
+ ],
833
+ raw: '',
834
+ warnings: ['Agent returned an empty response — connection to model may have dropped'],
835
+ });
836
+ emitDebug(block);
837
+ }
450
838
  // Empty reply from the model — stream resolved with no content and no
451
839
  // tool_calls. Most common causes: server-side disconnect mid-stream,
452
840
  // context-window overflow that slipped past the 400/413 handler, or a
@@ -459,69 +847,185 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
459
847
  break;
460
848
  }
461
849
 
462
- const toolCalls = extractToolCalls(reply);
850
+ // Native function-calling path: the model returned structured tool_calls.
851
+ // Convert them to the internal [action, ...args] tuple shape so downstream
852
+ // dispatch is unchanged, and remember the OpenAI id per call so results
853
+ // can be rooted back to their tool_call on the next turn.
854
+ const nativeToolCalls = Array.isArray(result?.toolCalls) ? result.toolCalls : [];
855
+ let toolCalls;
856
+ let nativeToolCallIds = [];
857
+ if (nativeToolCalls.length > 0) {
858
+ toolCalls = [];
859
+ for (const tc of nativeToolCalls) {
860
+ let args;
861
+ try {
862
+ args = tc.function?.arguments ? JSON.parse(tc.function.arguments) : {};
863
+ } catch (err) {
864
+ if (cb.onError) cb.onError({ message: `Failed to parse tool_call arguments for ${tc.function?.name || '(unknown)'}: ${err.message}`, isWarning: true });
865
+ continue;
866
+ }
867
+ const call = mapInvokeToCall(tc.function?.name, args);
868
+ if (call) {
869
+ toolCalls.push(call);
870
+ nativeToolCallIds.push(tc.id);
871
+ }
872
+ }
873
+ } else {
874
+ toolCalls = extractToolCalls(reply, { model });
875
+ }
876
+ const isNativeCall = nativeToolCalls.length > 0;
463
877
  const cleanedReply = cleanAssistantContent(reply);
464
878
 
465
- // Detect mid-tag truncation: an opening tool tag in the raw reply with
466
- // no matching close. This happens when the model streams a large
467
- // `<write_file>…` body and hits max_tokens or a server-side cutoff
468
- // before the closing tag arrives. cleanAssistantContent strips the
469
- // unclosed tag + its trailing content, so cleanedReply looks
470
- // legitimate (just the planning preamble) and extractToolCalls finds
471
- // zero calls the loop would break silently and the user sees the
472
- // planning text followed by nothing. Surface it so the user can retry,
473
- // shorten the request, or bump max_tokens.
474
- let truncatedTag = null;
475
- for (const [tag, entry] of Object.entries(TAG_REGISTRY)) {
476
- if (entry.type !== 'tool') continue;
477
- let opens = 0;
478
- for (const m of reply.matchAll(new RegExp(`<${tag}([^>]*)>`, 'gi'))) {
479
- // Skip self-closing (`<tag .../>`) — they don't need a matching close.
480
- if (!m[1].trimEnd().endsWith('/')) opens++;
879
+ if (debug && result) {
880
+ const lastUserMsg = [...messagesWithSystem].reverse().find((m) => m.role === 'user');
881
+ const sysMsg = messagesWithSystem.find((m) => m.role === 'system');
882
+ const systemPromptTok = estimateTokens(sysMsg ? sysMsg.content : activeSystemPrompt);
883
+ const currentInputTok = estimateTokens(lastUserMsg ? lastUserMsg.content : '');
884
+ const historyTok = messagesWithSystem.reduce((sum, m) => {
885
+ if (m === sysMsg || m === lastUserMsg) return sum;
886
+ return sum + estimateTokens(m.content || '');
887
+ }, 0);
888
+ const promptTokens = usage && usage.prompt_tokens != null
889
+ ? usage.prompt_tokens
890
+ : systemPromptTok + historyTok + currentInputTok;
891
+ const completionTokens = usage && usage.completion_tokens != null
892
+ ? usage.completion_tokens
893
+ : estimateTokens(reply);
894
+ const thinkingTokens = result.reasoning ? estimateTokens(result.reasoning) : 0;
895
+ const visibleTokens = Math.max(completionTokens - thinkingTokens, 0);
896
+ const contextLimit = tokenLimit || null;
897
+ const ctxPct = contextLimit ? Math.round((promptTokens / contextLimit) * 100) : null;
898
+ const detected = detectFormat(reply, toolCalls);
899
+ const firstCmd = toolCalls.length > 0 ? previewCommand(toolCalls[0]) : previewCommand(null);
900
+ const toolTags = Object.entries(TAG_REGISTRY)
901
+ .filter(([, e]) => e.type === 'tool')
902
+ .map(([t]) => t);
903
+
904
+ const warnings = [];
905
+ if (result.finish_reason === 'length') warnings.push('finish_reason=length → response truncated, increase max_tokens');
906
+ if (detected === 'tool_call' && toolCalls.length === 0) {
907
+ warnings.push('commands_found=0 → agent emitted no command, client will stall');
481
908
  }
482
- if (opens === 0) continue;
483
- const closes = (reply.match(new RegExp(`<\\/${tag}>`, 'gi')) || []).length;
484
- if (opens > closes) { truncatedTag = tag; break; }
909
+ if (ctxPct !== null && ctxPct > 80) warnings.push(`context_used=${ctxPct}% → approaching context limit`);
910
+
911
+ const block = formatDebugBlock({
912
+ iteration: iteration + 1,
913
+ blocks: [
914
+ ['REQUEST', [
915
+ ['model:', result.request?.model || model],
916
+ ['endpoint:', result.endpoint || '(unknown)'],
917
+ ['timestamp:', new Date().toISOString()],
918
+ ['native_tools:', result.request?.native_tools ?? nativeTools],
919
+ ]],
920
+ ['CONTEXT', [
921
+ ['total_messages:', messagesWithSystem.length],
922
+ ['system_prompt_tok:', systemPromptTok],
923
+ ['history_tok:', historyTok],
924
+ ['current_input_tok:', currentInputTok],
925
+ ['context_used:', contextLimit
926
+ ? `${promptTokens} / ${contextLimit} (${ctxPct}%)`
927
+ : `${promptTokens} / unknown`],
928
+ ]],
929
+ ['PARAMETERS', [
930
+ ['max_tokens:', result.request?.max_tokens ?? '(default)'],
931
+ ['temperature:', result.request?.temperature ?? '(default)'],
932
+ ['stop_sequences:', JSON.stringify(result.request?.stop || [])],
933
+ ['reasoning_effort:', '(n/a)'],
934
+ ['tools_enabled:', `${toolTags.length} XML tags (via system prompt)`],
935
+ ]],
936
+ ['RESPONSE', [
937
+ ['finish_reason:', result.finish_reason || '(unknown)'],
938
+ ['completion_tokens:', completionTokens],
939
+ ['thinking_tokens:', thinkingTokens],
940
+ ['visible_tokens:', visibleTokens],
941
+ ['latency_ms:', result.elapsed_ms ?? '?'],
942
+ ]],
943
+ ['PARSED', [
944
+ ['detected_format:', detected],
945
+ ['commands_found:', toolCalls.length],
946
+ ['first_command:', firstCmd],
947
+ ]],
948
+ ],
949
+ raw: reply || '',
950
+ rawFooter: [
951
+ ['finish_reason:', result.finish_reason || '(unknown)'],
952
+ ['total_tokens:', result.usage_from_provider && result.usage
953
+ ? (result.usage.prompt_tokens || 0) + (result.usage.completion_tokens || 0)
954
+ : '(no usage)'],
955
+ ['content_chars:', (result.content || '').length],
956
+ ['reasoning_chars:', (result.reasoning_details || '').length],
957
+ ['tool_calls_seen:', result.tool_calls_count > 0 ? `yes (${result.tool_calls_count})` : 'no'],
958
+ ['native_mode:', isNativeCall ? `yes (${nativeToolCalls.length} call${nativeToolCalls.length === 1 ? '' : 's'})` : 'no'],
959
+ ],
960
+ warnings,
961
+ });
962
+ emitDebug(block);
485
963
  }
964
+
965
+ // Detect mid-tag truncation: StreamParser tracks `insideTag` for the
966
+ // currently-unclosed tag. If it's still set after the stream settles,
967
+ // the response was cut off while inside a tool tag (hit max_tokens or
968
+ // a server-side cutoff). cleanAssistantContent strips the unclosed
969
+ // tag + trailing content, so extractToolCalls would find nothing and
970
+ // the loop would break silently. Surface it.
971
+ //
972
+ // When `cb.onToken` is unset (non-streaming UI), the parser was never
973
+ // fed — push the final reply through it once so `insideTag` reflects
974
+ // the terminal state.
975
+ if (!wrappedOnToken && reply) parser.push(reply);
976
+ const truncatedTag = parser.insideTag && TAG_REGISTRY[parser.insideTag]?.type === 'tool'
977
+ ? parser.insideTag
978
+ : null;
486
979
  if (truncatedTag && cb.onError) {
487
980
  cb.onError({ message: `Response truncated mid-<${truncatedTag}> tag — likely hit max_tokens or a server-side cutoff. Try again, shorten the request, or raise the model's max_tokens.`, isWarning: true });
488
981
  }
489
982
 
490
- messages.push({ role: 'assistant', content: cleanedReply });
983
+ const assistantMsg = { role: 'assistant', content: cleanedReply };
984
+ if (isNativeCall) assistantMsg.tool_calls = nativeToolCalls;
985
+ messages.push(assistantMsg);
491
986
  // When showThink is off and the turn has tool calls, suppress the text bubble —
492
987
  // pre-tool reasoning is noise, tool result bubbles already convey what happened.
493
988
  const displayReply = (!showThink && toolCalls.length > 0) ? '' : cleanedReply;
494
989
  if (cb.onAssistantMessage) cb.onAssistantMessage(displayReply);
495
990
 
496
- // If nothing meaningful came back (no text to show, no tools to run) but
497
- // the reply string wasn't strictly empty, it's usually model wrapper
498
- // noise or a stripped-only response. Still a dead-end for the user.
499
- if (toolCalls.length === 0 && !cleanedReply.trim()) {
500
- if (cb.onError) {
501
- cb.onError({ message: 'Agent reply had no visible content and no actions — stopping.', isWarning: true });
502
- }
503
- break;
504
- }
505
-
506
991
  if (toolCalls.length === 0) {
507
- // Model narrated next steps but didn't emit a tool tag. Happens when the
508
- // model ends a plan with "Let me do that for you." and stops. If we just
509
- // break, the user sees a dangling promise and thinks the connection dropped.
510
- if (iteration > 0 && /\b(let me|i['’]?ll|i will|i'?m going to|next[, ]|now[, ]? ?(i|we)|going to (create|write|build|add|make|run|do|set up|install))\b/i.test(cleanedReply)) {
992
+ // Detect malformed known-tag syntax (e.g. <create_file> with no path
993
+ // attribute, usually paired with nonsense like <attrs: path=...> inside
994
+ // the body). Push a corrective feedback message and keep looping so
995
+ // the model self-corrects instead of silently stalling on the user.
996
+ const malformed = detectMalformedTags(reply);
997
+ if (malformed.length > 0) {
998
+ const hintBlock = malformed.map((m) => `- <${m.tag}>: ${m.hint}`).join('\n');
999
+ const summary = malformed.map((m) => `<${m.tag}>`).join(', ');
511
1000
  if (cb.onError) {
512
- cb.onError({ message: 'Agent described next steps but did not emit a tool call. Reply "continue" (or similar) to push it forward, or restart if it keeps stalling.', isWarning: true });
1001
+ cb.onError({ message: `Detected malformed tool tag(s): ${summary}. Asking the model to retry with correct syntax.`, isWarning: true });
513
1002
  }
1003
+ messages.push({
1004
+ role: 'user',
1005
+ content: `Your last response contained malformed tool tags that the parser could not execute:\n\n${hintBlock}\n\nRe-emit the tool calls using the exact syntax above. Do not nest pseudo-tags like <attrs: ...> inside the body.`,
1006
+ });
1007
+ continue;
514
1008
  }
1009
+
1010
+ // No tool calls and non-empty content (the empty case was already
1011
+ // handled by the `!reply` guard above). This is the model's final
1012
+ // answer for this turn — end the loop and return control to the user.
515
1013
  break;
516
1014
  }
517
1015
  if (isAborted()) break;
518
1016
 
519
1017
  if (!cb.onToolStart) {
520
- process.stdout.write(`\n ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}\n`);
1018
+ writer.scrollback(`\n ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}`);
521
1019
  }
522
1020
 
523
1021
  const results = [];
1022
+ const debugEntries = debug ? [] : null;
524
1023
  let aborted = false;
1024
+ // Per-invocation id. Paired across onToolStart/onToolEnd so the UI
1025
+ // layer can track each concurrent tool's activity-region slot and
1026
+ // commit its final line atomically via endActivity. Monotonic —
1027
+ // never reused even if the agent runs the same tag twice.
1028
+ let invocationCounter = 0;
525
1029
 
526
1030
  for (const call of toolCalls) {
527
1031
  if (isAborted()) { aborted = true; break; }
@@ -529,8 +1033,11 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
529
1033
  const tag = call[0] || 'unknown';
530
1034
  const arg = call[1] || '';
531
1035
  const toolStart = Date.now();
1036
+ const invocationId = `tool-${iteration}-${invocationCounter++}-${tag}`;
1037
+ const attrs = _attrsFromCall(call);
1038
+ const startCtx = { id: invocationId, call, attrs, startedAt: toolStart };
532
1039
 
533
- if (cb.onToolStart) cb.onToolStart(tag, arg);
1040
+ if (cb.onToolStart) cb.onToolStart(tag, arg, startCtx);
534
1041
 
535
1042
  try {
536
1043
  if (tag === 'shell') {
@@ -538,16 +1045,29 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
538
1045
  const ms = Date.now() - toolStart;
539
1046
  if (shellResult.stderr === 'Permission denied by user') {
540
1047
  const resultStr = `Command \`${arg}\`: Permission denied by user.`;
541
- if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
1048
+ if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta: null, error: { message: 'denied' }, denied: true });
542
1049
  results.push(resultStr);
1050
+ if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
543
1051
  aborted = true;
544
1052
  break;
545
1053
  } else {
546
1054
  let out = shellResult.stdout;
547
1055
  if (shellResult.stderr) out += `\nSTDERR: ${shellResult.stderr}`;
548
1056
  const resultStr = `Command \`${arg}\`:\nExit code: ${shellResult.exit_code}\n${out}`;
549
- if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
1057
+ const meta = _metaForTool(tag, shellResult);
1058
+ const error = shellResult.exit_code !== 0
1059
+ ? { message: `exit ${shellResult.exit_code}`, code: shellResult.exit_code }
1060
+ : null;
1061
+ if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta, error });
550
1062
  results.push(resultStr);
1063
+ if (debugEntries) debugEntries.push({
1064
+ tag,
1065
+ call,
1066
+ ms,
1067
+ status: shellResult.exit_code === 0 ? 'ok' : 'nonzero_exit',
1068
+ exitCode: shellResult.exit_code,
1069
+ result: resultStr,
1070
+ });
551
1071
  }
552
1072
  continue;
553
1073
  }
@@ -557,28 +1077,87 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
557
1077
 
558
1078
  if (fileResult.error === 'Permission denied') {
559
1079
  const resultStr = `${tag} ${call[1] || ''}: Permission denied by user.`;
560
- if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
1080
+ if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta: null, error: { message: 'denied' }, denied: true });
561
1081
  results.push(resultStr);
1082
+ if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
562
1083
  aborted = true;
563
1084
  break;
564
1085
  } else {
565
1086
  const resultStr = formatFileResult(call, fileResult);
566
- if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
1087
+ const meta = _metaForTool(tag, fileResult);
1088
+ const error = fileResult.error
1089
+ ? { message: fileResult.error, code: fileResult.error_code || null }
1090
+ : null;
1091
+ if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta, error });
567
1092
  results.push(resultStr);
1093
+ if (debugEntries) debugEntries.push({
1094
+ tag,
1095
+ call,
1096
+ ms,
1097
+ status: fileResult.error ? 'error' : 'ok',
1098
+ exitCode: null,
1099
+ result: resultStr,
1100
+ });
568
1101
  }
569
1102
  } catch (err) {
570
1103
  const ms = Date.now() - toolStart;
571
- if (cb.onToolEnd) cb.onToolEnd(tag, `Error: ${err.message}`, ms);
1104
+ if (cb.onToolEnd) cb.onToolEnd(tag, `Error: ${err.message}`, ms, { id: invocationId, call, attrs, meta: null, error: err });
572
1105
  if (cb.onError) {
573
1106
  cb.onError({ message: `Tool error (${tag}): ${err.message}`, isWarning: true });
574
1107
  } else {
575
- process.stdout.write(`\n ${THEME.warn}⚠ Tool error (${tag}): ${err.message}${THEME.reset}\n`);
1108
+ messages.toolError(tag, err.message);
576
1109
  }
577
1110
  logToolCall(tag, { args: call.slice(1) }, false, 'error');
578
1111
  results.push(`${tag}: Error — ${err.message}`);
1112
+ if (debugEntries) debugEntries.push({ tag, call, ms, status: 'exception', exitCode: null, result: `Error — ${err.message}` });
579
1113
  }
580
1114
  }
581
1115
 
1116
+ if (debug && debugEntries && debugEntries.length > 0) {
1117
+ const totalMs = debugEntries.reduce((s, e) => s + (e.ms || 0), 0);
1118
+ const statusCounts = debugEntries.reduce((acc, e) => {
1119
+ acc[e.status] = (acc[e.status] || 0) + 1;
1120
+ return acc;
1121
+ }, {});
1122
+ const statusSummary = Object.entries(statusCounts)
1123
+ .map(([k, v]) => `${k}=${v}`)
1124
+ .join(', ');
1125
+
1126
+ const entries = debugEntries.map((e, idx) => {
1127
+ const argsPreview = (e.call || []).slice(1).map((a) => {
1128
+ if (a === undefined || a === null) return '';
1129
+ const s = String(a).replace(/\s+/g, ' ').trim();
1130
+ return s.length > 120 ? s.slice(0, 117) + '...' : s;
1131
+ }).filter((s) => s.length > 0).join(' │ ');
1132
+ const rows = [
1133
+ ['tag:', e.tag],
1134
+ ['args:', argsPreview || '(none)'],
1135
+ ['status:', e.status + (e.exitCode !== null && e.exitCode !== undefined ? ` (exit=${e.exitCode})` : '')],
1136
+ ['latency_ms:', e.ms],
1137
+ ];
1138
+ return {
1139
+ title: `TOOL ${idx + 1}/${debugEntries.length}`,
1140
+ rows,
1141
+ body: truncateForDebug(e.result),
1142
+ };
1143
+ });
1144
+
1145
+ const block = formatDebugBlock({
1146
+ title: 'DEBUG TOOL RESULTS',
1147
+ iteration: iteration + 1,
1148
+ blocks: [
1149
+ ['SUMMARY', [
1150
+ ['tools_executed:', debugEntries.length],
1151
+ ['total_latency_ms:', totalMs],
1152
+ ['status_breakdown:', statusSummary],
1153
+ ['aborted:', aborted ? 'yes' : 'no'],
1154
+ ]],
1155
+ ],
1156
+ entries,
1157
+ });
1158
+ emitDebug(block);
1159
+ }
1160
+
582
1161
  if (aborted) {
583
1162
  const warnMsg = isAborted()
584
1163
  ? 'Agent interrupted.'
@@ -586,24 +1165,36 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
586
1165
  if (cb.onError) {
587
1166
  cb.onError({ message: warnMsg, isWarning: true });
588
1167
  } else {
589
- process.stdout.write(`\n ${FG_YELLOW}⚠${RST} ${FG_GRAY}${warnMsg}${RST}`);
1168
+ messages.sysWarn(warnMsg);
590
1169
  }
591
1170
  // Push whatever results accumulated before the denial so the LLM has
592
1171
  // context if the user asks to continue.
593
1172
  if (results.length > 0) {
594
- messages.push({
595
- role: 'user',
596
- content: `Tool execution results (partial stopped after user denied an action):\n\n${results.join('\n\n')}`,
597
- });
1173
+ if (isNativeCall) {
1174
+ for (let i = 0; i < results.length; i++) {
1175
+ messages.push({ role: 'tool', tool_call_id: nativeToolCallIds[i], content: results[i] });
1176
+ }
1177
+ } else {
1178
+ messages.push({
1179
+ role: 'user',
1180
+ content: `Tool execution results (partial — stopped after user denied an action):\n\n${results.join('\n\n')}`,
1181
+ });
1182
+ }
598
1183
  }
599
1184
  break;
600
1185
  }
601
1186
 
602
- const feedback = results.join('\n\n');
603
- messages.push({
604
- role: 'user',
605
- content: `Tool execution results:\n\n${feedback}\n\nContinue with the task. If everything is done, summarize what was accomplished.`,
606
- });
1187
+ if (isNativeCall) {
1188
+ for (let i = 0; i < results.length; i++) {
1189
+ messages.push({ role: 'tool', tool_call_id: nativeToolCallIds[i], content: results[i] });
1190
+ }
1191
+ } else {
1192
+ const feedback = results.join('\n\n');
1193
+ messages.push({
1194
+ role: 'user',
1195
+ content: `Tool execution results:\n\n${feedback}\n\nContinue with the task. If everything is done, summarize what was accomplished.`,
1196
+ });
1197
+ }
607
1198
  }
608
1199
 
609
1200
  return { messages, metrics };
@@ -616,4 +1207,5 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
616
1207
 
617
1208
  module.exports = {
618
1209
  createAgentRunner,
1210
+ formatDebugBlock,
619
1211
  };