@semalt-ai/code 1.8.1 → 1.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/agent.js CHANGED
@@ -2,8 +2,12 @@
2
2
 
3
3
  const { logToolCall } = require('./audit');
4
4
  const { Metrics } = require('./metrics');
5
- const { SYSTEM_PROMPT } = require('./prompts');
5
+ const { getSystemPrompt } = require('./prompts');
6
6
  const { TAG_REGISTRY } = require('./constants');
7
+ const { mapInvokeToCall } = require('./tools');
8
+ const { UI_THEME } = require('./ui/theme');
9
+ const { RST } = require('./ui/ansi');
10
+ const { getCols: _getCols, repeatToWidth } = require('./ui/utils');
7
11
 
8
12
  class StreamParser {
9
13
  constructor(onToken, onTagOpen, onTagContent, onTagClose) {
@@ -40,14 +44,18 @@ class StreamParser {
40
44
  const tagRaw = this.buffer.slice(1, gtIdx).trim();
41
45
  const selfClose = tagRaw.endsWith('/');
42
46
  const tagBody = selfClose ? tagRaw.slice(0, -1).trim() : tagRaw;
43
- const spaceIdx = tagBody.search(/\s/);
44
- const tagName = (spaceIdx === -1 ? tagBody : tagBody.slice(0, spaceIdx)).toLowerCase();
45
- const attrStr = spaceIdx === -1 ? '' : tagBody.slice(spaceIdx + 1);
47
+ // Split on whitespace OR `=` so both MiniMax-style `<parameter name="x">`
48
+ // and Qwen3-Coder `<parameter=x>` resolve to the same tagName.
49
+ const delimIdx = tagBody.search(/[\s=]/);
50
+ const tagName = (delimIdx === -1 ? tagBody : tagBody.slice(0, delimIdx)).toLowerCase();
51
+ const attrStr = delimIdx === -1 ? '' : tagBody.slice(delimIdx + 1);
46
52
 
47
53
  const attrs = {};
48
- const attrRe = /(\w+)="([^"]*)"/g;
54
+ const attrReDouble = /(\w+)="([^"]*)"/g;
55
+ const attrReSingle = /(\w+)='([^']*)'/g;
49
56
  let m;
50
- while ((m = attrRe.exec(attrStr)) !== null) attrs[m[1]] = m[2];
57
+ while ((m = attrReDouble.exec(attrStr)) !== null) attrs[m[1]] = m[2];
58
+ while ((m = attrReSingle.exec(attrStr)) !== null) attrs[m[1]] = m[2];
51
59
 
52
60
  this.buffer = this.buffer.slice(gtIdx + 1);
53
61
 
@@ -70,11 +78,37 @@ class StreamParser {
70
78
  } else {
71
79
  const closing = '</' + this.insideTag + '>';
72
80
  const closeIdx = this.buffer.toLowerCase().indexOf(closing);
81
+ const entry = TAG_REGISTRY[this.insideTag];
82
+ const streamInner = entry && entry.type === 'final';
73
83
  if (closeIdx === -1) {
74
- this.tagContent += this.buffer;
75
- this.buffer = '';
84
+ if (streamInner) {
85
+ // Emit content live through onToken, but hold back any trailing
86
+ // substring that could be a prefix of the closing tag (chunk
87
+ // boundary splitting `</final_answer>` into e.g. `</fin` + `al…`).
88
+ const lowBuf = this.buffer.toLowerCase();
89
+ const lowClose = closing;
90
+ let safeUpTo = this.buffer.length;
91
+ const ltIdx = lowBuf.lastIndexOf('<');
92
+ if (ltIdx !== -1) {
93
+ const tail = lowBuf.slice(ltIdx);
94
+ if (lowClose.startsWith(tail)) safeUpTo = ltIdx;
95
+ }
96
+ if (safeUpTo > 0) {
97
+ const emit = this.buffer.slice(0, safeUpTo);
98
+ this.onToken(emit);
99
+ this.tagContent += emit;
100
+ this.buffer = this.buffer.slice(safeUpTo);
101
+ }
102
+ } else {
103
+ this.tagContent += this.buffer;
104
+ this.buffer = '';
105
+ }
76
106
  break;
77
107
  }
108
+ if (streamInner) {
109
+ const emit = this.buffer.slice(0, closeIdx);
110
+ if (emit) this.onToken(emit);
111
+ }
78
112
  this.tagContent += this.buffer.slice(0, closeIdx);
79
113
  this.buffer = this.buffer.slice(closeIdx + closing.length);
80
114
  this.onTagContent(this.insideTag, this.tagContent);
@@ -99,7 +133,7 @@ function cleanAssistantContent(raw) {
99
133
  }
100
134
 
101
135
  for (const [tag, entry] of Object.entries(TAG_REGISTRY)) {
102
- if (entry.type === 'strip') {
136
+ if (entry.type === 'strip' || entry.type === 'final') {
103
137
  // Strip only the wrapper tags; keep the inner content
104
138
  text = text.replace(new RegExp(`<${tag}[^>]*>`, 'gi'), '');
105
139
  text = text.replace(new RegExp(`<\\/${tag}>`, 'gi'), '');
@@ -112,13 +146,142 @@ function cleanAssistantContent(raw) {
112
146
  }
113
147
  }
114
148
 
115
- text = text.replace(/<\/?[a-zA-Z_][a-zA-Z0-9_]*(\s[^>]*)?>/g, '');
116
- text = text.replace(/\n{2,}/g, '\n');
117
-
118
149
  return text.trim();
119
150
  }
120
151
 
121
- function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, ui }) {
152
+ function estimateTokens(text) {
153
+ return Math.floor((text || '').length / 4);
154
+ }
155
+
156
+ function detectFormat(reply, toolCalls) {
157
+ if (!reply || !reply.trim()) return 'empty';
158
+ if (/<(minimax:tool_call|qwen:tool_call|tool_call|function_call)\b/i.test(reply)) return 'tool_call';
159
+ if (toolCalls && toolCalls.length > 0) return 'command';
160
+ return 'text';
161
+ }
162
+
163
+ // Spot known-tag names that are present in the reply but didn't produce any
164
+ // parsed tool calls. Common culprits are attribute-required tags (create_file,
165
+ // write_file, etc.) emitted without the `path` attribute, usually because the
166
+ // model put nonsense like `<attrs: path=...>` inside the body. Returning a
167
+ // specific hint lets the agent loop push a corrective user message and keep
168
+ // going instead of silently stalling.
169
+ function detectMalformedTags(text) {
170
+ const issues = [];
171
+ const PATH_REQUIRED = ['create_file', 'write_file', 'append_file'];
172
+ for (const tag of PATH_REQUIRED) {
173
+ const re = new RegExp(`<${tag}\\b([^>]*)>`, 'g');
174
+ for (const m of text.matchAll(re)) {
175
+ const attrs = m[1] || '';
176
+ if (!/\bpath\s*=\s*['"]/.test(attrs)) {
177
+ issues.push({
178
+ tag,
179
+ hint: `Use <${tag} path="/absolute/path">FILE CONTENT HERE</${tag}>. Put the path as a quoted attribute on the opening tag, and the actual file contents between the tags — not a nested pseudo-tag.`,
180
+ });
181
+ }
182
+ }
183
+ }
184
+ // Deduplicate by tag so we don't spam the model with the same hint per occurrence.
185
+ const seen = new Set();
186
+ return issues.filter((i) => (seen.has(i.tag) ? false : (seen.add(i.tag), true)));
187
+ }
188
+
189
+ function previewCommand(call) {
190
+ if (!call) return 'NONE — CLIENT WILL STALL';
191
+ const tag = call[0] || 'unknown';
192
+ const arg = call[1] || '';
193
+ const oneLine = String(arg).replace(/\s+/g, ' ').trim();
194
+ const trimmed = oneLine.length > 80 ? oneLine.slice(0, 77) + '...' : oneLine;
195
+ return trimmed ? `<${tag}> ${trimmed}` : `<${tag}>`;
196
+ }
197
+
198
+ function formatDebugBlock(sections) {
199
+ // The debug block is rendered as a tool-output message in the TUI. Chat
200
+ // history indents output by 5 cols; account for that so the frame still
201
+ // reaches the visible right edge instead of wrapping.
202
+ const totalW = Math.max(40, _getCols());
203
+ const frameW = Math.max(20, totalW - 7);
204
+ const H = UI_THEME.muted; // frame glyphs
205
+ const L = UI_THEME.info; // block label ("DEBUG TOOL RESULTS")
206
+ const S = UI_THEME.subtle; // iteration tag, secondary text
207
+ const K = UI_THEME.accent; // section-header bullets ("▸ SUMMARY")
208
+ const W = UI_THEME.warning; // warning markers
209
+
210
+ const header = sections.title || 'DEBUG';
211
+ const iter = `[iteration ${sections.iteration}]`;
212
+
213
+ const out = [];
214
+ // Top frame: " LABEL [iteration N] ═════… " — fills to width.
215
+ const headPrefix = `${H}══ ${RST}${L}${header}${RST} ${S}${iter}${RST} `;
216
+ const headVisible = 4 + header.length + 1 + iter.length + 1; // "══ " + label + " " + iter + " "
217
+ out.push(`${headPrefix}${H}${repeatToWidth('═', frameW, headVisible)}${RST}`);
218
+
219
+ const pushSection = (title) => out.push(`${K}▸ ${RST}${L}${title}${RST}`);
220
+
221
+ for (const [title, rows] of sections.blocks) {
222
+ pushSection(title);
223
+ const width = Math.max(...rows.map((r) => r[0].length));
224
+ for (const [k, v] of rows) {
225
+ const val = (v === undefined || v === null) ? '—' : String(v);
226
+ out.push(` ${S}${k.padEnd(width + 2)}${RST}${val}`);
227
+ }
228
+ out.push('');
229
+ }
230
+ if (sections.raw !== undefined) {
231
+ pushSection('RAW RESPONSE');
232
+ out.push(sections.raw ? sections.raw : `${S}(empty)${RST}`);
233
+ out.push('');
234
+ if (sections.rawFooter && sections.rawFooter.length) {
235
+ pushSection('STREAM FOOTER');
236
+ const width = Math.max(...sections.rawFooter.map((r) => r[0].length));
237
+ for (const [k, v] of sections.rawFooter) {
238
+ const val = (v === undefined || v === null) ? '—' : String(v);
239
+ out.push(` ${S}${k.padEnd(width + 2)}${RST}${val}`);
240
+ }
241
+ out.push('');
242
+ }
243
+ }
244
+ if (sections.entries && sections.entries.length) {
245
+ for (const entry of sections.entries) {
246
+ pushSection(entry.title);
247
+ if (entry.rows) {
248
+ const width = Math.max(...entry.rows.map((r) => r[0].length));
249
+ for (const [k, v] of entry.rows) {
250
+ const val = (v === undefined || v === null) ? '—' : String(v);
251
+ out.push(` ${S}${k.padEnd(width + 2)}${RST}${val}`);
252
+ }
253
+ }
254
+ if (entry.body !== undefined) {
255
+ const body = entry.body === '' ? `${S}(empty)${RST}` : entry.body;
256
+ for (const line of String(body).split('\n')) out.push(' ' + line);
257
+ }
258
+ out.push('');
259
+ }
260
+ }
261
+ if (sections.warnings && sections.warnings.length) {
262
+ pushSection('WARNINGS');
263
+ for (const w of sections.warnings) out.push(` ${W}⚠ ${w}${RST}`);
264
+ out.push('');
265
+ }
266
+ // Bottom frame: plain full-width rule in muted.
267
+ out.push(`${H}${repeatToWidth('═', frameW)}${RST}`);
268
+ return out.join('\n');
269
+ }
270
+
271
+ function truncateForDebug(text, maxLines = 40, maxChars = 2000) {
272
+ if (text === undefined || text === null) return '';
273
+ let s = String(text);
274
+ if (s.length > maxChars) {
275
+ s = s.slice(0, maxChars) + `\n… [truncated, ${String(text).length - maxChars} more chars]`;
276
+ }
277
+ const lines = s.split('\n');
278
+ if (lines.length > maxLines) {
279
+ return lines.slice(0, maxLines).join('\n') + `\n… [truncated, ${lines.length - maxLines} more lines]`;
280
+ }
281
+ return s;
282
+ }
283
+
284
+ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, ui, getConfig }) {
122
285
  const { BOLD, FG_DARK, FG_GRAY, FG_TEAL, FG_YELLOW, RST, THEME, getCols } = ui;
123
286
 
124
287
  function formatFileResult(call, result) {
@@ -140,20 +303,8 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
140
303
  case 'file_stat':
141
304
  return `Stat ${result.path}: size=${result.size_kb} KB, mtime=${result.mtime}, type=${result.type}, mode=${result.mode}`;
142
305
  case 'http_get': {
143
- if (result.chunked) {
144
- return `HTTP GET ${args[0]} (${result.status_code}) [Part 1/${result.total_parts}]:\n${result.body}\n\n[Response is large and was split into ${result.total_parts} parts. Use <http_get_next key="${args[0]}"/> to retrieve the next part.]`;
145
- }
146
306
  return `HTTP GET ${args[0]} (${result.status_code}):\n${result.body}`;
147
307
  }
148
- case 'http_get_next': {
149
- if (result.done && !result.body) {
150
- return `http_get_next "${args[0]}": No more content available.`;
151
- }
152
- const more = result.done
153
- ? ' [Final part]'
154
- : `\n\n[Use <http_get_next key="${args[0]}"/> to retrieve part ${result.part + 1}/${result.total_parts}.]`;
155
- return `HTTP content "${args[0]}" [Part ${result.part}/${result.total_parts}]:\n${result.body}${more}`;
156
- }
157
308
  case 'ask_user':
158
309
  return `User answered "${result.question}": ${result.answer}`;
159
310
  case 'store_memory':
@@ -257,10 +408,6 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
257
408
  const raw = attrs.raw || '';
258
409
  return formatFileResult(['http_get', url, raw], await agentExecFile('http_get', url, raw));
259
410
  }
260
- case 'http_get_next': {
261
- const key = attrs.key || content;
262
- return formatFileResult(['http_get_next', key], await agentExecFile('http_get_next', key));
263
- }
264
411
  case 'ask_user': {
265
412
  const q = attrs.question || content;
266
413
  return formatFileResult(['ask_user', q], await agentExecFile('ask_user', q));
@@ -295,9 +442,9 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
295
442
  return;
296
443
  }
297
444
 
298
- if (entry.type === 'strip') return;
445
+ if (entry.type === 'strip' || entry.type === 'final') return;
299
446
 
300
- // Tool execution happens in the toolCalls loop after streaming; handleTag only handles visual/strip.
447
+ // Tool execution happens in the toolCalls loop after streaming; handleTag only handles visual/strip/final.
301
448
  }
302
449
 
303
450
  async function runAgentLoop(messages, model, maxIterations = Infinity, tokenLimit = null, opts = {}) {
@@ -312,9 +459,32 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
312
459
  const isAborted = getAbortFlag || (() => false);
313
460
  const cb = callbacks;
314
461
  const metrics = new Metrics(tokenLimit);
315
- const activeSystemPrompt = overrideSystemPrompt !== null ? overrideSystemPrompt : SYSTEM_PROMPT;
316
462
  const mode = overrideMode || 'system_role';
317
463
 
464
+ // Route debug blocks to the UI callback when present (interactive TUI mode
465
+ // overwrites stderr with redraws, losing the output). Fall back to stderr
466
+ // for one-shot/non-TTY flows where there's no UI to host the block.
467
+ const emitDebug = (block) => {
468
+ if (typeof cb.onDebug === 'function') cb.onDebug(block);
469
+ else process.stderr.write('\n' + block + '\n');
470
+ };
471
+
472
+ // Resolve native_tools from the active profile (matched by api_base+model).
473
+ // Fallback to true if no matching profile — mirrors config-normalization default.
474
+ const _cfg = typeof getConfig === 'function' ? getConfig() : {};
475
+ const _profile = Array.isArray(_cfg.models)
476
+ ? _cfg.models.find((p) => p && p.api_base === _cfg.api_base && p.model === model)
477
+ : null;
478
+ const nativeTools = _profile && _profile.native_tools === false ? false : true;
479
+
480
+ const activeSystemPrompt = overrideSystemPrompt !== null ? overrideSystemPrompt : getSystemPrompt(nativeTools);
481
+
482
+ // Response contract: every model response must end with a tool call or
483
+ // <final_answer>...</final_answer>. Anything else is degraded — push a
484
+ // synthetic nudge and retry, capped to prevent runaway loops.
485
+ const MAX_DEGRADED_RETRIES = 2;
486
+ let degradedRetries = 0;
487
+
318
488
  for (let iteration = 0; iteration < maxIterations; iteration++) {
319
489
  if (isAborted()) break;
320
490
  const linePrefix = `${FG_TEAL}${BOLD}◆ ${RST}`;
@@ -366,49 +536,96 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
366
536
  }
367
537
  : null;
368
538
 
369
- if (debug) {
370
- const header = `\n───── messages sent to agent (iteration ${iteration + 1}) ─────\n`;
371
- const footer = `\n───── end messages ─────\n`;
372
- process.stderr.write(header + JSON.stringify(messagesWithSystem, null, 2) + footer);
373
- }
374
-
375
539
  const MAX_RETRIES = 3;
540
+ const RETRYABLE_STATUS = new Set([408, 425, 429, 500, 502, 503, 504]);
541
+ const NON_RETRYABLE_STATUS = new Set([400, 401, 403, 404, 413, 422]);
376
542
  let result = null;
377
543
  let lastApiErr = null;
378
544
 
379
- for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
380
- if (attempt === 1) {
381
- callbacks.onRequestSent?.();
382
- } else {
383
- cb.onRetry?.(attempt, MAX_RETRIES);
384
- await new Promise((r) => setTimeout(r, 1000));
385
- }
386
- try {
387
- result = await chatStream(messagesWithSystem, {
388
- model,
389
- linePrefix: wrappedOnToken ? '' : linePrefix,
390
- showThink,
391
- onToken: wrappedOnToken,
392
- silent: !!wrappedOnToken,
393
- });
394
- lastApiErr = null;
395
- break;
396
- } catch (err) {
397
- lastApiErr = err;
398
- if (debug) {
399
- const header = `\n───── raw http error (iteration ${iteration + 1}, attempt ${attempt}/${MAX_RETRIES}) ─────\n`;
400
- const footer = `\n───── end raw http error ─────\n`;
401
- const status = err.statusCode ? `HTTP ${err.statusCode}` : 'network error';
402
- const headerLines = err.responseHeaders
403
- ? Object.entries(err.responseHeaders).map(([k, v]) => `${k}: ${v}`).join('\n')
404
- : '';
405
- const body = err.rawBody !== undefined ? err.rawBody : (err.stack || err.message || String(err));
406
- const parts = [status];
407
- if (headerLines) parts.push(headerLines);
408
- parts.push(body || '(empty body)');
409
- process.stderr.write(header + parts.join('\n\n') + footer);
545
+ // AbortController per iteration: watcher polls isAborted() every 50ms
546
+ // and flips controller.abort() as soon as the flag flips.
547
+ const controller = new AbortController();
548
+ const abortWatcher = setInterval(() => {
549
+ if (isAborted() && !controller.signal.aborted) controller.abort();
550
+ }, 50);
551
+
552
+ try {
553
+ for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
554
+ if (attempt === 1) {
555
+ callbacks.onRequestSent?.();
556
+ }
557
+ try {
558
+ result = await chatStream(messagesWithSystem, {
559
+ model,
560
+ nativeTools,
561
+ linePrefix: wrappedOnToken ? '' : linePrefix,
562
+ showThink,
563
+ onToken: wrappedOnToken,
564
+ silent: !!wrappedOnToken,
565
+ signal: controller.signal,
566
+ onTrim: (info) => {
567
+ // Setter (rather than re-reading config per iteration) keeps
568
+ // the Metrics instance authoritative: a 400-overflow discovery
569
+ // mid-loop immediately updates 85%-warning thresholds and the
570
+ // status bar without threading config access into agent.js.
571
+ if (info && info.reason === 'overflow-400' && typeof info.limit === 'number' && info.limit > 0) {
572
+ metrics.setModelTokenLimit(info.limit);
573
+ }
574
+ if (cb.onError) {
575
+ cb.onError({
576
+ message: `Context trimmed (${info.reason}): ${info.dropped} message(s) dropped, kept ~${info.keptTokens} tokens (limit ${info.limit}).`,
577
+ isWarning: true,
578
+ });
579
+ }
580
+ },
581
+ });
582
+ lastApiErr = null;
583
+ break;
584
+ } catch (err) {
585
+ lastApiErr = err;
586
+ if (debug) {
587
+ const status = err.statusCode ? `HTTP ${err.statusCode}` : 'network error';
588
+ const body = err.rawBody !== undefined ? err.rawBody : (err.stack || err.message || String(err));
589
+ const block = formatDebugBlock({
590
+ iteration: iteration + 1,
591
+ blocks: [
592
+ ['REQUEST', [
593
+ ['model:', model],
594
+ ['endpoint:', err.endpoint || '(unknown)'],
595
+ ['timestamp:', new Date().toISOString()],
596
+ ['native_tools:', nativeTools],
597
+ ['attempt:', `${attempt}/${MAX_RETRIES}`],
598
+ ]],
599
+ ['RESPONSE', [
600
+ ['status:', status],
601
+ ['detail:', err.detail || ''],
602
+ ]],
603
+ ],
604
+ raw: body || '(empty body)',
605
+ warnings: [`HTTP error on attempt ${attempt}/${MAX_RETRIES}: ${err.message}`],
606
+ });
607
+ emitDebug(block);
608
+ }
609
+ const sc = err.statusCode;
610
+ const retryable = !sc || RETRYABLE_STATUS.has(sc);
611
+ if (!retryable || NON_RETRYABLE_STATUS.has(sc)) break;
612
+ if (attempt >= MAX_RETRIES) break;
613
+ // Backoff: base 1000ms doubling (1s, 2s, 4s). For 429, honor
614
+ // Retry-After header when it's a plausible seconds value.
615
+ let delayMs = 1000 * Math.pow(2, attempt - 1);
616
+ if (sc === 429) {
617
+ const ra = err.responseHeaders && err.responseHeaders['retry-after'];
618
+ const raNum = ra !== undefined ? Number(ra) : NaN;
619
+ if (Number.isFinite(raNum) && raNum >= 0 && raNum <= 30) {
620
+ delayMs = Math.round(raNum * 1000);
621
+ }
622
+ }
623
+ cb.onRetry?.(attempt + 1, MAX_RETRIES);
624
+ await new Promise((r) => setTimeout(r, delayMs));
410
625
  }
411
626
  }
627
+ } finally {
628
+ clearInterval(abortWatcher);
412
629
  }
413
630
 
414
631
  if (lastApiErr) {
@@ -420,26 +637,25 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
420
637
  const usage = result ? result.usage : null;
421
638
  metrics.endTurn(usage, model);
422
639
 
423
- if (debug) {
424
- const header = `\n───── raw ai response (iteration ${iteration + 1}) ─────\n`;
425
- const footer = `\n───── end raw response ─────\n`;
426
- process.stderr.write(header + (reply || '(empty)') + footer);
427
- }
428
-
429
640
  if (cb.onMetricsUpdate) {
430
641
  cb.onMetricsUpdate({
431
642
  totalTokens: metrics.totalTokens(),
432
643
  contextTokens: metrics.contextTokens(),
433
644
  turns: metrics.turns.length,
645
+ tokenLimit: metrics.tokenLimitStatus(),
434
646
  });
435
647
  }
436
648
 
437
649
  const limitStatus = metrics.tokenLimitStatus();
438
- if (limitStatus !== null && limitStatus.pct >= 85) {
650
+ if (limitStatus !== null && limitStatus.pct !== null && limitStatus.pct >= 85) {
439
651
  const warnMsg = `Context at ${limitStatus.pct}% of limit (${limitStatus.used}/${limitStatus.limit} tokens). Consider /compact.`;
440
652
  if (cb.onError) {
441
653
  cb.onError({ message: warnMsg, isWarning: true });
442
654
  } else {
655
+ // Non-TUI fallback (cb.onError is unset only for one-shot CLI
656
+ // commands like `cmdCode`, which don't run the shared live-region
657
+ // writer). Direct stdout write is safe here: no status-bar timer
658
+ // or bubble renderer is competing for stdout.
443
659
  process.stdout.write(
444
660
  `\n ${THEME.warn}⚠ ${warnMsg}${THEME.reset}\n`
445
661
  );
@@ -447,6 +663,32 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
447
663
  }
448
664
 
449
665
  if (!reply) {
666
+ if (debug && result) {
667
+ const block = formatDebugBlock({
668
+ iteration: iteration + 1,
669
+ blocks: [
670
+ ['REQUEST', [
671
+ ['model:', result.request?.model || model],
672
+ ['endpoint:', result.endpoint || '(unknown)'],
673
+ ['timestamp:', new Date().toISOString()],
674
+ ['native_tools:', result.request?.native_tools ?? nativeTools],
675
+ ]],
676
+ ['RESPONSE', [
677
+ ['finish_reason:', result.finish_reason || '(unknown)'],
678
+ ['completion_tokens:', usage?.completion_tokens ?? 0],
679
+ ['latency_ms:', result.elapsed_ms ?? '?'],
680
+ ]],
681
+ ['PARSED', [
682
+ ['detected_format:', 'empty'],
683
+ ['commands_found:', 0],
684
+ ['first_command:', 'NONE — CLIENT WILL STALL'],
685
+ ]],
686
+ ],
687
+ raw: '',
688
+ warnings: ['Agent returned an empty response — connection to model may have dropped'],
689
+ });
690
+ emitDebug(block);
691
+ }
450
692
  // Empty reply from the model — stream resolved with no content and no
451
693
  // tool_calls. Most common causes: server-side disconnect mid-stream,
452
694
  // context-window overflow that slipped past the 400/413 handler, or a
@@ -459,68 +701,208 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
459
701
  break;
460
702
  }
461
703
 
462
- const toolCalls = extractToolCalls(reply);
704
+ // Native function-calling path: the model returned structured tool_calls.
705
+ // Convert them to the internal [action, ...args] tuple shape so downstream
706
+ // dispatch is unchanged, and remember the OpenAI id per call so results
707
+ // can be rooted back to their tool_call on the next turn.
708
+ const nativeToolCalls = Array.isArray(result?.toolCalls) ? result.toolCalls : [];
709
+ let toolCalls;
710
+ let nativeToolCallIds = [];
711
+ if (nativeToolCalls.length > 0) {
712
+ toolCalls = [];
713
+ for (const tc of nativeToolCalls) {
714
+ let args;
715
+ try {
716
+ args = tc.function?.arguments ? JSON.parse(tc.function.arguments) : {};
717
+ } catch (err) {
718
+ if (cb.onError) cb.onError({ message: `Failed to parse tool_call arguments for ${tc.function?.name || '(unknown)'}: ${err.message}`, isWarning: true });
719
+ continue;
720
+ }
721
+ const call = mapInvokeToCall(tc.function?.name, args);
722
+ if (call) {
723
+ toolCalls.push(call);
724
+ nativeToolCallIds.push(tc.id);
725
+ }
726
+ }
727
+ } else {
728
+ toolCalls = extractToolCalls(reply, { model });
729
+ }
730
+ const isNativeCall = nativeToolCalls.length > 0;
463
731
  const cleanedReply = cleanAssistantContent(reply);
732
+ // Protocol contract: a valid response ends with a tool call OR a
733
+ // <final_answer>...</final_answer> block. Anything else is degraded.
734
+ const hasFinal = /<final_answer\b[\s\S]*?<\/final_answer>/i.test(reply);
735
+
736
+ if (debug && result) {
737
+ const lastUserMsg = [...messagesWithSystem].reverse().find((m) => m.role === 'user');
738
+ const sysMsg = messagesWithSystem.find((m) => m.role === 'system');
739
+ const systemPromptTok = estimateTokens(sysMsg ? sysMsg.content : activeSystemPrompt);
740
+ const currentInputTok = estimateTokens(lastUserMsg ? lastUserMsg.content : '');
741
+ const historyTok = messagesWithSystem.reduce((sum, m) => {
742
+ if (m === sysMsg || m === lastUserMsg) return sum;
743
+ return sum + estimateTokens(m.content || '');
744
+ }, 0);
745
+ const promptTokens = usage && usage.prompt_tokens != null
746
+ ? usage.prompt_tokens
747
+ : systemPromptTok + historyTok + currentInputTok;
748
+ const completionTokens = usage && usage.completion_tokens != null
749
+ ? usage.completion_tokens
750
+ : estimateTokens(reply);
751
+ const thinkingTokens = result.reasoning ? estimateTokens(result.reasoning) : 0;
752
+ const visibleTokens = Math.max(completionTokens - thinkingTokens, 0);
753
+ const contextLimit = tokenLimit || null;
754
+ const ctxPct = contextLimit ? Math.round((promptTokens / contextLimit) * 100) : null;
755
+ const detected = detectFormat(reply, toolCalls);
756
+ const firstCmd = toolCalls.length > 0 ? previewCommand(toolCalls[0]) : previewCommand(null);
757
+ const toolTags = Object.entries(TAG_REGISTRY)
758
+ .filter(([, e]) => e.type === 'tool')
759
+ .map(([t]) => t);
464
760
 
465
- // Detect mid-tag truncation: an opening tool tag in the raw reply with
466
- // no matching close. This happens when the model streams a large
467
- // `<write_file>…` body and hits max_tokens or a server-side cutoff
468
- // before the closing tag arrives. cleanAssistantContent strips the
469
- // unclosed tag + its trailing content, so cleanedReply looks
470
- // legitimate (just the planning preamble) and extractToolCalls finds
471
- // zero calls — the loop would break silently and the user sees the
472
- // planning text followed by nothing. Surface it so the user can retry,
473
- // shorten the request, or bump max_tokens.
474
- let truncatedTag = null;
475
- for (const [tag, entry] of Object.entries(TAG_REGISTRY)) {
476
- if (entry.type !== 'tool') continue;
477
- let opens = 0;
478
- for (const m of reply.matchAll(new RegExp(`<${tag}([^>]*)>`, 'gi'))) {
479
- // Skip self-closing (`<tag .../>`) — they don't need a matching close.
480
- if (!m[1].trimEnd().endsWith('/')) opens++;
761
+ const warnings = [];
762
+ if (result.finish_reason === 'length') warnings.push('finish_reason=length → response truncated, increase max_tokens');
763
+ if (detected === 'tool_call' && toolCalls.length === 0) {
764
+ warnings.push('commands_found=0 agent emitted no command, client will stall');
481
765
  }
482
- if (opens === 0) continue;
483
- const closes = (reply.match(new RegExp(`<\\/${tag}>`, 'gi')) || []).length;
484
- if (opens > closes) { truncatedTag = tag; break; }
766
+ if (ctxPct !== null && ctxPct > 80) warnings.push(`context_used=${ctxPct}% → approaching context limit`);
767
+
768
+ const block = formatDebugBlock({
769
+ iteration: iteration + 1,
770
+ blocks: [
771
+ ['REQUEST', [
772
+ ['model:', result.request?.model || model],
773
+ ['endpoint:', result.endpoint || '(unknown)'],
774
+ ['timestamp:', new Date().toISOString()],
775
+ ['native_tools:', result.request?.native_tools ?? nativeTools],
776
+ ]],
777
+ ['CONTEXT', [
778
+ ['total_messages:', messagesWithSystem.length],
779
+ ['system_prompt_tok:', systemPromptTok],
780
+ ['history_tok:', historyTok],
781
+ ['current_input_tok:', currentInputTok],
782
+ ['context_used:', contextLimit
783
+ ? `${promptTokens} / ${contextLimit} (${ctxPct}%)`
784
+ : `${promptTokens} / unknown`],
785
+ ]],
786
+ ['PARAMETERS', [
787
+ ['max_tokens:', result.request?.max_tokens ?? '(default)'],
788
+ ['temperature:', result.request?.temperature ?? '(default)'],
789
+ ['stop_sequences:', JSON.stringify(result.request?.stop || [])],
790
+ ['reasoning_effort:', '(n/a)'],
791
+ ['tools_enabled:', `${toolTags.length} XML tags (via system prompt)`],
792
+ ]],
793
+ ['RESPONSE', [
794
+ ['finish_reason:', result.finish_reason || '(unknown)'],
795
+ ['completion_tokens:', completionTokens],
796
+ ['thinking_tokens:', thinkingTokens],
797
+ ['visible_tokens:', visibleTokens],
798
+ ['latency_ms:', result.elapsed_ms ?? '?'],
799
+ ]],
800
+ ['PARSED', [
801
+ ['detected_format:', detected],
802
+ ['commands_found:', toolCalls.length],
803
+ ['first_command:', firstCmd],
804
+ ]],
805
+ ],
806
+ raw: reply || '',
807
+ rawFooter: [
808
+ ['finish_reason:', result.finish_reason || '(unknown)'],
809
+ ['total_tokens:', result.usage_from_provider && result.usage
810
+ ? (result.usage.prompt_tokens || 0) + (result.usage.completion_tokens || 0)
811
+ : '(no usage)'],
812
+ ['content_chars:', (result.content || '').length],
813
+ ['reasoning_chars:', (result.reasoning_details || '').length],
814
+ ['tool_calls_seen:', result.tool_calls_count > 0 ? `yes (${result.tool_calls_count})` : 'no'],
815
+ ['native_mode:', isNativeCall ? `yes (${nativeToolCalls.length} call${nativeToolCalls.length === 1 ? '' : 's'})` : 'no'],
816
+ ],
817
+ warnings,
818
+ });
819
+ emitDebug(block);
485
820
  }
821
+
822
+ // Detect mid-tag truncation: StreamParser tracks `insideTag` for the
823
+ // currently-unclosed tag. If it's still set after the stream settles,
824
+ // the response was cut off while inside a tool tag (hit max_tokens or
825
+ // a server-side cutoff). cleanAssistantContent strips the unclosed
826
+ // tag + trailing content, so extractToolCalls would find nothing and
827
+ // the loop would break silently. Surface it.
828
+ //
829
+ // When `cb.onToken` is unset (non-streaming UI), the parser was never
830
+ // fed — push the final reply through it once so `insideTag` reflects
831
+ // the terminal state.
832
+ if (!wrappedOnToken && reply) parser.push(reply);
833
+ const truncatedTag = parser.insideTag && TAG_REGISTRY[parser.insideTag]?.type === 'tool'
834
+ ? parser.insideTag
835
+ : null;
486
836
  if (truncatedTag && cb.onError) {
487
837
  cb.onError({ message: `Response truncated mid-<${truncatedTag}> tag — likely hit max_tokens or a server-side cutoff. Try again, shorten the request, or raise the model's max_tokens.`, isWarning: true });
488
838
  }
489
839
 
490
- messages.push({ role: 'assistant', content: cleanedReply });
840
+ const assistantMsg = { role: 'assistant', content: cleanedReply };
841
+ if (isNativeCall) assistantMsg.tool_calls = nativeToolCalls;
842
+ messages.push(assistantMsg);
491
843
  // When showThink is off and the turn has tool calls, suppress the text bubble —
492
844
  // pre-tool reasoning is noise, tool result bubbles already convey what happened.
493
845
  const displayReply = (!showThink && toolCalls.length > 0) ? '' : cleanedReply;
494
846
  if (cb.onAssistantMessage) cb.onAssistantMessage(displayReply);
495
847
 
496
- // If nothing meaningful came back (no text to show, no tools to run) but
497
- // the reply string wasn't strictly empty, it's usually model wrapper
498
- // noise or a stripped-only response. Still a dead-end for the user.
499
- if (toolCalls.length === 0 && !cleanedReply.trim()) {
500
- if (cb.onError) {
501
- cb.onError({ message: 'Agent reply had no visible content and no actions — stopping.', isWarning: true });
848
+ if (toolCalls.length === 0) {
849
+ // Detect malformed known-tag syntax (e.g. <create_file> with no path
850
+ // attribute, usually paired with nonsense like <attrs: path=...> inside
851
+ // the body). Push a corrective feedback message and keep looping so
852
+ // the model self-corrects instead of silently stalling on the user.
853
+ const malformed = detectMalformedTags(reply);
854
+ if (malformed.length > 0) {
855
+ const hintBlock = malformed.map((m) => `- <${m.tag}>: ${m.hint}`).join('\n');
856
+ const summary = malformed.map((m) => `<${m.tag}>`).join(', ');
857
+ if (cb.onError) {
858
+ cb.onError({ message: `Detected malformed tool tag(s): ${summary}. Asking the model to retry with correct syntax.`, isWarning: true });
859
+ }
860
+ messages.push({
861
+ role: 'user',
862
+ content: `Your last response contained malformed tool tags that the parser could not execute:\n\n${hintBlock}\n\nRe-emit the tool calls using the exact syntax above. Do not nest pseudo-tags like <attrs: ...> inside the body.`,
863
+ });
864
+ continue;
502
865
  }
503
- break;
504
- }
505
866
 
506
- if (toolCalls.length === 0) {
507
- // Model narrated next steps but didn't emit a tool tag. Happens when the
508
- // model ends a plan with "Let me do that for you." and stops. If we just
509
- // break, the user sees a dangling promise and thinks the connection dropped.
510
- if (iteration > 0 && /\b(let me|i['’]?ll|i will|i'?m going to|next[, ]|now[, ]? ?(i|we)|going to (create|write|build|add|make|run|do|set up|install))\b/i.test(cleanedReply)) {
867
+ if (hasFinal) {
868
+ // Model declared it is done honor the protocol and terminate.
869
+ // An empty <final_answer></final_answer> is the model's choice;
870
+ // we don't police content.
871
+ degradedRetries = 0;
872
+ break;
873
+ }
874
+
875
+ // Protocol violation: neither a tool call nor a <final_answer>. Nudge
876
+ // the model to restate in-protocol, capped to prevent runaway loops.
877
+ if (degradedRetries >= MAX_DEGRADED_RETRIES) {
511
878
  if (cb.onError) {
512
- cb.onError({ message: 'Agent described next steps but did not emit a tool call. Reply "continue" (or similar) to push it forward, or restart if it keeps stalling.', isWarning: true });
879
+ cb.onError({ message: `Agent violated the response contract after ${MAX_DEGRADED_RETRIES} retries no tool call or <final_answer> block emitted. Stopping.`, isWarning: false });
513
880
  }
881
+ break;
514
882
  }
515
- break;
883
+ degradedRetries++;
884
+ if (cb.onError) {
885
+ cb.onError({ message: 'Response missing tool call or <final_answer> — nudging model to retry in-protocol.', isWarning: true });
886
+ }
887
+ messages.push({
888
+ role: 'user',
889
+ content: 'Your previous response contained neither a tool call nor a <final_answer> block, which violates the response contract. If you need to perform an action, emit the appropriate tool tag now. If you are done, wrap your reply in <final_answer>...</final_answer>. Do not describe intended actions in prose.',
890
+ });
891
+ continue;
516
892
  }
893
+ // Non-degraded response (has tool calls) — reset the retry counter.
894
+ degradedRetries = 0;
517
895
  if (isAborted()) break;
518
896
 
519
897
  if (!cb.onToolStart) {
898
+ // Non-TUI fallback: only one-shot CLI commands leave cb.onToolStart
899
+ // unset. The shared live-region writer isn't running, so a direct
900
+ // write here can't interleave with a bubble/status redraw.
520
901
  process.stdout.write(`\n ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}\n`);
521
902
  }
522
903
 
523
904
  const results = [];
905
+ const debugEntries = debug ? [] : null;
524
906
  let aborted = false;
525
907
 
526
908
  for (const call of toolCalls) {
@@ -540,6 +922,7 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
540
922
  const resultStr = `Command \`${arg}\`: Permission denied by user.`;
541
923
  if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
542
924
  results.push(resultStr);
925
+ if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
543
926
  aborted = true;
544
927
  break;
545
928
  } else {
@@ -548,6 +931,14 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
548
931
  const resultStr = `Command \`${arg}\`:\nExit code: ${shellResult.exit_code}\n${out}`;
549
932
  if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
550
933
  results.push(resultStr);
934
+ if (debugEntries) debugEntries.push({
935
+ tag,
936
+ call,
937
+ ms,
938
+ status: shellResult.exit_code === 0 ? 'ok' : 'nonzero_exit',
939
+ exitCode: shellResult.exit_code,
940
+ result: resultStr,
941
+ });
551
942
  }
552
943
  continue;
553
944
  }
@@ -559,12 +950,21 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
559
950
  const resultStr = `${tag} ${call[1] || ''}: Permission denied by user.`;
560
951
  if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
561
952
  results.push(resultStr);
953
+ if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
562
954
  aborted = true;
563
955
  break;
564
956
  } else {
565
957
  const resultStr = formatFileResult(call, fileResult);
566
958
  if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
567
959
  results.push(resultStr);
960
+ if (debugEntries) debugEntries.push({
961
+ tag,
962
+ call,
963
+ ms,
964
+ status: fileResult.error ? 'error' : 'ok',
965
+ exitCode: null,
966
+ result: resultStr,
967
+ });
568
968
  }
569
969
  } catch (err) {
570
970
  const ms = Date.now() - toolStart;
@@ -572,13 +972,60 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
572
972
  if (cb.onError) {
573
973
  cb.onError({ message: `Tool error (${tag}): ${err.message}`, isWarning: true });
574
974
  } else {
975
+ // Non-TUI fallback — see comment on the onToolStart branch above.
575
976
  process.stdout.write(`\n ${THEME.warn}⚠ Tool error (${tag}): ${err.message}${THEME.reset}\n`);
576
977
  }
577
978
  logToolCall(tag, { args: call.slice(1) }, false, 'error');
578
979
  results.push(`${tag}: Error — ${err.message}`);
980
+ if (debugEntries) debugEntries.push({ tag, call, ms, status: 'exception', exitCode: null, result: `Error — ${err.message}` });
579
981
  }
580
982
  }
581
983
 
984
+ if (debug && debugEntries && debugEntries.length > 0) {
985
+ const totalMs = debugEntries.reduce((s, e) => s + (e.ms || 0), 0);
986
+ const statusCounts = debugEntries.reduce((acc, e) => {
987
+ acc[e.status] = (acc[e.status] || 0) + 1;
988
+ return acc;
989
+ }, {});
990
+ const statusSummary = Object.entries(statusCounts)
991
+ .map(([k, v]) => `${k}=${v}`)
992
+ .join(', ');
993
+
994
+ const entries = debugEntries.map((e, idx) => {
995
+ const argsPreview = (e.call || []).slice(1).map((a) => {
996
+ if (a === undefined || a === null) return '';
997
+ const s = String(a).replace(/\s+/g, ' ').trim();
998
+ return s.length > 120 ? s.slice(0, 117) + '...' : s;
999
+ }).filter((s) => s.length > 0).join(' │ ');
1000
+ const rows = [
1001
+ ['tag:', e.tag],
1002
+ ['args:', argsPreview || '(none)'],
1003
+ ['status:', e.status + (e.exitCode !== null && e.exitCode !== undefined ? ` (exit=${e.exitCode})` : '')],
1004
+ ['latency_ms:', e.ms],
1005
+ ];
1006
+ return {
1007
+ title: `TOOL ${idx + 1}/${debugEntries.length}`,
1008
+ rows,
1009
+ body: truncateForDebug(e.result),
1010
+ };
1011
+ });
1012
+
1013
+ const block = formatDebugBlock({
1014
+ title: 'DEBUG TOOL RESULTS',
1015
+ iteration: iteration + 1,
1016
+ blocks: [
1017
+ ['SUMMARY', [
1018
+ ['tools_executed:', debugEntries.length],
1019
+ ['total_latency_ms:', totalMs],
1020
+ ['status_breakdown:', statusSummary],
1021
+ ['aborted:', aborted ? 'yes' : 'no'],
1022
+ ]],
1023
+ ],
1024
+ entries,
1025
+ });
1026
+ emitDebug(block);
1027
+ }
1028
+
582
1029
  if (aborted) {
583
1030
  const warnMsg = isAborted()
584
1031
  ? 'Agent interrupted.'
@@ -586,24 +1033,37 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
586
1033
  if (cb.onError) {
587
1034
  cb.onError({ message: warnMsg, isWarning: true });
588
1035
  } else {
1036
+ // Non-TUI fallback — see comment above on the Found-actions path.
589
1037
  process.stdout.write(`\n ${FG_YELLOW}⚠${RST} ${FG_GRAY}${warnMsg}${RST}`);
590
1038
  }
591
1039
  // Push whatever results accumulated before the denial so the LLM has
592
1040
  // context if the user asks to continue.
593
1041
  if (results.length > 0) {
594
- messages.push({
595
- role: 'user',
596
- content: `Tool execution results (partial stopped after user denied an action):\n\n${results.join('\n\n')}`,
597
- });
1042
+ if (isNativeCall) {
1043
+ for (let i = 0; i < results.length; i++) {
1044
+ messages.push({ role: 'tool', tool_call_id: nativeToolCallIds[i], content: results[i] });
1045
+ }
1046
+ } else {
1047
+ messages.push({
1048
+ role: 'user',
1049
+ content: `Tool execution results (partial — stopped after user denied an action):\n\n${results.join('\n\n')}`,
1050
+ });
1051
+ }
598
1052
  }
599
1053
  break;
600
1054
  }
601
1055
 
602
- const feedback = results.join('\n\n');
603
- messages.push({
604
- role: 'user',
605
- content: `Tool execution results:\n\n${feedback}\n\nContinue with the task. If everything is done, summarize what was accomplished.`,
606
- });
1056
+ if (isNativeCall) {
1057
+ for (let i = 0; i < results.length; i++) {
1058
+ messages.push({ role: 'tool', tool_call_id: nativeToolCallIds[i], content: results[i] });
1059
+ }
1060
+ } else {
1061
+ const feedback = results.join('\n\n');
1062
+ messages.push({
1063
+ role: 'user',
1064
+ content: `Tool execution results:\n\n${feedback}\n\nContinue with the task. If everything is done, summarize what was accomplished.`,
1065
+ });
1066
+ }
607
1067
  }
608
1068
 
609
1069
  return { messages, metrics };
@@ -616,4 +1076,5 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
616
1076
 
617
1077
  module.exports = {
618
1078
  createAgentRunner,
1079
+ formatDebugBlock,
619
1080
  };