npm - @semalt-ai/code - Versions diffs - 1.8.3 → 1.8.5 - Mend

@semalt-ai/code 1.8.3 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/.claude/settings.local.json +3 -1
package/CLAUDE.md +4 -1
package/TECHNICAL_DEBT.md +66 -0
package/index.js +23 -9
package/lib/agent.js +407 -129
package/lib/api.js +105 -39
package/lib/args.js +22 -0
package/lib/commands.js +367 -132
package/lib/config.js +14 -0
package/lib/constants.js +1 -1
package/lib/debug.js +106 -0
package/lib/permissions.js +9 -8
package/lib/proc.js +96 -0
package/lib/prompts.js +8 -10
package/lib/tool_specs.js +14 -7
package/lib/tools.js +299 -118
package/lib/ui/chat-history.js +37 -8
package/lib/ui/create-ui.js +63 -38
package/lib/ui/diff.js +4 -3
package/lib/ui/format.js +321 -0
package/lib/ui/input-field.js +134 -59
package/lib/ui/layout.js +0 -2
package/lib/ui/messages.js +44 -0
package/lib/ui/select.js +114 -0
package/lib/ui/status-bar.js +135 -28
package/lib/ui/stream.js +8 -12
package/lib/ui/terminal.js +12 -4
package/lib/ui/theme.js +25 -4
package/lib/ui/utils.js +94 -27
package/lib/ui/writer.js +391 -45
package/lib/ui.js +6 -6
package/package.json +1 -1
package/lib/ui/legacy.js +0 -130

package/lib/agent.js CHANGED Viewed

@@ -3,11 +3,16 @@
 const { logToolCall } = require('./audit');
 const { Metrics } = require('./metrics');
 const { getSystemPrompt } = require('./prompts');
+const { isNativeToolsActive } = require('./config');
 const { TAG_REGISTRY } = require('./constants');
 const { mapInvokeToCall } = require('./tools');
+const { TOOL_SPECS } = require('./tool_specs');
 const { UI_THEME } = require('./ui/theme');
 const { RST } = require('./ui/ansi');
 const { getCols: _getCols, repeatToWidth } = require('./ui/utils');
+const writer = require('./ui/writer');
+const messages = require('./ui/messages');
+const dbg = require('./debug');
 class StreamParser {
   constructor(onToken, onTagOpen, onTagContent, onTagClose) {
@@ -153,7 +158,33 @@ function estimateTokens(text) {
   return Math.floor((text || '').length / 4);
 }
-function detectFormat(reply, toolCalls) {
+// User-initiated aborts surface through several shapes depending on where in
+// the Node http stack the signal fires: `new Error('Aborted')` from our own
+// abort paths in api.js, or AbortError/ABORT_ERR from Node's built-ins. The
+// authoritative check is the signal itself — this helper is the fallback.
+function isAbortError(err) {
+  if (!err) return false;
+  if (err.name === 'AbortError') return true;
+  if (err.code === 'ABORT_ERR' || err.code === 'ERR_ABORTED') return true;
+  if (typeof err.message === 'string' && /^Aborted$/i.test(err.message)) return true;
+  return false;
+}
+function abortableSleep(ms, signal) {
+  return new Promise((resolve) => {
+    if (signal && signal.aborted) { resolve(); return; }
+    const t = setTimeout(resolve, ms);
+    if (signal) {
+      signal.addEventListener('abort', () => {
+        clearTimeout(t);
+        resolve();
+      }, { once: true });
+    }
+  });
+}
+function detectFormat(reply, toolCalls, nativeToolCalls) {
+  if (nativeToolCalls && nativeToolCalls.length > 0) return 'native_tool_calls';
   if (!reply || !reply.trim()) return 'empty';
   if (/<(minimax:tool_call|qwen:tool_call|tool_call|function_call)\b/i.test(reply)) return 'tool_call';
   if (toolCalls && toolCalls.length > 0) return 'command';
@@ -195,6 +226,26 @@ function previewCommand(call) {
   return trimmed ? `<${tag}> ${trimmed}` : `<${tag}>`;
 }
+// Classify why mapInvokeToCall returned null for a native tool_call so the
+// debug block (and the corrective retry hint) can surface the specific cause
+// instead of a generic "unknown name or invalid args". Source of truth is
+// TOOL_SPECS — its `required` array tells us which positional args the
+// native API advertised, and `wrapper:true` flags parser envelopes that
+// must never appear as a model-emitted tool name.
+function describeNativeRejection(toolName, params) {
+  const lowerName = (toolName || '').toLowerCase();
+  const spec = TOOL_SPECS[lowerName];
+  if (!spec || spec.wrapper) {
+    return 'unknown name (not in TOOL_SPECS / not supported by mapInvokeToCall)';
+  }
+  const required = (spec.parameters && spec.parameters.required) || [];
+  const missing = required.filter((r) => params[r] === undefined || params[r] === null);
+  if (missing.length > 0) {
+    return `missing required arg: ${missing.join(', ')}`;
+  }
+  return 'mapInvokeToCall returned null without specific reason';
+}
 function formatDebugBlock(sections) {
   // The debug block is rendered as a tool-output message in the TUI. Chat
   // history indents output by 5 cols; account for that so the frame still
@@ -281,7 +332,117 @@ function truncateForDebug(text, maxLines = 40, maxChars = 2000) {
   return s;
 }
-function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, ui, getConfig }) {
+// Per-tag meta extractor. Converts a tool-executor return value into the
+// compact meta object consumed by the tool-line formatter — exit codes for
+// shell, byte counts for file ops, status_code + bytes for HTTP, etc. A
+// pure function by design: no UI state, no config reads. The callback
+// layer (commands.js) feeds the meta into formatToolLine together with
+// the tag, so the formatter can produce the 4-segment line in either the
+// pending (live region) or final (scrollback) context.
+function _metaForTool(tag, result) {
+  if (!result || result.error) return null;
+  switch (tag) {
+    case 'shell':
+    case 'exec':
+      return { exit_code: result.exit_code };
+    case 'read':
+    case 'read_file':
+      return {
+        bytes: typeof result.bytes === 'number'
+          ? result.bytes
+          : (result.content ? Buffer.byteLength(String(result.content), 'utf8') : 0),
+      };
+    case 'write':
+    case 'write_file':
+    case 'create_file':
+    case 'append':
+    case 'append_file':
+    case 'upload':
+      return { bytes: typeof result.bytes === 'number' ? result.bytes : 0 };
+    case 'list_dir':
+      return { count: Array.isArray(result.items) ? result.items.length : 0 };
+    case 'search_files':
+      return { count: Array.isArray(result.files) ? result.files.length : 0 };
+    case 'search_in_file':
+      return { count: Array.isArray(result.matches) ? result.matches.length : 0 };
+    case 'replace_in_file':
+      return { count: typeof result.count === 'number' ? result.count : 0 };
+    case 'http_get':
+    case 'download':
+      return {
+        status_code: result.status_code,
+        bytes: typeof result.bytes === 'number'
+          ? result.bytes
+          : (result.body ? Buffer.byteLength(String(result.body), 'utf8') : 0),
+      };
+    case 'file_stat':
+      return {
+        bytes: result.size_kb ? Math.round(parseFloat(result.size_kb) * 1024) : 0,
+        kind: result.type || null,
+      };
+    default:
+      return null;
+  }
+}
+// Turn a [action, arg1, arg2, …] call tuple into the `attrs` bag that
+// formatToolLine looks up when building the operation string. Centralized
+// here so the per-tag positional-arg contract is written down in exactly
+// one place — any new tool added to the agent-loop tuple schema also gets
+// its attrs mapping here.
+function _attrsFromCall(call) {
+  if (!Array.isArray(call) || call.length === 0) return {};
+  const [tag, ...args] = call;
+  switch (tag) {
+    case 'shell':
+    case 'exec':
+      return { command: args[0] || '' };
+    case 'read':
+    case 'read_file':
+    case 'list_dir':
+    case 'delete_file':
+    case 'make_dir':
+    case 'remove_dir':
+    case 'file_stat':
+      return { path: args[0] || '' };
+    case 'write':
+    case 'write_file':
+    case 'create_file':
+    case 'append':
+    case 'append_file':
+      return { path: args[0] || '', content: args[1] || '' };
+    case 'upload':
+      return { path: args[0] || '' };
+    case 'move_file':
+    case 'copy_file':
+      return { src: args[0] || '', dst: args[1] || '' };
+    case 'edit_file':
+      return { path: args[0] || '', line: args[1], content: args[2] || '' };
+    case 'search_files':
+      return { pattern: args[0] || '', dir: args[1] || '.' };
+    case 'search_in_file':
+      return { path: args[0] || '', pattern: args[1] || '' };
+    case 'replace_in_file':
+      return { path: args[0] || '', search: args[1] || '', replace: args[2] || '', flags: args[3] || '' };
+    case 'get_env':
+      return { name: args[0] || '' };
+    case 'set_env':
+      return { name: args[0] || '', value: args[1] || '' };
+    case 'download':
+    case 'http_get':
+      return { url: args[0] || '' };
+    case 'ask_user':
+      return { question: args[0] || '' };
+    case 'store_memory':
+      return { key: args[0] || '', value: args[1] || '' };
+    case 'recall_memory':
+      return { key: args[0] || '' };
+    default:
+      return {};
+  }
+}
+function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, describePermission, permissionManager, ui, getConfig }) {
   const { BOLD, FG_DARK, FG_GRAY, FG_TEAL, FG_YELLOW, RST, THEME, getCols } = ui;
   function formatFileResult(call, result) {
@@ -405,8 +566,7 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
       }
       case 'http_get': {
         const url = attrs.url || content;
-        const raw = attrs.raw || '';
-        return formatFileResult(['http_get', url, raw], await agentExecFile('http_get', url, raw));
+        return formatFileResult(['http_get', url], await agentExecFile('http_get', url));
       }
       case 'ask_user': {
         const q = attrs.question || content;
@@ -461,30 +621,26 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
     const metrics = new Metrics(tokenLimit);
     const mode = overrideMode || 'system_role';
-    // Route debug blocks to the UI callback when present (interactive TUI mode
-    // overwrites stderr with redraws, losing the output). Fall back to stderr
-    // for one-shot/non-TTY flows where there's no UI to host the block.
+    // Route debug blocks based on debug mode.
+    //   file mode   — write to the debug file. Never touch the TUI.
+    //   simple mode — UI callback when present (chat-bubble in interactive
+    //                 TUI), fall back to stderr for one-shot/non-TTY flows.
+    //   off mode    — discard. (debug=true can also come from in-chat /debug
+    //                 toggle with no global mode active.)
     const emitDebug = (block) => {
+      if (dbg.isFile()) {
+        dbg.log(block);
+        return;
+      }
       if (typeof cb.onDebug === 'function') cb.onDebug(block);
+      // audit: allowed — stderr debug under --debug flag (no UI hosting available).
       else process.stderr.write('\n' + block + '\n');
     };
-    // Resolve native_tools from the active profile (matched by api_base+model).
-    // Fallback to true if no matching profile — mirrors config-normalization default.
-    const _cfg = typeof getConfig === 'function' ? getConfig() : {};
-    const _profile = Array.isArray(_cfg.models)
-      ? _cfg.models.find((p) => p && p.api_base === _cfg.api_base && p.model === model)
-      : null;
-    const nativeTools = _profile && _profile.native_tools === false ? false : true;
+    const nativeTools = isNativeToolsActive(model);
     const activeSystemPrompt = overrideSystemPrompt !== null ? overrideSystemPrompt : getSystemPrompt(nativeTools);
-    // Response contract: every model response must end with a tool call or
-    // <final_answer>...</final_answer>. Anything else is degraded — push a
-    // synthetic nudge and retry, capped to prevent runaway loops.
-    const MAX_DEGRADED_RETRIES = 2;
-    let degradedRetries = 0;
     for (let iteration = 0; iteration < maxIterations; iteration++) {
       if (isAborted()) break;
       const linePrefix = `${FG_TEAL}${BOLD}◆ ${RST}`;
@@ -582,6 +738,14 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
             lastApiErr = null;
             break;
           } catch (err) {
+            // User-initiated abort: not a transient failure. Skip the retry
+            // counter, the "Retrying (N/M)..." status update, the debug dump,
+            // and the post-loop error surface. The "Interrupted." feedback is
+            // already shown by the input-field abort listener.
+            if (controller.signal.aborted || isAborted() || isAbortError(err)) {
+              lastApiErr = null;
+              break;
+            }
             lastApiErr = err;
             if (debug) {
               const status = err.statusCode ? `HTTP ${err.statusCode}` : 'network error';
@@ -621,13 +785,25 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
               }
             }
             cb.onRetry?.(attempt + 1, MAX_RETRIES);
-            await new Promise((r) => setTimeout(r, delayMs));
+            await abortableSleep(delayMs, controller.signal);
+            // Ctrl+C pressed during backoff: bail without the next attempt.
+            if (controller.signal.aborted || isAborted()) {
+              lastApiErr = null;
+              break;
+            }
           }
         }
       } finally {
         clearInterval(abortWatcher);
       }
+      // User-initiated abort: exit the turn quietly. Skip the empty-reply
+      // "connection dropped" warning below — the abort listener already
+      // surfaced "Interrupted." and the outer prompt will return.
+      if (controller.signal.aborted || isAborted()) {
+        break;
+      }
       if (lastApiErr) {
         if (cb.onError) cb.onError(lastApiErr);
         break;
@@ -652,13 +828,7 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
         if (cb.onError) {
           cb.onError({ message: warnMsg, isWarning: true });
         } else {
-          // Non-TUI fallback (cb.onError is unset only for one-shot CLI
-          // commands like `cmdCode`, which don't run the shared live-region
-          // writer). Direct stdout write is safe here: no status-bar timer
-          // or bubble renderer is competing for stdout.
-          process.stdout.write(
-            `\n  ${THEME.warn}⚠ ${warnMsg}${THEME.reset}\n`
-          );
+          messages.sysWarn(warnMsg);
         }
       }
@@ -708,20 +878,35 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
       const nativeToolCalls = Array.isArray(result?.toolCalls) ? result.toolCalls : [];
       let toolCalls;
       let nativeToolCallIds = [];
+      // Per-call rejection records for native tool_calls that could not be
+      // converted to executable form (parse error or unknown name / missing
+      // required arg). Used downstream to (a) keep the assistant's tool_calls
+      // ↔ tool-result map consistent, and (b) feed a corrective hint back to
+      // the model so it retries instead of stalling.
+      const nativeRejections = [];
       if (nativeToolCalls.length > 0) {
         toolCalls = [];
         for (const tc of nativeToolCalls) {
+          const fnName = tc.function?.name || '(unknown)';
+          const argsRaw = tc.function?.arguments || '';
+          const argsPreview = argsRaw.length > 200 ? argsRaw.slice(0, 200) + '…' : argsRaw;
           let args;
           try {
-            args = tc.function?.arguments ? JSON.parse(tc.function.arguments) : {};
+            args = argsRaw ? JSON.parse(argsRaw) : {};
           } catch (err) {
-            if (cb.onError) cb.onError({ message: `Failed to parse tool_call arguments for ${tc.function?.name || '(unknown)'}: ${err.message}`, isWarning: true });
+            const reason = `JSON parse failed: ${err.message}`;
+            if (cb.onError) cb.onError({ message: `${fnName}: ${reason} Args: ${argsPreview}`, isWarning: true });
+            nativeRejections.push({ id: tc.id, name: fnName, argsPreview, reason });
             continue;
           }
-          const call = mapInvokeToCall(tc.function?.name, args);
+          const call = mapInvokeToCall(fnName, args);
           if (call) {
             toolCalls.push(call);
             nativeToolCallIds.push(tc.id);
+          } else {
+            const reason = describeNativeRejection(fnName, args);
+            if (cb.onError) cb.onError({ message: `${fnName}: ${reason} Args: ${argsPreview}`, isWarning: true });
+            nativeRejections.push({ id: tc.id, name: fnName, argsPreview, reason });
           }
         }
       } else {
@@ -729,9 +914,6 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
       }
       const isNativeCall = nativeToolCalls.length > 0;
       const cleanedReply = cleanAssistantContent(reply);
-      // Protocol contract: a valid response ends with a tool call OR a
-      // <final_answer>...</final_answer> block. Anything else is degraded.
-      const hasFinal = /<final_answer\b[\s\S]*?<\/final_answer>/i.test(reply);
       if (debug && result) {
         const lastUserMsg = [...messagesWithSystem].reverse().find((m) => m.role === 'user');
@@ -752,17 +934,27 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
         const visibleTokens = Math.max(completionTokens - thinkingTokens, 0);
         const contextLimit = tokenLimit || null;
         const ctxPct = contextLimit ? Math.round((promptTokens / contextLimit) * 100) : null;
-        const detected = detectFormat(reply, toolCalls);
+        const detected = detectFormat(reply, toolCalls, nativeToolCalls);
         const firstCmd = toolCalls.length > 0 ? previewCommand(toolCalls[0]) : previewCommand(null);
         const toolTags = Object.entries(TAG_REGISTRY)
           .filter(([, e]) => e.type === 'tool')
           .map(([t]) => t);
+        const callableSpecCount = Object.values(TOOL_SPECS).filter((s) => !s.wrapper).length;
         const warnings = [];
         if (result.finish_reason === 'length') warnings.push('finish_reason=length  → response truncated, increase max_tokens');
         if (detected === 'tool_call' && toolCalls.length === 0) {
           warnings.push('commands_found=0      → agent emitted no command, client will stall');
         }
+        if (detected === 'native_tool_calls' && toolCalls.length === 0) {
+          const lines = [`commands_found=0      → all ${nativeToolCalls.length} native tool_call(s) rejected:`];
+          for (const r of nativeRejections) {
+            lines.push(`    • name="${r.name}"`);
+            lines.push(`      args=${r.argsPreview || '(empty)'}`);
+            lines.push(`      reason=${r.reason}`);
+          }
+          warnings.push(lines.join('\n'));
+        }
         if (ctxPct !== null && ctxPct > 80) warnings.push(`context_used=${ctxPct}%    → approaching context limit`);
         const block = formatDebugBlock({
@@ -788,7 +980,9 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
               ['temperature:', result.request?.temperature ?? '(default)'],
               ['stop_sequences:', JSON.stringify(result.request?.stop || [])],
               ['reasoning_effort:', '(n/a)'],
-              ['tools_enabled:', `${toolTags.length} XML tags (via system prompt)`],
+              ['tools_enabled:', nativeTools
+                ? `${callableSpecCount} functions (via tools API)`
+                : `${toolTags.length} XML tags (via system prompt)`],
             ]],
             ['RESPONSE', [
               ['finish_reason:', result.finish_reason || '(unknown)'],
@@ -838,7 +1032,13 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
       }
       const assistantMsg = { role: 'assistant', content: cleanedReply };
-      if (isNativeCall) assistantMsg.tool_calls = nativeToolCalls;
+      // Only attach tool_calls for the calls we actually accepted. Attaching
+      // rejected calls here would leave them without matching `tool` results
+      // on the next turn — strict providers reject the resulting history.
+      if (isNativeCall && nativeToolCallIds.length > 0) {
+        const acceptedSet = new Set(nativeToolCallIds);
+        assistantMsg.tool_calls = nativeToolCalls.filter((tc) => acceptedSet.has(tc.id));
+      }
       messages.push(assistantMsg);
       // When showThink is off and the turn has tool calls, suppress the text bubble —
       // pre-tool reasoning is noise, tool result bubbles already convey what happened.
@@ -846,6 +1046,29 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
       if (cb.onAssistantMessage) cb.onAssistantMessage(displayReply);
       if (toolCalls.length === 0) {
+        // Native mode: tool_calls came in but none could be converted (parse
+        // error or unknown name / missing required arg). Push a corrective
+        // user hint so the model retries instead of stalling. Without this
+        // the loop would break silently — that's the bug the migration set
+        // out to fix.
+        if (isNativeCall && nativeRejections.length > 0) {
+          const summary = nativeRejections
+            .map((r) => `- ${r.name}: ${r.reason}`)
+            .join('\n');
+          if (cb.onError) {
+            const names = nativeRejections.map((r) => r.name).join(', ');
+            cb.onError({
+              message: `Native tool_call(s) rejected: ${names}. Asking the model to retry with a valid call.`,
+              isWarning: true,
+            });
+          }
+          messages.push({
+            role: 'user',
+            content: `Your last response contained tool_calls that could not be executed:\n\n${summary}\n\nRetry with a valid tool name and complete required arguments per the tools schema.`,
+          });
+          continue;
+        }
         // Detect malformed known-tag syntax (e.g. <create_file> with no path
         // attribute, usually paired with nonsense like <attrs: path=...> inside
         // the body). Push a corrective feedback message and keep looping so
@@ -864,121 +1087,172 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
           continue;
         }
-        if (hasFinal) {
-          // Model declared it is done — honor the protocol and terminate.
-          // An empty <final_answer></final_answer> is the model's choice;
-          // we don't police content.
-          degradedRetries = 0;
-          break;
-        }
-        // Protocol violation: neither a tool call nor a <final_answer>. Nudge
-        // the model to restate in-protocol, capped to prevent runaway loops.
-        if (degradedRetries >= MAX_DEGRADED_RETRIES) {
-          if (cb.onError) {
-            cb.onError({ message: `Agent violated the response contract after ${MAX_DEGRADED_RETRIES} retries — no tool call or <final_answer> block emitted. Stopping.`, isWarning: false });
-          }
-          break;
-        }
-        degradedRetries++;
-        if (cb.onError) {
-          cb.onError({ message: 'Response missing tool call or <final_answer> — nudging model to retry in-protocol.', isWarning: true });
-        }
-        messages.push({
-          role: 'user',
-          content: 'Your previous response contained neither a tool call nor a <final_answer> block, which violates the response contract. If you need to perform an action, emit the appropriate tool tag now. If you are done, wrap your reply in <final_answer>...</final_answer>. Do not describe intended actions in prose.',
-        });
-        continue;
+        // No tool calls and non-empty content (the empty case was already
+        // handled by the `!reply` guard above). This is the model's final
+        // answer for this turn — end the loop and return control to the user.
+        break;
       }
-      // Non-degraded response (has tool calls) — reset the retry counter.
-      degradedRetries = 0;
       if (isAborted()) break;
       if (!cb.onToolStart) {
-        // Non-TUI fallback: only one-shot CLI commands leave cb.onToolStart
-        // unset. The shared live-region writer isn't running, so a direct
-        // write here can't interleave with a bubble/status redraw.
-        process.stdout.write(`\n  ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}\n`);
+        writer.scrollback(`\n  ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}`);
       }
       const results = [];
       const debugEntries = debug ? [] : null;
       let aborted = false;
+      // Per-invocation id. Paired across onToolStart/onToolEnd so the UI
+      // layer can track each concurrent tool's activity-region slot and
+      // commit its final line atomically via endActivity. Monotonic —
+      // never reused even if the agent runs the same tag twice.
+      let invocationCounter = 0;
+      // Re-arm the abort watcher for the tool-execution phase. The API-call
+      // finally cleared the previous one, so without this a Ctrl+C while a
+      // long shell command is running would never reach the AbortSignal we
+      // now thread into agentExecShell — the child would keep running and
+      // the UI would show "Interrupted" without actually killing anything.
+      const toolAbortWatcher = setInterval(() => {
+        if (isAborted() && !controller.signal.aborted) controller.abort();
+      }, 50);
-      for (const call of toolCalls) {
-        if (isAborted()) { aborted = true; break; }
+      try {
+        for (const call of toolCalls) {
+          if (isAborted()) { aborted = true; break; }
+          const tag = call[0] || 'unknown';
+          const arg = call[1] || '';
+          const attrs = _attrsFromCall(call);
+          // Permission gate, lifted out of the executors. Asking before
+          // onToolStart fires means the activity bubble (and its 1Hz
+          // ticker) doesn't pre-date grant — and on denial no bubble
+          // appears at all. The picker's own onCloseModal scrollback
+          // line ("✗ <description>") is the visual record of the denial.
+          let permDesc = null;
+          try {
+            permDesc = describePermission ? await describePermission(call) : null;
+          } catch (err) {
+            if (cb.onError) cb.onError({ message: `describePermission(${tag}): ${err.message}`, isWarning: true });
+          }
+          if (permDesc) {
+            if (cb.onPermissionAsk) cb.onPermissionAsk(tag, arg);
+            let approved = true;
+            try {
+              approved = await permissionManager.askPermission(permDesc.actionType, permDesc.description, permDesc.tag);
+            } catch (err) {
+              if (cb.onError) cb.onError({ message: `askPermission(${tag}): ${err.message}`, isWarning: true });
+              approved = false;
+            }
+            if (!approved) {
+              const resultStr = (tag === 'shell' || tag === 'exec')
+                ? `Command \`${arg}\`: Permission denied by user.`
+                : `${tag} ${arg}: Permission denied by user.`;
+              logToolCall(permDesc.tag, { args: call.slice(1) }, false, 'denied');
+              results.push(resultStr);
+              if (debugEntries) debugEntries.push({ tag, call, ms: 0, status: 'denied', exitCode: null, result: resultStr });
+              aborted = true;
+              break;
+            }
+          }
-        const tag = call[0] || 'unknown';
-        const arg = call[1] || '';
-        const toolStart = Date.now();
+          const toolStart = Date.now();
+          const invocationId = `tool-${iteration}-${invocationCounter++}-${tag}`;
+          const startCtx = { id: invocationId, call, attrs, startedAt: toolStart };
-        if (cb.onToolStart) cb.onToolStart(tag, arg);
+          if (cb.onToolStart) cb.onToolStart(tag, arg, startCtx);
-        try {
-          if (tag === 'shell') {
-            const shellResult = await agentExecShell(arg);
+          try {
+            if (tag === 'shell') {
+              const shellResult = await agentExecShell(arg, { signal: controller.signal });
+              const ms = Date.now() - toolStart;
+              if (shellResult.aborted) {
+                // User pressed Ctrl+C mid-command. The child process tree
+                // has already been terminated by killTreeEscalating in
+                // tools.js. Surface a clear message to the model so it can
+                // plan around the interruption instead of blindly retrying
+                // the same long-running command on the next turn.
+                const elapsedS = shellResult.elapsed_s || 0;
+                const oneLine = String(arg).replace(/\s+/g, ' ').trim();
+                const truncatedCmd = oneLine.length > 80 ? oneLine.slice(0, 77) + '...' : oneLine;
+                const resultStr = `User interrupted execution after ${elapsedS}s. Tool was running: ${truncatedCmd}. Plan around this — do not retry the same long-running command.`;
+                if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta: null, error: { message: 'aborted' } });
+                results.push(resultStr);
+                if (debugEntries) debugEntries.push({ tag, call, ms, status: 'aborted', exitCode: null, result: resultStr });
+                aborted = true;
+                break;
+              } else {
+                let out = shellResult.stdout;
+                if (shellResult.stderr) out += `\nSTDERR: ${shellResult.stderr}`;
+                const resultStr = `Command \`${arg}\`:\nExit code: ${shellResult.exit_code}\n${out}`;
+                const meta = _metaForTool(tag, shellResult);
+                const error = shellResult.exit_code !== 0
+                  ? { message: `exit ${shellResult.exit_code}`, code: shellResult.exit_code }
+                  : null;
+                if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta, error });
+                results.push(resultStr);
+                if (debugEntries) debugEntries.push({
+                  tag,
+                  call,
+                  ms,
+                  status: shellResult.exit_code === 0 ? 'ok' : 'nonzero_exit',
+                  exitCode: shellResult.exit_code,
+                  result: resultStr,
+                });
+              }
+              continue;
+            }
+            const fileResult = await agentExecFile(...call, { signal: controller.signal });
             const ms = Date.now() - toolStart;
-            if (shellResult.stderr === 'Permission denied by user') {
-              const resultStr = `Command \`${arg}\`: Permission denied by user.`;
-              if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
+            if (fileResult.aborted) {
+              // User pressed Ctrl+C while a file/network tool was running.
+              // The per-tool abort listener has already torn down the in-flight
+              // op (closed the FS read, destroyed the HTTP request, stopped the
+              // recursive walk). Surface a clear note to the model so the next
+              // turn doesn't replay the same long-running operation.
+              const elapsedS = fileResult.elapsed_s || 0;
+              const oneLine = String(arg).replace(/\s+/g, ' ').trim();
+              const truncatedArg = oneLine.length > 80 ? oneLine.slice(0, 77) + '...' : oneLine;
+              const resultStr = `User interrupted execution after ${elapsedS}s. Tool was running: ${tag} ${truncatedArg}. Plan around this — do not retry the same long-running operation.`;
+              if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta: null, error: { message: 'aborted' } });
               results.push(resultStr);
-              if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
+              if (debugEntries) debugEntries.push({ tag, call, ms, status: 'aborted', exitCode: null, result: resultStr });
               aborted = true;
               break;
             } else {
-              let out = shellResult.stdout;
-              if (shellResult.stderr) out += `\nSTDERR: ${shellResult.stderr}`;
-              const resultStr = `Command \`${arg}\`:\nExit code: ${shellResult.exit_code}\n${out}`;
-              if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
+              const resultStr = formatFileResult(call, fileResult);
+              const meta = _metaForTool(tag, fileResult);
+              const error = fileResult.error
+                ? { message: fileResult.error, code: fileResult.error_code || null }
+                : null;
+              if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta, error });
               results.push(resultStr);
               if (debugEntries) debugEntries.push({
                 tag,
                 call,
                 ms,
-                status: shellResult.exit_code === 0 ? 'ok' : 'nonzero_exit',
-                exitCode: shellResult.exit_code,
+                status: fileResult.error ? 'error' : 'ok',
+                exitCode: null,
                 result: resultStr,
               });
             }
-            continue;
-          }
-          const fileResult = await agentExecFile(...call);
-          const ms = Date.now() - toolStart;
-          if (fileResult.error === 'Permission denied') {
-            const resultStr = `${tag} ${call[1] || ''}: Permission denied by user.`;
-            if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
-            results.push(resultStr);
-            if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
-            aborted = true;
-            break;
-          } else {
-            const resultStr = formatFileResult(call, fileResult);
-            if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
-            results.push(resultStr);
-            if (debugEntries) debugEntries.push({
-              tag,
-              call,
-              ms,
-              status: fileResult.error ? 'error' : 'ok',
-              exitCode: null,
-              result: resultStr,
-            });
-          }
-        } catch (err) {
-          const ms = Date.now() - toolStart;
-          if (cb.onToolEnd) cb.onToolEnd(tag, `Error: ${err.message}`, ms);
-          if (cb.onError) {
-            cb.onError({ message: `Tool error (${tag}): ${err.message}`, isWarning: true });
-          } else {
-            // Non-TUI fallback — see comment on the onToolStart branch above.
-            process.stdout.write(`\n  ${THEME.warn}⚠ Tool error (${tag}): ${err.message}${THEME.reset}\n`);
+          } catch (err) {
+            const ms = Date.now() - toolStart;
+            if (cb.onToolEnd) cb.onToolEnd(tag, `Error: ${err.message}`, ms, { id: invocationId, call, attrs, meta: null, error: err });
+            if (cb.onError) {
+              cb.onError({ message: `Tool error (${tag}): ${err.message}`, isWarning: true });
+            } else {
+              messages.toolError(tag, err.message);
+            }
+            logToolCall(tag, { args: call.slice(1) }, false, 'error');
+            results.push(`${tag}: Error — ${err.message}`);
+            if (debugEntries) debugEntries.push({ tag, call, ms, status: 'exception', exitCode: null, result: `Error — ${err.message}` });
           }
-          logToolCall(tag, { args: call.slice(1) }, false, 'error');
-          results.push(`${tag}: Error — ${err.message}`);
-          if (debugEntries) debugEntries.push({ tag, call, ms, status: 'exception', exitCode: null, result: `Error — ${err.message}` });
         }
+      } finally {
+        clearInterval(toolAbortWatcher);
       }
       if (debug && debugEntries && debugEntries.length > 0) {
@@ -1033,12 +1307,16 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
         if (cb.onError) {
           cb.onError({ message: warnMsg, isWarning: true });
         } else {
-          // Non-TUI fallback — see comment above on the Found-actions path.
-          process.stdout.write(`\n  ${FG_YELLOW}⚠${RST} ${FG_GRAY}${warnMsg}${RST}`);
+          messages.sysWarn(warnMsg);
         }
-        // Push whatever results accumulated before the denial so the LLM has
-        // context if the user asks to continue.
+        // Push whatever results accumulated before the stop so the LLM has
+        // context if the user asks to continue. The reason matters: an abort
+        // (Ctrl+C) and a denial are both surfaced through the same `aborted`
+        // flag, but the model should know which happened so it doesn't
+        // immediately retry a runaway command after the user explicitly
+        // killed it.
         if (results.length > 0) {
+          const reason = isAborted() ? 'user interrupted' : 'after user denied an action';
           if (isNativeCall) {
             for (let i = 0; i < results.length; i++) {
               messages.push({ role: 'tool', tool_call_id: nativeToolCallIds[i], content: results[i] });
@@ -1046,7 +1324,7 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
           } else {
             messages.push({
               role: 'user',
-              content: `Tool execution results (partial — stopped after user denied an action):\n\n${results.join('\n\n')}`,
+              content: `Tool execution results (partial — stopped: ${reason}):\n\n${results.join('\n\n')}`,
             });
           }
         }