npm - @semalt-ai/code - Versions diffs - 1.7.0 → 1.8.1 - Mend

@semalt-ai/code 1.7.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/.claude/settings.local.json +8 -0
package/ARCHITECTURE.md +99 -0
package/CLAUDE.md +349 -0
package/index.js +69 -7
package/lib/agent.js +577 -39
package/lib/api.js +285 -79
package/lib/args.js +31 -0
package/lib/audit.js +31 -0
package/lib/commands.js +1006 -307
package/lib/config.js +51 -5
package/lib/constants.js +72 -0
package/lib/context.js +2 -6
package/lib/metrics.js +94 -0
package/lib/permissions.js +180 -49
package/lib/prompts.js +96 -13
package/lib/storage.js +96 -0
package/lib/tools.js +1009 -35
package/lib/ui/ansi.js +65 -0
package/lib/ui/chat-history.js +217 -0
package/lib/ui/create-ui.js +474 -0
package/lib/ui/diff.js +243 -0
package/lib/ui/input-field.js +1176 -0
package/lib/ui/layout.js +53 -0
package/lib/ui/legacy.js +130 -0
package/lib/ui/status-bar.js +131 -0
package/lib/ui/stream.js +158 -0
package/lib/ui/utils.js +45 -0
package/lib/ui.js +42 -598
package/package.json +1 -1
package/path +1 -0

package/lib/agent.js CHANGED Viewed

@@ -1,60 +1,602 @@
 'use strict';
+const { logToolCall } = require('./audit');
+const { Metrics } = require('./metrics');
+const { SYSTEM_PROMPT } = require('./prompts');
+const { TAG_REGISTRY } = require('./constants');
+class StreamParser {
+  constructor(onToken, onTagOpen, onTagContent, onTagClose) {
+    this.onToken = onToken;
+    this.onTagOpen = onTagOpen;
+    this.onTagContent = onTagContent;
+    this.onTagClose = onTagClose;
+    this.buffer = '';
+    this.insideTag = null;
+    this.tagAttrs = {};
+    this.tagContent = '';
+  }
+  push(chunk) {
+    this.buffer += chunk;
+    this._process();
+  }
+  _process() {
+    while (true) {
+      if (this.insideTag === null) {
+        const ltIdx = this.buffer.indexOf('<');
+        if (ltIdx === -1) {
+          if (this.buffer) this.onToken(this.buffer);
+          this.buffer = '';
+          break;
+        }
+        if (ltIdx > 0) {
+          this.onToken(this.buffer.slice(0, ltIdx));
+          this.buffer = this.buffer.slice(ltIdx);
+        }
+        const gtIdx = this.buffer.indexOf('>');
+        if (gtIdx === -1) break;
+        const tagRaw = this.buffer.slice(1, gtIdx).trim();
+        const selfClose = tagRaw.endsWith('/');
+        const tagBody = selfClose ? tagRaw.slice(0, -1).trim() : tagRaw;
+        const spaceIdx = tagBody.search(/\s/);
+        const tagName = (spaceIdx === -1 ? tagBody : tagBody.slice(0, spaceIdx)).toLowerCase();
+        const attrStr = spaceIdx === -1 ? '' : tagBody.slice(spaceIdx + 1);
+        const attrs = {};
+        const attrRe = /(\w+)="([^"]*)"/g;
+        let m;
+        while ((m = attrRe.exec(attrStr)) !== null) attrs[m[1]] = m[2];
+        this.buffer = this.buffer.slice(gtIdx + 1);
+        const entry = TAG_REGISTRY[tagName];
+        if (!entry) {
+          this.onToken('<' + tagRaw + '>');
+          continue;
+        }
+        this.onTagOpen(tagName, attrs);
+        if (selfClose) {
+          this.onTagContent(tagName, '');
+          this.onTagClose(tagName, '', attrs);
+        } else {
+          this.insideTag = tagName;
+          this.tagAttrs = attrs;
+          this.tagContent = '';
+        }
+      } else {
+        const closing = '</' + this.insideTag + '>';
+        const closeIdx = this.buffer.toLowerCase().indexOf(closing);
+        if (closeIdx === -1) {
+          this.tagContent += this.buffer;
+          this.buffer = '';
+          break;
+        }
+        this.tagContent += this.buffer.slice(0, closeIdx);
+        this.buffer = this.buffer.slice(closeIdx + closing.length);
+        this.onTagContent(this.insideTag, this.tagContent);
+        this.onTagClose(this.insideTag, this.tagContent, this.tagAttrs);
+        this.insideTag = null;
+        this.tagContent = '';
+        this.tagAttrs = {};
+      }
+    }
+  }
+}
+function cleanAssistantContent(raw) {
+  let text = raw;
+  // Qwen3-style: response starts with implicit thinking (no opening tag), closed by </tag>.
+  // Strip everything from the start up to and including the first orphan closing think tag.
+  for (const [tag, entry] of Object.entries(TAG_REGISTRY)) {
+    if (entry.type === 'visual') {
+      text = text.replace(new RegExp(`^[\\s\\S]*?<\\/${tag}>\\s*`, 'i'), '');
+    }
+  }
+  for (const [tag, entry] of Object.entries(TAG_REGISTRY)) {
+    if (entry.type === 'strip') {
+      // Strip only the wrapper tags; keep the inner content
+      text = text.replace(new RegExp(`<${tag}[^>]*>`, 'gi'), '');
+      text = text.replace(new RegExp(`<\\/${tag}>`, 'gi'), '');
+    } else {
+      // Strip entire tag block including content (visual / tool)
+      text = text.replace(new RegExp(`<${tag}[^>]*>[\\s\\S]*?<\\/${tag}>`, 'gi'), '');
+      text = text.replace(new RegExp(`<${tag}[^>]*/>`, 'gi'), '');
+      // Strip unclosed opening tag and everything after it (truncated streaming)
+      text = text.replace(new RegExp(`<${tag}[^>]*>[\\s\\S]*$`, 'gi'), '');
+    }
+  }
+  text = text.replace(/<\/?[a-zA-Z_][a-zA-Z0-9_]*(\s[^>]*)?>/g, '');
+  text = text.replace(/\n{2,}/g, '\n');
+  return text.trim();
+}
 function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, ui }) {
-  const { BOLD, FG_DARK, FG_GRAY, FG_TEAL, FG_YELLOW, RST, getCols } = ui;
+  const { BOLD, FG_DARK, FG_GRAY, FG_TEAL, FG_YELLOW, RST, THEME, getCols } = ui;
-  async function runAgentLoop(messages, model, maxIterations = 10) {
-    const cols = getCols();
+  function formatFileResult(call, result) {
+    const [action, ...args] = call;
+    if (result.error) return `${action} ${args[0] || ''}: Error — ${result.error}`;
+    switch (action) {
+      case 'read':
+        return `File ${args[0]}:\n${result.content}`;
+      case 'write':
+        return `Wrote ${result.bytes} bytes to ${args[0]}`;
+      case 'append':
+        return `Appended ${result.bytes} bytes to ${args[0]}`;
+      case 'list_dir':
+        return `Directory ${args[0]}:\n${result.items.join('\n')}`;
+      case 'search_files':
+        return result.files.length
+          ? `Files matching "${args[0]}" in ${args[1] || '.'}:\n${result.files.join('\n')}`
+          : `No files found matching "${args[0]}" in ${args[1] || '.'}`;
+      case 'file_stat':
+        return `Stat ${result.path}: size=${result.size_kb} KB, mtime=${result.mtime}, type=${result.type}, mode=${result.mode}`;
+      case 'http_get': {
+        if (result.chunked) {
+          return `HTTP GET ${args[0]} (${result.status_code}) [Part 1/${result.total_parts}]:\n${result.body}\n\n[Response is large and was split into ${result.total_parts} parts. Use <http_get_next key="${args[0]}"/> to retrieve the next part.]`;
+        }
+        return `HTTP GET ${args[0]} (${result.status_code}):\n${result.body}`;
+      }
+      case 'http_get_next': {
+        if (result.done && !result.body) {
+          return `http_get_next "${args[0]}": No more content available.`;
+        }
+        const more = result.done
+          ? ' [Final part]'
+          : `\n\n[Use <http_get_next key="${args[0]}"/> to retrieve part ${result.part + 1}/${result.total_parts}.]`;
+        return `HTTP content "${args[0]}" [Part ${result.part}/${result.total_parts}]:\n${result.body}${more}`;
+      }
+      case 'ask_user':
+        return `User answered "${result.question}": ${result.answer}`;
+      case 'store_memory':
+        return `Stored memory key "${result.key}"`;
+      case 'recall_memory':
+        return result.found
+          ? `Memory "${result.key}": ${result.value}`
+          : `Memory "${result.key}": not found`;
+      case 'list_memories':
+        return result.keys.length
+          ? `Memory keys:\n${result.keys.join('\n')}`
+          : 'No memories stored';
+      case 'system_info':
+        return `System: ${result.platform}/${result.arch}, host=${result.hostname}, user=${result.user}, mem=${result.free_mem_mb}/${result.total_mem_mb} MB free, node=${result.node_version}, cwd=${result.cwd}`;
+      case 'delete_file':
+        return `Deleted ${args[0]}`;
+      case 'make_dir':
+        return `Created directory ${args[0]}`;
+      case 'remove_dir':
+        return `Removed directory ${args[0]}`;
+      case 'get_env':
+        return `${args[0]}=${result.value !== null ? result.value : '(not set)'}`;
+      case 'set_env':
+        return `Set env ${args[0]}=${args[1]}`;
+      case 'move_file':
+        return `Moved ${args[0]} → ${args[1]}`;
+      case 'copy_file':
+        return `Copied ${args[0]} → ${args[1]}`;
+      case 'edit_file':
+        return `Edited line ${args[1]} in ${args[0]}`;
+      case 'search_in_file': {
+        const matchLines = result.matches.map((m) => `  Line ${m.line}: ${m.content}`).join('\n');
+        return `Search in ${args[0]} for "${args[1]}":\n${matchLines || '  (no matches)'}`;
+      }
+      case 'replace_in_file':
+        return `Replaced ${result.count} occurrence(s) in ${args[0]}`;
+      case 'download':
+        return `Downloaded to ${result.path}`;
+      case 'upload':
+        return `Uploaded ${result.bytes} bytes to ${args[0]}`;
+      default:
+        return `${action}: done`;
+    }
+  }
+  async function executeTool(tag, content, attrs) {
+    switch (tag) {
+      case 'exec': {
+        const r = await agentExecShell(content);
+        if (r.stderr === 'Permission denied by user') {
+          return `Command \`${content}\`: Permission denied by user.`;
+        }
+        let out = r.stdout;
+        if (r.stderr) out += `\nSTDERR: ${r.stderr}`;
+        return `Command \`${content}\`:\nExit code: ${r.exit_code}\n${out}`;
+      }
+      case 'read_file': {
+        const p = attrs.path || content;
+        return formatFileResult(['read', p], await agentExecFile('read', p));
+      }
+      case 'write_file':
+      case 'create_file': {
+        const p = attrs.path;
+        if (!p) return `Error: ${tag} requires a path attribute`;
+        return formatFileResult(['write', p], await agentExecFile('write', p, content));
+      }
+      case 'append_file': {
+        const p = attrs.path;
+        if (!p) return 'Error: append_file requires a path attribute';
+        return formatFileResult(['append', p], await agentExecFile('append', p, content));
+      }
+      case 'delete_file': {
+        const p = attrs.path || content;
+        return formatFileResult(['delete_file', p], await agentExecFile('delete_file', p));
+      }
+      case 'list_dir': {
+        const p = attrs.path || content;
+        return formatFileResult(['list_dir', p], await agentExecFile('list_dir', p));
+      }
+      case 'make_dir': {
+        const p = attrs.path || content;
+        return formatFileResult(['make_dir', p], await agentExecFile('make_dir', p));
+      }
+      case 'move_file': {
+        return formatFileResult(['move_file', attrs.src, attrs.dst], await agentExecFile('move_file', attrs.src, attrs.dst));
+      }
+      case 'copy_file': {
+        return formatFileResult(['copy_file', attrs.src, attrs.dst], await agentExecFile('copy_file', attrs.src, attrs.dst));
+      }
+      case 'file_stat': {
+        const p = attrs.path || content;
+        return formatFileResult(['file_stat', p], await agentExecFile('file_stat', p));
+      }
+      case 'search_files': {
+        const pat = attrs.pattern || content;
+        const dir = attrs.dir || '.';
+        return formatFileResult(['search_files', pat, dir], await agentExecFile('search_files', pat, dir));
+      }
+      case 'http_get': {
+        const url = attrs.url || content;
+        const raw = attrs.raw || '';
+        return formatFileResult(['http_get', url, raw], await agentExecFile('http_get', url, raw));
+      }
+      case 'http_get_next': {
+        const key = attrs.key || content;
+        return formatFileResult(['http_get_next', key], await agentExecFile('http_get_next', key));
+      }
+      case 'ask_user': {
+        const q = attrs.question || content;
+        return formatFileResult(['ask_user', q], await agentExecFile('ask_user', q));
+      }
+      case 'store_memory': {
+        const k = attrs.key;
+        if (!k) return 'Error: store_memory requires a key attribute';
+        return formatFileResult(['store_memory', k], await agentExecFile('store_memory', k, content));
+      }
+      case 'recall_memory': {
+        const k = attrs.key || content;
+        return formatFileResult(['recall_memory', k], await agentExecFile('recall_memory', k));
+      }
+      case 'list_memories': {
+        return formatFileResult(['list_memories'], await agentExecFile('list_memories'));
+      }
+      case 'system_info': {
+        return formatFileResult(['system_info'], await agentExecFile('system_info'));
+      }
+      default:
+        return `Error: tool "${tag}" not implemented`;
+    }
+  }
+  async function handleTag(tag, content, attrs, callbacks, showThink) {
+    const entry = TAG_REGISTRY[tag];
+    if (!entry) return;
+    if (entry.type === 'visual' && entry.display === 'think_bubble') {
+      if (!showThink) return;
+      callbacks.onThinkEnd?.(content.trim());
+      return;
+    }
+    if (entry.type === 'strip') return;
+    // Tool execution happens in the toolCalls loop after streaming; handleTag only handles visual/strip.
+  }
+  async function runAgentLoop(messages, model, maxIterations = Infinity, tokenLimit = null, opts = {}) {
+    const {
+      showThink = false,
+      debug = false,
+      callbacks = {},
+      systemPrompt: overrideSystemPrompt = null,
+      systemPromptMode: overrideMode = null,
+      getAbortFlag = null,
+    } = opts;
+    const isAborted = getAbortFlag || (() => false);
+    const cb = callbacks;
+    const metrics = new Metrics(tokenLimit);
+    const activeSystemPrompt = overrideSystemPrompt !== null ? overrideSystemPrompt : SYSTEM_PROMPT;
+    const mode = overrideMode || 'system_role';
     for (let iteration = 0; iteration < maxIterations; iteration++) {
-      console.log();
-      console.log(`  ${FG_DARK}${'─'.repeat(Math.min(cols, 70) - 4)}${RST}`);
-      process.stdout.write(`  ${FG_TEAL}${BOLD}◆ Semalt.AI${RST}`);
-      if (iteration > 0) process.stdout.write(` ${FG_DARK}(step ${iteration + 1})${RST}`);
-      console.log();
-      console.log(`  ${FG_DARK}${'─'.repeat(Math.min(cols, 70) - 4)}${RST}`);
-      console.log();
-      process.stdout.write('  ');
+      if (isAborted()) break;
+      const linePrefix = `${FG_TEAL}${BOLD}◆ ${RST}`;
+      metrics.startTurn();
+      if (cb.onThinking) cb.onThinking();
+      // Build messagesWithSystem fresh on every API call; messages[] never stores the system entry
+      let messagesWithSystem;
+      if (mode === 'first_user') {
+        messagesWithSystem = [
+          { role: 'user', content: activeSystemPrompt },
+          { role: 'assistant', content: 'Understood.' },
+          ...messages,
+        ];
+      } else if (mode === 'prepend') {
+        const firstUserIdx = messages.findIndex((m) => m.role === 'user');
+        if (firstUserIdx === -1) {
+          messagesWithSystem = messages;
+        } else {
+          messagesWithSystem = messages.map((m, i) =>
+            i === firstUserIdx
+              ? { ...m, content: `${activeSystemPrompt}\n\n---\n\n${m.content}` }
+              : m
+          );
+        }
+      } else {
+        // 'system_role' (default)
+        messagesWithSystem = [{ role: 'system', content: activeSystemPrompt }, ...messages];
+      }
+      // Wire onToken callback: first token triggers onStreamStart
+      const parser = new StreamParser(
+        (text)               => callbacks.onToken?.(text),
+        (tag, attrs)         => callbacks.onTagOpen?.(tag, attrs),
+        (tag, content)       => {},
+        (tag, content, attrs) => handleTag(tag, content, attrs, callbacks, showThink)
+      );
+      let streamStarted = false;
+      const wrappedOnToken = cb.onToken
+        ? (token) => {
+            if (!streamStarted) {
+              streamStarted = true;
+              if (cb.onStreamStart) cb.onStreamStart();
+            }
+            parser.push(token);
+          }
+        : null;
-      const reply = await chatStream(messages, { model });
-      if (!reply) break;
+      if (debug) {
+        const header = `\n───── messages sent to agent (iteration ${iteration + 1}) ─────\n`;
+        const footer = `\n───── end messages ─────\n`;
+        process.stderr.write(header + JSON.stringify(messagesWithSystem, null, 2) + footer);
+      }
-      messages.push({ role: 'assistant', content: reply });
+      const MAX_RETRIES = 3;
+      let result = null;
+      let lastApiErr = null;
+      for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
+        if (attempt === 1) {
+          callbacks.onRequestSent?.();
+        } else {
+          cb.onRetry?.(attempt, MAX_RETRIES);
+          await new Promise((r) => setTimeout(r, 1000));
+        }
+        try {
+          result = await chatStream(messagesWithSystem, {
+            model,
+            linePrefix: wrappedOnToken ? '' : linePrefix,
+            showThink,
+            onToken: wrappedOnToken,
+            silent: !!wrappedOnToken,
+          });
+          lastApiErr = null;
+          break;
+        } catch (err) {
+          lastApiErr = err;
+          if (debug) {
+            const header = `\n───── raw http error (iteration ${iteration + 1}, attempt ${attempt}/${MAX_RETRIES}) ─────\n`;
+            const footer = `\n───── end raw http error ─────\n`;
+            const status = err.statusCode ? `HTTP ${err.statusCode}` : 'network error';
+            const headerLines = err.responseHeaders
+              ? Object.entries(err.responseHeaders).map(([k, v]) => `${k}: ${v}`).join('\n')
+              : '';
+            const body = err.rawBody !== undefined ? err.rawBody : (err.stack || err.message || String(err));
+            const parts = [status];
+            if (headerLines) parts.push(headerLines);
+            parts.push(body || '(empty body)');
+            process.stderr.write(header + parts.join('\n\n') + footer);
+          }
+        }
+      }
+      if (lastApiErr) {
+        if (cb.onError) cb.onError(lastApiErr);
+        break;
+      }
+      const reply = result ? result.content : '';
+      const usage = result ? result.usage : null;
+      metrics.endTurn(usage, model);
+      if (debug) {
+        const header = `\n───── raw ai response (iteration ${iteration + 1}) ─────\n`;
+        const footer = `\n───── end raw response ─────\n`;
+        process.stderr.write(header + (reply || '(empty)') + footer);
+      }
+      if (cb.onMetricsUpdate) {
+        cb.onMetricsUpdate({
+          totalTokens: metrics.totalTokens(),
+          contextTokens: metrics.contextTokens(),
+          turns: metrics.turns.length,
+        });
+      }
+      const limitStatus = metrics.tokenLimitStatus();
+      if (limitStatus !== null && limitStatus.pct >= 85) {
+        const warnMsg = `Context at ${limitStatus.pct}% of limit (${limitStatus.used}/${limitStatus.limit} tokens). Consider /compact.`;
+        if (cb.onError) {
+          cb.onError({ message: warnMsg, isWarning: true });
+        } else {
+          process.stdout.write(
+            `\n  ${THEME.warn}⚠ ${warnMsg}${THEME.reset}\n`
+          );
+        }
+      }
+      if (!reply) {
+        // Empty reply from the model — stream resolved with no content and no
+        // tool_calls. Most common causes: server-side disconnect mid-stream,
+        // context-window overflow that slipped past the 400/413 handler, or a
+        // model that returns only a stop token. Surface it so the user isn't
+        // left staring at an idle prompt.
+        if (cb.onError) {
+          const hint = iteration > 0 ? ' (after tool execution)' : '';
+          cb.onError({ message: `Agent returned an empty response${hint}. The connection to the model may have dropped — try again or /compact if context is large.`, isWarning: true });
+        }
+        break;
+      }
       const toolCalls = extractToolCalls(reply);
-      if (toolCalls.length === 0) break;
+      const cleanedReply = cleanAssistantContent(reply);
+      // Detect mid-tag truncation: an opening tool tag in the raw reply with
+      // no matching close. This happens when the model streams a large
+      // `<write_file>…` body and hits max_tokens or a server-side cutoff
+      // before the closing tag arrives. cleanAssistantContent strips the
+      // unclosed tag + its trailing content, so cleanedReply looks
+      // legitimate (just the planning preamble) and extractToolCalls finds
+      // zero calls — the loop would break silently and the user sees the
+      // planning text followed by nothing. Surface it so the user can retry,
+      // shorten the request, or bump max_tokens.
+      let truncatedTag = null;
+      for (const [tag, entry] of Object.entries(TAG_REGISTRY)) {
+        if (entry.type !== 'tool') continue;
+        let opens = 0;
+        for (const m of reply.matchAll(new RegExp(`<${tag}([^>]*)>`, 'gi'))) {
+          // Skip self-closing (`<tag .../>`) — they don't need a matching close.
+          if (!m[1].trimEnd().endsWith('/')) opens++;
+        }
+        if (opens === 0) continue;
+        const closes = (reply.match(new RegExp(`<\\/${tag}>`, 'gi')) || []).length;
+        if (opens > closes) { truncatedTag = tag; break; }
+      }
+      if (truncatedTag && cb.onError) {
+        cb.onError({ message: `Response truncated mid-<${truncatedTag}> tag — likely hit max_tokens or a server-side cutoff. Try again, shorten the request, or raise the model's max_tokens.`, isWarning: true });
+      }
+      messages.push({ role: 'assistant', content: cleanedReply });
+      // When showThink is off and the turn has tool calls, suppress the text bubble —
+      // pre-tool reasoning is noise, tool result bubbles already convey what happened.
+      const displayReply = (!showThink && toolCalls.length > 0) ? '' : cleanedReply;
+      if (cb.onAssistantMessage) cb.onAssistantMessage(displayReply);
-      console.log(`\n  ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}`);
+      // If nothing meaningful came back (no text to show, no tools to run) but
+      // the reply string wasn't strictly empty, it's usually model wrapper
+      // noise or a stripped-only response. Still a dead-end for the user.
+      if (toolCalls.length === 0 && !cleanedReply.trim()) {
+        if (cb.onError) {
+          cb.onError({ message: 'Agent reply had no visible content and no actions — stopping.', isWarning: true });
+        }
+        break;
+      }
+      if (toolCalls.length === 0) {
+        // Model narrated next steps but didn't emit a tool tag. Happens when the
+        // model ends a plan with "Let me do that for you." and stops. If we just
+        // break, the user sees a dangling promise and thinks the connection dropped.
+        if (iteration > 0 && /\b(let me|i['’]?ll|i will|i'?m going to|next[, ]|now[, ]? ?(i|we)|going to (create|write|build|add|make|run|do|set up|install))\b/i.test(cleanedReply)) {
+          if (cb.onError) {
+            cb.onError({ message: 'Agent described next steps but did not emit a tool call. Reply "continue" (or similar) to push it forward, or restart if it keeps stalling.', isWarning: true });
+          }
+        }
+        break;
+      }
+      if (isAborted()) break;
+      if (!cb.onToolStart) {
+        process.stdout.write(`\n  ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}\n`);
+      }
       const results = [];
       let aborted = false;
       for (const call of toolCalls) {
-        if (call[0] === 'shell') {
-          const result = await agentExecShell(call[1]);
-          if (result.stderr === 'Permission denied by user') {
-            results.push(`Command \`${call[1]}\`: Permission denied by user.`);
+        if (isAborted()) { aborted = true; break; }
+        const tag = call[0] || 'unknown';
+        const arg = call[1] || '';
+        const toolStart = Date.now();
+        if (cb.onToolStart) cb.onToolStart(tag, arg);
+        try {
+          if (tag === 'shell') {
+            const shellResult = await agentExecShell(arg);
+            const ms = Date.now() - toolStart;
+            if (shellResult.stderr === 'Permission denied by user') {
+              const resultStr = `Command \`${arg}\`: Permission denied by user.`;
+              if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
+              results.push(resultStr);
+              aborted = true;
+              break;
+            } else {
+              let out = shellResult.stdout;
+              if (shellResult.stderr) out += `\nSTDERR: ${shellResult.stderr}`;
+              const resultStr = `Command \`${arg}\`:\nExit code: ${shellResult.exit_code}\n${out}`;
+              if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
+              results.push(resultStr);
+            }
+            continue;
+          }
+          const fileResult = await agentExecFile(...call);
+          const ms = Date.now() - toolStart;
+          if (fileResult.error === 'Permission denied') {
+            const resultStr = `${tag} ${call[1] || ''}: Permission denied by user.`;
+            if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
+            results.push(resultStr);
             aborted = true;
+            break;
           } else {
-            let out = result.stdout;
-            if (result.stderr) out += `\nSTDERR: ${result.stderr}`;
-            results.push(`Command \`${call[1]}\`:\nExit code: ${result.exit_code}\n${out}`);
+            const resultStr = formatFileResult(call, fileResult);
+            if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
+            results.push(resultStr);
           }
-          continue;
+        } catch (err) {
+          const ms = Date.now() - toolStart;
+          if (cb.onToolEnd) cb.onToolEnd(tag, `Error: ${err.message}`, ms);
+          if (cb.onError) {
+            cb.onError({ message: `Tool error (${tag}): ${err.message}`, isWarning: true });
+          } else {
+            process.stdout.write(`\n  ${THEME.warn}⚠ Tool error (${tag}): ${err.message}${THEME.reset}\n`);
+          }
+          logToolCall(tag, { args: call.slice(1) }, false, 'error');
+          results.push(`${tag}: Error — ${err.message}`);
         }
+      }
-        if (call[0] === 'read') {
-          const result = await agentExecFile('read', call[1]);
-          if (result.error) results.push(`Read ${call[1]}: Error — ${result.error}`);
-          else results.push(`File ${call[1]}:\n${result.content}`);
-          continue;
+      if (aborted) {
+        const warnMsg = isAborted()
+          ? 'Agent interrupted.'
+          : 'Action denied — stopping.';
+        if (cb.onError) {
+          cb.onError({ message: warnMsg, isWarning: true });
+        } else {
+          process.stdout.write(`\n  ${FG_YELLOW}⚠${RST} ${FG_GRAY}${warnMsg}${RST}`);
         }
-        if (call[0] === 'write') {
-          const result = await agentExecFile('write', call[1], call[2]);
-          if (result.error) results.push(`Write ${call[1]}: Error — ${result.error}`);
-          else results.push(`Wrote ${result.bytes} bytes to ${call[1]}`);
+        // Push whatever results accumulated before the denial so the LLM has
+        // context if the user asks to continue.
+        if (results.length > 0) {
+          messages.push({
+            role: 'user',
+            content: `Tool execution results (partial — stopped after user denied an action):\n\n${results.join('\n\n')}`,
+          });
         }
+        break;
       }
       const feedback = results.join('\n\n');
@@ -62,13 +604,9 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
         role: 'user',
         content: `Tool execution results:\n\n${feedback}\n\nContinue with the task. If everything is done, summarize what was accomplished.`,
       });
-      if (aborted) {
-        console.log(`\n  ${FG_YELLOW}⚠${RST} ${FG_GRAY}Some actions were denied. Continuing with partial results.${RST}`);
-      }
     }
-    return messages;
+    return { messages, metrics };
   }
   return {