npm - banana-code - Versions diffs - 1.3.1 → 1.4.0 - Mend

banana-code 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/banana.js +85 -11
package/lib/agenticRunner.js +236 -10
package/lib/claudeCodeProvider.js +540 -0
package/lib/config.js +49 -15
package/lib/contextBuilder.js +11 -4
package/lib/fileManager.js +9 -11
package/lib/fsUtils.js +30 -0
package/lib/historyManager.js +3 -5
package/lib/modelRegistry.js +2 -1
package/lib/providerManager.js +7 -1
package/lib/providerStore.js +38 -4
package/lib/streamHandler.js +25 -4
package/package.json +48 -43
package/prompts/code-agent-qwen.md +1 -0
package/prompts/code-agent.md +1 -0

package/banana.js CHANGED Viewed

@@ -81,7 +81,7 @@ let pendingHumanQuestion = null; // { resolve, question }
 // CONFIGURATION
 // =============================================================================
-const VERSION = '1.3.1';
+const VERSION = '1.4.0';
 const { PAD } = require('./lib/borderRenderer'); // Single source of truth for left padding
 const DEBUG_DISABLED_VALUES = new Set(['0', 'false', 'off', 'no']);
 const NEXT_TURN_RESERVE_TOKENS = 1200;
@@ -389,7 +389,7 @@ ${P}${c.yellow}/hooks${c.reset}               Manage lifecycle hooks (add, edit,
 ${P}${c.yellow}/steer <text>${c.reset}       Steer next turn (or interrupt + redirect current turn)
 ${P}${c.yellow}/model [name]${c.reset}      Show/switch model
 ${P}${c.yellow}/model search <query>${c.reset} Search OpenRouter models and add one
-${P}${c.yellow}/connect [provider]${c.reset} Connect provider (Anthropic, OpenAI OAuth, OpenRouter)
+${P}${c.yellow}/connect [provider]${c.reset} Connect provider (Anthropic, OpenAI, OpenRouter, Claude Code)
 ${P}${c.yellow}/prompt [name]${c.reset}     Show/switch prompt (base, code-agent, or any .md)
 ${P}${c.banana}${c.dim}Config Commands:${c.reset}
@@ -442,6 +442,15 @@ function initProject() {
   tokenCounter = new TokenCounter(config);
   imageHandler = new ImageHandler(projectDir);
+  const lastRunSnapshot = config.getRunSnapshot();
+  if (lastRunSnapshot && lastRunSnapshot.completed === false) {
+    const when = lastRunSnapshot.savedAt ? new Date(lastRunSnapshot.savedAt).toLocaleString() : 'recently';
+    console.log(`${PAD}${c.yellow}⚠ Previous run appears to have ended unexpectedly.${c.reset} ${c.dim}(${when})${c.reset}`);
+    if (lastRunSnapshot.userMessage) {
+      console.log(`${PAD}${c.dim}  Last request: ${String(lastRunSnapshot.userMessage).slice(0, 120)}${c.reset}`);
+    }
+  }
   // Initialize LM Studio + provider manager
   const lmStudioUrl = config.get('lmStudioUrl') || 'http://localhost:1234';
   lmStudio = new LmStudio({ baseUrl: lmStudioUrl });
@@ -913,6 +922,7 @@ function normalizeProviderKey(raw) {
   const key = String(raw || '').trim().toLowerCase();
   if (!key) return null;
   if (key === 'lmstudio' || key === 'local') return 'local';
+  if (key === 'claude' || key === 'claudecode' || key === 'claude-code') return 'claude-code';
   if (PROVIDERS.includes(key)) return key;
   return null;
 }
@@ -1306,7 +1316,9 @@ async function connectProviderInteractive(provider) {
       label: PROVIDER_LABELS[p] || p,
       description: p === 'openai'
         ? 'OAuth device login for Codex subscription'
-        : 'Connect with API key',
+        : p === 'claude-code'
+          ? 'Use your Claude Code CLI subscription (no API key needed)'
+          : 'Connect with API key',
       tags: ['provider'],
       active: providerStore.isConnected(p)
     }));
@@ -1351,6 +1363,29 @@ async function connectProviderInteractive(provider) {
     return;
   }
+  if (provider === 'claude-code') {
+    console.log(`\n${PAD}${c.cyan}Claude Code CLI Connection${c.reset}`);
+    console.log(`${PAD}${c.dim}Checking for Claude Code CLI...${c.reset}`);
+    const { ClaudeCodeClient } = require('./lib/claudeCodeProvider');
+    const claudeClient = new ClaudeCodeClient();
+    const connected = await claudeClient.isConnected();
+    if (!connected) {
+      console.log(`${PAD}${c.red}✗ Claude Code CLI not found.${c.reset}`);
+      console.log(`${PAD}${c.dim}Install it: npm install -g @anthropic-ai/claude-code${c.reset}`);
+      console.log(`${PAD}${c.dim}Then run: claude login${c.reset}\n`);
+      return;
+    }
+    providerStore.connectClaudeCode();
+    modelRegistry.refreshRemoteModels();
+    console.log(`${PAD}${c.green}✓ Connected Claude Code CLI${c.reset}`);
+    console.log(`${PAD}${c.dim}Uses your existing Claude subscription (no API key needed).${c.reset}`);
+    console.log(`${PAD}${c.dim}Use /model to switch to Claude Code models.${c.reset}\n`);
+    return;
+  }
   throw new Error(`Unsupported provider: ${provider}`);
 }
@@ -1589,9 +1624,15 @@ async function handleCommand(input) {
       tokenCounter.resetSession();
       imageHandler.clearPending();
       setContextEstimate(0);
+      promptDuringWork = false;
+      renderWorkingPrompt = null;
+      if (rl) {
+        rl.write(null, { ctrl: true, name: 'u' });
+      }
       if (statusBar) {
+        statusBar.setInputHint('');
         statusBar.update({ sessionIn: 0, sessionOut: 0 });
-        statusBar.reinstall();
+        statusBar.uninstall();
       }
       console.clear();
       // Push cursor to bottom of scroll region so prompt isn't stranded at the top
@@ -1603,6 +1644,9 @@ async function handleCommand(input) {
         if (padding > 0) process.stdout.write('\n'.repeat(padding));
       }
       refreshIdleContextEstimate();
+      if (statusBar) {
+        statusBar.reinstall();
+      }
       if (rl) {
         rl.setPrompt(buildPromptPrefix());
         rl.prompt(false);
@@ -2118,7 +2162,7 @@ async function handleCommand(input) {
       if (normalizedSub === 'disconnect') {
         const provider = normalizeProviderKey(secondArg);
         if (!provider || provider === 'local') {
-          console.log(`\n${PAD}${c.yellow}Usage: /connect disconnect <anthropic|openai|openrouter>${c.reset}\n`);
+          console.log(`\n${PAD}${c.yellow}Usage: /connect disconnect <anthropic|openai|openrouter|claude-code>${c.reset}\n`);
           return true;
         }
         const wasActiveProvider = (modelRegistry.getCurrentModel()?.provider || 'local') === provider;
@@ -2135,7 +2179,7 @@ async function handleCommand(input) {
       if (normalizedSub === 'use') {
         const provider = normalizeProviderKey(secondArg);
         if (!provider) {
-          console.log(`\n${PAD}${c.yellow}Usage: /connect use <local|anthropic|openai|openrouter>${c.reset}\n`);
+          console.log(`\n${PAD}${c.yellow}Usage: /connect use <local|anthropic|openai|openrouter|claude-code>${c.reset}\n`);
           return true;
         }
@@ -2160,7 +2204,7 @@ async function handleCommand(input) {
       if (!provider || provider === 'local') {
         console.log(`\n${PAD}${c.yellow}Usage:${c.reset}`);
         console.log(`${PAD}${c.dim}  /connect${c.reset}`);
-        console.log(`${PAD}${c.dim}  /connect <anthropic|openai|openrouter>${c.reset}`);
+        console.log(`${PAD}${c.dim}  /connect <anthropic|openai|openrouter|claude-code>${c.reset}`);
         console.log(`${PAD}${c.dim}  /connect status${c.reset}`);
         console.log(`${PAD}${c.dim}  /connect disconnect <provider>${c.reset}`);
         console.log(`${PAD}${c.dim}  /connect use <local|provider>${c.reset}\n`);
@@ -2684,9 +2728,19 @@ async function sendMessage(message) {
     fullMessage += '\n\n[Image analysis above is primary source of truth. Focus on image content, not file listing.]';
   }
+  config.saveRunSnapshot({
+    projectDir,
+    activeModel: modelRegistry.getCurrent(),
+    userMessage: message,
+    fullMessagePreview: fullMessage.slice(0, 2000),
+    conversationLength: conversationHistory.length
+  });
   try {
     await sendAgenticMessage(fullMessage, pendingImages, message);
+    config.completeRunSnapshot({ status: 'completed' });
   } catch (error) {
+    config.completeRunSnapshot({ status: 'failed', error: error.message });
     const provider = activeProviderKey();
     const providerLabel = providerManager.getProviderLabel(provider);
     console.log(`\n${PAD}${c.red}✗ Error: ${error.message}${c.reset}`);
@@ -2985,6 +3039,11 @@ async function sendStreamingMessage(message, images = [], rawMessage = '') {
   try {
     await streamHandler.handleStream(response);
+    const streamResult = streamHandler.getResult();
+    if (!streamResult.completed && streamResult.warning) {
+      fullResponse = `${streamResult.warning}\n\n${fullResponse}`.trim();
+      console.log(`\n${PAD}${c.yellow}⚠ ${streamResult.warning}${c.reset}`);
+    }
   } catch (error) {
     stopStatus();
     // Check if this was an abort
@@ -5172,10 +5231,12 @@ Examples:
   // then subsequent lines arrive as new 'line' events. We detect paste by
   // buffering lines that arrive within PASTE_DELAY_MS of each other.
   const PASTE_DELAY_MS = 400; // 400ms to handle large pastes and Windows Terminal dialog latency
+  const PASTE_STRAGGLER_WINDOW_MS = 1200; // Late lines can arrive after submit on Windows Terminal
   let pasteBuffer = [];
   let pasteTimer = null;
   let waitingForInput = false;
   let lastFlushTime = 0; // Track when paste buffer last flushed (to catch stragglers)
+  let lastPasteStragglerWarningAt = 0;
   showGeminiKeyPrompt = (callback) => {
     awaitingGeminiKey = true;
@@ -5342,13 +5403,17 @@ Examples:
         return;
       }
-      // Straggler paste lines: arrived after flush but within 2s and before AI started.
-      // This happens on Windows when the paste confirmation dialog adds latency between lines.
-      if (!currentAbortController && lastFlushTime && (Date.now() - lastFlushTime) < 2000) {
+      // Straggler paste lines: arrived after flush but before the paste has fully settled.
+      // On Windows Terminal, delayed lines can arrive after the first chunk was submitted,
+      // and without this guard they'd be misread as mid-turn steering.
+      if (lastFlushTime && (Date.now() - lastFlushTime) < PASTE_STRAGGLER_WINDOW_MS) {
         const trimmed = String(input || '').trim();
         if (trimmed) {
           appendDebugLog(`[paste-straggler] Dropped line arrived ${Date.now() - lastFlushTime}ms after flush: ${trimmed.slice(0, 60)}\n`);
-          console.log(`${PAD}${c.yellow}Lines were dropped from your paste.${c.reset} ${c.dim}Try pasting again, or disable the paste warning in Windows Terminal settings.${c.reset}`);
+          if (lastPasteStragglerWarningAt !== lastFlushTime) {
+            lastPasteStragglerWarningAt = lastFlushTime;
+            console.log(`${PAD}${c.yellow}Ignored delayed paste lines from the previous submission.${c.reset} ${c.dim}If this keeps happening, disable the Windows Terminal paste warning or paste again after the prompt settles.${c.reset}`);
+          }
         }
         return;
       }
@@ -5476,6 +5541,9 @@ process.on('SIGINT', () => {
   if (config && config.get('autoSaveHistory') && conversationHistory.length > 0) {
     config.saveConversation('autosave', conversationHistory);
   }
+  if (config) {
+    config.completeRunSnapshot({ status: 'cancelled' });
+  }
   if (watcher) watcher.stop();
   console.log(`\n${PAD}${c.cyan}👋 See you later!${c.reset}\n`);
   process.exit(0);
@@ -5484,6 +5552,12 @@ process.on('SIGINT', () => {
 main().catch(error => {
   logSessionEnd('crash', ` error=${error.message}`);
   if (statusBar) statusBar.uninstall();
+  if (config && config.get('autoSaveHistory') && conversationHistory.length > 0) {
+    config.saveConversation('autosave-crash', conversationHistory);
+  }
+  if (config) {
+    config.completeRunSnapshot({ status: 'crashed', error: error.message });
+  }
   console.error(`${c.red}Fatal error: ${error.message}${c.reset}`);
   if (watcher) watcher.stop();
   process.exit(1);

package/lib/agenticRunner.js CHANGED Viewed

@@ -549,6 +549,11 @@ const READ_ONLY_TOOLS = TOOLS.filter(t => READ_ONLY_TOOL_NAMES.has(t.function.na
 const IGNORE_PATTERNS = ['node_modules', '.git', '.next', 'dist', 'build', '.banana'];
 const MAX_ITERATIONS = 50;
+const MAX_TOOL_CALLS_PER_TURN = 24;
+const MAX_IDENTICAL_TOOL_CALLS_PER_TURN = 1;
+const MAX_TOOL_CALLS_BY_NAME_PER_TURN = {
+  list_files: 6
+};
 const WRITE_TOOL_NAMES = new Set(['create_file', 'edit_file', 'run_command']);
 const CONTEXT_TRIM_THRESHOLD = 0.60; // 60% of context limit - start trimming early
 const CONTEXT_TRIM_KEEP_RECENT = 6;  // Keep last N messages intact
@@ -885,9 +890,50 @@ function executeEditFile(projectDir, filePath, content) {
   }
 }
+function classifyCommandVerification(command) {
+  const lowerCommand = String(command || '').trim().toLowerCase();
+  const gitMutationRe = /\bgit\s+(pull|checkout|switch|reset|merge|rebase|cherry-pick|restore|clean|stash\s+(pop|apply|drop)|apply|commit|push)\b/;
+  const fsMutationRe = /\b(copy|move|ren|rename|mkdir|rmdir|del|erase|xcopy|robocopy|attrib)\b/;
+  const gitReadOnlyRe = /\bgit\s+(status|rev-parse|branch|log|diff|show|ls-files|show-ref)\b/;
+  const fsReadOnlyRe = /\b(dir|type|findstr|where)\b/;
+  const verificationEvidenceFor = [];
+  if (gitReadOnlyRe.test(lowerCommand)) verificationEvidenceFor.push('git_state');
+  if (fsReadOnlyRe.test(lowerCommand)) verificationEvidenceFor.push('filesystem_state');
+  if (gitMutationRe.test(lowerCommand)) {
+    return {
+      requiresVerification: true,
+      category: 'git_state',
+      verificationHint: 'Before claiming success, run a read-only git check such as `git status --short`, `git rev-parse HEAD`, or compare `HEAD` to `@{u}`.',
+      verificationEvidenceFor,
+      readOnlyCommand: false
+    };
+  }
+  if (fsMutationRe.test(lowerCommand)) {
+    return {
+      requiresVerification: true,
+      category: 'filesystem_state',
+      verificationHint: 'Before claiming success, run a read-only check such as `dir`, `type`, or `findstr` to confirm the change is actually present.',
+      verificationEvidenceFor,
+      readOnlyCommand: false
+    };
+  }
+  return {
+    requiresVerification: false,
+    category: null,
+    verificationHint: null,
+    verificationEvidenceFor,
+    readOnlyCommand: verificationEvidenceFor.length > 0
+  };
+}
 async function executeRunCommand(projectDir, command, options = {}) {
   const signal = options.signal;
   const timeoutMs = options.timeoutMs ?? 30000;
+  const verificationMeta = classifyCommandVerification(command);
   // Basic safety check - block destructive commands
   const dangerous = /\b(rm\s+-rf|del\s+\/[sqf]|format\s+[a-z]:)\b/i;
@@ -946,6 +992,13 @@ async function executeRunCommand(projectDir, command, options = {}) {
         const limit = 15000;
         finish(resolve, {
           success: true,
+          command,
+          outcome: 'completed',
+          requiresVerification: verificationMeta.requiresVerification,
+          verificationCategory: verificationMeta.category,
+          verificationHint: verificationMeta.verificationHint,
+          verificationEvidenceFor: verificationMeta.verificationEvidenceFor,
+          readOnlyCommand: verificationMeta.readOnlyCommand,
           output: output.substring(0, limit),
           ...(output.length > limit ? { truncated: true, totalLength: output.length } : {})
         });
@@ -953,6 +1006,8 @@ async function executeRunCommand(projectDir, command, options = {}) {
         const limit = 10000;
         finish(resolve, {
           error: `Command failed with exit code ${code}`,
+          command,
+          outcome: code === 124 ? 'timed_out' : 'failed',
           output: output.substring(0, limit),
           exitCode: code,
           ...(output.length > limit ? { truncated: true, totalLength: output.length } : {})
@@ -967,6 +1022,7 @@ async function executeRunCommand(projectDir, command, options = {}) {
       finish(resolve, {
         error: `Command timed out after ${timeoutMs}ms`,
         output: raw.substring(0, 10000),
+        outcome: 'timed_out',
         exitCode: 124,
         ...(raw.length > 10000 ? { truncated: true, totalLength: raw.length } : {})
       });
@@ -1124,6 +1180,77 @@ function stripControlTokens(text) {
   return cleaned.replace(/^\s+$/, '');
 }
+function stableStringify(value) {
+  if (Array.isArray(value)) {
+    return `[${value.map(stableStringify).join(',')}]`;
+  }
+  if (value && typeof value === 'object') {
+    const keys = Object.keys(value).sort();
+    return `{${keys.map(key => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(',')}}`;
+  }
+  return JSON.stringify(value);
+}
+function parseToolArgs(rawArgs) {
+  if (typeof rawArgs !== 'string') return {};
+  try {
+    return JSON.parse(rawArgs);
+  } catch {
+    return {};
+  }
+}
+function sanitizeToolCalls(toolCalls) {
+  const kept = [];
+  const dropped = [];
+  const signatureCounts = new Map();
+  const toolNameCounts = new Map();
+  for (const toolCall of toolCalls || []) {
+    const functionName = toolCall?.function?.name;
+    if (!functionName) {
+      dropped.push({ reason: 'invalid', toolCall });
+      continue;
+    }
+    const args = parseToolArgs(toolCall.function.arguments);
+    const signature = `${functionName}:${stableStringify(args)}`;
+    const seenCount = signatureCounts.get(signature) || 0;
+    const sameToolCount = toolNameCounts.get(functionName) || 0;
+    if (seenCount >= MAX_IDENTICAL_TOOL_CALLS_PER_TURN) {
+      dropped.push({ reason: 'duplicate', toolCall, signature });
+      continue;
+    }
+    const perToolLimit = MAX_TOOL_CALLS_BY_NAME_PER_TURN[functionName];
+    if (perToolLimit && sameToolCount >= perToolLimit) {
+      dropped.push({ reason: 'per_tool_overflow', toolCall, signature });
+      continue;
+    }
+    if (kept.length >= MAX_TOOL_CALLS_PER_TURN) {
+      dropped.push({ reason: 'overflow', toolCall, signature });
+      continue;
+    }
+    signatureCounts.set(signature, seenCount + 1);
+    toolNameCounts.set(functionName, sameToolCount + 1);
+    kept.push(toolCall);
+  }
+  return {
+    toolCalls: kept,
+    dropped,
+    summary: {
+      invalid: dropped.filter(item => item.reason === 'invalid').length,
+      duplicate: dropped.filter(item => item.reason === 'duplicate').length,
+      perToolOverflow: dropped.filter(item => item.reason === 'per_tool_overflow').length,
+      overflow: dropped.filter(item => item.reason === 'overflow').length
+    }
+  };
+}
 // ─── Repetition Detection ─────────────────────────────────────────────────────
 /**
@@ -1155,6 +1282,8 @@ async function consumeStream(response, onToken) {
   let thinkBuffer = '';   // accumulates text inside a think block
   let inThink = false;
   let repetitionDetected = false;
+  let doneSignalReceived = false;
+  let warning = null;
   const flush = (text) => {
     const clean = stripControlTokens(text);
@@ -1184,7 +1313,11 @@ async function consumeStream(response, onToken) {
     for (const line of lines) {
       const trimmed = line.trim();
-      if (!trimmed || trimmed === 'data: [DONE]') continue;
+      if (!trimmed) continue;
+      if (trimmed === 'data: [DONE]') {
+        doneSignalReceived = true;
+        continue;
+      }
       if (!trimmed.startsWith('data: ')) continue;
       try {
@@ -1224,7 +1357,16 @@ async function consumeStream(response, onToken) {
     }
   }
-  return fullContent;
+  if (!doneSignalReceived) {
+    warning = 'Warning: final stream ended without an explicit completion signal. The response may be incomplete.';
+  }
+  return {
+    content: fullContent,
+    completed: doneSignalReceived,
+    warning,
+    repetitionDetected
+  };
 }
 // ─── Agentic Loop ───────────────────────────────────────────────────────────
@@ -1252,6 +1394,7 @@ class AgenticRunner {
     this.lastTurnMessagesEstimate = 0;
     this.totalCacheReadTokens = 0;
     this.totalCacheCreationTokens = 0;
+    this.lastRunOutcome = { status: 'running', phase: 'start', warning: null };
   }
   /**
@@ -1302,9 +1445,12 @@ class AgenticRunner {
     let iterations = 0;
     const toolCallHistory = []; // Track tool calls for loop detection
     const failedMcpTools = new Set(); // Track MCP tools that returned "Unknown tool" errors
+    const pendingCommandVerifications = new Map(); // category -> verification hint
     let readOnlyStreak = 0; // Consecutive iterations with only read-only tool calls
     let loopWarningCount = 0; // How many times loop detection has fired
+    let verificationReminderCount = 0; // How many times we had to demand verification before finalizing
     // Model-tier-aware read-only thresholds: smarter models get more research leeway
     // options.model is the raw model ID (e.g. "claude-sonnet-4-6-20250514", "gpt-4o", "silverback")
     const modelId = (options.model || '').toLowerCase();
@@ -1483,12 +1629,27 @@ class AgenticRunner {
       // Some models use finish_reason "tool_calls", others use "stop" or "function_call"
       // but still include tool_calls in the message. Check for the array itself.
       if (assistantMessage.tool_calls && assistantMessage.tool_calls.length > 0) {
-        // Cap tool calls per response to prevent runaway models spamming dozens of calls
-        const MAX_TOOL_CALLS_PER_RESPONSE = 8;
-        if (assistantMessage.tool_calls.length > MAX_TOOL_CALLS_PER_RESPONSE) {
-          appendDebugLog(`  [TOOL CALL CAP] Model returned ${assistantMessage.tool_calls.length} tool calls, capping to ${MAX_TOOL_CALLS_PER_RESPONSE}\n`);
-          this.onWarning(`Model tried to make ${assistantMessage.tool_calls.length} tool calls at once. Capping to ${MAX_TOOL_CALLS_PER_RESPONSE}.`);
-          assistantMessage.tool_calls = assistantMessage.tool_calls.slice(0, MAX_TOOL_CALLS_PER_RESPONSE);
+        const originalToolCallCount = assistantMessage.tool_calls.length;
+        const sanitizedBatch = sanitizeToolCalls(assistantMessage.tool_calls);
+        assistantMessage.tool_calls = sanitizedBatch.toolCalls;
+        if (sanitizedBatch.dropped.length > 0) {
+          appendDebugLog(
+            `  [tool batch sanitized] original=${originalToolCallCount} kept=${assistantMessage.tool_calls.length} ` +
+            `duplicate=${sanitizedBatch.summary.duplicate} per_tool_overflow=${sanitizedBatch.summary.perToolOverflow} ` +
+            `overflow=${sanitizedBatch.summary.overflow} invalid=${sanitizedBatch.summary.invalid}\n`
+          );
+          this.onWarning(
+            `Trimmed a noisy tool batch from ${originalToolCallCount} calls to ${assistantMessage.tool_calls.length}.`
+          );
+        }
+        if (assistantMessage.tool_calls.length === 0) {
+          messages.push({
+            role: 'system',
+            content: 'Your previous tool batch was invalid or excessively repetitive. Do NOT emit more tools right now. Answer the user directly with what you already know, or explain what specific missing context is still needed.'
+          });
+          continue;
         }
         // Add assistant message to history, preserving the reasoning field
@@ -1568,6 +1729,14 @@ class AgenticRunner {
           // Track command execution for hooks
           if (functionName === 'run_command' && !result.error) {
+            if (result.requiresVerification && result.verificationCategory) {
+              pendingCommandVerifications.set(result.verificationCategory, result.verificationHint || 'Run a read-only verification command before claiming success.');
+            }
+            if (Array.isArray(result.verificationEvidenceFor)) {
+              for (const category of result.verificationEvidenceFor) {
+                pendingCommandVerifications.delete(category);
+              }
+            }
             if (this.onCommandComplete) this.onCommandComplete(args.command, result);
           }
@@ -1673,6 +1842,12 @@ class AgenticRunner {
           nudgeParts.push(`Non-existent MCP tools (do NOT retry): ${[...failedMcpTools].join(', ')}`);
         }
+        if (pendingCommandVerifications.size > 0) {
+          nudgeParts.push(
+            `State-changing commands are still UNVERIFIED. Before telling the user the task is done, run a read-only verification step. ${[...pendingCommandVerifications.values()].join(' ')}`
+          );
+        }
         if (nudgeParts.length > 0) {
           messages.push({
             role: 'system',
@@ -1680,6 +1855,17 @@ class AgenticRunner {
           });
         }
+        if (sanitizedBatch.dropped.length > 0) {
+          messages.push({
+            role: 'system',
+            content:
+              `Your previous response tried to call too many or duplicate tools. ` +
+              `Dropped: ${sanitizedBatch.summary.duplicate} duplicate, ${sanitizedBatch.summary.perToolOverflow} excessive same-tool calls, ` +
+              `${sanitizedBatch.summary.overflow} overflow, ${sanitizedBatch.summary.invalid} invalid. ` +
+              `Next turn, use fewer tools and avoid repeating the same call with identical arguments.`
+          });
+        }
         // Track read-only streaks (iterations with no writes or commands)
         // Skip streak tracking in plan mode - plan mode is inherently read-only
         const thisIterToolNames = assistantMessage.tool_calls.map(t => t.function.name);
@@ -1752,6 +1938,7 @@ class AgenticRunner {
               this._lastWrittenFiles = [...writtenFiles];
               logRunTotals('loop-break');
               const loopResponse = finalContent || 'I got stuck in a loop and could not complete the task. Please try rephrasing your request.';
+              this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'loop-break', warning: 'Loop breaker forced finalization.' };
               await this.emitStreaming(loopResponse);
               this.onContent(loopResponse);
               return loopResponse;
@@ -1787,6 +1974,7 @@ class AgenticRunner {
           this._lastWrittenFiles = [...writtenFiles];
           logRunTotals('no-progress-break');
           const npResponse = npContent || 'I spent too many iterations researching without making progress. Please try a more specific request.';
+          this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'no-progress-break', warning: 'No-progress breaker forced finalization.' };
           await this.emitStreaming(npResponse);
           this.onContent(npResponse);
           return npResponse;
@@ -1801,6 +1989,18 @@ class AgenticRunner {
         // Final response - no more tool calls.
         // The non-streaming chat() call already returned content. Use it directly
         // instead of making a redundant streaming call that may return empty/truncated.
+        if (pendingCommandVerifications.size > 0 && verificationReminderCount < 1) {
+          verificationReminderCount++;
+          messages.push({
+            role: 'system',
+            content:
+              `STOP. You are about to answer, but you still have unverified state-changing command results. ` +
+              `Before claiming completion, run at least one read-only verification step for these categories: ${[...pendingCommandVerifications.keys()].join(', ')}. ` +
+              `${[...pendingCommandVerifications.values()].join(' ')}`
+          });
+          continue;
+        }
         let existingContent = stripControlTokens(assistantMessage.content || '');
         // Extract inline <think>/<thinking> blocks from content (Qwen3.5 embeds reasoning in content)
@@ -1835,11 +2035,23 @@ class AgenticRunner {
         const reasoning = assistantMessage.reasoning || assistantMessage.reasoning_content || inlineReasoning;
+        if (pendingCommandVerifications.size > 0) {
+          const verificationWarning = `Warning: the requested command effects were not independently verified. ${[...pendingCommandVerifications.values()].join(' ')}`;
+          existingContent = existingContent
+            ? `${verificationWarning}\n\n${existingContent}`
+            : verificationWarning;
+        }
         // If the model already produced content in this iteration, use it directly
         if (existingContent) {
           if (reasoning) {
             this.onReasoning(stripControlTokens(reasoning));
           }
+          this.lastRunOutcome = {
+            status: pendingCommandVerifications.size > 0 ? 'completed_with_warnings' : 'completed',
+            phase: 'final-content',
+            warning: pendingCommandVerifications.size > 0 ? 'Completion claims were not fully verified.' : null
+          };
           await this.emitStreaming(existingContent);
           this.onContent(existingContent);
           logRunTotals('final-content');
@@ -1853,6 +2065,7 @@ class AgenticRunner {
           // Some models put the actual answer in reasoning when content is empty.
           // Return a minimal acknowledgment rather than an empty response.
           const fallback = '(Response was in reasoning only - see thinking output above)';
+          this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'final-reasoning-fallback', warning: 'Model returned reasoning without visible content.' };
           await this.emitStreaming(fallback);
           this.onContent(fallback);
           logRunTotals('final-reasoning-fallback');
@@ -1877,6 +2090,7 @@ class AgenticRunner {
           const content = stripControlTokens(thinkMsg?.content || '');
           if (thinkReasoning) this.onReasoning(stripControlTokens(thinkReasoning));
+          this.lastRunOutcome = { status: 'completed', phase: 'final-think-pass', warning: null };
           await this.emitStreaming(content);
           this.onContent(content);
           logRunTotals('final-think-pass');
@@ -1895,10 +2109,21 @@ class AgenticRunner {
           signal: options.signal
         });
-        const content = await consumeStream(streamResponse, (token) => {
+        const streamResult = await consumeStream(streamResponse, (token) => {
           this.onToken(token);
         });
+        const content = streamResult.completed || !streamResult.warning
+          ? streamResult.content
+          : `${streamResult.warning}\n\n${streamResult.content}`.trim();
+        if (!streamResult.completed && streamResult.warning) {
+          this.onWarning(streamResult.warning);
+        }
+        this.lastRunOutcome = {
+          status: streamResult.completed ? 'completed' : 'completed_with_warnings',
+          phase: 'final-stream',
+          warning: streamResult.warning
+        };
         this.onContent(content);
         logRunTotals('final-stream');
         return content;
@@ -1906,9 +2131,10 @@ class AgenticRunner {
     }
     this.onWarning('Max tool iterations reached');
+    this.lastRunOutcome = { status: 'failed', phase: 'max-iterations', warning: 'Max tool iterations reached.' };
     logRunTotals('max-iterations');
     return '';
   }
 }
-module.exports = { AgenticRunner, TOOLS, READ_ONLY_TOOLS, executeTool, setMcpClient };
+module.exports = { AgenticRunner, TOOLS, READ_ONLY_TOOLS, executeTool, setMcpClient, sanitizeToolCalls, classifyCommandVerification };