npm - banana-code - Versions diffs - 1.3.1 → 1.4.1 - Mend

banana-code 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/banana.js +219 -31
package/lib/agenticRunner.js +236 -10
package/lib/claudeCodeProvider.js +540 -0
package/lib/config.js +49 -15
package/lib/contextBuilder.js +11 -4
package/lib/fileManager.js +9 -11
package/lib/fsUtils.js +30 -0
package/lib/historyManager.js +3 -5
package/lib/interactivePicker.js +2 -2
package/lib/modelRegistry.js +3 -2
package/lib/providerManager.js +7 -1
package/lib/providerStore.js +38 -4
package/lib/streamHandler.js +25 -4
package/package.json +48 -43
package/prompts/base.md +33 -23
package/prompts/code-agent-qwen.md +1 -0
package/prompts/code-agent.md +157 -70

package/lib/agenticRunner.js CHANGED Viewed

@@ -549,6 +549,11 @@ const READ_ONLY_TOOLS = TOOLS.filter(t => READ_ONLY_TOOL_NAMES.has(t.function.na
 const IGNORE_PATTERNS = ['node_modules', '.git', '.next', 'dist', 'build', '.banana'];
 const MAX_ITERATIONS = 50;
+const MAX_TOOL_CALLS_PER_TURN = 24;
+const MAX_IDENTICAL_TOOL_CALLS_PER_TURN = 1;
+const MAX_TOOL_CALLS_BY_NAME_PER_TURN = {
+  list_files: 6
+};
 const WRITE_TOOL_NAMES = new Set(['create_file', 'edit_file', 'run_command']);
 const CONTEXT_TRIM_THRESHOLD = 0.60; // 60% of context limit - start trimming early
 const CONTEXT_TRIM_KEEP_RECENT = 6;  // Keep last N messages intact
@@ -885,9 +890,50 @@ function executeEditFile(projectDir, filePath, content) {
   }
 }
+function classifyCommandVerification(command) {
+  const lowerCommand = String(command || '').trim().toLowerCase();
+  const gitMutationRe = /\bgit\s+(pull|checkout|switch|reset|merge|rebase|cherry-pick|restore|clean|stash\s+(pop|apply|drop)|apply|commit|push)\b/;
+  const fsMutationRe = /\b(copy|move|ren|rename|mkdir|rmdir|del|erase|xcopy|robocopy|attrib)\b/;
+  const gitReadOnlyRe = /\bgit\s+(status|rev-parse|branch|log|diff|show|ls-files|show-ref)\b/;
+  const fsReadOnlyRe = /\b(dir|type|findstr|where)\b/;
+  const verificationEvidenceFor = [];
+  if (gitReadOnlyRe.test(lowerCommand)) verificationEvidenceFor.push('git_state');
+  if (fsReadOnlyRe.test(lowerCommand)) verificationEvidenceFor.push('filesystem_state');
+  if (gitMutationRe.test(lowerCommand)) {
+    return {
+      requiresVerification: true,
+      category: 'git_state',
+      verificationHint: 'Before claiming success, run a read-only git check such as `git status --short`, `git rev-parse HEAD`, or compare `HEAD` to `@{u}`.',
+      verificationEvidenceFor,
+      readOnlyCommand: false
+    };
+  }
+  if (fsMutationRe.test(lowerCommand)) {
+    return {
+      requiresVerification: true,
+      category: 'filesystem_state',
+      verificationHint: 'Before claiming success, run a read-only check such as `dir`, `type`, or `findstr` to confirm the change is actually present.',
+      verificationEvidenceFor,
+      readOnlyCommand: false
+    };
+  }
+  return {
+    requiresVerification: false,
+    category: null,
+    verificationHint: null,
+    verificationEvidenceFor,
+    readOnlyCommand: verificationEvidenceFor.length > 0
+  };
+}
 async function executeRunCommand(projectDir, command, options = {}) {
   const signal = options.signal;
   const timeoutMs = options.timeoutMs ?? 30000;
+  const verificationMeta = classifyCommandVerification(command);
   // Basic safety check - block destructive commands
   const dangerous = /\b(rm\s+-rf|del\s+\/[sqf]|format\s+[a-z]:)\b/i;
@@ -946,6 +992,13 @@ async function executeRunCommand(projectDir, command, options = {}) {
         const limit = 15000;
         finish(resolve, {
           success: true,
+          command,
+          outcome: 'completed',
+          requiresVerification: verificationMeta.requiresVerification,
+          verificationCategory: verificationMeta.category,
+          verificationHint: verificationMeta.verificationHint,
+          verificationEvidenceFor: verificationMeta.verificationEvidenceFor,
+          readOnlyCommand: verificationMeta.readOnlyCommand,
           output: output.substring(0, limit),
           ...(output.length > limit ? { truncated: true, totalLength: output.length } : {})
         });
@@ -953,6 +1006,8 @@ async function executeRunCommand(projectDir, command, options = {}) {
         const limit = 10000;
         finish(resolve, {
           error: `Command failed with exit code ${code}`,
+          command,
+          outcome: code === 124 ? 'timed_out' : 'failed',
           output: output.substring(0, limit),
           exitCode: code,
           ...(output.length > limit ? { truncated: true, totalLength: output.length } : {})
@@ -967,6 +1022,7 @@ async function executeRunCommand(projectDir, command, options = {}) {
       finish(resolve, {
         error: `Command timed out after ${timeoutMs}ms`,
         output: raw.substring(0, 10000),
+        outcome: 'timed_out',
         exitCode: 124,
         ...(raw.length > 10000 ? { truncated: true, totalLength: raw.length } : {})
       });
@@ -1124,6 +1180,77 @@ function stripControlTokens(text) {
   return cleaned.replace(/^\s+$/, '');
 }
+function stableStringify(value) {
+  if (Array.isArray(value)) {
+    return `[${value.map(stableStringify).join(',')}]`;
+  }
+  if (value && typeof value === 'object') {
+    const keys = Object.keys(value).sort();
+    return `{${keys.map(key => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(',')}}`;
+  }
+  return JSON.stringify(value);
+}
+function parseToolArgs(rawArgs) {
+  if (typeof rawArgs !== 'string') return {};
+  try {
+    return JSON.parse(rawArgs);
+  } catch {
+    return {};
+  }
+}
+function sanitizeToolCalls(toolCalls) {
+  const kept = [];
+  const dropped = [];
+  const signatureCounts = new Map();
+  const toolNameCounts = new Map();
+  for (const toolCall of toolCalls || []) {
+    const functionName = toolCall?.function?.name;
+    if (!functionName) {
+      dropped.push({ reason: 'invalid', toolCall });
+      continue;
+    }
+    const args = parseToolArgs(toolCall.function.arguments);
+    const signature = `${functionName}:${stableStringify(args)}`;
+    const seenCount = signatureCounts.get(signature) || 0;
+    const sameToolCount = toolNameCounts.get(functionName) || 0;
+    if (seenCount >= MAX_IDENTICAL_TOOL_CALLS_PER_TURN) {
+      dropped.push({ reason: 'duplicate', toolCall, signature });
+      continue;
+    }
+    const perToolLimit = MAX_TOOL_CALLS_BY_NAME_PER_TURN[functionName];
+    if (perToolLimit && sameToolCount >= perToolLimit) {
+      dropped.push({ reason: 'per_tool_overflow', toolCall, signature });
+      continue;
+    }
+    if (kept.length >= MAX_TOOL_CALLS_PER_TURN) {
+      dropped.push({ reason: 'overflow', toolCall, signature });
+      continue;
+    }
+    signatureCounts.set(signature, seenCount + 1);
+    toolNameCounts.set(functionName, sameToolCount + 1);
+    kept.push(toolCall);
+  }
+  return {
+    toolCalls: kept,
+    dropped,
+    summary: {
+      invalid: dropped.filter(item => item.reason === 'invalid').length,
+      duplicate: dropped.filter(item => item.reason === 'duplicate').length,
+      perToolOverflow: dropped.filter(item => item.reason === 'per_tool_overflow').length,
+      overflow: dropped.filter(item => item.reason === 'overflow').length
+    }
+  };
+}
 // ─── Repetition Detection ─────────────────────────────────────────────────────
 /**
@@ -1155,6 +1282,8 @@ async function consumeStream(response, onToken) {
   let thinkBuffer = '';   // accumulates text inside a think block
   let inThink = false;
   let repetitionDetected = false;
+  let doneSignalReceived = false;
+  let warning = null;
   const flush = (text) => {
     const clean = stripControlTokens(text);
@@ -1184,7 +1313,11 @@ async function consumeStream(response, onToken) {
     for (const line of lines) {
       const trimmed = line.trim();
-      if (!trimmed || trimmed === 'data: [DONE]') continue;
+      if (!trimmed) continue;
+      if (trimmed === 'data: [DONE]') {
+        doneSignalReceived = true;
+        continue;
+      }
       if (!trimmed.startsWith('data: ')) continue;
       try {
@@ -1224,7 +1357,16 @@ async function consumeStream(response, onToken) {
     }
   }
-  return fullContent;
+  if (!doneSignalReceived) {
+    warning = 'Warning: final stream ended without an explicit completion signal. The response may be incomplete.';
+  }
+  return {
+    content: fullContent,
+    completed: doneSignalReceived,
+    warning,
+    repetitionDetected
+  };
 }
 // ─── Agentic Loop ───────────────────────────────────────────────────────────
@@ -1252,6 +1394,7 @@ class AgenticRunner {
     this.lastTurnMessagesEstimate = 0;
     this.totalCacheReadTokens = 0;
     this.totalCacheCreationTokens = 0;
+    this.lastRunOutcome = { status: 'running', phase: 'start', warning: null };
   }
   /**
@@ -1302,9 +1445,12 @@ class AgenticRunner {
     let iterations = 0;
     const toolCallHistory = []; // Track tool calls for loop detection
     const failedMcpTools = new Set(); // Track MCP tools that returned "Unknown tool" errors
+    const pendingCommandVerifications = new Map(); // category -> verification hint
     let readOnlyStreak = 0; // Consecutive iterations with only read-only tool calls
     let loopWarningCount = 0; // How many times loop detection has fired
+    let verificationReminderCount = 0; // How many times we had to demand verification before finalizing
     // Model-tier-aware read-only thresholds: smarter models get more research leeway
     // options.model is the raw model ID (e.g. "claude-sonnet-4-6-20250514", "gpt-4o", "silverback")
     const modelId = (options.model || '').toLowerCase();
@@ -1483,12 +1629,27 @@ class AgenticRunner {
       // Some models use finish_reason "tool_calls", others use "stop" or "function_call"
       // but still include tool_calls in the message. Check for the array itself.
       if (assistantMessage.tool_calls && assistantMessage.tool_calls.length > 0) {
-        // Cap tool calls per response to prevent runaway models spamming dozens of calls
-        const MAX_TOOL_CALLS_PER_RESPONSE = 8;
-        if (assistantMessage.tool_calls.length > MAX_TOOL_CALLS_PER_RESPONSE) {
-          appendDebugLog(`  [TOOL CALL CAP] Model returned ${assistantMessage.tool_calls.length} tool calls, capping to ${MAX_TOOL_CALLS_PER_RESPONSE}\n`);
-          this.onWarning(`Model tried to make ${assistantMessage.tool_calls.length} tool calls at once. Capping to ${MAX_TOOL_CALLS_PER_RESPONSE}.`);
-          assistantMessage.tool_calls = assistantMessage.tool_calls.slice(0, MAX_TOOL_CALLS_PER_RESPONSE);
+        const originalToolCallCount = assistantMessage.tool_calls.length;
+        const sanitizedBatch = sanitizeToolCalls(assistantMessage.tool_calls);
+        assistantMessage.tool_calls = sanitizedBatch.toolCalls;
+        if (sanitizedBatch.dropped.length > 0) {
+          appendDebugLog(
+            `  [tool batch sanitized] original=${originalToolCallCount} kept=${assistantMessage.tool_calls.length} ` +
+            `duplicate=${sanitizedBatch.summary.duplicate} per_tool_overflow=${sanitizedBatch.summary.perToolOverflow} ` +
+            `overflow=${sanitizedBatch.summary.overflow} invalid=${sanitizedBatch.summary.invalid}\n`
+          );
+          this.onWarning(
+            `Trimmed a noisy tool batch from ${originalToolCallCount} calls to ${assistantMessage.tool_calls.length}.`
+          );
+        }
+        if (assistantMessage.tool_calls.length === 0) {
+          messages.push({
+            role: 'system',
+            content: 'Your previous tool batch was invalid or excessively repetitive. Do NOT emit more tools right now. Answer the user directly with what you already know, or explain what specific missing context is still needed.'
+          });
+          continue;
         }
         // Add assistant message to history, preserving the reasoning field
@@ -1568,6 +1729,14 @@ class AgenticRunner {
           // Track command execution for hooks
           if (functionName === 'run_command' && !result.error) {
+            if (result.requiresVerification && result.verificationCategory) {
+              pendingCommandVerifications.set(result.verificationCategory, result.verificationHint || 'Run a read-only verification command before claiming success.');
+            }
+            if (Array.isArray(result.verificationEvidenceFor)) {
+              for (const category of result.verificationEvidenceFor) {
+                pendingCommandVerifications.delete(category);
+              }
+            }
             if (this.onCommandComplete) this.onCommandComplete(args.command, result);
           }
@@ -1673,6 +1842,12 @@ class AgenticRunner {
           nudgeParts.push(`Non-existent MCP tools (do NOT retry): ${[...failedMcpTools].join(', ')}`);
         }
+        if (pendingCommandVerifications.size > 0) {
+          nudgeParts.push(
+            `State-changing commands are still UNVERIFIED. Before telling the user the task is done, run a read-only verification step. ${[...pendingCommandVerifications.values()].join(' ')}`
+          );
+        }
         if (nudgeParts.length > 0) {
           messages.push({
             role: 'system',
@@ -1680,6 +1855,17 @@ class AgenticRunner {
           });
         }
+        if (sanitizedBatch.dropped.length > 0) {
+          messages.push({
+            role: 'system',
+            content:
+              `Your previous response tried to call too many or duplicate tools. ` +
+              `Dropped: ${sanitizedBatch.summary.duplicate} duplicate, ${sanitizedBatch.summary.perToolOverflow} excessive same-tool calls, ` +
+              `${sanitizedBatch.summary.overflow} overflow, ${sanitizedBatch.summary.invalid} invalid. ` +
+              `Next turn, use fewer tools and avoid repeating the same call with identical arguments.`
+          });
+        }
         // Track read-only streaks (iterations with no writes or commands)
         // Skip streak tracking in plan mode - plan mode is inherently read-only
         const thisIterToolNames = assistantMessage.tool_calls.map(t => t.function.name);
@@ -1752,6 +1938,7 @@ class AgenticRunner {
               this._lastWrittenFiles = [...writtenFiles];
               logRunTotals('loop-break');
               const loopResponse = finalContent || 'I got stuck in a loop and could not complete the task. Please try rephrasing your request.';
+              this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'loop-break', warning: 'Loop breaker forced finalization.' };
               await this.emitStreaming(loopResponse);
               this.onContent(loopResponse);
               return loopResponse;
@@ -1787,6 +1974,7 @@ class AgenticRunner {
           this._lastWrittenFiles = [...writtenFiles];
           logRunTotals('no-progress-break');
           const npResponse = npContent || 'I spent too many iterations researching without making progress. Please try a more specific request.';
+          this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'no-progress-break', warning: 'No-progress breaker forced finalization.' };
           await this.emitStreaming(npResponse);
           this.onContent(npResponse);
           return npResponse;
@@ -1801,6 +1989,18 @@ class AgenticRunner {
         // Final response - no more tool calls.
         // The non-streaming chat() call already returned content. Use it directly
         // instead of making a redundant streaming call that may return empty/truncated.
+        if (pendingCommandVerifications.size > 0 && verificationReminderCount < 1) {
+          verificationReminderCount++;
+          messages.push({
+            role: 'system',
+            content:
+              `STOP. You are about to answer, but you still have unverified state-changing command results. ` +
+              `Before claiming completion, run at least one read-only verification step for these categories: ${[...pendingCommandVerifications.keys()].join(', ')}. ` +
+              `${[...pendingCommandVerifications.values()].join(' ')}`
+          });
+          continue;
+        }
         let existingContent = stripControlTokens(assistantMessage.content || '');
         // Extract inline <think>/<thinking> blocks from content (Qwen3.5 embeds reasoning in content)
@@ -1835,11 +2035,23 @@ class AgenticRunner {
         const reasoning = assistantMessage.reasoning || assistantMessage.reasoning_content || inlineReasoning;
+        if (pendingCommandVerifications.size > 0) {
+          const verificationWarning = `Warning: the requested command effects were not independently verified. ${[...pendingCommandVerifications.values()].join(' ')}`;
+          existingContent = existingContent
+            ? `${verificationWarning}\n\n${existingContent}`
+            : verificationWarning;
+        }
         // If the model already produced content in this iteration, use it directly
         if (existingContent) {
           if (reasoning) {
             this.onReasoning(stripControlTokens(reasoning));
           }
+          this.lastRunOutcome = {
+            status: pendingCommandVerifications.size > 0 ? 'completed_with_warnings' : 'completed',
+            phase: 'final-content',
+            warning: pendingCommandVerifications.size > 0 ? 'Completion claims were not fully verified.' : null
+          };
           await this.emitStreaming(existingContent);
           this.onContent(existingContent);
           logRunTotals('final-content');
@@ -1853,6 +2065,7 @@ class AgenticRunner {
           // Some models put the actual answer in reasoning when content is empty.
           // Return a minimal acknowledgment rather than an empty response.
           const fallback = '(Response was in reasoning only - see thinking output above)';
+          this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'final-reasoning-fallback', warning: 'Model returned reasoning without visible content.' };
           await this.emitStreaming(fallback);
           this.onContent(fallback);
           logRunTotals('final-reasoning-fallback');
@@ -1877,6 +2090,7 @@ class AgenticRunner {
           const content = stripControlTokens(thinkMsg?.content || '');
           if (thinkReasoning) this.onReasoning(stripControlTokens(thinkReasoning));
+          this.lastRunOutcome = { status: 'completed', phase: 'final-think-pass', warning: null };
           await this.emitStreaming(content);
           this.onContent(content);
           logRunTotals('final-think-pass');
@@ -1895,10 +2109,21 @@ class AgenticRunner {
           signal: options.signal
         });
-        const content = await consumeStream(streamResponse, (token) => {
+        const streamResult = await consumeStream(streamResponse, (token) => {
           this.onToken(token);
         });
+        const content = streamResult.completed || !streamResult.warning
+          ? streamResult.content
+          : `${streamResult.warning}\n\n${streamResult.content}`.trim();
+        if (!streamResult.completed && streamResult.warning) {
+          this.onWarning(streamResult.warning);
+        }
+        this.lastRunOutcome = {
+          status: streamResult.completed ? 'completed' : 'completed_with_warnings',
+          phase: 'final-stream',
+          warning: streamResult.warning
+        };
         this.onContent(content);
         logRunTotals('final-stream');
         return content;
@@ -1906,9 +2131,10 @@ class AgenticRunner {
     }
     this.onWarning('Max tool iterations reached');
+    this.lastRunOutcome = { status: 'failed', phase: 'max-iterations', warning: 'Max tool iterations reached.' };
     logRunTotals('max-iterations');
     return '';
   }
 }
-module.exports = { AgenticRunner, TOOLS, READ_ONLY_TOOLS, executeTool, setMcpClient };
+module.exports = { AgenticRunner, TOOLS, READ_ONLY_TOOLS, executeTool, setMcpClient, sanitizeToolCalls, classifyCommandVerification };