banana-code 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/banana.js CHANGED
@@ -81,7 +81,7 @@ let pendingHumanQuestion = null; // { resolve, question }
81
81
  // CONFIGURATION
82
82
  // =============================================================================
83
83
 
84
- const VERSION = '1.3.1';
84
+ const VERSION = '1.4.0';
85
85
  const { PAD } = require('./lib/borderRenderer'); // Single source of truth for left padding
86
86
  const DEBUG_DISABLED_VALUES = new Set(['0', 'false', 'off', 'no']);
87
87
  const NEXT_TURN_RESERVE_TOKENS = 1200;
@@ -389,7 +389,7 @@ ${P}${c.yellow}/hooks${c.reset} Manage lifecycle hooks (add, edit,
389
389
  ${P}${c.yellow}/steer <text>${c.reset} Steer next turn (or interrupt + redirect current turn)
390
390
  ${P}${c.yellow}/model [name]${c.reset} Show/switch model
391
391
  ${P}${c.yellow}/model search <query>${c.reset} Search OpenRouter models and add one
392
- ${P}${c.yellow}/connect [provider]${c.reset} Connect provider (Anthropic, OpenAI OAuth, OpenRouter)
392
+ ${P}${c.yellow}/connect [provider]${c.reset} Connect provider (Anthropic, OpenAI, OpenRouter, Claude Code)
393
393
  ${P}${c.yellow}/prompt [name]${c.reset} Show/switch prompt (base, code-agent, or any .md)
394
394
 
395
395
  ${P}${c.banana}${c.dim}Config Commands:${c.reset}
@@ -442,6 +442,15 @@ function initProject() {
442
442
  tokenCounter = new TokenCounter(config);
443
443
  imageHandler = new ImageHandler(projectDir);
444
444
 
445
+ const lastRunSnapshot = config.getRunSnapshot();
446
+ if (lastRunSnapshot && lastRunSnapshot.completed === false) {
447
+ const when = lastRunSnapshot.savedAt ? new Date(lastRunSnapshot.savedAt).toLocaleString() : 'recently';
448
+ console.log(`${PAD}${c.yellow}⚠ Previous run appears to have ended unexpectedly.${c.reset} ${c.dim}(${when})${c.reset}`);
449
+ if (lastRunSnapshot.userMessage) {
450
+ console.log(`${PAD}${c.dim} Last request: ${String(lastRunSnapshot.userMessage).slice(0, 120)}${c.reset}`);
451
+ }
452
+ }
453
+
445
454
  // Initialize LM Studio + provider manager
446
455
  const lmStudioUrl = config.get('lmStudioUrl') || 'http://localhost:1234';
447
456
  lmStudio = new LmStudio({ baseUrl: lmStudioUrl });
@@ -913,6 +922,7 @@ function normalizeProviderKey(raw) {
913
922
  const key = String(raw || '').trim().toLowerCase();
914
923
  if (!key) return null;
915
924
  if (key === 'lmstudio' || key === 'local') return 'local';
925
+ if (key === 'claude' || key === 'claudecode' || key === 'claude-code') return 'claude-code';
916
926
  if (PROVIDERS.includes(key)) return key;
917
927
  return null;
918
928
  }
@@ -1306,7 +1316,9 @@ async function connectProviderInteractive(provider) {
1306
1316
  label: PROVIDER_LABELS[p] || p,
1307
1317
  description: p === 'openai'
1308
1318
  ? 'OAuth device login for Codex subscription'
1309
- : 'Connect with API key',
1319
+ : p === 'claude-code'
1320
+ ? 'Use your Claude Code CLI subscription (no API key needed)'
1321
+ : 'Connect with API key',
1310
1322
  tags: ['provider'],
1311
1323
  active: providerStore.isConnected(p)
1312
1324
  }));
@@ -1351,6 +1363,29 @@ async function connectProviderInteractive(provider) {
1351
1363
  return;
1352
1364
  }
1353
1365
 
1366
+ if (provider === 'claude-code') {
1367
+ console.log(`\n${PAD}${c.cyan}Claude Code CLI Connection${c.reset}`);
1368
+ console.log(`${PAD}${c.dim}Checking for Claude Code CLI...${c.reset}`);
1369
+
1370
+ const { ClaudeCodeClient } = require('./lib/claudeCodeProvider');
1371
+ const claudeClient = new ClaudeCodeClient();
1372
+ const connected = await claudeClient.isConnected();
1373
+
1374
+ if (!connected) {
1375
+ console.log(`${PAD}${c.red}✗ Claude Code CLI not found.${c.reset}`);
1376
+ console.log(`${PAD}${c.dim}Install it: npm install -g @anthropic-ai/claude-code${c.reset}`);
1377
+ console.log(`${PAD}${c.dim}Then run: claude login${c.reset}\n`);
1378
+ return;
1379
+ }
1380
+
1381
+ providerStore.connectClaudeCode();
1382
+ modelRegistry.refreshRemoteModels();
1383
+ console.log(`${PAD}${c.green}✓ Connected Claude Code CLI${c.reset}`);
1384
+ console.log(`${PAD}${c.dim}Uses your existing Claude subscription (no API key needed).${c.reset}`);
1385
+ console.log(`${PAD}${c.dim}Use /model to switch to Claude Code models.${c.reset}\n`);
1386
+ return;
1387
+ }
1388
+
1354
1389
  throw new Error(`Unsupported provider: ${provider}`);
1355
1390
  }
1356
1391
 
@@ -1589,9 +1624,15 @@ async function handleCommand(input) {
1589
1624
  tokenCounter.resetSession();
1590
1625
  imageHandler.clearPending();
1591
1626
  setContextEstimate(0);
1627
+ promptDuringWork = false;
1628
+ renderWorkingPrompt = null;
1629
+ if (rl) {
1630
+ rl.write(null, { ctrl: true, name: 'u' });
1631
+ }
1592
1632
  if (statusBar) {
1633
+ statusBar.setInputHint('');
1593
1634
  statusBar.update({ sessionIn: 0, sessionOut: 0 });
1594
- statusBar.reinstall();
1635
+ statusBar.uninstall();
1595
1636
  }
1596
1637
  console.clear();
1597
1638
  // Push cursor to bottom of scroll region so prompt isn't stranded at the top
@@ -1603,6 +1644,9 @@ async function handleCommand(input) {
1603
1644
  if (padding > 0) process.stdout.write('\n'.repeat(padding));
1604
1645
  }
1605
1646
  refreshIdleContextEstimate();
1647
+ if (statusBar) {
1648
+ statusBar.reinstall();
1649
+ }
1606
1650
  if (rl) {
1607
1651
  rl.setPrompt(buildPromptPrefix());
1608
1652
  rl.prompt(false);
@@ -2118,7 +2162,7 @@ async function handleCommand(input) {
2118
2162
  if (normalizedSub === 'disconnect') {
2119
2163
  const provider = normalizeProviderKey(secondArg);
2120
2164
  if (!provider || provider === 'local') {
2121
- console.log(`\n${PAD}${c.yellow}Usage: /connect disconnect <anthropic|openai|openrouter>${c.reset}\n`);
2165
+ console.log(`\n${PAD}${c.yellow}Usage: /connect disconnect <anthropic|openai|openrouter|claude-code>${c.reset}\n`);
2122
2166
  return true;
2123
2167
  }
2124
2168
  const wasActiveProvider = (modelRegistry.getCurrentModel()?.provider || 'local') === provider;
@@ -2135,7 +2179,7 @@ async function handleCommand(input) {
2135
2179
  if (normalizedSub === 'use') {
2136
2180
  const provider = normalizeProviderKey(secondArg);
2137
2181
  if (!provider) {
2138
- console.log(`\n${PAD}${c.yellow}Usage: /connect use <local|anthropic|openai|openrouter>${c.reset}\n`);
2182
+ console.log(`\n${PAD}${c.yellow}Usage: /connect use <local|anthropic|openai|openrouter|claude-code>${c.reset}\n`);
2139
2183
  return true;
2140
2184
  }
2141
2185
 
@@ -2160,7 +2204,7 @@ async function handleCommand(input) {
2160
2204
  if (!provider || provider === 'local') {
2161
2205
  console.log(`\n${PAD}${c.yellow}Usage:${c.reset}`);
2162
2206
  console.log(`${PAD}${c.dim} /connect${c.reset}`);
2163
- console.log(`${PAD}${c.dim} /connect <anthropic|openai|openrouter>${c.reset}`);
2207
+ console.log(`${PAD}${c.dim} /connect <anthropic|openai|openrouter|claude-code>${c.reset}`);
2164
2208
  console.log(`${PAD}${c.dim} /connect status${c.reset}`);
2165
2209
  console.log(`${PAD}${c.dim} /connect disconnect <provider>${c.reset}`);
2166
2210
  console.log(`${PAD}${c.dim} /connect use <local|provider>${c.reset}\n`);
@@ -2684,9 +2728,19 @@ async function sendMessage(message) {
2684
2728
  fullMessage += '\n\n[Image analysis above is primary source of truth. Focus on image content, not file listing.]';
2685
2729
  }
2686
2730
 
2731
+ config.saveRunSnapshot({
2732
+ projectDir,
2733
+ activeModel: modelRegistry.getCurrent(),
2734
+ userMessage: message,
2735
+ fullMessagePreview: fullMessage.slice(0, 2000),
2736
+ conversationLength: conversationHistory.length
2737
+ });
2738
+
2687
2739
  try {
2688
2740
  await sendAgenticMessage(fullMessage, pendingImages, message);
2741
+ config.completeRunSnapshot({ status: 'completed' });
2689
2742
  } catch (error) {
2743
+ config.completeRunSnapshot({ status: 'failed', error: error.message });
2690
2744
  const provider = activeProviderKey();
2691
2745
  const providerLabel = providerManager.getProviderLabel(provider);
2692
2746
  console.log(`\n${PAD}${c.red}✗ Error: ${error.message}${c.reset}`);
@@ -2985,6 +3039,11 @@ async function sendStreamingMessage(message, images = [], rawMessage = '') {
2985
3039
 
2986
3040
  try {
2987
3041
  await streamHandler.handleStream(response);
3042
+ const streamResult = streamHandler.getResult();
3043
+ if (!streamResult.completed && streamResult.warning) {
3044
+ fullResponse = `${streamResult.warning}\n\n${fullResponse}`.trim();
3045
+ console.log(`\n${PAD}${c.yellow}⚠ ${streamResult.warning}${c.reset}`);
3046
+ }
2988
3047
  } catch (error) {
2989
3048
  stopStatus();
2990
3049
  // Check if this was an abort
@@ -5172,10 +5231,12 @@ Examples:
5172
5231
  // then subsequent lines arrive as new 'line' events. We detect paste by
5173
5232
  // buffering lines that arrive within PASTE_DELAY_MS of each other.
5174
5233
  const PASTE_DELAY_MS = 400; // 400ms to handle large pastes and Windows Terminal dialog latency
5234
+ const PASTE_STRAGGLER_WINDOW_MS = 1200; // Late lines can arrive after submit on Windows Terminal
5175
5235
  let pasteBuffer = [];
5176
5236
  let pasteTimer = null;
5177
5237
  let waitingForInput = false;
5178
5238
  let lastFlushTime = 0; // Track when paste buffer last flushed (to catch stragglers)
5239
+ let lastPasteStragglerWarningAt = 0;
5179
5240
 
5180
5241
  showGeminiKeyPrompt = (callback) => {
5181
5242
  awaitingGeminiKey = true;
@@ -5342,13 +5403,17 @@ Examples:
5342
5403
  return;
5343
5404
  }
5344
5405
 
5345
- // Straggler paste lines: arrived after flush but within 2s and before AI started.
5346
- // This happens on Windows when the paste confirmation dialog adds latency between lines.
5347
- if (!currentAbortController && lastFlushTime && (Date.now() - lastFlushTime) < 2000) {
5406
+ // Straggler paste lines: arrived after flush but before the paste has fully settled.
5407
+ // On Windows Terminal, delayed lines can arrive after the first chunk was submitted,
5408
+ // and without this guard they'd be misread as mid-turn steering.
5409
+ if (lastFlushTime && (Date.now() - lastFlushTime) < PASTE_STRAGGLER_WINDOW_MS) {
5348
5410
  const trimmed = String(input || '').trim();
5349
5411
  if (trimmed) {
5350
5412
  appendDebugLog(`[paste-straggler] Dropped line arrived ${Date.now() - lastFlushTime}ms after flush: ${trimmed.slice(0, 60)}\n`);
5351
- console.log(`${PAD}${c.yellow}Lines were dropped from your paste.${c.reset} ${c.dim}Try pasting again, or disable the paste warning in Windows Terminal settings.${c.reset}`);
5413
+ if (lastPasteStragglerWarningAt !== lastFlushTime) {
5414
+ lastPasteStragglerWarningAt = lastFlushTime;
5415
+ console.log(`${PAD}${c.yellow}Ignored delayed paste lines from the previous submission.${c.reset} ${c.dim}If this keeps happening, disable the Windows Terminal paste warning or paste again after the prompt settles.${c.reset}`);
5416
+ }
5352
5417
  }
5353
5418
  return;
5354
5419
  }
@@ -5476,6 +5541,9 @@ process.on('SIGINT', () => {
5476
5541
  if (config && config.get('autoSaveHistory') && conversationHistory.length > 0) {
5477
5542
  config.saveConversation('autosave', conversationHistory);
5478
5543
  }
5544
+ if (config) {
5545
+ config.completeRunSnapshot({ status: 'cancelled' });
5546
+ }
5479
5547
  if (watcher) watcher.stop();
5480
5548
  console.log(`\n${PAD}${c.cyan}👋 See you later!${c.reset}\n`);
5481
5549
  process.exit(0);
@@ -5484,6 +5552,12 @@ process.on('SIGINT', () => {
5484
5552
  main().catch(error => {
5485
5553
  logSessionEnd('crash', ` error=${error.message}`);
5486
5554
  if (statusBar) statusBar.uninstall();
5555
+ if (config && config.get('autoSaveHistory') && conversationHistory.length > 0) {
5556
+ config.saveConversation('autosave-crash', conversationHistory);
5557
+ }
5558
+ if (config) {
5559
+ config.completeRunSnapshot({ status: 'crashed', error: error.message });
5560
+ }
5487
5561
  console.error(`${c.red}Fatal error: ${error.message}${c.reset}`);
5488
5562
  if (watcher) watcher.stop();
5489
5563
  process.exit(1);
@@ -549,6 +549,11 @@ const READ_ONLY_TOOLS = TOOLS.filter(t => READ_ONLY_TOOL_NAMES.has(t.function.na
549
549
 
550
550
  const IGNORE_PATTERNS = ['node_modules', '.git', '.next', 'dist', 'build', '.banana'];
551
551
  const MAX_ITERATIONS = 50;
552
+ const MAX_TOOL_CALLS_PER_TURN = 24;
553
+ const MAX_IDENTICAL_TOOL_CALLS_PER_TURN = 1;
554
+ const MAX_TOOL_CALLS_BY_NAME_PER_TURN = {
555
+ list_files: 6
556
+ };
552
557
  const WRITE_TOOL_NAMES = new Set(['create_file', 'edit_file', 'run_command']);
553
558
  const CONTEXT_TRIM_THRESHOLD = 0.60; // 60% of context limit - start trimming early
554
559
  const CONTEXT_TRIM_KEEP_RECENT = 6; // Keep last N messages intact
@@ -885,9 +890,50 @@ function executeEditFile(projectDir, filePath, content) {
885
890
  }
886
891
  }
887
892
 
893
+ function classifyCommandVerification(command) {
894
+ const lowerCommand = String(command || '').trim().toLowerCase();
895
+ const gitMutationRe = /\bgit\s+(pull|checkout|switch|reset|merge|rebase|cherry-pick|restore|clean|stash\s+(pop|apply|drop)|apply|commit|push)\b/;
896
+ const fsMutationRe = /\b(copy|move|ren|rename|mkdir|rmdir|del|erase|xcopy|robocopy|attrib)\b/;
897
+ const gitReadOnlyRe = /\bgit\s+(status|rev-parse|branch|log|diff|show|ls-files|show-ref)\b/;
898
+ const fsReadOnlyRe = /\b(dir|type|findstr|where)\b/;
899
+ const verificationEvidenceFor = [];
900
+
901
+ if (gitReadOnlyRe.test(lowerCommand)) verificationEvidenceFor.push('git_state');
902
+ if (fsReadOnlyRe.test(lowerCommand)) verificationEvidenceFor.push('filesystem_state');
903
+
904
+ if (gitMutationRe.test(lowerCommand)) {
905
+ return {
906
+ requiresVerification: true,
907
+ category: 'git_state',
908
+ verificationHint: 'Before claiming success, run a read-only git check such as `git status --short`, `git rev-parse HEAD`, or compare `HEAD` to `@{u}`.',
909
+ verificationEvidenceFor,
910
+ readOnlyCommand: false
911
+ };
912
+ }
913
+
914
+ if (fsMutationRe.test(lowerCommand)) {
915
+ return {
916
+ requiresVerification: true,
917
+ category: 'filesystem_state',
918
+ verificationHint: 'Before claiming success, run a read-only check such as `dir`, `type`, or `findstr` to confirm the change is actually present.',
919
+ verificationEvidenceFor,
920
+ readOnlyCommand: false
921
+ };
922
+ }
923
+
924
+ return {
925
+ requiresVerification: false,
926
+ category: null,
927
+ verificationHint: null,
928
+ verificationEvidenceFor,
929
+ readOnlyCommand: verificationEvidenceFor.length > 0
930
+ };
931
+ }
932
+
888
933
  async function executeRunCommand(projectDir, command, options = {}) {
889
934
  const signal = options.signal;
890
935
  const timeoutMs = options.timeoutMs ?? 30000;
936
+ const verificationMeta = classifyCommandVerification(command);
891
937
 
892
938
  // Basic safety check - block destructive commands
893
939
  const dangerous = /\b(rm\s+-rf|del\s+\/[sqf]|format\s+[a-z]:)\b/i;
@@ -946,6 +992,13 @@ async function executeRunCommand(projectDir, command, options = {}) {
946
992
  const limit = 15000;
947
993
  finish(resolve, {
948
994
  success: true,
995
+ command,
996
+ outcome: 'completed',
997
+ requiresVerification: verificationMeta.requiresVerification,
998
+ verificationCategory: verificationMeta.category,
999
+ verificationHint: verificationMeta.verificationHint,
1000
+ verificationEvidenceFor: verificationMeta.verificationEvidenceFor,
1001
+ readOnlyCommand: verificationMeta.readOnlyCommand,
949
1002
  output: output.substring(0, limit),
950
1003
  ...(output.length > limit ? { truncated: true, totalLength: output.length } : {})
951
1004
  });
@@ -953,6 +1006,8 @@ async function executeRunCommand(projectDir, command, options = {}) {
953
1006
  const limit = 10000;
954
1007
  finish(resolve, {
955
1008
  error: `Command failed with exit code ${code}`,
1009
+ command,
1010
+ outcome: code === 124 ? 'timed_out' : 'failed',
956
1011
  output: output.substring(0, limit),
957
1012
  exitCode: code,
958
1013
  ...(output.length > limit ? { truncated: true, totalLength: output.length } : {})
@@ -967,6 +1022,7 @@ async function executeRunCommand(projectDir, command, options = {}) {
967
1022
  finish(resolve, {
968
1023
  error: `Command timed out after ${timeoutMs}ms`,
969
1024
  output: raw.substring(0, 10000),
1025
+ outcome: 'timed_out',
970
1026
  exitCode: 124,
971
1027
  ...(raw.length > 10000 ? { truncated: true, totalLength: raw.length } : {})
972
1028
  });
@@ -1124,6 +1180,77 @@ function stripControlTokens(text) {
1124
1180
  return cleaned.replace(/^\s+$/, '');
1125
1181
  }
1126
1182
 
1183
+ function stableStringify(value) {
1184
+ if (Array.isArray(value)) {
1185
+ return `[${value.map(stableStringify).join(',')}]`;
1186
+ }
1187
+ if (value && typeof value === 'object') {
1188
+ const keys = Object.keys(value).sort();
1189
+ return `{${keys.map(key => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(',')}}`;
1190
+ }
1191
+ return JSON.stringify(value);
1192
+ }
1193
+
1194
+ function parseToolArgs(rawArgs) {
1195
+ if (typeof rawArgs !== 'string') return {};
1196
+ try {
1197
+ return JSON.parse(rawArgs);
1198
+ } catch {
1199
+ return {};
1200
+ }
1201
+ }
1202
+
1203
+ function sanitizeToolCalls(toolCalls) {
1204
+ const kept = [];
1205
+ const dropped = [];
1206
+ const signatureCounts = new Map();
1207
+ const toolNameCounts = new Map();
1208
+
1209
+ for (const toolCall of toolCalls || []) {
1210
+ const functionName = toolCall?.function?.name;
1211
+ if (!functionName) {
1212
+ dropped.push({ reason: 'invalid', toolCall });
1213
+ continue;
1214
+ }
1215
+
1216
+ const args = parseToolArgs(toolCall.function.arguments);
1217
+ const signature = `${functionName}:${stableStringify(args)}`;
1218
+ const seenCount = signatureCounts.get(signature) || 0;
1219
+ const sameToolCount = toolNameCounts.get(functionName) || 0;
1220
+
1221
+ if (seenCount >= MAX_IDENTICAL_TOOL_CALLS_PER_TURN) {
1222
+ dropped.push({ reason: 'duplicate', toolCall, signature });
1223
+ continue;
1224
+ }
1225
+
1226
+ const perToolLimit = MAX_TOOL_CALLS_BY_NAME_PER_TURN[functionName];
1227
+ if (perToolLimit && sameToolCount >= perToolLimit) {
1228
+ dropped.push({ reason: 'per_tool_overflow', toolCall, signature });
1229
+ continue;
1230
+ }
1231
+
1232
+ if (kept.length >= MAX_TOOL_CALLS_PER_TURN) {
1233
+ dropped.push({ reason: 'overflow', toolCall, signature });
1234
+ continue;
1235
+ }
1236
+
1237
+ signatureCounts.set(signature, seenCount + 1);
1238
+ toolNameCounts.set(functionName, sameToolCount + 1);
1239
+ kept.push(toolCall);
1240
+ }
1241
+
1242
+ return {
1243
+ toolCalls: kept,
1244
+ dropped,
1245
+ summary: {
1246
+ invalid: dropped.filter(item => item.reason === 'invalid').length,
1247
+ duplicate: dropped.filter(item => item.reason === 'duplicate').length,
1248
+ perToolOverflow: dropped.filter(item => item.reason === 'per_tool_overflow').length,
1249
+ overflow: dropped.filter(item => item.reason === 'overflow').length
1250
+ }
1251
+ };
1252
+ }
1253
+
1127
1254
  // ─── Repetition Detection ─────────────────────────────────────────────────────
1128
1255
 
1129
1256
  /**
@@ -1155,6 +1282,8 @@ async function consumeStream(response, onToken) {
1155
1282
  let thinkBuffer = ''; // accumulates text inside a think block
1156
1283
  let inThink = false;
1157
1284
  let repetitionDetected = false;
1285
+ let doneSignalReceived = false;
1286
+ let warning = null;
1158
1287
 
1159
1288
  const flush = (text) => {
1160
1289
  const clean = stripControlTokens(text);
@@ -1184,7 +1313,11 @@ async function consumeStream(response, onToken) {
1184
1313
 
1185
1314
  for (const line of lines) {
1186
1315
  const trimmed = line.trim();
1187
- if (!trimmed || trimmed === 'data: [DONE]') continue;
1316
+ if (!trimmed) continue;
1317
+ if (trimmed === 'data: [DONE]') {
1318
+ doneSignalReceived = true;
1319
+ continue;
1320
+ }
1188
1321
  if (!trimmed.startsWith('data: ')) continue;
1189
1322
 
1190
1323
  try {
@@ -1224,7 +1357,16 @@ async function consumeStream(response, onToken) {
1224
1357
  }
1225
1358
  }
1226
1359
 
1227
- return fullContent;
1360
+ if (!doneSignalReceived) {
1361
+ warning = 'Warning: final stream ended without an explicit completion signal. The response may be incomplete.';
1362
+ }
1363
+
1364
+ return {
1365
+ content: fullContent,
1366
+ completed: doneSignalReceived,
1367
+ warning,
1368
+ repetitionDetected
1369
+ };
1228
1370
  }
1229
1371
 
1230
1372
  // ─── Agentic Loop ───────────────────────────────────────────────────────────
@@ -1252,6 +1394,7 @@ class AgenticRunner {
1252
1394
  this.lastTurnMessagesEstimate = 0;
1253
1395
  this.totalCacheReadTokens = 0;
1254
1396
  this.totalCacheCreationTokens = 0;
1397
+ this.lastRunOutcome = { status: 'running', phase: 'start', warning: null };
1255
1398
  }
1256
1399
 
1257
1400
  /**
@@ -1302,9 +1445,12 @@ class AgenticRunner {
1302
1445
  let iterations = 0;
1303
1446
  const toolCallHistory = []; // Track tool calls for loop detection
1304
1447
  const failedMcpTools = new Set(); // Track MCP tools that returned "Unknown tool" errors
1448
+ const pendingCommandVerifications = new Map(); // category -> verification hint
1305
1449
  let readOnlyStreak = 0; // Consecutive iterations with only read-only tool calls
1306
1450
  let loopWarningCount = 0; // How many times loop detection has fired
1307
1451
 
1452
+ let verificationReminderCount = 0; // How many times we had to demand verification before finalizing
1453
+
1308
1454
  // Model-tier-aware read-only thresholds: smarter models get more research leeway
1309
1455
  // options.model is the raw model ID (e.g. "claude-sonnet-4-6-20250514", "gpt-4o", "silverback")
1310
1456
  const modelId = (options.model || '').toLowerCase();
@@ -1483,12 +1629,27 @@ class AgenticRunner {
1483
1629
  // Some models use finish_reason "tool_calls", others use "stop" or "function_call"
1484
1630
  // but still include tool_calls in the message. Check for the array itself.
1485
1631
  if (assistantMessage.tool_calls && assistantMessage.tool_calls.length > 0) {
1486
- // Cap tool calls per response to prevent runaway models spamming dozens of calls
1487
- const MAX_TOOL_CALLS_PER_RESPONSE = 8;
1488
- if (assistantMessage.tool_calls.length > MAX_TOOL_CALLS_PER_RESPONSE) {
1489
- appendDebugLog(` [TOOL CALL CAP] Model returned ${assistantMessage.tool_calls.length} tool calls, capping to ${MAX_TOOL_CALLS_PER_RESPONSE}\n`);
1490
- this.onWarning(`Model tried to make ${assistantMessage.tool_calls.length} tool calls at once. Capping to ${MAX_TOOL_CALLS_PER_RESPONSE}.`);
1491
- assistantMessage.tool_calls = assistantMessage.tool_calls.slice(0, MAX_TOOL_CALLS_PER_RESPONSE);
1632
+ const originalToolCallCount = assistantMessage.tool_calls.length;
1633
+ const sanitizedBatch = sanitizeToolCalls(assistantMessage.tool_calls);
1634
+ assistantMessage.tool_calls = sanitizedBatch.toolCalls;
1635
+
1636
+ if (sanitizedBatch.dropped.length > 0) {
1637
+ appendDebugLog(
1638
+ ` [tool batch sanitized] original=${originalToolCallCount} kept=${assistantMessage.tool_calls.length} ` +
1639
+ `duplicate=${sanitizedBatch.summary.duplicate} per_tool_overflow=${sanitizedBatch.summary.perToolOverflow} ` +
1640
+ `overflow=${sanitizedBatch.summary.overflow} invalid=${sanitizedBatch.summary.invalid}\n`
1641
+ );
1642
+ this.onWarning(
1643
+ `Trimmed a noisy tool batch from ${originalToolCallCount} calls to ${assistantMessage.tool_calls.length}.`
1644
+ );
1645
+ }
1646
+
1647
+ if (assistantMessage.tool_calls.length === 0) {
1648
+ messages.push({
1649
+ role: 'system',
1650
+ content: 'Your previous tool batch was invalid or excessively repetitive. Do NOT emit more tools right now. Answer the user directly with what you already know, or explain what specific missing context is still needed.'
1651
+ });
1652
+ continue;
1492
1653
  }
1493
1654
 
1494
1655
  // Add assistant message to history, preserving the reasoning field
@@ -1568,6 +1729,14 @@ class AgenticRunner {
1568
1729
 
1569
1730
  // Track command execution for hooks
1570
1731
  if (functionName === 'run_command' && !result.error) {
1732
+ if (result.requiresVerification && result.verificationCategory) {
1733
+ pendingCommandVerifications.set(result.verificationCategory, result.verificationHint || 'Run a read-only verification command before claiming success.');
1734
+ }
1735
+ if (Array.isArray(result.verificationEvidenceFor)) {
1736
+ for (const category of result.verificationEvidenceFor) {
1737
+ pendingCommandVerifications.delete(category);
1738
+ }
1739
+ }
1571
1740
  if (this.onCommandComplete) this.onCommandComplete(args.command, result);
1572
1741
  }
1573
1742
 
@@ -1673,6 +1842,12 @@ class AgenticRunner {
1673
1842
  nudgeParts.push(`Non-existent MCP tools (do NOT retry): ${[...failedMcpTools].join(', ')}`);
1674
1843
  }
1675
1844
 
1845
+ if (pendingCommandVerifications.size > 0) {
1846
+ nudgeParts.push(
1847
+ `State-changing commands are still UNVERIFIED. Before telling the user the task is done, run a read-only verification step. ${[...pendingCommandVerifications.values()].join(' ')}`
1848
+ );
1849
+ }
1850
+
1676
1851
  if (nudgeParts.length > 0) {
1677
1852
  messages.push({
1678
1853
  role: 'system',
@@ -1680,6 +1855,17 @@ class AgenticRunner {
1680
1855
  });
1681
1856
  }
1682
1857
 
1858
+ if (sanitizedBatch.dropped.length > 0) {
1859
+ messages.push({
1860
+ role: 'system',
1861
+ content:
1862
+ `Your previous response tried to call too many or duplicate tools. ` +
1863
+ `Dropped: ${sanitizedBatch.summary.duplicate} duplicate, ${sanitizedBatch.summary.perToolOverflow} excessive same-tool calls, ` +
1864
+ `${sanitizedBatch.summary.overflow} overflow, ${sanitizedBatch.summary.invalid} invalid. ` +
1865
+ `Next turn, use fewer tools and avoid repeating the same call with identical arguments.`
1866
+ });
1867
+ }
1868
+
1683
1869
  // Track read-only streaks (iterations with no writes or commands)
1684
1870
  // Skip streak tracking in plan mode - plan mode is inherently read-only
1685
1871
  const thisIterToolNames = assistantMessage.tool_calls.map(t => t.function.name);
@@ -1752,6 +1938,7 @@ class AgenticRunner {
1752
1938
  this._lastWrittenFiles = [...writtenFiles];
1753
1939
  logRunTotals('loop-break');
1754
1940
  const loopResponse = finalContent || 'I got stuck in a loop and could not complete the task. Please try rephrasing your request.';
1941
+ this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'loop-break', warning: 'Loop breaker forced finalization.' };
1755
1942
  await this.emitStreaming(loopResponse);
1756
1943
  this.onContent(loopResponse);
1757
1944
  return loopResponse;
@@ -1787,6 +1974,7 @@ class AgenticRunner {
1787
1974
  this._lastWrittenFiles = [...writtenFiles];
1788
1975
  logRunTotals('no-progress-break');
1789
1976
  const npResponse = npContent || 'I spent too many iterations researching without making progress. Please try a more specific request.';
1977
+ this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'no-progress-break', warning: 'No-progress breaker forced finalization.' };
1790
1978
  await this.emitStreaming(npResponse);
1791
1979
  this.onContent(npResponse);
1792
1980
  return npResponse;
@@ -1801,6 +1989,18 @@ class AgenticRunner {
1801
1989
  // Final response - no more tool calls.
1802
1990
  // The non-streaming chat() call already returned content. Use it directly
1803
1991
  // instead of making a redundant streaming call that may return empty/truncated.
1992
+ if (pendingCommandVerifications.size > 0 && verificationReminderCount < 1) {
1993
+ verificationReminderCount++;
1994
+ messages.push({
1995
+ role: 'system',
1996
+ content:
1997
+ `STOP. You are about to answer, but you still have unverified state-changing command results. ` +
1998
+ `Before claiming completion, run at least one read-only verification step for these categories: ${[...pendingCommandVerifications.keys()].join(', ')}. ` +
1999
+ `${[...pendingCommandVerifications.values()].join(' ')}`
2000
+ });
2001
+ continue;
2002
+ }
2003
+
1804
2004
  let existingContent = stripControlTokens(assistantMessage.content || '');
1805
2005
 
1806
2006
  // Extract inline <think>/<thinking> blocks from content (Qwen3.5 embeds reasoning in content)
@@ -1835,11 +2035,23 @@ class AgenticRunner {
1835
2035
 
1836
2036
  const reasoning = assistantMessage.reasoning || assistantMessage.reasoning_content || inlineReasoning;
1837
2037
 
2038
+ if (pendingCommandVerifications.size > 0) {
2039
+ const verificationWarning = `Warning: the requested command effects were not independently verified. ${[...pendingCommandVerifications.values()].join(' ')}`;
2040
+ existingContent = existingContent
2041
+ ? `${verificationWarning}\n\n${existingContent}`
2042
+ : verificationWarning;
2043
+ }
2044
+
1838
2045
  // If the model already produced content in this iteration, use it directly
1839
2046
  if (existingContent) {
1840
2047
  if (reasoning) {
1841
2048
  this.onReasoning(stripControlTokens(reasoning));
1842
2049
  }
2050
+ this.lastRunOutcome = {
2051
+ status: pendingCommandVerifications.size > 0 ? 'completed_with_warnings' : 'completed',
2052
+ phase: 'final-content',
2053
+ warning: pendingCommandVerifications.size > 0 ? 'Completion claims were not fully verified.' : null
2054
+ };
1843
2055
  await this.emitStreaming(existingContent);
1844
2056
  this.onContent(existingContent);
1845
2057
  logRunTotals('final-content');
@@ -1853,6 +2065,7 @@ class AgenticRunner {
1853
2065
  // Some models put the actual answer in reasoning when content is empty.
1854
2066
  // Return a minimal acknowledgment rather than an empty response.
1855
2067
  const fallback = '(Response was in reasoning only - see thinking output above)';
2068
+ this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'final-reasoning-fallback', warning: 'Model returned reasoning without visible content.' };
1856
2069
  await this.emitStreaming(fallback);
1857
2070
  this.onContent(fallback);
1858
2071
  logRunTotals('final-reasoning-fallback');
@@ -1877,6 +2090,7 @@ class AgenticRunner {
1877
2090
  const content = stripControlTokens(thinkMsg?.content || '');
1878
2091
 
1879
2092
  if (thinkReasoning) this.onReasoning(stripControlTokens(thinkReasoning));
2093
+ this.lastRunOutcome = { status: 'completed', phase: 'final-think-pass', warning: null };
1880
2094
  await this.emitStreaming(content);
1881
2095
  this.onContent(content);
1882
2096
  logRunTotals('final-think-pass');
@@ -1895,10 +2109,21 @@ class AgenticRunner {
1895
2109
  signal: options.signal
1896
2110
  });
1897
2111
 
1898
- const content = await consumeStream(streamResponse, (token) => {
2112
+ const streamResult = await consumeStream(streamResponse, (token) => {
1899
2113
  this.onToken(token);
1900
2114
  });
2115
+ const content = streamResult.completed || !streamResult.warning
2116
+ ? streamResult.content
2117
+ : `${streamResult.warning}\n\n${streamResult.content}`.trim();
1901
2118
 
2119
+ if (!streamResult.completed && streamResult.warning) {
2120
+ this.onWarning(streamResult.warning);
2121
+ }
2122
+ this.lastRunOutcome = {
2123
+ status: streamResult.completed ? 'completed' : 'completed_with_warnings',
2124
+ phase: 'final-stream',
2125
+ warning: streamResult.warning
2126
+ };
1902
2127
  this.onContent(content);
1903
2128
  logRunTotals('final-stream');
1904
2129
  return content;
@@ -1906,9 +2131,10 @@ class AgenticRunner {
1906
2131
  }
1907
2132
 
1908
2133
  this.onWarning('Max tool iterations reached');
2134
+ this.lastRunOutcome = { status: 'failed', phase: 'max-iterations', warning: 'Max tool iterations reached.' };
1909
2135
  logRunTotals('max-iterations');
1910
2136
  return '';
1911
2137
  }
1912
2138
  }
1913
2139
 
1914
- module.exports = { AgenticRunner, TOOLS, READ_ONLY_TOOLS, executeTool, setMcpClient };
2140
+ module.exports = { AgenticRunner, TOOLS, READ_ONLY_TOOLS, executeTool, setMcpClient, sanitizeToolCalls, classifyCommandVerification };