banana-code 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/banana.js +85 -11
- package/lib/agenticRunner.js +236 -10
- package/lib/claudeCodeProvider.js +540 -0
- package/lib/config.js +49 -15
- package/lib/contextBuilder.js +11 -4
- package/lib/fileManager.js +9 -11
- package/lib/fsUtils.js +30 -0
- package/lib/historyManager.js +3 -5
- package/lib/modelRegistry.js +2 -1
- package/lib/providerManager.js +7 -1
- package/lib/providerStore.js +38 -4
- package/lib/streamHandler.js +25 -4
- package/package.json +48 -43
- package/prompts/code-agent-qwen.md +1 -0
- package/prompts/code-agent.md +1 -0
package/banana.js
CHANGED
|
@@ -81,7 +81,7 @@ let pendingHumanQuestion = null; // { resolve, question }
|
|
|
81
81
|
// CONFIGURATION
|
|
82
82
|
// =============================================================================
|
|
83
83
|
|
|
84
|
-
const VERSION = '1.
|
|
84
|
+
const VERSION = '1.4.0';
|
|
85
85
|
const { PAD } = require('./lib/borderRenderer'); // Single source of truth for left padding
|
|
86
86
|
const DEBUG_DISABLED_VALUES = new Set(['0', 'false', 'off', 'no']);
|
|
87
87
|
const NEXT_TURN_RESERVE_TOKENS = 1200;
|
|
@@ -389,7 +389,7 @@ ${P}${c.yellow}/hooks${c.reset} Manage lifecycle hooks (add, edit,
|
|
|
389
389
|
${P}${c.yellow}/steer <text>${c.reset} Steer next turn (or interrupt + redirect current turn)
|
|
390
390
|
${P}${c.yellow}/model [name]${c.reset} Show/switch model
|
|
391
391
|
${P}${c.yellow}/model search <query>${c.reset} Search OpenRouter models and add one
|
|
392
|
-
${P}${c.yellow}/connect [provider]${c.reset} Connect provider (Anthropic, OpenAI
|
|
392
|
+
${P}${c.yellow}/connect [provider]${c.reset} Connect provider (Anthropic, OpenAI, OpenRouter, Claude Code)
|
|
393
393
|
${P}${c.yellow}/prompt [name]${c.reset} Show/switch prompt (base, code-agent, or any .md)
|
|
394
394
|
|
|
395
395
|
${P}${c.banana}${c.dim}Config Commands:${c.reset}
|
|
@@ -442,6 +442,15 @@ function initProject() {
|
|
|
442
442
|
tokenCounter = new TokenCounter(config);
|
|
443
443
|
imageHandler = new ImageHandler(projectDir);
|
|
444
444
|
|
|
445
|
+
const lastRunSnapshot = config.getRunSnapshot();
|
|
446
|
+
if (lastRunSnapshot && lastRunSnapshot.completed === false) {
|
|
447
|
+
const when = lastRunSnapshot.savedAt ? new Date(lastRunSnapshot.savedAt).toLocaleString() : 'recently';
|
|
448
|
+
console.log(`${PAD}${c.yellow}⚠ Previous run appears to have ended unexpectedly.${c.reset} ${c.dim}(${when})${c.reset}`);
|
|
449
|
+
if (lastRunSnapshot.userMessage) {
|
|
450
|
+
console.log(`${PAD}${c.dim} Last request: ${String(lastRunSnapshot.userMessage).slice(0, 120)}${c.reset}`);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
445
454
|
// Initialize LM Studio + provider manager
|
|
446
455
|
const lmStudioUrl = config.get('lmStudioUrl') || 'http://localhost:1234';
|
|
447
456
|
lmStudio = new LmStudio({ baseUrl: lmStudioUrl });
|
|
@@ -913,6 +922,7 @@ function normalizeProviderKey(raw) {
|
|
|
913
922
|
const key = String(raw || '').trim().toLowerCase();
|
|
914
923
|
if (!key) return null;
|
|
915
924
|
if (key === 'lmstudio' || key === 'local') return 'local';
|
|
925
|
+
if (key === 'claude' || key === 'claudecode' || key === 'claude-code') return 'claude-code';
|
|
916
926
|
if (PROVIDERS.includes(key)) return key;
|
|
917
927
|
return null;
|
|
918
928
|
}
|
|
@@ -1306,7 +1316,9 @@ async function connectProviderInteractive(provider) {
|
|
|
1306
1316
|
label: PROVIDER_LABELS[p] || p,
|
|
1307
1317
|
description: p === 'openai'
|
|
1308
1318
|
? 'OAuth device login for Codex subscription'
|
|
1309
|
-
:
|
|
1319
|
+
: p === 'claude-code'
|
|
1320
|
+
? 'Use your Claude Code CLI subscription (no API key needed)'
|
|
1321
|
+
: 'Connect with API key',
|
|
1310
1322
|
tags: ['provider'],
|
|
1311
1323
|
active: providerStore.isConnected(p)
|
|
1312
1324
|
}));
|
|
@@ -1351,6 +1363,29 @@ async function connectProviderInteractive(provider) {
|
|
|
1351
1363
|
return;
|
|
1352
1364
|
}
|
|
1353
1365
|
|
|
1366
|
+
if (provider === 'claude-code') {
|
|
1367
|
+
console.log(`\n${PAD}${c.cyan}Claude Code CLI Connection${c.reset}`);
|
|
1368
|
+
console.log(`${PAD}${c.dim}Checking for Claude Code CLI...${c.reset}`);
|
|
1369
|
+
|
|
1370
|
+
const { ClaudeCodeClient } = require('./lib/claudeCodeProvider');
|
|
1371
|
+
const claudeClient = new ClaudeCodeClient();
|
|
1372
|
+
const connected = await claudeClient.isConnected();
|
|
1373
|
+
|
|
1374
|
+
if (!connected) {
|
|
1375
|
+
console.log(`${PAD}${c.red}✗ Claude Code CLI not found.${c.reset}`);
|
|
1376
|
+
console.log(`${PAD}${c.dim}Install it: npm install -g @anthropic-ai/claude-code${c.reset}`);
|
|
1377
|
+
console.log(`${PAD}${c.dim}Then run: claude login${c.reset}\n`);
|
|
1378
|
+
return;
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
providerStore.connectClaudeCode();
|
|
1382
|
+
modelRegistry.refreshRemoteModels();
|
|
1383
|
+
console.log(`${PAD}${c.green}✓ Connected Claude Code CLI${c.reset}`);
|
|
1384
|
+
console.log(`${PAD}${c.dim}Uses your existing Claude subscription (no API key needed).${c.reset}`);
|
|
1385
|
+
console.log(`${PAD}${c.dim}Use /model to switch to Claude Code models.${c.reset}\n`);
|
|
1386
|
+
return;
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1354
1389
|
throw new Error(`Unsupported provider: ${provider}`);
|
|
1355
1390
|
}
|
|
1356
1391
|
|
|
@@ -1589,9 +1624,15 @@ async function handleCommand(input) {
|
|
|
1589
1624
|
tokenCounter.resetSession();
|
|
1590
1625
|
imageHandler.clearPending();
|
|
1591
1626
|
setContextEstimate(0);
|
|
1627
|
+
promptDuringWork = false;
|
|
1628
|
+
renderWorkingPrompt = null;
|
|
1629
|
+
if (rl) {
|
|
1630
|
+
rl.write(null, { ctrl: true, name: 'u' });
|
|
1631
|
+
}
|
|
1592
1632
|
if (statusBar) {
|
|
1633
|
+
statusBar.setInputHint('');
|
|
1593
1634
|
statusBar.update({ sessionIn: 0, sessionOut: 0 });
|
|
1594
|
-
statusBar.
|
|
1635
|
+
statusBar.uninstall();
|
|
1595
1636
|
}
|
|
1596
1637
|
console.clear();
|
|
1597
1638
|
// Push cursor to bottom of scroll region so prompt isn't stranded at the top
|
|
@@ -1603,6 +1644,9 @@ async function handleCommand(input) {
|
|
|
1603
1644
|
if (padding > 0) process.stdout.write('\n'.repeat(padding));
|
|
1604
1645
|
}
|
|
1605
1646
|
refreshIdleContextEstimate();
|
|
1647
|
+
if (statusBar) {
|
|
1648
|
+
statusBar.reinstall();
|
|
1649
|
+
}
|
|
1606
1650
|
if (rl) {
|
|
1607
1651
|
rl.setPrompt(buildPromptPrefix());
|
|
1608
1652
|
rl.prompt(false);
|
|
@@ -2118,7 +2162,7 @@ async function handleCommand(input) {
|
|
|
2118
2162
|
if (normalizedSub === 'disconnect') {
|
|
2119
2163
|
const provider = normalizeProviderKey(secondArg);
|
|
2120
2164
|
if (!provider || provider === 'local') {
|
|
2121
|
-
console.log(`\n${PAD}${c.yellow}Usage: /connect disconnect <anthropic|openai|openrouter>${c.reset}\n`);
|
|
2165
|
+
console.log(`\n${PAD}${c.yellow}Usage: /connect disconnect <anthropic|openai|openrouter|claude-code>${c.reset}\n`);
|
|
2122
2166
|
return true;
|
|
2123
2167
|
}
|
|
2124
2168
|
const wasActiveProvider = (modelRegistry.getCurrentModel()?.provider || 'local') === provider;
|
|
@@ -2135,7 +2179,7 @@ async function handleCommand(input) {
|
|
|
2135
2179
|
if (normalizedSub === 'use') {
|
|
2136
2180
|
const provider = normalizeProviderKey(secondArg);
|
|
2137
2181
|
if (!provider) {
|
|
2138
|
-
console.log(`\n${PAD}${c.yellow}Usage: /connect use <local|anthropic|openai|openrouter>${c.reset}\n`);
|
|
2182
|
+
console.log(`\n${PAD}${c.yellow}Usage: /connect use <local|anthropic|openai|openrouter|claude-code>${c.reset}\n`);
|
|
2139
2183
|
return true;
|
|
2140
2184
|
}
|
|
2141
2185
|
|
|
@@ -2160,7 +2204,7 @@ async function handleCommand(input) {
|
|
|
2160
2204
|
if (!provider || provider === 'local') {
|
|
2161
2205
|
console.log(`\n${PAD}${c.yellow}Usage:${c.reset}`);
|
|
2162
2206
|
console.log(`${PAD}${c.dim} /connect${c.reset}`);
|
|
2163
|
-
console.log(`${PAD}${c.dim} /connect <anthropic|openai|openrouter>${c.reset}`);
|
|
2207
|
+
console.log(`${PAD}${c.dim} /connect <anthropic|openai|openrouter|claude-code>${c.reset}`);
|
|
2164
2208
|
console.log(`${PAD}${c.dim} /connect status${c.reset}`);
|
|
2165
2209
|
console.log(`${PAD}${c.dim} /connect disconnect <provider>${c.reset}`);
|
|
2166
2210
|
console.log(`${PAD}${c.dim} /connect use <local|provider>${c.reset}\n`);
|
|
@@ -2684,9 +2728,19 @@ async function sendMessage(message) {
|
|
|
2684
2728
|
fullMessage += '\n\n[Image analysis above is primary source of truth. Focus on image content, not file listing.]';
|
|
2685
2729
|
}
|
|
2686
2730
|
|
|
2731
|
+
config.saveRunSnapshot({
|
|
2732
|
+
projectDir,
|
|
2733
|
+
activeModel: modelRegistry.getCurrent(),
|
|
2734
|
+
userMessage: message,
|
|
2735
|
+
fullMessagePreview: fullMessage.slice(0, 2000),
|
|
2736
|
+
conversationLength: conversationHistory.length
|
|
2737
|
+
});
|
|
2738
|
+
|
|
2687
2739
|
try {
|
|
2688
2740
|
await sendAgenticMessage(fullMessage, pendingImages, message);
|
|
2741
|
+
config.completeRunSnapshot({ status: 'completed' });
|
|
2689
2742
|
} catch (error) {
|
|
2743
|
+
config.completeRunSnapshot({ status: 'failed', error: error.message });
|
|
2690
2744
|
const provider = activeProviderKey();
|
|
2691
2745
|
const providerLabel = providerManager.getProviderLabel(provider);
|
|
2692
2746
|
console.log(`\n${PAD}${c.red}✗ Error: ${error.message}${c.reset}`);
|
|
@@ -2985,6 +3039,11 @@ async function sendStreamingMessage(message, images = [], rawMessage = '') {
|
|
|
2985
3039
|
|
|
2986
3040
|
try {
|
|
2987
3041
|
await streamHandler.handleStream(response);
|
|
3042
|
+
const streamResult = streamHandler.getResult();
|
|
3043
|
+
if (!streamResult.completed && streamResult.warning) {
|
|
3044
|
+
fullResponse = `${streamResult.warning}\n\n${fullResponse}`.trim();
|
|
3045
|
+
console.log(`\n${PAD}${c.yellow}⚠ ${streamResult.warning}${c.reset}`);
|
|
3046
|
+
}
|
|
2988
3047
|
} catch (error) {
|
|
2989
3048
|
stopStatus();
|
|
2990
3049
|
// Check if this was an abort
|
|
@@ -5172,10 +5231,12 @@ Examples:
|
|
|
5172
5231
|
// then subsequent lines arrive as new 'line' events. We detect paste by
|
|
5173
5232
|
// buffering lines that arrive within PASTE_DELAY_MS of each other.
|
|
5174
5233
|
const PASTE_DELAY_MS = 400; // 400ms to handle large pastes and Windows Terminal dialog latency
|
|
5234
|
+
const PASTE_STRAGGLER_WINDOW_MS = 1200; // Late lines can arrive after submit on Windows Terminal
|
|
5175
5235
|
let pasteBuffer = [];
|
|
5176
5236
|
let pasteTimer = null;
|
|
5177
5237
|
let waitingForInput = false;
|
|
5178
5238
|
let lastFlushTime = 0; // Track when paste buffer last flushed (to catch stragglers)
|
|
5239
|
+
let lastPasteStragglerWarningAt = 0;
|
|
5179
5240
|
|
|
5180
5241
|
showGeminiKeyPrompt = (callback) => {
|
|
5181
5242
|
awaitingGeminiKey = true;
|
|
@@ -5342,13 +5403,17 @@ Examples:
|
|
|
5342
5403
|
return;
|
|
5343
5404
|
}
|
|
5344
5405
|
|
|
5345
|
-
// Straggler paste lines: arrived after flush but
|
|
5346
|
-
//
|
|
5347
|
-
|
|
5406
|
+
// Straggler paste lines: arrived after flush but before the paste has fully settled.
|
|
5407
|
+
// On Windows Terminal, delayed lines can arrive after the first chunk was submitted,
|
|
5408
|
+
// and without this guard they'd be misread as mid-turn steering.
|
|
5409
|
+
if (lastFlushTime && (Date.now() - lastFlushTime) < PASTE_STRAGGLER_WINDOW_MS) {
|
|
5348
5410
|
const trimmed = String(input || '').trim();
|
|
5349
5411
|
if (trimmed) {
|
|
5350
5412
|
appendDebugLog(`[paste-straggler] Dropped line arrived ${Date.now() - lastFlushTime}ms after flush: ${trimmed.slice(0, 60)}\n`);
|
|
5351
|
-
|
|
5413
|
+
if (lastPasteStragglerWarningAt !== lastFlushTime) {
|
|
5414
|
+
lastPasteStragglerWarningAt = lastFlushTime;
|
|
5415
|
+
console.log(`${PAD}${c.yellow}Ignored delayed paste lines from the previous submission.${c.reset} ${c.dim}If this keeps happening, disable the Windows Terminal paste warning or paste again after the prompt settles.${c.reset}`);
|
|
5416
|
+
}
|
|
5352
5417
|
}
|
|
5353
5418
|
return;
|
|
5354
5419
|
}
|
|
@@ -5476,6 +5541,9 @@ process.on('SIGINT', () => {
|
|
|
5476
5541
|
if (config && config.get('autoSaveHistory') && conversationHistory.length > 0) {
|
|
5477
5542
|
config.saveConversation('autosave', conversationHistory);
|
|
5478
5543
|
}
|
|
5544
|
+
if (config) {
|
|
5545
|
+
config.completeRunSnapshot({ status: 'cancelled' });
|
|
5546
|
+
}
|
|
5479
5547
|
if (watcher) watcher.stop();
|
|
5480
5548
|
console.log(`\n${PAD}${c.cyan}👋 See you later!${c.reset}\n`);
|
|
5481
5549
|
process.exit(0);
|
|
@@ -5484,6 +5552,12 @@ process.on('SIGINT', () => {
|
|
|
5484
5552
|
main().catch(error => {
|
|
5485
5553
|
logSessionEnd('crash', ` error=${error.message}`);
|
|
5486
5554
|
if (statusBar) statusBar.uninstall();
|
|
5555
|
+
if (config && config.get('autoSaveHistory') && conversationHistory.length > 0) {
|
|
5556
|
+
config.saveConversation('autosave-crash', conversationHistory);
|
|
5557
|
+
}
|
|
5558
|
+
if (config) {
|
|
5559
|
+
config.completeRunSnapshot({ status: 'crashed', error: error.message });
|
|
5560
|
+
}
|
|
5487
5561
|
console.error(`${c.red}Fatal error: ${error.message}${c.reset}`);
|
|
5488
5562
|
if (watcher) watcher.stop();
|
|
5489
5563
|
process.exit(1);
|
package/lib/agenticRunner.js
CHANGED
|
@@ -549,6 +549,11 @@ const READ_ONLY_TOOLS = TOOLS.filter(t => READ_ONLY_TOOL_NAMES.has(t.function.na
|
|
|
549
549
|
|
|
550
550
|
const IGNORE_PATTERNS = ['node_modules', '.git', '.next', 'dist', 'build', '.banana'];
|
|
551
551
|
const MAX_ITERATIONS = 50;
|
|
552
|
+
const MAX_TOOL_CALLS_PER_TURN = 24;
|
|
553
|
+
const MAX_IDENTICAL_TOOL_CALLS_PER_TURN = 1;
|
|
554
|
+
const MAX_TOOL_CALLS_BY_NAME_PER_TURN = {
|
|
555
|
+
list_files: 6
|
|
556
|
+
};
|
|
552
557
|
const WRITE_TOOL_NAMES = new Set(['create_file', 'edit_file', 'run_command']);
|
|
553
558
|
const CONTEXT_TRIM_THRESHOLD = 0.60; // 60% of context limit - start trimming early
|
|
554
559
|
const CONTEXT_TRIM_KEEP_RECENT = 6; // Keep last N messages intact
|
|
@@ -885,9 +890,50 @@ function executeEditFile(projectDir, filePath, content) {
|
|
|
885
890
|
}
|
|
886
891
|
}
|
|
887
892
|
|
|
893
|
+
function classifyCommandVerification(command) {
|
|
894
|
+
const lowerCommand = String(command || '').trim().toLowerCase();
|
|
895
|
+
const gitMutationRe = /\bgit\s+(pull|checkout|switch|reset|merge|rebase|cherry-pick|restore|clean|stash\s+(pop|apply|drop)|apply|commit|push)\b/;
|
|
896
|
+
const fsMutationRe = /\b(copy|move|ren|rename|mkdir|rmdir|del|erase|xcopy|robocopy|attrib)\b/;
|
|
897
|
+
const gitReadOnlyRe = /\bgit\s+(status|rev-parse|branch|log|diff|show|ls-files|show-ref)\b/;
|
|
898
|
+
const fsReadOnlyRe = /\b(dir|type|findstr|where)\b/;
|
|
899
|
+
const verificationEvidenceFor = [];
|
|
900
|
+
|
|
901
|
+
if (gitReadOnlyRe.test(lowerCommand)) verificationEvidenceFor.push('git_state');
|
|
902
|
+
if (fsReadOnlyRe.test(lowerCommand)) verificationEvidenceFor.push('filesystem_state');
|
|
903
|
+
|
|
904
|
+
if (gitMutationRe.test(lowerCommand)) {
|
|
905
|
+
return {
|
|
906
|
+
requiresVerification: true,
|
|
907
|
+
category: 'git_state',
|
|
908
|
+
verificationHint: 'Before claiming success, run a read-only git check such as `git status --short`, `git rev-parse HEAD`, or compare `HEAD` to `@{u}`.',
|
|
909
|
+
verificationEvidenceFor,
|
|
910
|
+
readOnlyCommand: false
|
|
911
|
+
};
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
if (fsMutationRe.test(lowerCommand)) {
|
|
915
|
+
return {
|
|
916
|
+
requiresVerification: true,
|
|
917
|
+
category: 'filesystem_state',
|
|
918
|
+
verificationHint: 'Before claiming success, run a read-only check such as `dir`, `type`, or `findstr` to confirm the change is actually present.',
|
|
919
|
+
verificationEvidenceFor,
|
|
920
|
+
readOnlyCommand: false
|
|
921
|
+
};
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
return {
|
|
925
|
+
requiresVerification: false,
|
|
926
|
+
category: null,
|
|
927
|
+
verificationHint: null,
|
|
928
|
+
verificationEvidenceFor,
|
|
929
|
+
readOnlyCommand: verificationEvidenceFor.length > 0
|
|
930
|
+
};
|
|
931
|
+
}
|
|
932
|
+
|
|
888
933
|
async function executeRunCommand(projectDir, command, options = {}) {
|
|
889
934
|
const signal = options.signal;
|
|
890
935
|
const timeoutMs = options.timeoutMs ?? 30000;
|
|
936
|
+
const verificationMeta = classifyCommandVerification(command);
|
|
891
937
|
|
|
892
938
|
// Basic safety check - block destructive commands
|
|
893
939
|
const dangerous = /\b(rm\s+-rf|del\s+\/[sqf]|format\s+[a-z]:)\b/i;
|
|
@@ -946,6 +992,13 @@ async function executeRunCommand(projectDir, command, options = {}) {
|
|
|
946
992
|
const limit = 15000;
|
|
947
993
|
finish(resolve, {
|
|
948
994
|
success: true,
|
|
995
|
+
command,
|
|
996
|
+
outcome: 'completed',
|
|
997
|
+
requiresVerification: verificationMeta.requiresVerification,
|
|
998
|
+
verificationCategory: verificationMeta.category,
|
|
999
|
+
verificationHint: verificationMeta.verificationHint,
|
|
1000
|
+
verificationEvidenceFor: verificationMeta.verificationEvidenceFor,
|
|
1001
|
+
readOnlyCommand: verificationMeta.readOnlyCommand,
|
|
949
1002
|
output: output.substring(0, limit),
|
|
950
1003
|
...(output.length > limit ? { truncated: true, totalLength: output.length } : {})
|
|
951
1004
|
});
|
|
@@ -953,6 +1006,8 @@ async function executeRunCommand(projectDir, command, options = {}) {
|
|
|
953
1006
|
const limit = 10000;
|
|
954
1007
|
finish(resolve, {
|
|
955
1008
|
error: `Command failed with exit code ${code}`,
|
|
1009
|
+
command,
|
|
1010
|
+
outcome: code === 124 ? 'timed_out' : 'failed',
|
|
956
1011
|
output: output.substring(0, limit),
|
|
957
1012
|
exitCode: code,
|
|
958
1013
|
...(output.length > limit ? { truncated: true, totalLength: output.length } : {})
|
|
@@ -967,6 +1022,7 @@ async function executeRunCommand(projectDir, command, options = {}) {
|
|
|
967
1022
|
finish(resolve, {
|
|
968
1023
|
error: `Command timed out after ${timeoutMs}ms`,
|
|
969
1024
|
output: raw.substring(0, 10000),
|
|
1025
|
+
outcome: 'timed_out',
|
|
970
1026
|
exitCode: 124,
|
|
971
1027
|
...(raw.length > 10000 ? { truncated: true, totalLength: raw.length } : {})
|
|
972
1028
|
});
|
|
@@ -1124,6 +1180,77 @@ function stripControlTokens(text) {
|
|
|
1124
1180
|
return cleaned.replace(/^\s+$/, '');
|
|
1125
1181
|
}
|
|
1126
1182
|
|
|
1183
|
+
function stableStringify(value) {
|
|
1184
|
+
if (Array.isArray(value)) {
|
|
1185
|
+
return `[${value.map(stableStringify).join(',')}]`;
|
|
1186
|
+
}
|
|
1187
|
+
if (value && typeof value === 'object') {
|
|
1188
|
+
const keys = Object.keys(value).sort();
|
|
1189
|
+
return `{${keys.map(key => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(',')}}`;
|
|
1190
|
+
}
|
|
1191
|
+
return JSON.stringify(value);
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
function parseToolArgs(rawArgs) {
|
|
1195
|
+
if (typeof rawArgs !== 'string') return {};
|
|
1196
|
+
try {
|
|
1197
|
+
return JSON.parse(rawArgs);
|
|
1198
|
+
} catch {
|
|
1199
|
+
return {};
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
function sanitizeToolCalls(toolCalls) {
|
|
1204
|
+
const kept = [];
|
|
1205
|
+
const dropped = [];
|
|
1206
|
+
const signatureCounts = new Map();
|
|
1207
|
+
const toolNameCounts = new Map();
|
|
1208
|
+
|
|
1209
|
+
for (const toolCall of toolCalls || []) {
|
|
1210
|
+
const functionName = toolCall?.function?.name;
|
|
1211
|
+
if (!functionName) {
|
|
1212
|
+
dropped.push({ reason: 'invalid', toolCall });
|
|
1213
|
+
continue;
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
const args = parseToolArgs(toolCall.function.arguments);
|
|
1217
|
+
const signature = `${functionName}:${stableStringify(args)}`;
|
|
1218
|
+
const seenCount = signatureCounts.get(signature) || 0;
|
|
1219
|
+
const sameToolCount = toolNameCounts.get(functionName) || 0;
|
|
1220
|
+
|
|
1221
|
+
if (seenCount >= MAX_IDENTICAL_TOOL_CALLS_PER_TURN) {
|
|
1222
|
+
dropped.push({ reason: 'duplicate', toolCall, signature });
|
|
1223
|
+
continue;
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
const perToolLimit = MAX_TOOL_CALLS_BY_NAME_PER_TURN[functionName];
|
|
1227
|
+
if (perToolLimit && sameToolCount >= perToolLimit) {
|
|
1228
|
+
dropped.push({ reason: 'per_tool_overflow', toolCall, signature });
|
|
1229
|
+
continue;
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
if (kept.length >= MAX_TOOL_CALLS_PER_TURN) {
|
|
1233
|
+
dropped.push({ reason: 'overflow', toolCall, signature });
|
|
1234
|
+
continue;
|
|
1235
|
+
}
|
|
1236
|
+
|
|
1237
|
+
signatureCounts.set(signature, seenCount + 1);
|
|
1238
|
+
toolNameCounts.set(functionName, sameToolCount + 1);
|
|
1239
|
+
kept.push(toolCall);
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
return {
|
|
1243
|
+
toolCalls: kept,
|
|
1244
|
+
dropped,
|
|
1245
|
+
summary: {
|
|
1246
|
+
invalid: dropped.filter(item => item.reason === 'invalid').length,
|
|
1247
|
+
duplicate: dropped.filter(item => item.reason === 'duplicate').length,
|
|
1248
|
+
perToolOverflow: dropped.filter(item => item.reason === 'per_tool_overflow').length,
|
|
1249
|
+
overflow: dropped.filter(item => item.reason === 'overflow').length
|
|
1250
|
+
}
|
|
1251
|
+
};
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1127
1254
|
// ─── Repetition Detection ─────────────────────────────────────────────────────
|
|
1128
1255
|
|
|
1129
1256
|
/**
|
|
@@ -1155,6 +1282,8 @@ async function consumeStream(response, onToken) {
|
|
|
1155
1282
|
let thinkBuffer = ''; // accumulates text inside a think block
|
|
1156
1283
|
let inThink = false;
|
|
1157
1284
|
let repetitionDetected = false;
|
|
1285
|
+
let doneSignalReceived = false;
|
|
1286
|
+
let warning = null;
|
|
1158
1287
|
|
|
1159
1288
|
const flush = (text) => {
|
|
1160
1289
|
const clean = stripControlTokens(text);
|
|
@@ -1184,7 +1313,11 @@ async function consumeStream(response, onToken) {
|
|
|
1184
1313
|
|
|
1185
1314
|
for (const line of lines) {
|
|
1186
1315
|
const trimmed = line.trim();
|
|
1187
|
-
if (!trimmed
|
|
1316
|
+
if (!trimmed) continue;
|
|
1317
|
+
if (trimmed === 'data: [DONE]') {
|
|
1318
|
+
doneSignalReceived = true;
|
|
1319
|
+
continue;
|
|
1320
|
+
}
|
|
1188
1321
|
if (!trimmed.startsWith('data: ')) continue;
|
|
1189
1322
|
|
|
1190
1323
|
try {
|
|
@@ -1224,7 +1357,16 @@ async function consumeStream(response, onToken) {
|
|
|
1224
1357
|
}
|
|
1225
1358
|
}
|
|
1226
1359
|
|
|
1227
|
-
|
|
1360
|
+
if (!doneSignalReceived) {
|
|
1361
|
+
warning = 'Warning: final stream ended without an explicit completion signal. The response may be incomplete.';
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
return {
|
|
1365
|
+
content: fullContent,
|
|
1366
|
+
completed: doneSignalReceived,
|
|
1367
|
+
warning,
|
|
1368
|
+
repetitionDetected
|
|
1369
|
+
};
|
|
1228
1370
|
}
|
|
1229
1371
|
|
|
1230
1372
|
// ─── Agentic Loop ───────────────────────────────────────────────────────────
|
|
@@ -1252,6 +1394,7 @@ class AgenticRunner {
|
|
|
1252
1394
|
this.lastTurnMessagesEstimate = 0;
|
|
1253
1395
|
this.totalCacheReadTokens = 0;
|
|
1254
1396
|
this.totalCacheCreationTokens = 0;
|
|
1397
|
+
this.lastRunOutcome = { status: 'running', phase: 'start', warning: null };
|
|
1255
1398
|
}
|
|
1256
1399
|
|
|
1257
1400
|
/**
|
|
@@ -1302,9 +1445,12 @@ class AgenticRunner {
|
|
|
1302
1445
|
let iterations = 0;
|
|
1303
1446
|
const toolCallHistory = []; // Track tool calls for loop detection
|
|
1304
1447
|
const failedMcpTools = new Set(); // Track MCP tools that returned "Unknown tool" errors
|
|
1448
|
+
const pendingCommandVerifications = new Map(); // category -> verification hint
|
|
1305
1449
|
let readOnlyStreak = 0; // Consecutive iterations with only read-only tool calls
|
|
1306
1450
|
let loopWarningCount = 0; // How many times loop detection has fired
|
|
1307
1451
|
|
|
1452
|
+
let verificationReminderCount = 0; // How many times we had to demand verification before finalizing
|
|
1453
|
+
|
|
1308
1454
|
// Model-tier-aware read-only thresholds: smarter models get more research leeway
|
|
1309
1455
|
// options.model is the raw model ID (e.g. "claude-sonnet-4-6-20250514", "gpt-4o", "silverback")
|
|
1310
1456
|
const modelId = (options.model || '').toLowerCase();
|
|
@@ -1483,12 +1629,27 @@ class AgenticRunner {
|
|
|
1483
1629
|
// Some models use finish_reason "tool_calls", others use "stop" or "function_call"
|
|
1484
1630
|
// but still include tool_calls in the message. Check for the array itself.
|
|
1485
1631
|
if (assistantMessage.tool_calls && assistantMessage.tool_calls.length > 0) {
|
|
1486
|
-
|
|
1487
|
-
const
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1632
|
+
const originalToolCallCount = assistantMessage.tool_calls.length;
|
|
1633
|
+
const sanitizedBatch = sanitizeToolCalls(assistantMessage.tool_calls);
|
|
1634
|
+
assistantMessage.tool_calls = sanitizedBatch.toolCalls;
|
|
1635
|
+
|
|
1636
|
+
if (sanitizedBatch.dropped.length > 0) {
|
|
1637
|
+
appendDebugLog(
|
|
1638
|
+
` [tool batch sanitized] original=${originalToolCallCount} kept=${assistantMessage.tool_calls.length} ` +
|
|
1639
|
+
`duplicate=${sanitizedBatch.summary.duplicate} per_tool_overflow=${sanitizedBatch.summary.perToolOverflow} ` +
|
|
1640
|
+
`overflow=${sanitizedBatch.summary.overflow} invalid=${sanitizedBatch.summary.invalid}\n`
|
|
1641
|
+
);
|
|
1642
|
+
this.onWarning(
|
|
1643
|
+
`Trimmed a noisy tool batch from ${originalToolCallCount} calls to ${assistantMessage.tool_calls.length}.`
|
|
1644
|
+
);
|
|
1645
|
+
}
|
|
1646
|
+
|
|
1647
|
+
if (assistantMessage.tool_calls.length === 0) {
|
|
1648
|
+
messages.push({
|
|
1649
|
+
role: 'system',
|
|
1650
|
+
content: 'Your previous tool batch was invalid or excessively repetitive. Do NOT emit more tools right now. Answer the user directly with what you already know, or explain what specific missing context is still needed.'
|
|
1651
|
+
});
|
|
1652
|
+
continue;
|
|
1492
1653
|
}
|
|
1493
1654
|
|
|
1494
1655
|
// Add assistant message to history, preserving the reasoning field
|
|
@@ -1568,6 +1729,14 @@ class AgenticRunner {
|
|
|
1568
1729
|
|
|
1569
1730
|
// Track command execution for hooks
|
|
1570
1731
|
if (functionName === 'run_command' && !result.error) {
|
|
1732
|
+
if (result.requiresVerification && result.verificationCategory) {
|
|
1733
|
+
pendingCommandVerifications.set(result.verificationCategory, result.verificationHint || 'Run a read-only verification command before claiming success.');
|
|
1734
|
+
}
|
|
1735
|
+
if (Array.isArray(result.verificationEvidenceFor)) {
|
|
1736
|
+
for (const category of result.verificationEvidenceFor) {
|
|
1737
|
+
pendingCommandVerifications.delete(category);
|
|
1738
|
+
}
|
|
1739
|
+
}
|
|
1571
1740
|
if (this.onCommandComplete) this.onCommandComplete(args.command, result);
|
|
1572
1741
|
}
|
|
1573
1742
|
|
|
@@ -1673,6 +1842,12 @@ class AgenticRunner {
|
|
|
1673
1842
|
nudgeParts.push(`Non-existent MCP tools (do NOT retry): ${[...failedMcpTools].join(', ')}`);
|
|
1674
1843
|
}
|
|
1675
1844
|
|
|
1845
|
+
if (pendingCommandVerifications.size > 0) {
|
|
1846
|
+
nudgeParts.push(
|
|
1847
|
+
`State-changing commands are still UNVERIFIED. Before telling the user the task is done, run a read-only verification step. ${[...pendingCommandVerifications.values()].join(' ')}`
|
|
1848
|
+
);
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1676
1851
|
if (nudgeParts.length > 0) {
|
|
1677
1852
|
messages.push({
|
|
1678
1853
|
role: 'system',
|
|
@@ -1680,6 +1855,17 @@ class AgenticRunner {
|
|
|
1680
1855
|
});
|
|
1681
1856
|
}
|
|
1682
1857
|
|
|
1858
|
+
if (sanitizedBatch.dropped.length > 0) {
|
|
1859
|
+
messages.push({
|
|
1860
|
+
role: 'system',
|
|
1861
|
+
content:
|
|
1862
|
+
`Your previous response tried to call too many or duplicate tools. ` +
|
|
1863
|
+
`Dropped: ${sanitizedBatch.summary.duplicate} duplicate, ${sanitizedBatch.summary.perToolOverflow} excessive same-tool calls, ` +
|
|
1864
|
+
`${sanitizedBatch.summary.overflow} overflow, ${sanitizedBatch.summary.invalid} invalid. ` +
|
|
1865
|
+
`Next turn, use fewer tools and avoid repeating the same call with identical arguments.`
|
|
1866
|
+
});
|
|
1867
|
+
}
|
|
1868
|
+
|
|
1683
1869
|
// Track read-only streaks (iterations with no writes or commands)
|
|
1684
1870
|
// Skip streak tracking in plan mode - plan mode is inherently read-only
|
|
1685
1871
|
const thisIterToolNames = assistantMessage.tool_calls.map(t => t.function.name);
|
|
@@ -1752,6 +1938,7 @@ class AgenticRunner {
|
|
|
1752
1938
|
this._lastWrittenFiles = [...writtenFiles];
|
|
1753
1939
|
logRunTotals('loop-break');
|
|
1754
1940
|
const loopResponse = finalContent || 'I got stuck in a loop and could not complete the task. Please try rephrasing your request.';
|
|
1941
|
+
this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'loop-break', warning: 'Loop breaker forced finalization.' };
|
|
1755
1942
|
await this.emitStreaming(loopResponse);
|
|
1756
1943
|
this.onContent(loopResponse);
|
|
1757
1944
|
return loopResponse;
|
|
@@ -1787,6 +1974,7 @@ class AgenticRunner {
|
|
|
1787
1974
|
this._lastWrittenFiles = [...writtenFiles];
|
|
1788
1975
|
logRunTotals('no-progress-break');
|
|
1789
1976
|
const npResponse = npContent || 'I spent too many iterations researching without making progress. Please try a more specific request.';
|
|
1977
|
+
this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'no-progress-break', warning: 'No-progress breaker forced finalization.' };
|
|
1790
1978
|
await this.emitStreaming(npResponse);
|
|
1791
1979
|
this.onContent(npResponse);
|
|
1792
1980
|
return npResponse;
|
|
@@ -1801,6 +1989,18 @@ class AgenticRunner {
|
|
|
1801
1989
|
// Final response - no more tool calls.
|
|
1802
1990
|
// The non-streaming chat() call already returned content. Use it directly
|
|
1803
1991
|
// instead of making a redundant streaming call that may return empty/truncated.
|
|
1992
|
+
if (pendingCommandVerifications.size > 0 && verificationReminderCount < 1) {
|
|
1993
|
+
verificationReminderCount++;
|
|
1994
|
+
messages.push({
|
|
1995
|
+
role: 'system',
|
|
1996
|
+
content:
|
|
1997
|
+
`STOP. You are about to answer, but you still have unverified state-changing command results. ` +
|
|
1998
|
+
`Before claiming completion, run at least one read-only verification step for these categories: ${[...pendingCommandVerifications.keys()].join(', ')}. ` +
|
|
1999
|
+
`${[...pendingCommandVerifications.values()].join(' ')}`
|
|
2000
|
+
});
|
|
2001
|
+
continue;
|
|
2002
|
+
}
|
|
2003
|
+
|
|
1804
2004
|
let existingContent = stripControlTokens(assistantMessage.content || '');
|
|
1805
2005
|
|
|
1806
2006
|
// Extract inline <think>/<thinking> blocks from content (Qwen3.5 embeds reasoning in content)
|
|
@@ -1835,11 +2035,23 @@ class AgenticRunner {
|
|
|
1835
2035
|
|
|
1836
2036
|
const reasoning = assistantMessage.reasoning || assistantMessage.reasoning_content || inlineReasoning;
|
|
1837
2037
|
|
|
2038
|
+
if (pendingCommandVerifications.size > 0) {
|
|
2039
|
+
const verificationWarning = `Warning: the requested command effects were not independently verified. ${[...pendingCommandVerifications.values()].join(' ')}`;
|
|
2040
|
+
existingContent = existingContent
|
|
2041
|
+
? `${verificationWarning}\n\n${existingContent}`
|
|
2042
|
+
: verificationWarning;
|
|
2043
|
+
}
|
|
2044
|
+
|
|
1838
2045
|
// If the model already produced content in this iteration, use it directly
|
|
1839
2046
|
if (existingContent) {
|
|
1840
2047
|
if (reasoning) {
|
|
1841
2048
|
this.onReasoning(stripControlTokens(reasoning));
|
|
1842
2049
|
}
|
|
2050
|
+
this.lastRunOutcome = {
|
|
2051
|
+
status: pendingCommandVerifications.size > 0 ? 'completed_with_warnings' : 'completed',
|
|
2052
|
+
phase: 'final-content',
|
|
2053
|
+
warning: pendingCommandVerifications.size > 0 ? 'Completion claims were not fully verified.' : null
|
|
2054
|
+
};
|
|
1843
2055
|
await this.emitStreaming(existingContent);
|
|
1844
2056
|
this.onContent(existingContent);
|
|
1845
2057
|
logRunTotals('final-content');
|
|
@@ -1853,6 +2065,7 @@ class AgenticRunner {
|
|
|
1853
2065
|
// Some models put the actual answer in reasoning when content is empty.
|
|
1854
2066
|
// Return a minimal acknowledgment rather than an empty response.
|
|
1855
2067
|
const fallback = '(Response was in reasoning only - see thinking output above)';
|
|
2068
|
+
this.lastRunOutcome = { status: 'completed_with_warnings', phase: 'final-reasoning-fallback', warning: 'Model returned reasoning without visible content.' };
|
|
1856
2069
|
await this.emitStreaming(fallback);
|
|
1857
2070
|
this.onContent(fallback);
|
|
1858
2071
|
logRunTotals('final-reasoning-fallback');
|
|
@@ -1877,6 +2090,7 @@ class AgenticRunner {
|
|
|
1877
2090
|
const content = stripControlTokens(thinkMsg?.content || '');
|
|
1878
2091
|
|
|
1879
2092
|
if (thinkReasoning) this.onReasoning(stripControlTokens(thinkReasoning));
|
|
2093
|
+
this.lastRunOutcome = { status: 'completed', phase: 'final-think-pass', warning: null };
|
|
1880
2094
|
await this.emitStreaming(content);
|
|
1881
2095
|
this.onContent(content);
|
|
1882
2096
|
logRunTotals('final-think-pass');
|
|
@@ -1895,10 +2109,21 @@ class AgenticRunner {
|
|
|
1895
2109
|
signal: options.signal
|
|
1896
2110
|
});
|
|
1897
2111
|
|
|
1898
|
-
const
|
|
2112
|
+
const streamResult = await consumeStream(streamResponse, (token) => {
|
|
1899
2113
|
this.onToken(token);
|
|
1900
2114
|
});
|
|
2115
|
+
const content = streamResult.completed || !streamResult.warning
|
|
2116
|
+
? streamResult.content
|
|
2117
|
+
: `${streamResult.warning}\n\n${streamResult.content}`.trim();
|
|
1901
2118
|
|
|
2119
|
+
if (!streamResult.completed && streamResult.warning) {
|
|
2120
|
+
this.onWarning(streamResult.warning);
|
|
2121
|
+
}
|
|
2122
|
+
this.lastRunOutcome = {
|
|
2123
|
+
status: streamResult.completed ? 'completed' : 'completed_with_warnings',
|
|
2124
|
+
phase: 'final-stream',
|
|
2125
|
+
warning: streamResult.warning
|
|
2126
|
+
};
|
|
1902
2127
|
this.onContent(content);
|
|
1903
2128
|
logRunTotals('final-stream');
|
|
1904
2129
|
return content;
|
|
@@ -1906,9 +2131,10 @@ class AgenticRunner {
|
|
|
1906
2131
|
}
|
|
1907
2132
|
|
|
1908
2133
|
this.onWarning('Max tool iterations reached');
|
|
2134
|
+
this.lastRunOutcome = { status: 'failed', phase: 'max-iterations', warning: 'Max tool iterations reached.' };
|
|
1909
2135
|
logRunTotals('max-iterations');
|
|
1910
2136
|
return '';
|
|
1911
2137
|
}
|
|
1912
2138
|
}
|
|
1913
2139
|
|
|
1914
|
-
module.exports = { AgenticRunner, TOOLS, READ_ONLY_TOOLS, executeTool, setMcpClient };
|
|
2140
|
+
module.exports = { AgenticRunner, TOOLS, READ_ONLY_TOOLS, executeTool, setMcpClient, sanitizeToolCalls, classifyCommandVerification };
|