erosolar-cli 2.1.168 → 2.1.169
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/agents/erosolar-code.rules.json +2 -2
- package/agents/general.rules.json +21 -3
- package/dist/capabilities/statusCapability.js +2 -2
- package/dist/capabilities/statusCapability.js.map +1 -1
- package/dist/contracts/agent-schemas.json +5 -5
- package/dist/core/agent.d.ts +70 -24
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +424 -248
- package/dist/core/agent.js.map +1 -1
- package/dist/core/preferences.d.ts +1 -0
- package/dist/core/preferences.d.ts.map +1 -1
- package/dist/core/preferences.js +8 -1
- package/dist/core/preferences.js.map +1 -1
- package/dist/core/reliabilityPrompt.d.ts +9 -0
- package/dist/core/reliabilityPrompt.d.ts.map +1 -0
- package/dist/core/reliabilityPrompt.js +31 -0
- package/dist/core/reliabilityPrompt.js.map +1 -0
- package/dist/core/schemaValidator.js +3 -3
- package/dist/core/schemaValidator.js.map +1 -1
- package/dist/core/toolPreconditions.d.ts +0 -11
- package/dist/core/toolPreconditions.d.ts.map +1 -1
- package/dist/core/toolPreconditions.js +33 -164
- package/dist/core/toolPreconditions.js.map +1 -1
- package/dist/core/toolRuntime.d.ts.map +1 -1
- package/dist/core/toolRuntime.js +9 -114
- package/dist/core/toolRuntime.js.map +1 -1
- package/dist/core/updateChecker.d.ts +61 -1
- package/dist/core/updateChecker.d.ts.map +1 -1
- package/dist/core/updateChecker.js +147 -3
- package/dist/core/updateChecker.js.map +1 -1
- package/dist/headless/headlessApp.d.ts.map +1 -1
- package/dist/headless/headlessApp.js +0 -39
- package/dist/headless/headlessApp.js.map +1 -1
- package/dist/plugins/tools/nodeDefaults.d.ts.map +1 -1
- package/dist/plugins/tools/nodeDefaults.js +0 -2
- package/dist/plugins/tools/nodeDefaults.js.map +1 -1
- package/dist/providers/openaiResponsesProvider.d.ts.map +1 -1
- package/dist/providers/openaiResponsesProvider.js +79 -74
- package/dist/providers/openaiResponsesProvider.js.map +1 -1
- package/dist/runtime/agentController.d.ts.map +1 -1
- package/dist/runtime/agentController.js +6 -3
- package/dist/runtime/agentController.js.map +1 -1
- package/dist/runtime/agentSession.d.ts +0 -2
- package/dist/runtime/agentSession.d.ts.map +1 -1
- package/dist/runtime/agentSession.js +2 -2
- package/dist/runtime/agentSession.js.map +1 -1
- package/dist/shell/interactiveShell.d.ts +11 -12
- package/dist/shell/interactiveShell.d.ts.map +1 -1
- package/dist/shell/interactiveShell.js +269 -193
- package/dist/shell/interactiveShell.js.map +1 -1
- package/dist/shell/systemPrompt.d.ts.map +1 -1
- package/dist/shell/systemPrompt.js +4 -15
- package/dist/shell/systemPrompt.js.map +1 -1
- package/dist/subagents/taskRunner.js +2 -1
- package/dist/subagents/taskRunner.js.map +1 -1
- package/dist/tools/bashTools.d.ts.map +1 -1
- package/dist/tools/bashTools.js +101 -8
- package/dist/tools/bashTools.js.map +1 -1
- package/dist/tools/diffUtils.d.ts +8 -2
- package/dist/tools/diffUtils.d.ts.map +1 -1
- package/dist/tools/diffUtils.js +72 -13
- package/dist/tools/diffUtils.js.map +1 -1
- package/dist/tools/grepTools.d.ts.map +1 -1
- package/dist/tools/grepTools.js +10 -2
- package/dist/tools/grepTools.js.map +1 -1
- package/dist/tools/searchTools.d.ts.map +1 -1
- package/dist/tools/searchTools.js +4 -2
- package/dist/tools/searchTools.js.map +1 -1
- package/dist/ui/PromptController.d.ts +2 -3
- package/dist/ui/PromptController.d.ts.map +1 -1
- package/dist/ui/PromptController.js +2 -3
- package/dist/ui/PromptController.js.map +1 -1
- package/dist/ui/ShellUIAdapter.d.ts +71 -18
- package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
- package/dist/ui/ShellUIAdapter.js +237 -139
- package/dist/ui/ShellUIAdapter.js.map +1 -1
- package/dist/ui/UnifiedUIController.d.ts +0 -1
- package/dist/ui/UnifiedUIController.d.ts.map +1 -1
- package/dist/ui/UnifiedUIController.js +0 -1
- package/dist/ui/UnifiedUIController.js.map +1 -1
- package/dist/ui/UnifiedUIRenderer.d.ts +122 -7
- package/dist/ui/UnifiedUIRenderer.d.ts.map +1 -1
- package/dist/ui/UnifiedUIRenderer.js +823 -130
- package/dist/ui/UnifiedUIRenderer.js.map +1 -1
- package/dist/ui/animatedStatus.d.ts +129 -0
- package/dist/ui/animatedStatus.d.ts.map +1 -0
- package/dist/ui/animatedStatus.js +384 -0
- package/dist/ui/animatedStatus.js.map +1 -0
- package/dist/ui/display.d.ts +13 -48
- package/dist/ui/display.d.ts.map +1 -1
- package/dist/ui/display.js +22 -105
- package/dist/ui/display.js.map +1 -1
- package/dist/ui/shortcutsHelp.d.ts.map +1 -1
- package/dist/ui/shortcutsHelp.js +0 -1
- package/dist/ui/shortcutsHelp.js.map +1 -1
- package/dist/ui/unified/index.d.ts +1 -1
- package/dist/ui/unified/index.d.ts.map +1 -1
- package/dist/ui/unified/index.js +0 -2
- package/dist/ui/unified/index.js.map +1 -1
- package/package.json +1 -2
- package/dist/StringUtils.d.ts +0 -8
- package/dist/StringUtils.d.ts.map +0 -1
- package/dist/StringUtils.js +0 -11
- package/dist/StringUtils.js.map +0 -1
- package/dist/core/aiFlowSupervisor.d.ts +0 -44
- package/dist/core/aiFlowSupervisor.d.ts.map +0 -1
- package/dist/core/aiFlowSupervisor.js +0 -299
- package/dist/core/aiFlowSupervisor.js.map +0 -1
- package/dist/core/cliTestHarness.d.ts +0 -200
- package/dist/core/cliTestHarness.d.ts.map +0 -1
- package/dist/core/cliTestHarness.js +0 -549
- package/dist/core/cliTestHarness.js.map +0 -1
- package/dist/core/testUtils.d.ts +0 -121
- package/dist/core/testUtils.d.ts.map +0 -1
- package/dist/core/testUtils.js +0 -235
- package/dist/core/testUtils.js.map +0 -1
- package/dist/core/toolValidation.d.ts +0 -116
- package/dist/core/toolValidation.d.ts.map +0 -1
- package/dist/core/toolValidation.js +0 -282
- package/dist/core/toolValidation.js.map +0 -1
- package/dist/ui/compactRenderer.d.ts +0 -139
- package/dist/ui/compactRenderer.d.ts.map +0 -1
- package/dist/ui/compactRenderer.js +0 -398
- package/dist/ui/compactRenderer.js.map +0 -1
- package/dist/ui/streamingFormatter.d.ts +0 -30
- package/dist/ui/streamingFormatter.d.ts.map +0 -1
- package/dist/ui/streamingFormatter.js +0 -91
- package/dist/ui/streamingFormatter.js.map +0 -1
- package/dist/utils/errorUtils.d.ts +0 -16
- package/dist/utils/errorUtils.d.ts.map +0 -1
- package/dist/utils/errorUtils.js +0 -66
- package/dist/utils/errorUtils.js.map +0 -1
package/dist/core/agent.js
CHANGED
|
@@ -4,65 +4,7 @@ import { safeErrorMessage } from './secretStore.js';
|
|
|
4
4
|
* Maximum number of context overflow recovery attempts
|
|
5
5
|
*/
|
|
6
6
|
const MAX_CONTEXT_RECOVERY_ATTEMPTS = 3;
|
|
7
|
-
|
|
8
|
-
* Maximum number of auto-continuation attempts when model expresses intent but doesn't act
|
|
9
|
-
*/
|
|
10
|
-
const MAX_AUTO_CONTINUE_ATTEMPTS = 3;
|
|
11
|
-
/**
|
|
12
|
-
* Streaming safety timeouts (ms)
|
|
13
|
-
* - First chunk timeout: fail fast if the stream never starts
|
|
14
|
-
* - Inactivity timeout: abort if no chunks arrive for an extended period
|
|
15
|
-
*/
|
|
16
|
-
// Allow more headroom before declaring a streaming stall to avoid premature fallbacks.
|
|
17
|
-
const STREAM_FIRST_CHUNK_TIMEOUT_MS = 25000;
|
|
18
|
-
const STREAM_INACTIVITY_TIMEOUT_MS = 60000;
|
|
19
|
-
/**
|
|
20
|
-
* Patterns that indicate the model intends to take action but hasn't yet
|
|
21
|
-
* These suggest the model should be prompted to continue
|
|
22
|
-
*/
|
|
23
|
-
const INTENT_WITHOUT_ACTION_PATTERNS = [
|
|
24
|
-
// "Let me X" patterns - model is stating what it will do
|
|
25
|
-
/\blet me\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|examine|review)/i,
|
|
26
|
-
// "I'll X" / "I will X" patterns
|
|
27
|
-
/\bi['']ll\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
|
|
28
|
-
/\bi will\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
|
|
29
|
-
// "I'm going to X" patterns
|
|
30
|
-
/\bi['']m going to\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze)/i,
|
|
31
|
-
// "Now I'll X" / "First, I'll X" patterns
|
|
32
|
-
/\b(now|first|next)\s*(,)?\s*i['']ll\s+/i,
|
|
33
|
-
// Explicit continuation signals
|
|
34
|
-
/\bhere['']s (the|my) (plan|approach|solution|implementation)/i,
|
|
35
|
-
// Numbered steps suggesting action to come
|
|
36
|
-
/^\s*\d+\.\s+(create|write|implement|add|update|edit|modify|fix|show|read|check)/im,
|
|
37
|
-
// Bullet points suggesting planned actions
|
|
38
|
-
/^[\s•\-\*]+\s*(create|write|implement|add|update|edit|modify|fix|shows?|reads?|checks?)\s/im,
|
|
39
|
-
];
|
|
40
|
-
const AUTO_CONTINUE_PROMPTS = [
|
|
41
|
-
'Continue. Use tools now: start with Read/read_file to inspect the target file, then call Edit (or Write if available) with file_path/old_string/new_string to apply changes. Keep using tools until the task is done.',
|
|
42
|
-
'You MUST call tools immediately. Issue Read -> Edit/Write tool calls with explicit parameters; no more explaining or planning.',
|
|
43
|
-
'CRITICAL: Call a tool right now. Use Edit with file_path, old_string, new_string (or Write with file_path and content). Respond with tool calls only.',
|
|
44
|
-
];
|
|
45
|
-
/**
|
|
46
|
-
* Check if response indicates intent to act without actually acting
|
|
47
|
-
* This detects when the model says "let me do X" but doesn't call any tools
|
|
48
|
-
*/
|
|
49
|
-
function shouldAutoContinue(content, hasToolCalls) {
|
|
50
|
-
// If there are tool calls, no need to auto-continue
|
|
51
|
-
if (hasToolCalls) {
|
|
52
|
-
return false;
|
|
53
|
-
}
|
|
54
|
-
// If content is very short, likely not an incomplete intent
|
|
55
|
-
if (content.length < 50) {
|
|
56
|
-
return false;
|
|
57
|
-
}
|
|
58
|
-
// Check for intent patterns
|
|
59
|
-
for (const pattern of INTENT_WITHOUT_ACTION_PATTERNS) {
|
|
60
|
-
if (pattern.test(content)) {
|
|
61
|
-
return true;
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
return false;
|
|
65
|
-
}
|
|
7
|
+
// Streaming runs without timeouts - we let the model take as long as it needs
|
|
66
8
|
/**
|
|
67
9
|
* Check if an error is a context overflow error
|
|
68
10
|
*/
|
|
@@ -77,15 +19,53 @@ function isContextOverflowError(error) {
|
|
|
77
19
|
message.includes('max_tokens') ||
|
|
78
20
|
message.includes('context window'));
|
|
79
21
|
}
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
22
|
+
/**
|
|
23
|
+
* Check if an error is a transient/retryable error (network issues, rate limits, server errors)
|
|
24
|
+
*/
|
|
25
|
+
function isTransientError(error) {
|
|
26
|
+
if (!(error instanceof Error))
|
|
27
|
+
return false;
|
|
28
|
+
const message = error.message.toLowerCase();
|
|
29
|
+
// Network errors
|
|
30
|
+
const networkPatterns = [
|
|
31
|
+
'econnrefused', 'econnreset', 'enotfound', 'etimedout', 'epipe',
|
|
32
|
+
'network error', 'connection error', 'fetch failed', 'socket hang up',
|
|
33
|
+
'network is unreachable', 'connection refused', 'connection reset',
|
|
34
|
+
];
|
|
35
|
+
if (networkPatterns.some(p => message.includes(p))) {
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
// Rate limit errors
|
|
39
|
+
if (message.includes('rate limit') || message.includes('429') || message.includes('too many requests')) {
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
// Server errors (5xx)
|
|
43
|
+
if (message.includes('500') || message.includes('502') || message.includes('503') || message.includes('504')) {
|
|
44
|
+
return true;
|
|
45
|
+
}
|
|
46
|
+
// Temporary service errors
|
|
47
|
+
if (message.includes('service unavailable') || message.includes('temporarily unavailable') ||
|
|
48
|
+
message.includes('overloaded') || message.includes('server error')) {
|
|
49
|
+
return true;
|
|
88
50
|
}
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Maximum number of transient error retries
|
|
55
|
+
*/
|
|
56
|
+
const MAX_TRANSIENT_RETRIES = 3;
|
|
57
|
+
/**
|
|
58
|
+
* Delay before retry (in ms), with exponential backoff
|
|
59
|
+
*/
|
|
60
|
+
function getRetryDelay(attempt) {
|
|
61
|
+
// Base delay of 1 second, doubles each attempt: 1s, 2s, 4s
|
|
62
|
+
return Math.min(1000 * Math.pow(2, attempt - 1), 10000);
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Sleep for the specified milliseconds
|
|
66
|
+
*/
|
|
67
|
+
function sleep(ms) {
|
|
68
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
89
69
|
}
|
|
90
70
|
export class AgentRuntime {
|
|
91
71
|
messages = [];
|
|
@@ -99,7 +79,19 @@ export class AgentRuntime {
|
|
|
99
79
|
modelId;
|
|
100
80
|
workingDirectory;
|
|
101
81
|
cancellationRequested = false;
|
|
102
|
-
|
|
82
|
+
// Loop detection: track last tool calls to detect stuck loops
|
|
83
|
+
lastToolCallSignature = null;
|
|
84
|
+
repeatedToolCallCount = 0;
|
|
85
|
+
static MAX_REPEATED_TOOL_CALLS = 5; // Allow up to 4 identical calls before stopping
|
|
86
|
+
// Behavioral loop detection: track recent tool calls to catch repetitive patterns
|
|
87
|
+
// e.g., calling "execute_bash" with "git status" 5 times even if output differs slightly
|
|
88
|
+
recentToolCalls = [];
|
|
89
|
+
static TOOL_HISTORY_SIZE = 12;
|
|
90
|
+
static BEHAVIORAL_LOOP_THRESHOLD = 3; // Same tool+cmd 3+ times in last 12 = stuck
|
|
91
|
+
// Tool result cache: prevent duplicate identical tool calls by returning cached results
|
|
92
|
+
// Key: tool signature (name + JSON args), Value: result string
|
|
93
|
+
toolResultCache = new Map();
|
|
94
|
+
static TOOL_CACHE_MAX_SIZE = 50; // Keep last 50 tool results
|
|
103
95
|
constructor(options) {
|
|
104
96
|
this.provider = options.provider;
|
|
105
97
|
this.toolRuntime = options.toolRuntime;
|
|
@@ -108,7 +100,6 @@ export class AgentRuntime {
|
|
|
108
100
|
this.providerId = options.providerId ?? 'unknown';
|
|
109
101
|
this.modelId = options.modelId ?? 'unknown';
|
|
110
102
|
this.workingDirectory = options.workingDirectory ?? process.cwd();
|
|
111
|
-
this._autoContinueEnabled = options.autoContinue ?? false;
|
|
112
103
|
const trimmedPrompt = options.systemPrompt.trim();
|
|
113
104
|
this.baseSystemPrompt = trimmedPrompt || null;
|
|
114
105
|
if (trimmedPrompt) {
|
|
@@ -134,25 +125,14 @@ export class AgentRuntime {
|
|
|
134
125
|
isRunning() {
|
|
135
126
|
return this.activeRun !== null;
|
|
136
127
|
}
|
|
137
|
-
/**
|
|
138
|
-
* Check if auto-continuation is enabled.
|
|
139
|
-
*/
|
|
140
|
-
isAutoContinueEnabled() {
|
|
141
|
-
return this._autoContinueEnabled;
|
|
142
|
-
}
|
|
143
|
-
/**
|
|
144
|
-
* Enable or disable auto-continuation.
|
|
145
|
-
*/
|
|
146
|
-
setAutoContinue(enabled) {
|
|
147
|
-
this._autoContinueEnabled = enabled;
|
|
148
|
-
}
|
|
149
128
|
async send(text, useStreaming = false) {
|
|
150
129
|
const prompt = text.trim();
|
|
151
130
|
if (!prompt) {
|
|
152
131
|
return '';
|
|
153
132
|
}
|
|
154
|
-
// Reset cancellation flag at start of new request
|
|
133
|
+
// Reset cancellation flag and loop tracking at start of new request
|
|
155
134
|
this.cancellationRequested = false;
|
|
135
|
+
this.resetBehavioralLoopTracking();
|
|
156
136
|
// Handle multi-line paste: show summary to user, send full content to AI
|
|
157
137
|
if (isMultilinePaste(prompt)) {
|
|
158
138
|
const processed = processPaste(prompt);
|
|
@@ -168,28 +148,9 @@ export class AgentRuntime {
|
|
|
168
148
|
const run = { startedAt: Date.now() };
|
|
169
149
|
this.activeRun = run;
|
|
170
150
|
try {
|
|
151
|
+
// Always use streaming when available - no fallback
|
|
171
152
|
if (useStreaming && this.provider.generateStream) {
|
|
172
|
-
|
|
173
|
-
return await this.processConversationStreaming();
|
|
174
|
-
}
|
|
175
|
-
catch (error) {
|
|
176
|
-
const message = safeErrorMessage(error);
|
|
177
|
-
const reason = error instanceof StreamInterruptionError ? error.reason : undefined;
|
|
178
|
-
const partialResponse = error instanceof StreamInterruptionError ? error.partialResponse : undefined;
|
|
179
|
-
console.warn(`[agent] Streaming failed, falling back to non-streaming: ${message}`);
|
|
180
|
-
// If we captured part of the response, seed it into history and ask the model to continue
|
|
181
|
-
// so we don't restart the answer from scratch during fallback.
|
|
182
|
-
if (partialResponse && partialResponse.trim()) {
|
|
183
|
-
const partial = partialResponse.trim();
|
|
184
|
-
this.messages.push({ role: 'assistant', content: partial });
|
|
185
|
-
this.messages.push({
|
|
186
|
-
role: 'user',
|
|
187
|
-
content: 'Continue your previous response from where it stopped. Do not repeat text you already provided.',
|
|
188
|
-
});
|
|
189
|
-
}
|
|
190
|
-
this.callbacks.onStreamFallback?.({ message, error, reason, partialResponse });
|
|
191
|
-
return await this.processConversation();
|
|
192
|
-
}
|
|
153
|
+
return await this.processConversationStreaming();
|
|
193
154
|
}
|
|
194
155
|
return await this.processConversation();
|
|
195
156
|
}
|
|
@@ -203,7 +164,7 @@ export class AgentRuntime {
|
|
|
203
164
|
}
|
|
204
165
|
async processConversation() {
|
|
205
166
|
let contextRecoveryAttempts = 0;
|
|
206
|
-
let
|
|
167
|
+
let transientRetryAttempts = 0;
|
|
207
168
|
while (true) {
|
|
208
169
|
// Check for cancellation at start of each iteration
|
|
209
170
|
if (this.cancellationRequested) {
|
|
@@ -219,44 +180,59 @@ export class AgentRuntime {
|
|
|
219
180
|
// Reset recovery attempts on successful generation
|
|
220
181
|
contextRecoveryAttempts = 0;
|
|
221
182
|
if (response.type === 'tool_calls') {
|
|
222
|
-
|
|
223
|
-
const
|
|
183
|
+
// BEHAVIORAL LOOP DETECTION: Check if model is stuck calling same tool repeatedly
|
|
184
|
+
const behavioralLoopResult = this.checkBehavioralLoop(response.toolCalls);
|
|
185
|
+
if (behavioralLoopResult) {
|
|
186
|
+
this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats });
|
|
187
|
+
this.messages.push({ role: 'assistant', content: behavioralLoopResult });
|
|
188
|
+
return behavioralLoopResult;
|
|
189
|
+
}
|
|
190
|
+
// Loop detection: check if same tool calls are being repeated (exact signature match)
|
|
191
|
+
const toolSignature = response.toolCalls
|
|
192
|
+
.map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
|
|
193
|
+
.sort()
|
|
194
|
+
.join('|');
|
|
195
|
+
if (toolSignature === this.lastToolCallSignature) {
|
|
196
|
+
this.repeatedToolCallCount++;
|
|
197
|
+
if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
|
|
198
|
+
// Break out of loop - model is stuck
|
|
199
|
+
const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
|
|
200
|
+
this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats });
|
|
201
|
+
this.messages.push({ role: 'assistant', content: loopMsg });
|
|
202
|
+
this.lastToolCallSignature = null;
|
|
203
|
+
this.repeatedToolCallCount = 0;
|
|
204
|
+
return loopMsg;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
else {
|
|
208
|
+
this.lastToolCallSignature = toolSignature;
|
|
209
|
+
this.repeatedToolCallCount = 1;
|
|
210
|
+
}
|
|
211
|
+
// Emit narration if present - it shows the AI's thought process before tools
|
|
212
|
+
const narration = response.content?.trim();
|
|
224
213
|
if (narration) {
|
|
225
|
-
this.emitAssistantMessage(narration, {
|
|
214
|
+
this.emitAssistantMessage(narration, {
|
|
215
|
+
isFinal: false,
|
|
216
|
+
usage,
|
|
217
|
+
contextStats,
|
|
218
|
+
});
|
|
226
219
|
}
|
|
227
220
|
const assistantMessage = {
|
|
228
221
|
role: 'assistant',
|
|
229
|
-
content:
|
|
222
|
+
content: response.content ?? '',
|
|
230
223
|
};
|
|
231
224
|
if (response.toolCalls?.length) {
|
|
232
225
|
assistantMessage.toolCalls = response.toolCalls;
|
|
233
226
|
}
|
|
234
227
|
this.messages.push(assistantMessage);
|
|
235
228
|
await this.resolveToolCalls(response.toolCalls);
|
|
236
|
-
// Reset auto-continue counter since model is actively working
|
|
237
|
-
autoContinueAttempts = 0;
|
|
238
229
|
continue;
|
|
239
230
|
}
|
|
240
231
|
const reply = response.content?.trim() ?? '';
|
|
241
|
-
//
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
autoContinueAttempts++;
|
|
246
|
-
// Emit the planning content but mark as non-final
|
|
247
|
-
if (reply) {
|
|
248
|
-
this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats });
|
|
249
|
-
}
|
|
250
|
-
this.messages.push({ role: 'assistant', content: reply });
|
|
251
|
-
// Auto-prompt with increasingly direct instructions
|
|
252
|
-
const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
|
|
253
|
-
this.messages.push({
|
|
254
|
-
role: 'user',
|
|
255
|
-
content: AUTO_CONTINUE_PROMPTS[promptIndex],
|
|
256
|
-
});
|
|
257
|
-
const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
|
|
258
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
|
|
259
|
-
continue;
|
|
232
|
+
// Reset loop detection when we get a text response (not just tool calls)
|
|
233
|
+
if (reply.length >= 10) {
|
|
234
|
+
this.lastToolCallSignature = null;
|
|
235
|
+
this.repeatedToolCallCount = 0;
|
|
260
236
|
}
|
|
261
237
|
if (reply) {
|
|
262
238
|
this.emitAssistantMessage(reply, { isFinal: true, usage, contextStats });
|
|
@@ -278,6 +254,14 @@ export class AgentRuntime {
|
|
|
278
254
|
continue;
|
|
279
255
|
}
|
|
280
256
|
}
|
|
257
|
+
// Auto-retry transient errors (network issues, rate limits, server errors)
|
|
258
|
+
if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
|
|
259
|
+
transientRetryAttempts++;
|
|
260
|
+
const delayMs = getRetryDelay(transientRetryAttempts);
|
|
261
|
+
this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
|
|
262
|
+
await sleep(delayMs);
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
281
265
|
// Re-throw if not recoverable or recovery failed
|
|
282
266
|
throw error;
|
|
283
267
|
}
|
|
@@ -288,7 +272,7 @@ export class AgentRuntime {
|
|
|
288
272
|
return this.processConversation();
|
|
289
273
|
}
|
|
290
274
|
let contextRecoveryAttempts = 0;
|
|
291
|
-
let
|
|
275
|
+
let transientRetryAttempts = 0;
|
|
292
276
|
while (true) {
|
|
293
277
|
// Check for cancellation at start of each iteration
|
|
294
278
|
if (this.cancellationRequested) {
|
|
@@ -321,45 +305,10 @@ export class AgentRuntime {
|
|
|
321
305
|
}
|
|
322
306
|
}
|
|
323
307
|
};
|
|
324
|
-
|
|
325
|
-
const base = reason === 'startup-timeout'
|
|
326
|
-
? 'Streaming stalled before any content arrived.'
|
|
327
|
-
: 'Streaming stalled due to inactivity.';
|
|
328
|
-
return new StreamInterruptionError(reason, `${base} Falling back to non-streaming.`, fullContent || reasoningContent);
|
|
329
|
-
};
|
|
330
|
-
// Timer for first token arrival
|
|
331
|
-
let startupTimer = null;
|
|
332
|
-
const startupTimeoutPromise = new Promise((_, reject) => {
|
|
333
|
-
startupTimer = setTimeout(() => reject(buildTimeoutError('startup-timeout')), STREAM_FIRST_CHUNK_TIMEOUT_MS);
|
|
334
|
-
});
|
|
335
|
-
const createIdleTimeout = () => {
|
|
336
|
-
let idleTimer = null;
|
|
337
|
-
const promise = new Promise((_, reject) => {
|
|
338
|
-
idleTimer = setTimeout(() => reject(buildTimeoutError('idle-timeout')), STREAM_INACTIVITY_TIMEOUT_MS);
|
|
339
|
-
});
|
|
340
|
-
const cancel = () => {
|
|
341
|
-
if (idleTimer) {
|
|
342
|
-
clearTimeout(idleTimer);
|
|
343
|
-
idleTimer = null;
|
|
344
|
-
}
|
|
345
|
-
};
|
|
346
|
-
return { promise, cancel };
|
|
347
|
-
};
|
|
348
|
-
let idleTimeout = createIdleTimeout();
|
|
349
|
-
let firstChunkSeen = false;
|
|
308
|
+
// Simple streaming loop - no timeouts, let the stream run until done
|
|
350
309
|
try {
|
|
351
310
|
while (true) {
|
|
352
|
-
const
|
|
353
|
-
iterator.next(),
|
|
354
|
-
idleTimeout.promise,
|
|
355
|
-
];
|
|
356
|
-
if (!firstChunkSeen) {
|
|
357
|
-
races.push(startupTimeoutPromise);
|
|
358
|
-
}
|
|
359
|
-
const result = (await Promise.race(races));
|
|
360
|
-
// Reset idle timer for the next iteration
|
|
361
|
-
idleTimeout.cancel();
|
|
362
|
-
idleTimeout = createIdleTimeout();
|
|
311
|
+
const result = await iterator.next();
|
|
363
312
|
// Check for cancellation during streaming
|
|
364
313
|
if (this.cancellationRequested) {
|
|
365
314
|
await closeStream();
|
|
@@ -374,17 +323,10 @@ export class AgentRuntime {
|
|
|
374
323
|
break;
|
|
375
324
|
}
|
|
376
325
|
const chunk = result.value;
|
|
377
|
-
if (!firstChunkSeen) {
|
|
378
|
-
firstChunkSeen = true;
|
|
379
|
-
if (startupTimer) {
|
|
380
|
-
clearTimeout(startupTimer);
|
|
381
|
-
startupTimer = null;
|
|
382
|
-
}
|
|
383
|
-
}
|
|
384
326
|
if (chunk.type === 'reasoning' && chunk.content) {
|
|
327
|
+
// Buffer reasoning content - don't stream token-by-token
|
|
328
|
+
// It will be emitted as a complete block when ready
|
|
385
329
|
reasoningContent += chunk.content;
|
|
386
|
-
// Surface reasoning tokens to the UI so thought process is visible
|
|
387
|
-
this.callbacks.onStreamChunk?.(chunk.content, 'reasoning');
|
|
388
330
|
continue;
|
|
389
331
|
}
|
|
390
332
|
if (chunk.type === 'content' && chunk.content) {
|
|
@@ -397,11 +339,19 @@ export class AgentRuntime {
|
|
|
397
339
|
}
|
|
398
340
|
}
|
|
399
341
|
else if (chunk.type === 'tool_call' && chunk.toolCall) {
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
342
|
+
// On first tool call, flush any buffered content
|
|
343
|
+
if (toolCalls.length === 0) {
|
|
344
|
+
// Emit complete reasoning block first
|
|
345
|
+
if (reasoningContent.trim()) {
|
|
346
|
+
this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
|
|
347
|
+
}
|
|
348
|
+
// Then emit buffered narration content
|
|
349
|
+
if (suppressStreamNarration && bufferedContent) {
|
|
350
|
+
this.callbacks.onStreamChunk?.(bufferedContent, 'content');
|
|
351
|
+
bufferedContent = '';
|
|
352
|
+
}
|
|
404
353
|
}
|
|
354
|
+
toolCalls.push(chunk.toolCall);
|
|
405
355
|
}
|
|
406
356
|
else if (chunk.type === 'usage' && chunk.usage) {
|
|
407
357
|
usage = chunk.usage;
|
|
@@ -409,61 +359,81 @@ export class AgentRuntime {
|
|
|
409
359
|
}
|
|
410
360
|
}
|
|
411
361
|
finally {
|
|
412
|
-
idleTimeout.cancel();
|
|
413
|
-
if (startupTimer) {
|
|
414
|
-
clearTimeout(startupTimer);
|
|
415
|
-
}
|
|
416
362
|
await closeStream();
|
|
417
363
|
}
|
|
418
364
|
// Reset recovery attempts on successful generation
|
|
419
365
|
contextRecoveryAttempts = 0;
|
|
420
366
|
const contextStats = this.getContextStats();
|
|
421
367
|
const combinedContent = fullContent || reasoningContent;
|
|
422
|
-
// If no tool calls were issued,
|
|
423
|
-
if (
|
|
424
|
-
|
|
425
|
-
|
|
368
|
+
// If no tool calls were issued, emit reasoning and buffered content as complete blocks
|
|
369
|
+
if (toolCalls.length === 0) {
|
|
370
|
+
// Emit complete reasoning block if we have one
|
|
371
|
+
if (reasoningContent.trim()) {
|
|
372
|
+
this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
|
|
373
|
+
}
|
|
374
|
+
// Emit buffered narration content
|
|
375
|
+
if (suppressStreamNarration && bufferedContent) {
|
|
376
|
+
this.callbacks.onStreamChunk?.(bufferedContent, 'content');
|
|
377
|
+
bufferedContent = '';
|
|
378
|
+
}
|
|
426
379
|
}
|
|
427
380
|
// Check if we got tool calls
|
|
428
381
|
if (toolCalls.length > 0) {
|
|
429
|
-
|
|
430
|
-
|
|
382
|
+
// BEHAVIORAL LOOP DETECTION: Check if model is stuck calling same tool repeatedly
|
|
383
|
+
// This catches patterns like "git status" called 5 times even with slightly different outputs
|
|
384
|
+
const behavioralLoopResult = this.checkBehavioralLoop(toolCalls);
|
|
385
|
+
if (behavioralLoopResult) {
|
|
386
|
+
this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats, wasStreamed: true });
|
|
387
|
+
this.messages.push({ role: 'assistant', content: behavioralLoopResult });
|
|
388
|
+
return behavioralLoopResult;
|
|
389
|
+
}
|
|
390
|
+
// Loop detection: check if same tool calls are being repeated (exact signature match)
|
|
391
|
+
const toolSignature = toolCalls
|
|
392
|
+
.map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
|
|
393
|
+
.sort()
|
|
394
|
+
.join('|');
|
|
395
|
+
if (toolSignature === this.lastToolCallSignature) {
|
|
396
|
+
this.repeatedToolCallCount++;
|
|
397
|
+
if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
|
|
398
|
+
// Break out of loop - model is stuck
|
|
399
|
+
const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
|
|
400
|
+
this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats, wasStreamed: true });
|
|
401
|
+
this.messages.push({ role: 'assistant', content: loopMsg });
|
|
402
|
+
this.lastToolCallSignature = null;
|
|
403
|
+
this.repeatedToolCallCount = 0;
|
|
404
|
+
return loopMsg;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
else {
|
|
408
|
+
this.lastToolCallSignature = toolSignature;
|
|
409
|
+
this.repeatedToolCallCount = 1;
|
|
410
|
+
}
|
|
411
|
+
// Content was already streamed via onStreamChunk, just record it for context
|
|
412
|
+
// (wasStreamed=true prevents duplicate display)
|
|
413
|
+
// Note: Acknowledgement injection happens during streaming (when first tool_call chunk arrives)
|
|
414
|
+
const narration = combinedContent.trim();
|
|
431
415
|
if (narration) {
|
|
432
|
-
|
|
433
|
-
|
|
416
|
+
this.emitAssistantMessage(narration, {
|
|
417
|
+
isFinal: false,
|
|
418
|
+
usage,
|
|
419
|
+
contextStats,
|
|
420
|
+
wasStreamed: true,
|
|
421
|
+
});
|
|
434
422
|
}
|
|
435
423
|
const assistantMessage = {
|
|
436
424
|
role: 'assistant',
|
|
437
|
-
content:
|
|
425
|
+
content: combinedContent,
|
|
438
426
|
toolCalls,
|
|
439
427
|
};
|
|
440
428
|
this.messages.push(assistantMessage);
|
|
441
429
|
await this.resolveToolCalls(toolCalls);
|
|
442
|
-
// Reset auto-continue counter since model is actively working
|
|
443
|
-
autoContinueAttempts = 0;
|
|
444
430
|
continue;
|
|
445
431
|
}
|
|
446
|
-
// Check if model expressed intent to act but didn't call tools
|
|
447
|
-
// This catches "Let me create..." without actual tool calls
|
|
448
|
-
// Only auto-continue if the feature is enabled
|
|
449
432
|
const reply = combinedContent.trim();
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
if (reply) {
|
|
455
|
-
this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats, wasStreamed: true });
|
|
456
|
-
}
|
|
457
|
-
this.messages.push({ role: 'assistant', content: reply });
|
|
458
|
-
// Auto-prompt with increasingly direct instructions
|
|
459
|
-
const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
|
|
460
|
-
this.messages.push({
|
|
461
|
-
role: 'user',
|
|
462
|
-
content: AUTO_CONTINUE_PROMPTS[promptIndex],
|
|
463
|
-
});
|
|
464
|
-
const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
|
|
465
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
|
|
466
|
-
continue;
|
|
433
|
+
// Reset loop detection when we get a text response (not just tool calls)
|
|
434
|
+
if (reply.length >= 10) {
|
|
435
|
+
this.lastToolCallSignature = null;
|
|
436
|
+
this.repeatedToolCallCount = 0;
|
|
467
437
|
}
|
|
468
438
|
// Final message - mark as streamed to avoid double-display in UI
|
|
469
439
|
if (reply) {
|
|
@@ -486,6 +456,14 @@ export class AgentRuntime {
|
|
|
486
456
|
continue;
|
|
487
457
|
}
|
|
488
458
|
}
|
|
459
|
+
// Auto-retry transient errors (network issues, rate limits, server errors)
|
|
460
|
+
if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
|
|
461
|
+
transientRetryAttempts++;
|
|
462
|
+
const delayMs = getRetryDelay(transientRetryAttempts);
|
|
463
|
+
this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
|
|
464
|
+
await sleep(delayMs);
|
|
465
|
+
continue;
|
|
466
|
+
}
|
|
489
467
|
// Re-throw if not recoverable or recovery failed
|
|
490
468
|
throw error;
|
|
491
469
|
}
|
|
@@ -517,7 +495,23 @@ export class AgentRuntime {
|
|
|
517
495
|
// Fast path: single tool call
|
|
518
496
|
if (numCalls === 1) {
|
|
519
497
|
const call = toolCalls[0];
|
|
498
|
+
// Check cache first - prevent duplicate identical tool calls
|
|
499
|
+
const cached = this.getCachedToolResult(call);
|
|
500
|
+
if (cached !== null) {
|
|
501
|
+
// Return cached result with indicator that it was from cache
|
|
502
|
+
this.messages.push({
|
|
503
|
+
role: 'tool',
|
|
504
|
+
name: call.name,
|
|
505
|
+
toolCallId: call.id,
|
|
506
|
+
content: `[Cached result - identical call already executed]\n\n${cached}`,
|
|
507
|
+
});
|
|
508
|
+
return;
|
|
509
|
+
}
|
|
510
|
+
this.callbacks.onToolExecution?.(call.name, true);
|
|
520
511
|
const output = await this.toolRuntime.execute(call);
|
|
512
|
+
this.callbacks.onToolExecution?.(call.name, false);
|
|
513
|
+
// Cache the result for future identical calls
|
|
514
|
+
this.cacheToolResult(call, output);
|
|
521
515
|
this.messages.push({
|
|
522
516
|
role: 'tool',
|
|
523
517
|
name: call.name,
|
|
@@ -527,56 +521,106 @@ export class AgentRuntime {
|
|
|
527
521
|
return;
|
|
528
522
|
}
|
|
529
523
|
// PERF: For reasonable batch sizes, execute all in parallel
|
|
524
|
+
// Check cache for each call and only execute non-cached ones
|
|
530
525
|
if (numCalls <= 10) {
|
|
531
|
-
const
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
526
|
+
const cachedResults = [];
|
|
527
|
+
const toExecute = [];
|
|
528
|
+
// Separate cached from non-cached calls
|
|
529
|
+
for (const call of toolCalls) {
|
|
530
|
+
const cached = this.getCachedToolResult(call);
|
|
531
|
+
if (cached !== null) {
|
|
532
|
+
cachedResults.push({ call, output: cached, fromCache: true });
|
|
533
|
+
}
|
|
534
|
+
else {
|
|
535
|
+
toExecute.push(call);
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
// Execute non-cached calls in parallel
|
|
539
|
+
if (toExecute.length > 0) {
|
|
540
|
+
const toolNames = toExecute.map(c => c.name).join(', ');
|
|
541
|
+
this.callbacks.onToolExecution?.(toolNames, true);
|
|
542
|
+
const executed = await Promise.all(toExecute.map(async (call) => {
|
|
543
|
+
const output = await this.toolRuntime.execute(call);
|
|
544
|
+
this.cacheToolResult(call, output);
|
|
545
|
+
return { call, output, fromCache: false };
|
|
546
|
+
}));
|
|
547
|
+
this.callbacks.onToolExecution?.(toolNames, false);
|
|
548
|
+
cachedResults.push(...executed);
|
|
549
|
+
}
|
|
550
|
+
// Add all results to messages in the original order
|
|
551
|
+
for (const originalCall of toolCalls) {
|
|
552
|
+
const result = cachedResults.find(r => r.call.id === originalCall.id);
|
|
553
|
+
if (result) {
|
|
554
|
+
const content = result.fromCache
|
|
555
|
+
? `[Cached result - identical call already executed]\n\n${result.output}`
|
|
556
|
+
: result.output;
|
|
557
|
+
this.messages.push({
|
|
558
|
+
role: 'tool',
|
|
559
|
+
name: result.call.name,
|
|
560
|
+
toolCallId: result.call.id,
|
|
561
|
+
content,
|
|
562
|
+
});
|
|
563
|
+
}
|
|
543
564
|
}
|
|
544
565
|
return;
|
|
545
566
|
}
|
|
546
|
-
// PERF: For large batches, use chunked parallel execution
|
|
547
|
-
// This prevents memory pressure from too many concurrent operations
|
|
567
|
+
// PERF: For large batches, use chunked parallel execution with caching
|
|
548
568
|
const CHUNK_SIZE = 8;
|
|
549
|
-
const
|
|
569
|
+
const allResults = [];
|
|
550
570
|
for (let i = 0; i < numCalls; i += CHUNK_SIZE) {
|
|
551
571
|
const chunk = toolCalls.slice(i, i + CHUNK_SIZE);
|
|
552
|
-
const
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
572
|
+
const cachedInChunk = [];
|
|
573
|
+
const toExecuteInChunk = [];
|
|
574
|
+
for (const call of chunk) {
|
|
575
|
+
const cached = this.getCachedToolResult(call);
|
|
576
|
+
if (cached !== null) {
|
|
577
|
+
cachedInChunk.push({ call, output: cached, fromCache: true });
|
|
578
|
+
}
|
|
579
|
+
else {
|
|
580
|
+
toExecuteInChunk.push(call);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
if (toExecuteInChunk.length > 0) {
|
|
584
|
+
const chunkNames = toExecuteInChunk.map(c => c.name).join(', ');
|
|
585
|
+
this.callbacks.onToolExecution?.(chunkNames, true);
|
|
586
|
+
const executed = await Promise.all(toExecuteInChunk.map(async (call) => {
|
|
587
|
+
const output = await this.toolRuntime.execute(call);
|
|
588
|
+
this.cacheToolResult(call, output);
|
|
589
|
+
return { call, output, fromCache: false };
|
|
590
|
+
}));
|
|
591
|
+
this.callbacks.onToolExecution?.(chunkNames, false);
|
|
592
|
+
cachedInChunk.push(...executed);
|
|
593
|
+
}
|
|
594
|
+
allResults.push(...cachedInChunk);
|
|
595
|
+
}
|
|
596
|
+
// Add results to messages in original order
|
|
597
|
+
for (const originalCall of toolCalls) {
|
|
598
|
+
const result = allResults.find(r => r.call.id === originalCall.id);
|
|
599
|
+
if (result) {
|
|
600
|
+
const content = result.fromCache
|
|
601
|
+
? `[Cached result - identical call already executed]\n\n${result.output}`
|
|
602
|
+
: result.output;
|
|
603
|
+
this.messages.push({
|
|
604
|
+
role: 'tool',
|
|
605
|
+
name: result.call.name,
|
|
606
|
+
toolCallId: result.call.id,
|
|
607
|
+
content,
|
|
608
|
+
});
|
|
609
|
+
}
|
|
566
610
|
}
|
|
567
611
|
}
|
|
568
612
|
get providerTools() {
|
|
569
613
|
return this.toolRuntime.listProviderTools();
|
|
570
614
|
}
|
|
571
615
|
/**
|
|
572
|
-
*
|
|
573
|
-
*
|
|
616
|
+
* Whether to suppress tool narration in the content field.
|
|
617
|
+
* Previously suppressed for OpenAI but now we show all thinking/narration.
|
|
574
618
|
*/
|
|
575
619
|
shouldSuppressToolNarration() {
|
|
576
|
-
return
|
|
620
|
+
return false; // Always show thinking/narration
|
|
577
621
|
}
|
|
578
622
|
emitAssistantMessage(content, metadata) {
|
|
579
|
-
if (!content) {
|
|
623
|
+
if (!content || !content.trim()) {
|
|
580
624
|
return;
|
|
581
625
|
}
|
|
582
626
|
const elapsedMs = this.activeRun ? Date.now() - this.activeRun.startedAt : undefined;
|
|
@@ -626,6 +670,138 @@ export class AgentRuntime {
|
|
|
626
670
|
model: this.modelId,
|
|
627
671
|
});
|
|
628
672
|
}
|
|
673
|
+
/**
|
|
674
|
+
* Extract a "command hash" from tool arguments for behavioral loop detection.
|
|
675
|
+
* For execute_bash, this is the actual command. For other tools, key identifying args.
|
|
676
|
+
*/
|
|
677
|
+
extractCmdHash(name, args) {
|
|
678
|
+
// For bash/execute commands, extract the command itself
|
|
679
|
+
if (name === 'execute_bash' || name === 'Bash') {
|
|
680
|
+
const cmd = args['command'];
|
|
681
|
+
if (cmd) {
|
|
682
|
+
// Normalize: trim, take first 100 chars, remove variable parts like timestamps
|
|
683
|
+
return cmd.trim().slice(0, 100).replace(/\d{10,}/g, 'N');
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
// For file operations, use the path
|
|
687
|
+
if (name === 'read_file' || name === 'Read' || name === 'read_files') {
|
|
688
|
+
const path = args['path'] || args['file_path'] || args['paths'];
|
|
689
|
+
if (path)
|
|
690
|
+
return `path:${JSON.stringify(path).slice(0, 100)}`;
|
|
691
|
+
}
|
|
692
|
+
if (name === 'list_files' || name === 'Glob') {
|
|
693
|
+
const path = args['path'] || args['pattern'];
|
|
694
|
+
if (path)
|
|
695
|
+
return `path:${JSON.stringify(path).slice(0, 100)}`;
|
|
696
|
+
}
|
|
697
|
+
// For search, use the query/pattern
|
|
698
|
+
if (name === 'Grep' || name === 'grep' || name === 'search') {
|
|
699
|
+
const pattern = args['pattern'] || args['query'];
|
|
700
|
+
if (pattern)
|
|
701
|
+
return `search:${String(pattern).slice(0, 100)}`;
|
|
702
|
+
}
|
|
703
|
+
// Default: use first significant arg value
|
|
704
|
+
const firstArg = Object.values(args)[0];
|
|
705
|
+
if (firstArg) {
|
|
706
|
+
return String(firstArg).slice(0, 100);
|
|
707
|
+
}
|
|
708
|
+
return 'no-args';
|
|
709
|
+
}
|
|
710
|
+
/**
|
|
711
|
+
* Check for behavioral loops - model calling the same tool with similar args repeatedly.
|
|
712
|
+
* Returns an error message if a loop is detected, null otherwise.
|
|
713
|
+
*
|
|
714
|
+
* FUNDAMENTAL PREVENTION: Cached calls are excluded from loop detection since they
|
|
715
|
+
* don't actually execute (the cache provides the result). This means:
|
|
716
|
+
* - First call: executes and caches result
|
|
717
|
+
* - Second identical call: returns cached result, NOT counted toward loop
|
|
718
|
+
* - Only genuinely NEW (non-cached) repetitive calls trigger loop detection
|
|
719
|
+
*
|
|
720
|
+
* This catches patterns like:
|
|
721
|
+
* - "git status -sb" called 3 times with DIFFERENT outputs (cache miss each time)
|
|
722
|
+
* - Repeated file reads where file content changed
|
|
723
|
+
* - Repeated searches with same pattern but new results
|
|
724
|
+
*/
|
|
725
|
+
checkBehavioralLoop(toolCalls) {
|
|
726
|
+
// Filter out calls that will be served from cache - these don't count toward loops
|
|
727
|
+
// since they're handled fundamentally by the caching mechanism
|
|
728
|
+
const nonCachedCalls = toolCalls.filter(call => this.getCachedToolResult(call) === null);
|
|
729
|
+
// If all calls are cached, no loop detection needed
|
|
730
|
+
if (nonCachedCalls.length === 0) {
|
|
731
|
+
return null;
|
|
732
|
+
}
|
|
733
|
+
// Count existing occurrences in recent history
|
|
734
|
+
const existingCounts = new Map();
|
|
735
|
+
for (const { name, cmdHash } of this.recentToolCalls) {
|
|
736
|
+
const key = `${name}:${cmdHash}`;
|
|
737
|
+
existingCounts.set(key, (existingCounts.get(key) ?? 0) + 1);
|
|
738
|
+
}
|
|
739
|
+
// Check if ANY incoming NON-CACHED call would exceed threshold
|
|
740
|
+
for (const call of nonCachedCalls) {
|
|
741
|
+
const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
|
|
742
|
+
const key = `${call.name}:${cmdHash}`;
|
|
743
|
+
const currentCount = existingCounts.get(key) ?? 0;
|
|
744
|
+
// If adding this call would reach or exceed threshold, block immediately
|
|
745
|
+
if (currentCount + 1 >= AgentRuntime.BEHAVIORAL_LOOP_THRESHOLD) {
|
|
746
|
+
// Reset history to prevent immediate re-trigger
|
|
747
|
+
this.recentToolCalls = [];
|
|
748
|
+
return `Behavioral loop detected: "${call.name}" called ${currentCount + 1} times with similar arguments. The task appears stuck. Please try a different approach or provide more specific instructions.`;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
// Track only non-cached tool calls (cached ones are handled by caching)
|
|
752
|
+
for (const call of nonCachedCalls) {
|
|
753
|
+
const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
|
|
754
|
+
this.recentToolCalls.push({ name: call.name, cmdHash });
|
|
755
|
+
}
|
|
756
|
+
// Keep only recent history
|
|
757
|
+
while (this.recentToolCalls.length > AgentRuntime.TOOL_HISTORY_SIZE) {
|
|
758
|
+
this.recentToolCalls.shift();
|
|
759
|
+
}
|
|
760
|
+
return null;
|
|
761
|
+
}
|
|
762
|
+
/**
|
|
763
|
+
* Reset behavioral loop tracking (called when user provides new input or task completes)
|
|
764
|
+
*/
|
|
765
|
+
resetBehavioralLoopTracking() {
|
|
766
|
+
this.recentToolCalls = [];
|
|
767
|
+
this.lastToolCallSignature = null;
|
|
768
|
+
this.repeatedToolCallCount = 0;
|
|
769
|
+
// Note: we DON'T clear toolResultCache here - cached results remain valid across turns
|
|
770
|
+
// to prevent re-executing identical tool calls within a session
|
|
771
|
+
}
|
|
772
|
+
/**
|
|
773
|
+
* Create a stable cache key for a tool call based on name and arguments
|
|
774
|
+
*/
|
|
775
|
+
getToolCacheKey(call) {
|
|
776
|
+
const args = call.arguments ?? {};
|
|
777
|
+
// Sort keys for consistent ordering
|
|
778
|
+
const sortedArgs = Object.keys(args).sort().reduce((acc, key) => {
|
|
779
|
+
acc[key] = args[key];
|
|
780
|
+
return acc;
|
|
781
|
+
}, {});
|
|
782
|
+
return `${call.name}:${JSON.stringify(sortedArgs)}`;
|
|
783
|
+
}
|
|
784
|
+
/**
|
|
785
|
+
* Get cached result for a tool call, or null if not cached
|
|
786
|
+
*/
|
|
787
|
+
getCachedToolResult(call) {
|
|
788
|
+
const key = this.getToolCacheKey(call);
|
|
789
|
+
return this.toolResultCache.get(key) ?? null;
|
|
790
|
+
}
|
|
791
|
+
/**
|
|
792
|
+
* Cache a tool result for future identical calls
|
|
793
|
+
*/
|
|
794
|
+
cacheToolResult(call, result) {
|
|
795
|
+
const key = this.getToolCacheKey(call);
|
|
796
|
+
// Evict oldest entries if cache is full
|
|
797
|
+
if (this.toolResultCache.size >= AgentRuntime.TOOL_CACHE_MAX_SIZE) {
|
|
798
|
+
const firstKey = this.toolResultCache.keys().next().value;
|
|
799
|
+
if (firstKey) {
|
|
800
|
+
this.toolResultCache.delete(firstKey);
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
this.toolResultCache.set(key, result);
|
|
804
|
+
}
|
|
629
805
|
getHistory() {
|
|
630
806
|
return this.messages.map(cloneMessage);
|
|
631
807
|
}
|