erosolar-cli 2.1.170 → 2.1.172
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/agents/erosolar-code.rules.json +2 -2
- package/agents/general.rules.json +3 -21
- package/dist/StringUtils.d.ts +8 -0
- package/dist/StringUtils.d.ts.map +1 -0
- package/dist/StringUtils.js +11 -0
- package/dist/StringUtils.js.map +1 -0
- package/dist/capabilities/statusCapability.js +2 -2
- package/dist/capabilities/statusCapability.js.map +1 -1
- package/dist/contracts/agent-schemas.json +5 -5
- package/dist/core/agent.d.ts +24 -83
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +248 -499
- package/dist/core/agent.js.map +1 -1
- package/dist/core/aiFlowSupervisor.d.ts +44 -0
- package/dist/core/aiFlowSupervisor.d.ts.map +1 -0
- package/dist/core/aiFlowSupervisor.js +299 -0
- package/dist/core/aiFlowSupervisor.js.map +1 -0
- package/dist/core/cliTestHarness.d.ts +200 -0
- package/dist/core/cliTestHarness.d.ts.map +1 -0
- package/dist/core/cliTestHarness.js +549 -0
- package/dist/core/cliTestHarness.js.map +1 -0
- package/dist/core/preferences.d.ts +0 -1
- package/dist/core/preferences.d.ts.map +1 -1
- package/dist/core/preferences.js +1 -8
- package/dist/core/preferences.js.map +1 -1
- package/dist/core/schemaValidator.js +3 -3
- package/dist/core/schemaValidator.js.map +1 -1
- package/dist/core/testUtils.d.ts +121 -0
- package/dist/core/testUtils.d.ts.map +1 -0
- package/dist/core/testUtils.js +235 -0
- package/dist/core/testUtils.js.map +1 -0
- package/dist/core/toolPreconditions.d.ts +11 -0
- package/dist/core/toolPreconditions.d.ts.map +1 -1
- package/dist/core/toolPreconditions.js +164 -33
- package/dist/core/toolPreconditions.js.map +1 -1
- package/dist/core/toolRuntime.d.ts.map +1 -1
- package/dist/core/toolRuntime.js +114 -9
- package/dist/core/toolRuntime.js.map +1 -1
- package/dist/core/toolValidation.d.ts +116 -0
- package/dist/core/toolValidation.d.ts.map +1 -0
- package/dist/core/toolValidation.js +282 -0
- package/dist/core/toolValidation.js.map +1 -0
- package/dist/core/updateChecker.d.ts +1 -61
- package/dist/core/updateChecker.d.ts.map +1 -1
- package/dist/core/updateChecker.js +3 -147
- package/dist/core/updateChecker.js.map +1 -1
- package/dist/headless/headlessApp.d.ts.map +1 -1
- package/dist/headless/headlessApp.js +39 -0
- package/dist/headless/headlessApp.js.map +1 -1
- package/dist/plugins/tools/nodeDefaults.d.ts.map +1 -1
- package/dist/plugins/tools/nodeDefaults.js +2 -0
- package/dist/plugins/tools/nodeDefaults.js.map +1 -1
- package/dist/providers/openaiResponsesProvider.d.ts.map +1 -1
- package/dist/providers/openaiResponsesProvider.js +74 -79
- package/dist/providers/openaiResponsesProvider.js.map +1 -1
- package/dist/runtime/agentController.d.ts.map +1 -1
- package/dist/runtime/agentController.js +3 -6
- package/dist/runtime/agentController.js.map +1 -1
- package/dist/runtime/agentSession.d.ts +2 -0
- package/dist/runtime/agentSession.d.ts.map +1 -1
- package/dist/runtime/agentSession.js +2 -2
- package/dist/runtime/agentSession.js.map +1 -1
- package/dist/shell/interactiveShell.d.ts +18 -11
- package/dist/shell/interactiveShell.d.ts.map +1 -1
- package/dist/shell/interactiveShell.js +291 -273
- package/dist/shell/interactiveShell.js.map +1 -1
- package/dist/shell/shellApp.d.ts.map +1 -1
- package/dist/shell/shellApp.js +1 -7
- package/dist/shell/shellApp.js.map +1 -1
- package/dist/shell/systemPrompt.d.ts.map +1 -1
- package/dist/shell/systemPrompt.js +15 -4
- package/dist/shell/systemPrompt.js.map +1 -1
- package/dist/subagents/taskRunner.js +1 -2
- package/dist/subagents/taskRunner.js.map +1 -1
- package/dist/tools/bashTools.d.ts.map +1 -1
- package/dist/tools/bashTools.js +8 -101
- package/dist/tools/bashTools.js.map +1 -1
- package/dist/tools/diffUtils.d.ts +2 -8
- package/dist/tools/diffUtils.d.ts.map +1 -1
- package/dist/tools/diffUtils.js +13 -72
- package/dist/tools/diffUtils.js.map +1 -1
- package/dist/tools/grepTools.d.ts.map +1 -1
- package/dist/tools/grepTools.js +2 -10
- package/dist/tools/grepTools.js.map +1 -1
- package/dist/tools/planningTools.d.ts +10 -0
- package/dist/tools/planningTools.d.ts.map +1 -1
- package/dist/tools/planningTools.js +16 -0
- package/dist/tools/planningTools.js.map +1 -1
- package/dist/tools/searchTools.d.ts.map +1 -1
- package/dist/tools/searchTools.js +2 -4
- package/dist/tools/searchTools.js.map +1 -1
- package/dist/ui/PromptController.d.ts +4 -1
- package/dist/ui/PromptController.d.ts.map +1 -1
- package/dist/ui/PromptController.js +7 -1
- package/dist/ui/PromptController.js.map +1 -1
- package/dist/ui/ShellUIAdapter.d.ts +28 -292
- package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
- package/dist/ui/ShellUIAdapter.js +121 -1513
- package/dist/ui/ShellUIAdapter.js.map +1 -1
- package/dist/ui/UnifiedUIRenderer.d.ts +30 -133
- package/dist/ui/UnifiedUIRenderer.d.ts.map +1 -1
- package/dist/ui/UnifiedUIRenderer.js +370 -939
- package/dist/ui/UnifiedUIRenderer.js.map +1 -1
- package/dist/ui/animatedStatus.d.ts +6 -128
- package/dist/ui/animatedStatus.d.ts.map +1 -1
- package/dist/ui/animatedStatus.js +50 -383
- package/dist/ui/animatedStatus.js.map +1 -1
- package/dist/ui/display.d.ts +26 -182
- package/dist/ui/display.d.ts.map +1 -1
- package/dist/ui/display.js +97 -678
- package/dist/ui/display.js.map +1 -1
- package/dist/ui/layout.d.ts +1 -0
- package/dist/ui/layout.d.ts.map +1 -1
- package/dist/ui/layout.js +12 -0
- package/dist/ui/layout.js.map +1 -1
- package/dist/ui/orchestration/UIUpdateCoordinator.d.ts +7 -61
- package/dist/ui/orchestration/UIUpdateCoordinator.d.ts.map +1 -1
- package/dist/ui/orchestration/UIUpdateCoordinator.js +20 -232
- package/dist/ui/orchestration/UIUpdateCoordinator.js.map +1 -1
- package/dist/ui/planOverlay.d.ts +28 -0
- package/dist/ui/planOverlay.d.ts.map +1 -0
- package/dist/ui/planOverlay.js +156 -0
- package/dist/ui/planOverlay.js.map +1 -0
- package/dist/ui/shortcutsHelp.d.ts.map +1 -1
- package/dist/ui/shortcutsHelp.js +1 -0
- package/dist/ui/shortcutsHelp.js.map +1 -1
- package/dist/ui/streamingFormatter.d.ts +30 -0
- package/dist/ui/streamingFormatter.d.ts.map +1 -0
- package/dist/ui/streamingFormatter.js +91 -0
- package/dist/ui/streamingFormatter.js.map +1 -0
- package/dist/ui/unified/index.d.ts +1 -30
- package/dist/ui/unified/index.d.ts.map +1 -1
- package/dist/ui/unified/index.js +2 -45
- package/dist/ui/unified/index.js.map +1 -1
- package/dist/utils/errorUtils.d.ts +16 -0
- package/dist/utils/errorUtils.d.ts.map +1 -0
- package/dist/utils/errorUtils.js +66 -0
- package/dist/utils/errorUtils.js.map +1 -0
- package/package.json +2 -1
- package/dist/core/reliabilityPrompt.d.ts +0 -9
- package/dist/core/reliabilityPrompt.d.ts.map +0 -1
- package/dist/core/reliabilityPrompt.js +0 -31
- package/dist/core/reliabilityPrompt.js.map +0 -1
- package/dist/ui/UnifiedUIController.d.ts +0 -81
- package/dist/ui/UnifiedUIController.d.ts.map +0 -1
- package/dist/ui/UnifiedUIController.js +0 -212
- package/dist/ui/UnifiedUIController.js.map +0 -1
- package/dist/ui/animation/AnimationScheduler.d.ts +0 -192
- package/dist/ui/animation/AnimationScheduler.d.ts.map +0 -1
- package/dist/ui/animation/AnimationScheduler.js +0 -432
- package/dist/ui/animation/AnimationScheduler.js.map +0 -1
- package/dist/ui/inPlaceUpdater.d.ts +0 -181
- package/dist/ui/inPlaceUpdater.d.ts.map +0 -1
- package/dist/ui/inPlaceUpdater.js +0 -515
- package/dist/ui/inPlaceUpdater.js.map +0 -1
- package/dist/ui/interrupts/InterruptManager.d.ts +0 -142
- package/dist/ui/interrupts/InterruptManager.d.ts.map +0 -1
- package/dist/ui/interrupts/InterruptManager.js +0 -439
- package/dist/ui/interrupts/InterruptManager.js.map +0 -1
- package/dist/ui/telemetry/ResponseTracker.d.ts +0 -22
- package/dist/ui/telemetry/ResponseTracker.d.ts.map +0 -1
- package/dist/ui/telemetry/ResponseTracker.js +0 -60
- package/dist/ui/telemetry/ResponseTracker.js.map +0 -1
- package/dist/ui/telemetry/UITelemetry.d.ts +0 -181
- package/dist/ui/telemetry/UITelemetry.d.ts.map +0 -1
- package/dist/ui/telemetry/UITelemetry.js +0 -446
- package/dist/ui/telemetry/UITelemetry.js.map +0 -1
- package/dist/ui/unified/layout.d.ts +0 -12
- package/dist/ui/unified/layout.d.ts.map +0 -1
- package/dist/ui/unified/layout.js +0 -96
- package/dist/ui/unified/layout.js.map +0 -1
package/dist/core/agent.js
CHANGED
|
@@ -4,7 +4,65 @@ import { safeErrorMessage } from './secretStore.js';
|
|
|
4
4
|
* Maximum number of context overflow recovery attempts
|
|
5
5
|
*/
|
|
6
6
|
const MAX_CONTEXT_RECOVERY_ATTEMPTS = 3;
|
|
7
|
-
|
|
7
|
+
/**
|
|
8
|
+
* Maximum number of auto-continuation attempts when model expresses intent but doesn't act
|
|
9
|
+
*/
|
|
10
|
+
const MAX_AUTO_CONTINUE_ATTEMPTS = 3;
|
|
11
|
+
/**
|
|
12
|
+
* Streaming safety timeouts (ms)
|
|
13
|
+
* - First chunk timeout: fail fast if the stream never starts
|
|
14
|
+
* - Inactivity timeout: abort if no chunks arrive for an extended period
|
|
15
|
+
*/
|
|
16
|
+
// Allow more headroom before declaring a streaming stall to avoid premature fallbacks.
|
|
17
|
+
const STREAM_FIRST_CHUNK_TIMEOUT_MS = 25000;
|
|
18
|
+
const STREAM_INACTIVITY_TIMEOUT_MS = 60000;
|
|
19
|
+
/**
|
|
20
|
+
* Patterns that indicate the model intends to take action but hasn't yet
|
|
21
|
+
* These suggest the model should be prompted to continue
|
|
22
|
+
*/
|
|
23
|
+
const INTENT_WITHOUT_ACTION_PATTERNS = [
|
|
24
|
+
// "Let me X" patterns - model is stating what it will do
|
|
25
|
+
/\blet me\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|examine|review)/i,
|
|
26
|
+
// "I'll X" / "I will X" patterns
|
|
27
|
+
/\bi['']ll\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
|
|
28
|
+
/\bi will\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
|
|
29
|
+
// "I'm going to X" patterns
|
|
30
|
+
/\bi['']m going to\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze)/i,
|
|
31
|
+
// "Now I'll X" / "First, I'll X" patterns
|
|
32
|
+
/\b(now|first|next)\s*(,)?\s*i['']ll\s+/i,
|
|
33
|
+
// Explicit continuation signals
|
|
34
|
+
/\bhere['']s (the|my) (plan|approach|solution|implementation)/i,
|
|
35
|
+
// Numbered steps suggesting action to come
|
|
36
|
+
/^\s*\d+\.\s+(create|write|implement|add|update|edit|modify|fix|show|read|check)/im,
|
|
37
|
+
// Bullet points suggesting planned actions
|
|
38
|
+
/^[\s•\-\*]+\s*(create|write|implement|add|update|edit|modify|fix|shows?|reads?|checks?)\s/im,
|
|
39
|
+
];
|
|
40
|
+
const AUTO_CONTINUE_PROMPTS = [
|
|
41
|
+
'Continue. Use tools now: start with Read/read_file to inspect the target file, then call Edit (or Write if available) with file_path/old_string/new_string to apply changes. Keep using tools until the task is done.',
|
|
42
|
+
'You MUST call tools immediately. Issue Read -> Edit/Write tool calls with explicit parameters; no more explaining or planning.',
|
|
43
|
+
'CRITICAL: Call a tool right now. Use Edit with file_path, old_string, new_string (or Write with file_path and content). Respond with tool calls only.',
|
|
44
|
+
];
|
|
45
|
+
/**
|
|
46
|
+
* Check if response indicates intent to act without actually acting
|
|
47
|
+
* This detects when the model says "let me do X" but doesn't call any tools
|
|
48
|
+
*/
|
|
49
|
+
function shouldAutoContinue(content, hasToolCalls) {
|
|
50
|
+
// If there are tool calls, no need to auto-continue
|
|
51
|
+
if (hasToolCalls) {
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
// If content is very short, likely not an incomplete intent
|
|
55
|
+
if (content.length < 50) {
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
// Check for intent patterns
|
|
59
|
+
for (const pattern of INTENT_WITHOUT_ACTION_PATTERNS) {
|
|
60
|
+
if (pattern.test(content)) {
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
8
66
|
/**
|
|
9
67
|
* Check if an error is a context overflow error
|
|
10
68
|
*/
|
|
@@ -19,53 +77,15 @@ function isContextOverflowError(error) {
|
|
|
19
77
|
message.includes('max_tokens') ||
|
|
20
78
|
message.includes('context window'));
|
|
21
79
|
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const networkPatterns = [
|
|
31
|
-
'econnrefused', 'econnreset', 'enotfound', 'etimedout', 'epipe',
|
|
32
|
-
'network error', 'connection error', 'fetch failed', 'socket hang up',
|
|
33
|
-
'network is unreachable', 'connection refused', 'connection reset',
|
|
34
|
-
];
|
|
35
|
-
if (networkPatterns.some(p => message.includes(p))) {
|
|
36
|
-
return true;
|
|
80
|
+
class StreamInterruptionError extends Error {
|
|
81
|
+
reason;
|
|
82
|
+
partialResponse;
|
|
83
|
+
constructor(reason, message, partialResponse) {
|
|
84
|
+
super(message);
|
|
85
|
+
this.name = 'StreamInterruptionError';
|
|
86
|
+
this.reason = reason;
|
|
87
|
+
this.partialResponse = partialResponse;
|
|
37
88
|
}
|
|
38
|
-
// Rate limit errors
|
|
39
|
-
if (message.includes('rate limit') || message.includes('429') || message.includes('too many requests')) {
|
|
40
|
-
return true;
|
|
41
|
-
}
|
|
42
|
-
// Server errors (5xx)
|
|
43
|
-
if (message.includes('500') || message.includes('502') || message.includes('503') || message.includes('504')) {
|
|
44
|
-
return true;
|
|
45
|
-
}
|
|
46
|
-
// Temporary service errors
|
|
47
|
-
if (message.includes('service unavailable') || message.includes('temporarily unavailable') ||
|
|
48
|
-
message.includes('overloaded') || message.includes('server error')) {
|
|
49
|
-
return true;
|
|
50
|
-
}
|
|
51
|
-
return false;
|
|
52
|
-
}
|
|
53
|
-
/**
|
|
54
|
-
* Maximum number of transient error retries
|
|
55
|
-
*/
|
|
56
|
-
const MAX_TRANSIENT_RETRIES = 3;
|
|
57
|
-
/**
|
|
58
|
-
* Delay before retry (in ms), with exponential backoff
|
|
59
|
-
*/
|
|
60
|
-
function getRetryDelay(attempt) {
|
|
61
|
-
// Base delay of 1 second, doubles each attempt: 1s, 2s, 4s
|
|
62
|
-
return Math.min(1000 * Math.pow(2, attempt - 1), 10000);
|
|
63
|
-
}
|
|
64
|
-
/**
|
|
65
|
-
* Sleep for the specified milliseconds
|
|
66
|
-
*/
|
|
67
|
-
function sleep(ms) {
|
|
68
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
69
89
|
}
|
|
70
90
|
export class AgentRuntime {
|
|
71
91
|
messages = [];
|
|
@@ -79,53 +99,7 @@ export class AgentRuntime {
|
|
|
79
99
|
modelId;
|
|
80
100
|
workingDirectory;
|
|
81
101
|
cancellationRequested = false;
|
|
82
|
-
|
|
83
|
-
lastToolCallSignature = null;
|
|
84
|
-
repeatedToolCallCount = 0;
|
|
85
|
-
static MAX_REPEATED_TOOL_CALLS = 5; // Allow up to 4 identical calls before stopping
|
|
86
|
-
// Behavioral loop detection: track recent tool calls to catch repetitive patterns
|
|
87
|
-
// e.g., calling "execute_bash" with "git status" 5 times even if output differs slightly
|
|
88
|
-
recentToolCalls = [];
|
|
89
|
-
static TOOL_HISTORY_SIZE = 12;
|
|
90
|
-
static BEHAVIORAL_LOOP_THRESHOLD = 3; // Same tool+cmd 3+ times in last 12 = stuck
|
|
91
|
-
// Never cache stateful tools - they must always execute to reflect current system state
|
|
92
|
-
static NON_CACHEABLE_TOOL_NAMES = new Set([
|
|
93
|
-
'bash',
|
|
94
|
-
'execute_bash',
|
|
95
|
-
'execute_command',
|
|
96
|
-
'run_command',
|
|
97
|
-
'edit',
|
|
98
|
-
'edit_file',
|
|
99
|
-
'notebookedit',
|
|
100
|
-
'read',
|
|
101
|
-
'read_file',
|
|
102
|
-
'read_files',
|
|
103
|
-
'list_files',
|
|
104
|
-
'list_dir',
|
|
105
|
-
'glob',
|
|
106
|
-
'grep',
|
|
107
|
-
'search',
|
|
108
|
-
'search_text',
|
|
109
|
-
'git_status',
|
|
110
|
-
'git_diff',
|
|
111
|
-
'git_log',
|
|
112
|
-
'git_commit',
|
|
113
|
-
'git_push',
|
|
114
|
-
]);
|
|
115
|
-
// Skip loop short-circuiting for direct execution tools to avoid blocking user commands
|
|
116
|
-
static LOOP_EXEMPT_TOOL_NAMES = new Set([
|
|
117
|
-
'bash',
|
|
118
|
-
'execute_bash',
|
|
119
|
-
'execute_command',
|
|
120
|
-
'run_command',
|
|
121
|
-
'edit',
|
|
122
|
-
'edit_file',
|
|
123
|
-
'notebookedit',
|
|
124
|
-
]);
|
|
125
|
-
// Tool result cache: prevent duplicate identical tool calls by returning cached results
|
|
126
|
-
// Key: tool signature (name + JSON args), Value: result string
|
|
127
|
-
toolResultCache = new Map();
|
|
128
|
-
static TOOL_CACHE_MAX_SIZE = 50; // Keep last 50 tool results
|
|
102
|
+
_autoContinueEnabled = false;
|
|
129
103
|
constructor(options) {
|
|
130
104
|
this.provider = options.provider;
|
|
131
105
|
this.toolRuntime = options.toolRuntime;
|
|
@@ -134,6 +108,7 @@ export class AgentRuntime {
|
|
|
134
108
|
this.providerId = options.providerId ?? 'unknown';
|
|
135
109
|
this.modelId = options.modelId ?? 'unknown';
|
|
136
110
|
this.workingDirectory = options.workingDirectory ?? process.cwd();
|
|
111
|
+
this._autoContinueEnabled = options.autoContinue ?? false;
|
|
137
112
|
const trimmedPrompt = options.systemPrompt.trim();
|
|
138
113
|
this.baseSystemPrompt = trimmedPrompt || null;
|
|
139
114
|
if (trimmedPrompt) {
|
|
@@ -159,14 +134,25 @@ export class AgentRuntime {
|
|
|
159
134
|
isRunning() {
|
|
160
135
|
return this.activeRun !== null;
|
|
161
136
|
}
|
|
137
|
+
/**
|
|
138
|
+
* Check if auto-continuation is enabled.
|
|
139
|
+
*/
|
|
140
|
+
isAutoContinueEnabled() {
|
|
141
|
+
return this._autoContinueEnabled;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Enable or disable auto-continuation.
|
|
145
|
+
*/
|
|
146
|
+
setAutoContinue(enabled) {
|
|
147
|
+
this._autoContinueEnabled = enabled;
|
|
148
|
+
}
|
|
162
149
|
async send(text, useStreaming = false) {
|
|
163
150
|
const prompt = text.trim();
|
|
164
151
|
if (!prompt) {
|
|
165
152
|
return '';
|
|
166
153
|
}
|
|
167
|
-
// Reset cancellation flag
|
|
154
|
+
// Reset cancellation flag at start of new request
|
|
168
155
|
this.cancellationRequested = false;
|
|
169
|
-
this.resetBehavioralLoopTracking();
|
|
170
156
|
// Handle multi-line paste: show summary to user, send full content to AI
|
|
171
157
|
if (isMultilinePaste(prompt)) {
|
|
172
158
|
const processed = processPaste(prompt);
|
|
@@ -182,9 +168,28 @@ export class AgentRuntime {
|
|
|
182
168
|
const run = { startedAt: Date.now() };
|
|
183
169
|
this.activeRun = run;
|
|
184
170
|
try {
|
|
185
|
-
// Always use streaming when available - no fallback
|
|
186
171
|
if (useStreaming && this.provider.generateStream) {
|
|
187
|
-
|
|
172
|
+
try {
|
|
173
|
+
return await this.processConversationStreaming();
|
|
174
|
+
}
|
|
175
|
+
catch (error) {
|
|
176
|
+
const message = safeErrorMessage(error);
|
|
177
|
+
const reason = error instanceof StreamInterruptionError ? error.reason : undefined;
|
|
178
|
+
const partialResponse = error instanceof StreamInterruptionError ? error.partialResponse : undefined;
|
|
179
|
+
console.warn(`[agent] Streaming failed, falling back to non-streaming: ${message}`);
|
|
180
|
+
// If we captured part of the response, seed it into history and ask the model to continue
|
|
181
|
+
// so we don't restart the answer from scratch during fallback.
|
|
182
|
+
if (partialResponse && partialResponse.trim()) {
|
|
183
|
+
const partial = partialResponse.trim();
|
|
184
|
+
this.messages.push({ role: 'assistant', content: partial });
|
|
185
|
+
this.messages.push({
|
|
186
|
+
role: 'user',
|
|
187
|
+
content: 'Continue your previous response from where it stopped. Do not repeat text you already provided.',
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
this.callbacks.onStreamFallback?.({ message, error, reason, partialResponse });
|
|
191
|
+
return await this.processConversation();
|
|
192
|
+
}
|
|
188
193
|
}
|
|
189
194
|
return await this.processConversation();
|
|
190
195
|
}
|
|
@@ -198,7 +203,7 @@ export class AgentRuntime {
|
|
|
198
203
|
}
|
|
199
204
|
async processConversation() {
|
|
200
205
|
let contextRecoveryAttempts = 0;
|
|
201
|
-
let
|
|
206
|
+
let autoContinueAttempts = 0;
|
|
202
207
|
while (true) {
|
|
203
208
|
// Check for cancellation at start of each iteration
|
|
204
209
|
if (this.cancellationRequested) {
|
|
@@ -214,66 +219,44 @@ export class AgentRuntime {
|
|
|
214
219
|
// Reset recovery attempts on successful generation
|
|
215
220
|
contextRecoveryAttempts = 0;
|
|
216
221
|
if (response.type === 'tool_calls') {
|
|
217
|
-
|
|
218
|
-
const
|
|
219
|
-
if (behavioralLoopResult) {
|
|
220
|
-
this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats });
|
|
221
|
-
this.messages.push({ role: 'assistant', content: behavioralLoopResult });
|
|
222
|
-
return behavioralLoopResult;
|
|
223
|
-
}
|
|
224
|
-
// Loop detection: check if same tool calls are being repeated (exact signature match)
|
|
225
|
-
const signatureCalls = response.toolCalls.filter(call => !this.shouldSkipLoopDetection(call));
|
|
226
|
-
const toolSignature = signatureCalls.length
|
|
227
|
-
? signatureCalls
|
|
228
|
-
.map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
|
|
229
|
-
.sort()
|
|
230
|
-
.join('|')
|
|
231
|
-
: null;
|
|
232
|
-
if (toolSignature && toolSignature === this.lastToolCallSignature) {
|
|
233
|
-
this.repeatedToolCallCount++;
|
|
234
|
-
if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
|
|
235
|
-
// Break out of loop - model is stuck
|
|
236
|
-
const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
|
|
237
|
-
this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats });
|
|
238
|
-
this.messages.push({ role: 'assistant', content: loopMsg });
|
|
239
|
-
this.lastToolCallSignature = null;
|
|
240
|
-
this.repeatedToolCallCount = 0;
|
|
241
|
-
return loopMsg;
|
|
242
|
-
}
|
|
243
|
-
}
|
|
244
|
-
else if (toolSignature) {
|
|
245
|
-
this.lastToolCallSignature = toolSignature;
|
|
246
|
-
this.repeatedToolCallCount = 1;
|
|
247
|
-
}
|
|
248
|
-
else {
|
|
249
|
-
this.lastToolCallSignature = null;
|
|
250
|
-
this.repeatedToolCallCount = 0;
|
|
251
|
-
}
|
|
252
|
-
// Emit narration if present - it shows the AI's thought process before tools
|
|
253
|
-
const narration = response.content?.trim();
|
|
222
|
+
const suppressNarration = this.shouldSuppressToolNarration();
|
|
223
|
+
const narration = suppressNarration ? '' : response.content?.trim();
|
|
254
224
|
if (narration) {
|
|
255
|
-
this.emitAssistantMessage(narration, {
|
|
256
|
-
isFinal: false,
|
|
257
|
-
usage,
|
|
258
|
-
contextStats,
|
|
259
|
-
});
|
|
225
|
+
this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats });
|
|
260
226
|
}
|
|
261
227
|
const assistantMessage = {
|
|
262
228
|
role: 'assistant',
|
|
263
|
-
content: response.content ?? '',
|
|
229
|
+
content: suppressNarration ? '' : (response.content ?? ''),
|
|
264
230
|
};
|
|
265
231
|
if (response.toolCalls?.length) {
|
|
266
232
|
assistantMessage.toolCalls = response.toolCalls;
|
|
267
233
|
}
|
|
268
234
|
this.messages.push(assistantMessage);
|
|
269
235
|
await this.resolveToolCalls(response.toolCalls);
|
|
236
|
+
// Reset auto-continue counter since model is actively working
|
|
237
|
+
autoContinueAttempts = 0;
|
|
270
238
|
continue;
|
|
271
239
|
}
|
|
272
240
|
const reply = response.content?.trim() ?? '';
|
|
273
|
-
//
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
241
|
+
// Check if model expressed intent to act but didn't call tools
|
|
242
|
+
// This catches "Let me create..." without actual tool calls
|
|
243
|
+
// Only auto-continue if the feature is enabled
|
|
244
|
+
if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
245
|
+
autoContinueAttempts++;
|
|
246
|
+
// Emit the planning content but mark as non-final
|
|
247
|
+
if (reply) {
|
|
248
|
+
this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats });
|
|
249
|
+
}
|
|
250
|
+
this.messages.push({ role: 'assistant', content: reply });
|
|
251
|
+
// Auto-prompt with increasingly direct instructions
|
|
252
|
+
const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
|
|
253
|
+
this.messages.push({
|
|
254
|
+
role: 'user',
|
|
255
|
+
content: AUTO_CONTINUE_PROMPTS[promptIndex],
|
|
256
|
+
});
|
|
257
|
+
const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
|
|
258
|
+
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
|
|
259
|
+
continue;
|
|
277
260
|
}
|
|
278
261
|
if (reply) {
|
|
279
262
|
this.emitAssistantMessage(reply, { isFinal: true, usage, contextStats });
|
|
@@ -295,14 +278,6 @@ export class AgentRuntime {
|
|
|
295
278
|
continue;
|
|
296
279
|
}
|
|
297
280
|
}
|
|
298
|
-
// Auto-retry transient errors (network issues, rate limits, server errors)
|
|
299
|
-
if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
|
|
300
|
-
transientRetryAttempts++;
|
|
301
|
-
const delayMs = getRetryDelay(transientRetryAttempts);
|
|
302
|
-
this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
|
|
303
|
-
await sleep(delayMs);
|
|
304
|
-
continue;
|
|
305
|
-
}
|
|
306
281
|
// Re-throw if not recoverable or recovery failed
|
|
307
282
|
throw error;
|
|
308
283
|
}
|
|
@@ -313,7 +288,7 @@ export class AgentRuntime {
|
|
|
313
288
|
return this.processConversation();
|
|
314
289
|
}
|
|
315
290
|
let contextRecoveryAttempts = 0;
|
|
316
|
-
let
|
|
291
|
+
let autoContinueAttempts = 0;
|
|
317
292
|
while (true) {
|
|
318
293
|
// Check for cancellation at start of each iteration
|
|
319
294
|
if (this.cancellationRequested) {
|
|
@@ -346,10 +321,45 @@ export class AgentRuntime {
|
|
|
346
321
|
}
|
|
347
322
|
}
|
|
348
323
|
};
|
|
349
|
-
|
|
324
|
+
const buildTimeoutError = (reason) => {
|
|
325
|
+
const base = reason === 'startup-timeout'
|
|
326
|
+
? 'Streaming stalled before any content arrived.'
|
|
327
|
+
: 'Streaming stalled due to inactivity.';
|
|
328
|
+
return new StreamInterruptionError(reason, `${base} Falling back to non-streaming.`, fullContent || reasoningContent);
|
|
329
|
+
};
|
|
330
|
+
// Timer for first token arrival
|
|
331
|
+
let startupTimer = null;
|
|
332
|
+
const startupTimeoutPromise = new Promise((_, reject) => {
|
|
333
|
+
startupTimer = setTimeout(() => reject(buildTimeoutError('startup-timeout')), STREAM_FIRST_CHUNK_TIMEOUT_MS);
|
|
334
|
+
});
|
|
335
|
+
const createIdleTimeout = () => {
|
|
336
|
+
let idleTimer = null;
|
|
337
|
+
const promise = new Promise((_, reject) => {
|
|
338
|
+
idleTimer = setTimeout(() => reject(buildTimeoutError('idle-timeout')), STREAM_INACTIVITY_TIMEOUT_MS);
|
|
339
|
+
});
|
|
340
|
+
const cancel = () => {
|
|
341
|
+
if (idleTimer) {
|
|
342
|
+
clearTimeout(idleTimer);
|
|
343
|
+
idleTimer = null;
|
|
344
|
+
}
|
|
345
|
+
};
|
|
346
|
+
return { promise, cancel };
|
|
347
|
+
};
|
|
348
|
+
let idleTimeout = createIdleTimeout();
|
|
349
|
+
let firstChunkSeen = false;
|
|
350
350
|
try {
|
|
351
351
|
while (true) {
|
|
352
|
-
const
|
|
352
|
+
const races = [
|
|
353
|
+
iterator.next(),
|
|
354
|
+
idleTimeout.promise,
|
|
355
|
+
];
|
|
356
|
+
if (!firstChunkSeen) {
|
|
357
|
+
races.push(startupTimeoutPromise);
|
|
358
|
+
}
|
|
359
|
+
const result = (await Promise.race(races));
|
|
360
|
+
// Reset idle timer for the next iteration
|
|
361
|
+
idleTimeout.cancel();
|
|
362
|
+
idleTimeout = createIdleTimeout();
|
|
353
363
|
// Check for cancellation during streaming
|
|
354
364
|
if (this.cancellationRequested) {
|
|
355
365
|
await closeStream();
|
|
@@ -364,10 +374,17 @@ export class AgentRuntime {
|
|
|
364
374
|
break;
|
|
365
375
|
}
|
|
366
376
|
const chunk = result.value;
|
|
377
|
+
if (!firstChunkSeen) {
|
|
378
|
+
firstChunkSeen = true;
|
|
379
|
+
if (startupTimer) {
|
|
380
|
+
clearTimeout(startupTimer);
|
|
381
|
+
startupTimer = null;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
367
384
|
if (chunk.type === 'reasoning' && chunk.content) {
|
|
368
|
-
// Buffer reasoning content - don't stream token-by-token
|
|
369
|
-
// It will be emitted as a complete block when ready
|
|
370
385
|
reasoningContent += chunk.content;
|
|
386
|
+
// Surface reasoning tokens to the UI so thought process is visible
|
|
387
|
+
this.callbacks.onStreamChunk?.(chunk.content, 'reasoning');
|
|
371
388
|
continue;
|
|
372
389
|
}
|
|
373
390
|
if (chunk.type === 'content' && chunk.content) {
|
|
@@ -380,19 +397,11 @@ export class AgentRuntime {
|
|
|
380
397
|
}
|
|
381
398
|
}
|
|
382
399
|
else if (chunk.type === 'tool_call' && chunk.toolCall) {
|
|
383
|
-
// On first tool call, flush any buffered content
|
|
384
|
-
if (toolCalls.length === 0) {
|
|
385
|
-
// Emit complete reasoning block first
|
|
386
|
-
if (reasoningContent.trim()) {
|
|
387
|
-
this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
|
|
388
|
-
}
|
|
389
|
-
// Then emit buffered narration content
|
|
390
|
-
if (suppressStreamNarration && bufferedContent) {
|
|
391
|
-
this.callbacks.onStreamChunk?.(bufferedContent, 'content');
|
|
392
|
-
bufferedContent = '';
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
400
|
toolCalls.push(chunk.toolCall);
|
|
401
|
+
// Drop any speculative narration once we know the model is actually calling tools
|
|
402
|
+
if (suppressStreamNarration) {
|
|
403
|
+
bufferedContent = '';
|
|
404
|
+
}
|
|
396
405
|
}
|
|
397
406
|
else if (chunk.type === 'usage' && chunk.usage) {
|
|
398
407
|
usage = chunk.usage;
|
|
@@ -400,88 +409,61 @@ export class AgentRuntime {
|
|
|
400
409
|
}
|
|
401
410
|
}
|
|
402
411
|
finally {
|
|
412
|
+
idleTimeout.cancel();
|
|
413
|
+
if (startupTimer) {
|
|
414
|
+
clearTimeout(startupTimer);
|
|
415
|
+
}
|
|
403
416
|
await closeStream();
|
|
404
417
|
}
|
|
405
418
|
// Reset recovery attempts on successful generation
|
|
406
419
|
contextRecoveryAttempts = 0;
|
|
407
420
|
const contextStats = this.getContextStats();
|
|
408
421
|
const combinedContent = fullContent || reasoningContent;
|
|
409
|
-
// If no tool calls were issued,
|
|
410
|
-
if (toolCalls.length === 0) {
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
|
|
414
|
-
}
|
|
415
|
-
// Emit buffered narration content
|
|
416
|
-
if (suppressStreamNarration && bufferedContent) {
|
|
417
|
-
this.callbacks.onStreamChunk?.(bufferedContent, 'content');
|
|
418
|
-
bufferedContent = '';
|
|
419
|
-
}
|
|
422
|
+
// If no tool calls were issued, flush any buffered narration now
|
|
423
|
+
if (suppressStreamNarration && toolCalls.length === 0 && bufferedContent) {
|
|
424
|
+
this.callbacks.onStreamChunk?.(bufferedContent, 'content');
|
|
425
|
+
bufferedContent = '';
|
|
420
426
|
}
|
|
421
427
|
// Check if we got tool calls
|
|
422
428
|
if (toolCalls.length > 0) {
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
const behavioralLoopResult = this.checkBehavioralLoop(toolCalls);
|
|
426
|
-
if (behavioralLoopResult) {
|
|
427
|
-
this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats, wasStreamed: true });
|
|
428
|
-
this.messages.push({ role: 'assistant', content: behavioralLoopResult });
|
|
429
|
-
return behavioralLoopResult;
|
|
430
|
-
}
|
|
431
|
-
// Loop detection: check if same tool calls are being repeated (exact signature match)
|
|
432
|
-
const signatureCalls = toolCalls.filter(call => !this.shouldSkipLoopDetection(call));
|
|
433
|
-
const toolSignature = signatureCalls.length
|
|
434
|
-
? signatureCalls
|
|
435
|
-
.map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
|
|
436
|
-
.sort()
|
|
437
|
-
.join('|')
|
|
438
|
-
: null;
|
|
439
|
-
if (toolSignature && toolSignature === this.lastToolCallSignature) {
|
|
440
|
-
this.repeatedToolCallCount++;
|
|
441
|
-
if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
|
|
442
|
-
// Break out of loop - model is stuck
|
|
443
|
-
const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
|
|
444
|
-
this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats, wasStreamed: true });
|
|
445
|
-
this.messages.push({ role: 'assistant', content: loopMsg });
|
|
446
|
-
this.lastToolCallSignature = null;
|
|
447
|
-
this.repeatedToolCallCount = 0;
|
|
448
|
-
return loopMsg;
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
else if (toolSignature) {
|
|
452
|
-
this.lastToolCallSignature = toolSignature;
|
|
453
|
-
this.repeatedToolCallCount = 1;
|
|
454
|
-
}
|
|
455
|
-
else {
|
|
456
|
-
this.lastToolCallSignature = null;
|
|
457
|
-
this.repeatedToolCallCount = 0;
|
|
458
|
-
}
|
|
459
|
-
// Content was already streamed via onStreamChunk, just record it for context
|
|
460
|
-
// (wasStreamed=true prevents duplicate display)
|
|
461
|
-
// Note: Acknowledgement injection happens during streaming (when first tool_call chunk arrives)
|
|
462
|
-
const narration = combinedContent.trim();
|
|
429
|
+
const suppressNarration = this.shouldSuppressToolNarration();
|
|
430
|
+
const narration = suppressNarration ? '' : combinedContent.trim();
|
|
463
431
|
if (narration) {
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
usage,
|
|
467
|
-
contextStats,
|
|
468
|
-
wasStreamed: true,
|
|
469
|
-
});
|
|
432
|
+
// Mark as wasStreamed since content was already output via onStreamChunk
|
|
433
|
+
this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats, wasStreamed: true });
|
|
470
434
|
}
|
|
471
435
|
const assistantMessage = {
|
|
472
436
|
role: 'assistant',
|
|
473
|
-
content: combinedContent,
|
|
437
|
+
content: suppressNarration ? '' : combinedContent,
|
|
474
438
|
toolCalls,
|
|
475
439
|
};
|
|
476
440
|
this.messages.push(assistantMessage);
|
|
477
441
|
await this.resolveToolCalls(toolCalls);
|
|
442
|
+
// Reset auto-continue counter since model is actively working
|
|
443
|
+
autoContinueAttempts = 0;
|
|
478
444
|
continue;
|
|
479
445
|
}
|
|
446
|
+
// Check if model expressed intent to act but didn't call tools
|
|
447
|
+
// This catches "Let me create..." without actual tool calls
|
|
448
|
+
// Only auto-continue if the feature is enabled
|
|
480
449
|
const reply = combinedContent.trim();
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
450
|
+
if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
451
|
+
autoContinueAttempts++;
|
|
452
|
+
// Emit the planning content but mark as non-final
|
|
453
|
+
// Mark as wasStreamed since content was already output via onStreamChunk
|
|
454
|
+
if (reply) {
|
|
455
|
+
this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats, wasStreamed: true });
|
|
456
|
+
}
|
|
457
|
+
this.messages.push({ role: 'assistant', content: reply });
|
|
458
|
+
// Auto-prompt with increasingly direct instructions
|
|
459
|
+
const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
|
|
460
|
+
this.messages.push({
|
|
461
|
+
role: 'user',
|
|
462
|
+
content: AUTO_CONTINUE_PROMPTS[promptIndex],
|
|
463
|
+
});
|
|
464
|
+
const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
|
|
465
|
+
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
|
|
466
|
+
continue;
|
|
485
467
|
}
|
|
486
468
|
// Final message - mark as streamed to avoid double-display in UI
|
|
487
469
|
if (reply) {
|
|
@@ -504,14 +486,6 @@ export class AgentRuntime {
|
|
|
504
486
|
continue;
|
|
505
487
|
}
|
|
506
488
|
}
|
|
507
|
-
// Auto-retry transient errors (network issues, rate limits, server errors)
|
|
508
|
-
if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
|
|
509
|
-
transientRetryAttempts++;
|
|
510
|
-
const delayMs = getRetryDelay(transientRetryAttempts);
|
|
511
|
-
this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
|
|
512
|
-
await sleep(delayMs);
|
|
513
|
-
continue;
|
|
514
|
-
}
|
|
515
489
|
// Re-throw if not recoverable or recovery failed
|
|
516
490
|
throw error;
|
|
517
491
|
}
|
|
@@ -543,23 +517,7 @@ export class AgentRuntime {
|
|
|
543
517
|
// Fast path: single tool call
|
|
544
518
|
if (numCalls === 1) {
|
|
545
519
|
const call = toolCalls[0];
|
|
546
|
-
// Check cache first - prevent duplicate identical tool calls
|
|
547
|
-
const cached = this.getCachedToolResult(call);
|
|
548
|
-
if (cached !== null) {
|
|
549
|
-
// Return cached result with indicator that it was from cache
|
|
550
|
-
this.messages.push({
|
|
551
|
-
role: 'tool',
|
|
552
|
-
name: call.name,
|
|
553
|
-
toolCallId: call.id,
|
|
554
|
-
content: `[Cached result - identical call already executed]\n\n${cached}`,
|
|
555
|
-
});
|
|
556
|
-
return;
|
|
557
|
-
}
|
|
558
|
-
this.callbacks.onToolExecution?.(call.name, true);
|
|
559
520
|
const output = await this.toolRuntime.execute(call);
|
|
560
|
-
this.callbacks.onToolExecution?.(call.name, false);
|
|
561
|
-
// Cache the result for future identical calls
|
|
562
|
-
this.cacheToolResult(call, output);
|
|
563
521
|
this.messages.push({
|
|
564
522
|
role: 'tool',
|
|
565
523
|
name: call.name,
|
|
@@ -569,106 +527,56 @@ export class AgentRuntime {
|
|
|
569
527
|
return;
|
|
570
528
|
}
|
|
571
529
|
// PERF: For reasonable batch sizes, execute all in parallel
|
|
572
|
-
// Check cache for each call and only execute non-cached ones
|
|
573
530
|
if (numCalls <= 10) {
|
|
574
|
-
const
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
// Execute non-cached calls in parallel
|
|
587
|
-
if (toExecute.length > 0) {
|
|
588
|
-
const toolNames = toExecute.map(c => c.name).join(', ');
|
|
589
|
-
this.callbacks.onToolExecution?.(toolNames, true);
|
|
590
|
-
const executed = await Promise.all(toExecute.map(async (call) => {
|
|
591
|
-
const output = await this.toolRuntime.execute(call);
|
|
592
|
-
this.cacheToolResult(call, output);
|
|
593
|
-
return { call, output, fromCache: false };
|
|
594
|
-
}));
|
|
595
|
-
this.callbacks.onToolExecution?.(toolNames, false);
|
|
596
|
-
cachedResults.push(...executed);
|
|
597
|
-
}
|
|
598
|
-
// Add all results to messages in the original order
|
|
599
|
-
for (const originalCall of toolCalls) {
|
|
600
|
-
const result = cachedResults.find(r => r.call.id === originalCall.id);
|
|
601
|
-
if (result) {
|
|
602
|
-
const content = result.fromCache
|
|
603
|
-
? `[Cached result - identical call already executed]\n\n${result.output}`
|
|
604
|
-
: result.output;
|
|
605
|
-
this.messages.push({
|
|
606
|
-
role: 'tool',
|
|
607
|
-
name: result.call.name,
|
|
608
|
-
toolCallId: result.call.id,
|
|
609
|
-
content,
|
|
610
|
-
});
|
|
611
|
-
}
|
|
531
|
+
const results = await Promise.all(toolCalls.map(async (call) => ({
|
|
532
|
+
call,
|
|
533
|
+
output: await this.toolRuntime.execute(call),
|
|
534
|
+
})));
|
|
535
|
+
// Add results to messages in the same order as tool calls
|
|
536
|
+
for (const { call, output } of results) {
|
|
537
|
+
this.messages.push({
|
|
538
|
+
role: 'tool',
|
|
539
|
+
name: call.name,
|
|
540
|
+
toolCallId: call.id,
|
|
541
|
+
content: output,
|
|
542
|
+
});
|
|
612
543
|
}
|
|
613
544
|
return;
|
|
614
545
|
}
|
|
615
|
-
// PERF: For large batches, use chunked parallel execution
|
|
546
|
+
// PERF: For large batches, use chunked parallel execution
|
|
547
|
+
// This prevents memory pressure from too many concurrent operations
|
|
616
548
|
const CHUNK_SIZE = 8;
|
|
617
|
-
const
|
|
549
|
+
const results = [];
|
|
618
550
|
for (let i = 0; i < numCalls; i += CHUNK_SIZE) {
|
|
619
551
|
const chunk = toolCalls.slice(i, i + CHUNK_SIZE);
|
|
620
|
-
const
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
const executed = await Promise.all(toExecuteInChunk.map(async (call) => {
|
|
635
|
-
const output = await this.toolRuntime.execute(call);
|
|
636
|
-
this.cacheToolResult(call, output);
|
|
637
|
-
return { call, output, fromCache: false };
|
|
638
|
-
}));
|
|
639
|
-
this.callbacks.onToolExecution?.(chunkNames, false);
|
|
640
|
-
cachedInChunk.push(...executed);
|
|
641
|
-
}
|
|
642
|
-
allResults.push(...cachedInChunk);
|
|
643
|
-
}
|
|
644
|
-
// Add results to messages in original order
|
|
645
|
-
for (const originalCall of toolCalls) {
|
|
646
|
-
const result = allResults.find(r => r.call.id === originalCall.id);
|
|
647
|
-
if (result) {
|
|
648
|
-
const content = result.fromCache
|
|
649
|
-
? `[Cached result - identical call already executed]\n\n${result.output}`
|
|
650
|
-
: result.output;
|
|
651
|
-
this.messages.push({
|
|
652
|
-
role: 'tool',
|
|
653
|
-
name: result.call.name,
|
|
654
|
-
toolCallId: result.call.id,
|
|
655
|
-
content,
|
|
656
|
-
});
|
|
657
|
-
}
|
|
552
|
+
const chunkResults = await Promise.all(chunk.map(async (call) => ({
|
|
553
|
+
call,
|
|
554
|
+
output: await this.toolRuntime.execute(call),
|
|
555
|
+
})));
|
|
556
|
+
results.push(...chunkResults);
|
|
557
|
+
}
|
|
558
|
+
// Add results to messages in order
|
|
559
|
+
for (const { call, output } of results) {
|
|
560
|
+
this.messages.push({
|
|
561
|
+
role: 'tool',
|
|
562
|
+
name: call.name,
|
|
563
|
+
toolCallId: call.id,
|
|
564
|
+
content: output,
|
|
565
|
+
});
|
|
658
566
|
}
|
|
659
567
|
}
|
|
660
568
|
get providerTools() {
|
|
661
569
|
return this.toolRuntime.listProviderTools();
|
|
662
570
|
}
|
|
663
571
|
/**
|
|
664
|
-
*
|
|
665
|
-
*
|
|
572
|
+
* OpenAI models frequently add speculative tool narration in the content field.
|
|
573
|
+
* Suppress that text to avoid surfacing hallucinated tool usage in the UI.
|
|
666
574
|
*/
|
|
667
575
|
shouldSuppressToolNarration() {
|
|
668
|
-
return
|
|
576
|
+
return this.providerId.toLowerCase().includes('openai');
|
|
669
577
|
}
|
|
670
578
|
emitAssistantMessage(content, metadata) {
|
|
671
|
-
if (!content
|
|
579
|
+
if (!content) {
|
|
672
580
|
return;
|
|
673
581
|
}
|
|
674
582
|
const elapsedMs = this.activeRun ? Date.now() - this.activeRun.startedAt : undefined;
|
|
@@ -718,165 +626,6 @@ export class AgentRuntime {
|
|
|
718
626
|
model: this.modelId,
|
|
719
627
|
});
|
|
720
628
|
}
|
|
721
|
-
/**
|
|
722
|
-
* Extract a "command hash" from tool arguments for behavioral loop detection.
|
|
723
|
-
* For execute_bash, this is the actual command. For other tools, key identifying args.
|
|
724
|
-
*/
|
|
725
|
-
extractCmdHash(name, args) {
|
|
726
|
-
// For bash/execute commands, extract the command itself
|
|
727
|
-
if (name === 'execute_bash' || name === 'Bash') {
|
|
728
|
-
const cmd = args['command'];
|
|
729
|
-
if (cmd) {
|
|
730
|
-
// Normalize: trim, take first 100 chars, remove variable parts like timestamps
|
|
731
|
-
return cmd.trim().slice(0, 100).replace(/\d{10,}/g, 'N');
|
|
732
|
-
}
|
|
733
|
-
}
|
|
734
|
-
// For file operations, use the path
|
|
735
|
-
if (name === 'read_file' || name === 'Read' || name === 'read_files') {
|
|
736
|
-
const path = args['path'] || args['file_path'] || args['paths'];
|
|
737
|
-
if (path)
|
|
738
|
-
return `path:${JSON.stringify(path).slice(0, 100)}`;
|
|
739
|
-
}
|
|
740
|
-
if (name === 'list_files' || name === 'Glob') {
|
|
741
|
-
const path = args['path'] || args['pattern'];
|
|
742
|
-
if (path)
|
|
743
|
-
return `path:${JSON.stringify(path).slice(0, 100)}`;
|
|
744
|
-
}
|
|
745
|
-
// For search, use the query/pattern
|
|
746
|
-
if (name === 'Grep' || name === 'grep' || name === 'search') {
|
|
747
|
-
const pattern = args['pattern'] || args['query'];
|
|
748
|
-
if (pattern)
|
|
749
|
-
return `search:${String(pattern).slice(0, 100)}`;
|
|
750
|
-
}
|
|
751
|
-
// Default: use first significant arg value
|
|
752
|
-
const firstArg = Object.values(args)[0];
|
|
753
|
-
if (firstArg) {
|
|
754
|
-
return String(firstArg).slice(0, 100);
|
|
755
|
-
}
|
|
756
|
-
return 'no-args';
|
|
757
|
-
}
|
|
758
|
-
/**
|
|
759
|
-
* Check for behavioral loops - model calling the same tool with similar args repeatedly.
|
|
760
|
-
* Returns an error message if a loop is detected, null otherwise.
|
|
761
|
-
*
|
|
762
|
-
* FUNDAMENTAL PREVENTION: Cached calls are excluded from loop detection since they
|
|
763
|
-
* don't actually execute (the cache provides the result). This means:
|
|
764
|
-
* - First call: executes and caches result
|
|
765
|
-
* - Second identical call: returns cached result, NOT counted toward loop
|
|
766
|
-
* - Only genuinely NEW (non-cached) repetitive calls trigger loop detection
|
|
767
|
-
*
|
|
768
|
-
* Direct execution tools (bash/edit) are also exempt to avoid short-circuiting
|
|
769
|
-
* legitimate repeated user commands.
|
|
770
|
-
*
|
|
771
|
-
* This catches patterns like:
|
|
772
|
-
* - "git status -sb" called 3 times with DIFFERENT outputs (cache miss each time)
|
|
773
|
-
* - Repeated file reads where file content changed
|
|
774
|
-
* - Repeated searches with same pattern but new results
|
|
775
|
-
*/
|
|
776
|
-
checkBehavioralLoop(toolCalls) {
|
|
777
|
-
// Skip loop detection for direct execution tools (bash/edit) to avoid false positives
|
|
778
|
-
const loopEligibleCalls = toolCalls.filter(call => !this.shouldSkipLoopDetection(call));
|
|
779
|
-
if (loopEligibleCalls.length === 0) {
|
|
780
|
-
return null;
|
|
781
|
-
}
|
|
782
|
-
// Filter out calls that will be served from cache - these don't count toward loops
|
|
783
|
-
// since they're handled fundamentally by the caching mechanism
|
|
784
|
-
const nonCachedCalls = loopEligibleCalls.filter(call => this.getCachedToolResult(call) === null);
|
|
785
|
-
// If all calls are cached, no loop detection needed
|
|
786
|
-
if (nonCachedCalls.length === 0) {
|
|
787
|
-
return null;
|
|
788
|
-
}
|
|
789
|
-
// Count existing occurrences in recent history
|
|
790
|
-
const existingCounts = new Map();
|
|
791
|
-
for (const { name, cmdHash } of this.recentToolCalls) {
|
|
792
|
-
const key = `${name}:${cmdHash}`;
|
|
793
|
-
existingCounts.set(key, (existingCounts.get(key) ?? 0) + 1);
|
|
794
|
-
}
|
|
795
|
-
// Check if ANY incoming NON-CACHED call would exceed threshold
|
|
796
|
-
for (const call of nonCachedCalls) {
|
|
797
|
-
const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
|
|
798
|
-
const key = `${call.name}:${cmdHash}`;
|
|
799
|
-
const currentCount = existingCounts.get(key) ?? 0;
|
|
800
|
-
// If adding this call would reach or exceed threshold, block immediately
|
|
801
|
-
if (currentCount + 1 >= AgentRuntime.BEHAVIORAL_LOOP_THRESHOLD) {
|
|
802
|
-
// Reset history to prevent immediate re-trigger
|
|
803
|
-
this.recentToolCalls = [];
|
|
804
|
-
return `Behavioral loop detected: "${call.name}" called ${currentCount + 1} times with similar arguments. The task appears stuck. Please try a different approach or provide more specific instructions.`;
|
|
805
|
-
}
|
|
806
|
-
}
|
|
807
|
-
// Track only non-cached tool calls (cached ones are handled by caching)
|
|
808
|
-
for (const call of nonCachedCalls) {
|
|
809
|
-
const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
|
|
810
|
-
this.recentToolCalls.push({ name: call.name, cmdHash });
|
|
811
|
-
}
|
|
812
|
-
// Keep only recent history
|
|
813
|
-
while (this.recentToolCalls.length > AgentRuntime.TOOL_HISTORY_SIZE) {
|
|
814
|
-
this.recentToolCalls.shift();
|
|
815
|
-
}
|
|
816
|
-
return null;
|
|
817
|
-
}
|
|
818
|
-
/**
|
|
819
|
-
* Reset behavioral loop tracking (called when user provides new input or task completes)
|
|
820
|
-
*/
|
|
821
|
-
resetBehavioralLoopTracking() {
|
|
822
|
-
this.recentToolCalls = [];
|
|
823
|
-
this.lastToolCallSignature = null;
|
|
824
|
-
this.repeatedToolCallCount = 0;
|
|
825
|
-
// Note: we DON'T clear toolResultCache here for cacheable tools; stateful tools bypass caching
|
|
826
|
-
}
|
|
827
|
-
/**
|
|
828
|
-
* Create a stable cache key for a tool call based on name and arguments
|
|
829
|
-
*/
|
|
830
|
-
getToolCacheKey(call) {
|
|
831
|
-
const args = call.arguments ?? {};
|
|
832
|
-
// Sort keys for consistent ordering
|
|
833
|
-
const sortedArgs = Object.keys(args).sort().reduce((acc, key) => {
|
|
834
|
-
acc[key] = args[key];
|
|
835
|
-
return acc;
|
|
836
|
-
}, {});
|
|
837
|
-
return `${call.name}:${JSON.stringify(sortedArgs)}`;
|
|
838
|
-
}
|
|
839
|
-
/**
|
|
840
|
-
* Only cache tools that are safe to reuse; stateful commands must always execute.
|
|
841
|
-
*/
|
|
842
|
-
isCacheableTool(call) {
|
|
843
|
-
const nameLower = call.name.toLowerCase();
|
|
844
|
-
return !AgentRuntime.NON_CACHEABLE_TOOL_NAMES.has(nameLower);
|
|
845
|
-
}
|
|
846
|
-
/**
|
|
847
|
-
* Direct execution tools should not trigger behavioral loop short-circuiting.
|
|
848
|
-
*/
|
|
849
|
-
shouldSkipLoopDetection(call) {
|
|
850
|
-
const nameLower = call.name.toLowerCase();
|
|
851
|
-
return AgentRuntime.LOOP_EXEMPT_TOOL_NAMES.has(nameLower);
|
|
852
|
-
}
|
|
853
|
-
/**
|
|
854
|
-
* Get cached result for a tool call, or null if not cached
|
|
855
|
-
*/
|
|
856
|
-
getCachedToolResult(call) {
|
|
857
|
-
if (!this.isCacheableTool(call)) {
|
|
858
|
-
return null;
|
|
859
|
-
}
|
|
860
|
-
const key = this.getToolCacheKey(call);
|
|
861
|
-
return this.toolResultCache.get(key) ?? null;
|
|
862
|
-
}
|
|
863
|
-
/**
|
|
864
|
-
* Cache a tool result for future identical calls
|
|
865
|
-
*/
|
|
866
|
-
cacheToolResult(call, result) {
|
|
867
|
-
if (!this.isCacheableTool(call)) {
|
|
868
|
-
return;
|
|
869
|
-
}
|
|
870
|
-
const key = this.getToolCacheKey(call);
|
|
871
|
-
// Evict oldest entries if cache is full
|
|
872
|
-
if (this.toolResultCache.size >= AgentRuntime.TOOL_CACHE_MAX_SIZE) {
|
|
873
|
-
const firstKey = this.toolResultCache.keys().next().value;
|
|
874
|
-
if (firstKey) {
|
|
875
|
-
this.toolResultCache.delete(firstKey);
|
|
876
|
-
}
|
|
877
|
-
}
|
|
878
|
-
this.toolResultCache.set(key, result);
|
|
879
|
-
}
|
|
880
629
|
getHistory() {
|
|
881
630
|
return this.messages.map(cloneMessage);
|
|
882
631
|
}
|