erosolar-cli 2.1.167 → 2.1.168
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/erosolar-code.rules.json +2 -2
- package/agents/general.rules.json +3 -21
- package/dist/StringUtils.d.ts +8 -0
- package/dist/StringUtils.d.ts.map +1 -0
- package/dist/StringUtils.js +11 -0
- package/dist/StringUtils.js.map +1 -0
- package/dist/capabilities/statusCapability.js +2 -2
- package/dist/capabilities/statusCapability.js.map +1 -1
- package/dist/contracts/agent-schemas.json +0 -5
- package/dist/core/agent.d.ts +11 -72
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +182 -869
- package/dist/core/agent.js.map +1 -1
- package/dist/core/aiFlowSupervisor.d.ts +44 -0
- package/dist/core/aiFlowSupervisor.d.ts.map +1 -0
- package/dist/core/aiFlowSupervisor.js +299 -0
- package/dist/core/aiFlowSupervisor.js.map +1 -0
- package/dist/core/cliTestHarness.d.ts +200 -0
- package/dist/core/cliTestHarness.d.ts.map +1 -0
- package/dist/core/cliTestHarness.js +549 -0
- package/dist/core/cliTestHarness.js.map +1 -0
- package/dist/core/preferences.d.ts +0 -1
- package/dist/core/preferences.d.ts.map +1 -1
- package/dist/core/preferences.js +2 -9
- package/dist/core/preferences.js.map +1 -1
- package/dist/core/schemaValidator.js +3 -3
- package/dist/core/schemaValidator.js.map +1 -1
- package/dist/core/testUtils.d.ts +121 -0
- package/dist/core/testUtils.d.ts.map +1 -0
- package/dist/core/testUtils.js +235 -0
- package/dist/core/testUtils.js.map +1 -0
- package/dist/core/toolPreconditions.d.ts +11 -0
- package/dist/core/toolPreconditions.d.ts.map +1 -1
- package/dist/core/toolPreconditions.js +164 -33
- package/dist/core/toolPreconditions.js.map +1 -1
- package/dist/core/toolRuntime.d.ts.map +1 -1
- package/dist/core/toolRuntime.js +114 -9
- package/dist/core/toolRuntime.js.map +1 -1
- package/dist/core/toolValidation.d.ts +116 -0
- package/dist/core/toolValidation.d.ts.map +1 -0
- package/dist/core/toolValidation.js +282 -0
- package/dist/core/toolValidation.js.map +1 -0
- package/dist/core/updateChecker.d.ts +1 -61
- package/dist/core/updateChecker.d.ts.map +1 -1
- package/dist/core/updateChecker.js +3 -147
- package/dist/core/updateChecker.js.map +1 -1
- package/dist/headless/headlessApp.d.ts.map +1 -1
- package/dist/headless/headlessApp.js +39 -0
- package/dist/headless/headlessApp.js.map +1 -1
- package/dist/plugins/tools/nodeDefaults.d.ts.map +1 -1
- package/dist/plugins/tools/nodeDefaults.js +2 -0
- package/dist/plugins/tools/nodeDefaults.js.map +1 -1
- package/dist/providers/openaiResponsesProvider.d.ts.map +1 -1
- package/dist/providers/openaiResponsesProvider.js +74 -79
- package/dist/providers/openaiResponsesProvider.js.map +1 -1
- package/dist/runtime/agentController.d.ts.map +1 -1
- package/dist/runtime/agentController.js +0 -6
- package/dist/runtime/agentController.js.map +1 -1
- package/dist/runtime/agentSession.d.ts.map +1 -1
- package/dist/runtime/agentSession.js +2 -3
- package/dist/runtime/agentSession.js.map +1 -1
- package/dist/shell/interactiveShell.d.ts +8 -16
- package/dist/shell/interactiveShell.d.ts.map +1 -1
- package/dist/shell/interactiveShell.js +159 -388
- package/dist/shell/interactiveShell.js.map +1 -1
- package/dist/shell/systemPrompt.d.ts.map +1 -1
- package/dist/shell/systemPrompt.js +15 -4
- package/dist/shell/systemPrompt.js.map +1 -1
- package/dist/subagents/taskRunner.js +1 -2
- package/dist/subagents/taskRunner.js.map +1 -1
- package/dist/tools/bashTools.d.ts.map +1 -1
- package/dist/tools/bashTools.js +8 -101
- package/dist/tools/bashTools.js.map +1 -1
- package/dist/tools/diffUtils.d.ts +2 -8
- package/dist/tools/diffUtils.d.ts.map +1 -1
- package/dist/tools/diffUtils.js +13 -72
- package/dist/tools/diffUtils.js.map +1 -1
- package/dist/tools/grepTools.d.ts.map +1 -1
- package/dist/tools/grepTools.js +2 -10
- package/dist/tools/grepTools.js.map +1 -1
- package/dist/tools/searchTools.d.ts.map +1 -1
- package/dist/tools/searchTools.js +2 -4
- package/dist/tools/searchTools.js.map +1 -1
- package/dist/ui/PromptController.d.ts +0 -2
- package/dist/ui/PromptController.d.ts.map +1 -1
- package/dist/ui/PromptController.js +0 -2
- package/dist/ui/PromptController.js.map +1 -1
- package/dist/ui/ShellUIAdapter.d.ts +18 -71
- package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
- package/dist/ui/ShellUIAdapter.js +139 -237
- package/dist/ui/ShellUIAdapter.js.map +1 -1
- package/dist/ui/UnifiedUIController.d.ts +1 -0
- package/dist/ui/UnifiedUIController.d.ts.map +1 -1
- package/dist/ui/UnifiedUIController.js +1 -0
- package/dist/ui/UnifiedUIController.js.map +1 -1
- package/dist/ui/UnifiedUIRenderer.d.ts +5 -122
- package/dist/ui/UnifiedUIRenderer.d.ts.map +1 -1
- package/dist/ui/UnifiedUIRenderer.js +125 -830
- package/dist/ui/UnifiedUIRenderer.js.map +1 -1
- package/dist/ui/compactRenderer.d.ts +139 -0
- package/dist/ui/compactRenderer.d.ts.map +1 -0
- package/dist/ui/compactRenderer.js +398 -0
- package/dist/ui/compactRenderer.js.map +1 -0
- package/dist/ui/display.d.ts +48 -13
- package/dist/ui/display.d.ts.map +1 -1
- package/dist/ui/display.js +105 -22
- package/dist/ui/display.js.map +1 -1
- package/dist/ui/streamingFormatter.d.ts +30 -0
- package/dist/ui/streamingFormatter.d.ts.map +1 -0
- package/dist/ui/streamingFormatter.js +91 -0
- package/dist/ui/streamingFormatter.js.map +1 -0
- package/dist/ui/unified/index.d.ts +1 -1
- package/dist/ui/unified/index.d.ts.map +1 -1
- package/dist/ui/unified/index.js +2 -0
- package/dist/ui/unified/index.js.map +1 -1
- package/dist/utils/errorUtils.d.ts +16 -0
- package/dist/utils/errorUtils.d.ts.map +1 -0
- package/dist/utils/errorUtils.js +66 -0
- package/dist/utils/errorUtils.js.map +1 -0
- package/package.json +2 -1
- package/dist/core/reliabilityPrompt.d.ts +0 -9
- package/dist/core/reliabilityPrompt.d.ts.map +0 -1
- package/dist/core/reliabilityPrompt.js +0 -31
- package/dist/core/reliabilityPrompt.js.map +0 -1
- package/dist/ui/animatedStatus.d.ts +0 -129
- package/dist/ui/animatedStatus.d.ts.map +0 -1
- package/dist/ui/animatedStatus.js +0 -384
- package/dist/ui/animatedStatus.js.map +0 -1
package/dist/core/agent.js
CHANGED
|
@@ -6,347 +6,62 @@ import { safeErrorMessage } from './secretStore.js';
|
|
|
6
6
|
const MAX_CONTEXT_RECOVERY_ATTEMPTS = 3;
|
|
7
7
|
/**
|
|
8
8
|
* Maximum number of auto-continuation attempts when model expresses intent but doesn't act
|
|
9
|
-
* Increased to allow more recovery attempts for complex tasks
|
|
10
9
|
*/
|
|
11
|
-
const MAX_AUTO_CONTINUE_ATTEMPTS =
|
|
10
|
+
const MAX_AUTO_CONTINUE_ATTEMPTS = 3;
|
|
12
11
|
/**
|
|
13
|
-
*
|
|
14
|
-
*
|
|
12
|
+
* Streaming safety timeouts (ms)
|
|
13
|
+
* - First chunk timeout: fail fast if the stream never starts
|
|
14
|
+
* - Inactivity timeout: abort if no chunks arrive for an extended period
|
|
15
15
|
*/
|
|
16
|
-
|
|
17
|
-
const
|
|
18
|
-
|
|
16
|
+
// Allow more headroom before declaring a streaming stall to avoid premature fallbacks.
|
|
17
|
+
const STREAM_FIRST_CHUNK_TIMEOUT_MS = 25000;
|
|
18
|
+
const STREAM_INACTIVITY_TIMEOUT_MS = 60000;
|
|
19
19
|
/**
|
|
20
20
|
* Patterns that indicate the model intends to take action but hasn't yet
|
|
21
21
|
* These suggest the model should be prompted to continue
|
|
22
22
|
*/
|
|
23
23
|
const INTENT_WITHOUT_ACTION_PATTERNS = [
|
|
24
|
-
// TEXT-FORMATTED TOOL CALLS: Model outputs tool call as text instead of using API
|
|
25
|
-
// These are CRITICAL to catch - model is trying to call tools but failing to do so properly
|
|
26
|
-
// Pattern: "_tool_call_" or similar markers followed by function-like syntax
|
|
27
|
-
/_tool_call_\s*\n?\s*\w+\(/im,
|
|
28
|
-
// Pattern: "tool_call:" or "Tool call:" followed by function name
|
|
29
|
-
/tool[_\s]?call:?\s*\n?\s*\w+\(/im,
|
|
30
|
-
// Pattern: function call syntax at start of line like "read_file(..." without being in code block
|
|
31
|
-
/^\s*(read_file|write_file|edit_file|execute_bash|list_files|search|grep|glob)\s*\(/im,
|
|
32
|
-
// Pattern: "Executing X..." or "Calling X..." without actual tool call
|
|
33
|
-
/\b(executing|calling)\s+(read_file|write_file|edit_file|execute_bash|list_files|search|grep|glob)\b/i,
|
|
34
|
-
// Pattern: indented tool call syntax like " read_file(package.json)"
|
|
35
|
-
/^\s{2,}(read_file|write_file|edit_file|execute_bash|list_files|Bash|Read|Write|Edit|Grep|Glob)\s*\([^)]*\)\s*$/im,
|
|
36
24
|
// "Let me X" patterns - model is stating what it will do
|
|
37
|
-
/\blet me\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|examine|review
|
|
38
|
-
// "I'll X" / "I will X" patterns
|
|
39
|
-
/\bi[''
|
|
40
|
-
/\bi will\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now
|
|
41
|
-
// "
|
|
42
|
-
/\
|
|
43
|
-
// "I'm going to X" patterns - include all apostrophe variants
|
|
44
|
-
/\bi['''\u2018\u2019]m going to\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|run)/i,
|
|
25
|
+
/\blet me\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|examine|review)/i,
|
|
26
|
+
// "I'll X" / "I will X" patterns
|
|
27
|
+
/\bi['']ll\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
|
|
28
|
+
/\bi will\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
|
|
29
|
+
// "I'm going to X" patterns
|
|
30
|
+
/\bi['']m going to\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze)/i,
|
|
45
31
|
// "Now I'll X" / "First, I'll X" patterns
|
|
46
|
-
/\b(now|first|next)\s*(,)?\s*i[''
|
|
32
|
+
/\b(now|first|next)\s*(,)?\s*i['']ll\s+/i,
|
|
47
33
|
// Explicit continuation signals
|
|
48
|
-
/\bhere[''
|
|
49
|
-
// "Approach:" header indicates planning without action
|
|
50
|
-
/\bapproach:/i,
|
|
34
|
+
/\bhere['']s (the|my) (plan|approach|solution|implementation)/i,
|
|
51
35
|
// Numbered steps suggesting action to come
|
|
52
36
|
/^\s*\d+\.\s+(create|write|implement|add|update|edit|modify|fix|show|read|check)/im,
|
|
53
37
|
// Bullet points suggesting planned actions
|
|
54
38
|
/^[\s•\-\*]+\s*(create|write|implement|add|update|edit|modify|fix|shows?|reads?|checks?)\s/im,
|
|
55
|
-
// Statements about needing to do something
|
|
56
|
-
/\bneed\s+(to\s+)?(identify|search|find|check|read|look|inspect|analyze|examine|review|list|remove)/i,
|
|
57
|
-
// "Should be X" - model is describing action but not taking it
|
|
58
|
-
/\bshould be\s+(deleted|removed|fixed|updated|changed|cleaned|cleared)/i,
|
|
59
|
-
// "Can be X" - same pattern
|
|
60
|
-
/\bcan be\s+(deleted|removed|fixed|updated|changed|cleaned|cleared)/i,
|
|
61
|
-
// Questions that should trigger investigation instead of asking user
|
|
62
|
-
/\bwhat (files?|areas?|code|patterns?)\s+(should|would you like|do you want)/i,
|
|
63
|
-
// GIVING UP PATTERNS - model is asking for clarification instead of investigating
|
|
64
|
-
/\bplease\s+(point me to|show me|tell me|specify|clarify|provide)/i,
|
|
65
|
-
/\bwithout\s+(more|additional|further)\s+(details?|info|information|context|guidance)/i,
|
|
66
|
-
/\bcan you\s+(clarify|specify|tell me|point me|show me)/i,
|
|
67
|
-
/\blet me know\s+(what|which|where|how)/i,
|
|
68
|
-
/\b(no|can['\u2018\u2019]t|cannot)\s+(obvious|clear|specific|find|identify|determine)/i,
|
|
69
|
-
// "I don't have enough" type responses
|
|
70
|
-
/\bdon['\u2018\u2019]t have\s+(enough|sufficient|the)\s+(info|information|context|details)/i,
|
|
71
|
-
// "Could you" requests for clarification
|
|
72
|
-
/\bcould you\s+(provide|specify|clarify|point|tell|show)/i,
|
|
73
39
|
];
|
|
74
|
-
/**
|
|
75
|
-
* Patterns that indicate the model gave a premature summary without concrete findings.
|
|
76
|
-
* These summaries wrap up too quickly without file:line citations or code snippets.
|
|
77
|
-
*/
|
|
78
|
-
const PREMATURE_CONCLUSION_PATTERNS = [
|
|
79
|
-
// "Performed/Did a quick X" - surface-level investigation claim
|
|
80
|
-
/\b(performed|did)\s+a\s+(quick|brief|fast)\s+(search|grep|scan|review|check)/i,
|
|
81
|
-
// "Validated by running" - claiming verification without showing details
|
|
82
|
-
/\bvalidated\s+by\s+running/i,
|
|
83
|
-
// Mentioning matches but not examining them: "found X matches" without file paths
|
|
84
|
-
/\bfound\s+\d+\s+(match|result|item|file|issue)/i,
|
|
85
|
-
// Summary without substance: "cleaned up", "removed", "fixed" but no specifics
|
|
86
|
-
// Allow any words between the article and the target noun (e.g., "removed the .tsbuildinfo build cache")
|
|
87
|
-
/\b(cleaned up|removed|fixed)\s+(the|a|some)?\s*\S*\s*(file|cache|build|artifact)/i,
|
|
88
|
-
];
|
|
89
|
-
/**
|
|
90
|
-
* Check if response looks like a premature conclusion without concrete findings.
|
|
91
|
-
* Looks for summary language without actual file:line citations.
|
|
92
|
-
*/
|
|
93
|
-
function isPrematureConclusion(content) {
|
|
94
|
-
// Must match a premature conclusion pattern
|
|
95
|
-
const matchesPremature = PREMATURE_CONCLUSION_PATTERNS.some(p => p.test(content));
|
|
96
|
-
if (!matchesPremature) {
|
|
97
|
-
return false;
|
|
98
|
-
}
|
|
99
|
-
// Check if there are actual file:line citations (e.g., "src/foo.ts:42")
|
|
100
|
-
const hasFileCitation = /\b\w+\.(ts|js|tsx|jsx|py|go|rs|java|c|cpp|h|md|json|yaml|yml):\d+\b/.test(content);
|
|
101
|
-
if (hasFileCitation) {
|
|
102
|
-
return false; // Has concrete findings, not premature
|
|
103
|
-
}
|
|
104
|
-
// Check for code blocks with actual code
|
|
105
|
-
const hasCodeBlock = /```[\s\S]{20,}```/.test(content);
|
|
106
|
-
if (hasCodeBlock) {
|
|
107
|
-
return false; // Has code snippets, not premature
|
|
108
|
-
}
|
|
109
|
-
// Short response with no concrete findings = premature conclusion
|
|
110
|
-
return content.length < 500;
|
|
111
|
-
}
|
|
112
|
-
// Short, forceful prompts - model should just act
|
|
113
40
|
const AUTO_CONTINUE_PROMPTS = [
|
|
114
|
-
'Continue.',
|
|
115
|
-
'
|
|
116
|
-
'
|
|
117
|
-
'Do not summarize. Act with tools or ask a specific question.',
|
|
118
|
-
'Use tools to complete the task.',
|
|
41
|
+
'Continue. Use tools now: start with Read/read_file to inspect the target file, then call Edit (or Write if available) with file_path/old_string/new_string to apply changes. Keep using tools until the task is done.',
|
|
42
|
+
'You MUST call tools immediately. Issue Read -> Edit/Write tool calls with explicit parameters; no more explaining or planning.',
|
|
43
|
+
'CRITICAL: Call a tool right now. Use Edit with file_path, old_string, new_string (or Write with file_path and content). Respond with tool calls only.',
|
|
119
44
|
];
|
|
120
|
-
// Specific prompt for when model outputs text-formatted tool calls instead of using the API
|
|
121
|
-
const TEXT_TOOL_CALL_PROMPT = 'You wrote a tool call as text. Use the actual tool API - call the function directly, do not write it as text.';
|
|
122
|
-
// Forceful prompt used when the model keeps narrating or stalling after several attempts
|
|
123
|
-
const AUTO_CONTINUE_FORCE_PROMPT = 'You are stuck narrating. Immediately call the necessary tools to finish the task. If truly done, respond with a concise final answer citing any file paths/lines touched. Do not ask for confirmation.';
|
|
124
|
-
const SHORT_RESPONSE_PROMPT = 'Based on the tool results above, provide your complete response. Summarize findings and suggest next steps if applicable.';
|
|
125
|
-
/**
|
|
126
|
-
* Select an auto-continue prompt and user-facing message based on attempt count and reason.
|
|
127
|
-
* Escalates to a forceful instruction after repeated stalls.
|
|
128
|
-
*/
|
|
129
|
-
function buildAutoContinueInstruction(attempt, reason) {
|
|
130
|
-
const promptIndex = Math.max(0, Math.min(attempt - 1, AUTO_CONTINUE_PROMPTS.length - 1));
|
|
131
|
-
let prompt;
|
|
132
|
-
switch (reason) {
|
|
133
|
-
case 'text_tool_call':
|
|
134
|
-
prompt = TEXT_TOOL_CALL_PROMPT;
|
|
135
|
-
break;
|
|
136
|
-
case 'short_response':
|
|
137
|
-
prompt = SHORT_RESPONSE_PROMPT;
|
|
138
|
-
break;
|
|
139
|
-
default:
|
|
140
|
-
prompt = AUTO_CONTINUE_PROMPTS[promptIndex];
|
|
141
|
-
break;
|
|
142
|
-
}
|
|
143
|
-
const isEscalated = attempt >= AUTO_CONTINUE_ESCALATION_ATTEMPT;
|
|
144
|
-
if (isEscalated) {
|
|
145
|
-
prompt = AUTO_CONTINUE_FORCE_PROMPT;
|
|
146
|
-
}
|
|
147
|
-
const baseMessage = {
|
|
148
|
-
after_tools_narration: 'Model narrated after tools instead of completing. Prompting to continue...',
|
|
149
|
-
intent_without_action: "Model expressed intent but didn't act. Prompting to continue...",
|
|
150
|
-
text_tool_call: 'Model wrote tool call as text instead of using API. Prompting to use actual tools...',
|
|
151
|
-
short_response: 'Model responded too briefly. Prompting for a complete answer...',
|
|
152
|
-
};
|
|
153
|
-
const message = isEscalated
|
|
154
|
-
? `${baseMessage[reason]} Escalating to force tool use and completion.`
|
|
155
|
-
: baseMessage[reason];
|
|
156
|
-
return { prompt, message };
|
|
157
|
-
}
|
|
158
|
-
/**
|
|
159
|
-
* Generate a short, UI-safe preview of planning/intent text.
|
|
160
|
-
* Keeps only the first line and truncates long content.
|
|
161
|
-
*/
|
|
162
|
-
function buildPlanningPreview(content, maxLength = 140) {
|
|
163
|
-
const trimmed = (content || '').trim();
|
|
164
|
-
if (!trimmed) {
|
|
165
|
-
return null;
|
|
166
|
-
}
|
|
167
|
-
const firstLine = trimmed.split('\n').find(line => line.trim()) ?? '';
|
|
168
|
-
const collapsed = firstLine.replace(/\s+/g, ' ').trim();
|
|
169
|
-
if (!collapsed) {
|
|
170
|
-
return null;
|
|
171
|
-
}
|
|
172
|
-
const needsEllipsis = collapsed.length > maxLength;
|
|
173
|
-
const preview = collapsed.slice(0, maxLength).trim();
|
|
174
|
-
return needsEllipsis ? `${preview}...` : preview;
|
|
175
|
-
}
|
|
176
|
-
/**
|
|
177
|
-
* Detect if content contains text-formatted tool calls that should have been actual API calls.
|
|
178
|
-
* This is a PROGRAMMATIC check - if the model outputs "read_file(package.json)" as text,
|
|
179
|
-
* it clearly intended to call a tool but failed to use the proper API.
|
|
180
|
-
*/
|
|
181
|
-
function hasTextFormattedToolCall(content) {
|
|
182
|
-
// Patterns that indicate the model wrote a tool call as text instead of using the API
|
|
183
|
-
const textToolCallPatterns = [
|
|
184
|
-
// "_tool_call_" marker with function syntax
|
|
185
|
-
/_tool_call_\s*\n?\s*\w+\(/im,
|
|
186
|
-
// "tool_call:" followed by function name
|
|
187
|
-
/tool[_\s]?call:?\s*\n?\s*\w+\(/im,
|
|
188
|
-
// Common tool function call syntax at line start (not in code block)
|
|
189
|
-
/^\s*(read_file|write_file|edit_file|execute_bash|list_files)\s*\([^)]+\)/im,
|
|
190
|
-
// Indented tool call like " read_file(package.json)"
|
|
191
|
-
/^\s{2,}(read_file|write_file|edit_file|execute_bash|list_files|Bash|Read|Write|Edit|Grep|Glob)\s*\([^)]*\)\s*$/im,
|
|
192
|
-
// "Executing/Calling tool_name..." without actual execution
|
|
193
|
-
/\b(executing|calling)\s+(read_file|write_file|edit_file|execute_bash)\s*\.{3}?\s*$/im,
|
|
194
|
-
];
|
|
195
|
-
// Skip if the content is inside a code block (```...```)
|
|
196
|
-
const withoutCodeBlocks = content.replace(/```[\s\S]*?```/g, '');
|
|
197
|
-
return textToolCallPatterns.some(p => p.test(withoutCodeBlocks));
|
|
198
|
-
}
|
|
199
45
|
/**
|
|
200
|
-
*
|
|
201
|
-
*
|
|
46
|
+
* Check if response indicates intent to act without actually acting
|
|
47
|
+
* This detects when the model says "let me do X" but doesn't call any tools
|
|
202
48
|
*/
|
|
203
|
-
|
|
204
|
-
//
|
|
205
|
-
|
|
206
|
-
// Short confirmations with specifics
|
|
207
|
-
/^(removed|deleted|fixed|created|updated|added)\s+.{1,50}[.!]?$/i,
|
|
208
|
-
// "X is now Y" short statements
|
|
209
|
-
/^.{1,30}\s+is\s+now\s+.{1,30}[.!]?$/i,
|
|
210
|
-
// Task completion with count
|
|
211
|
-
/^(cleaned|removed|fixed|updated)\s+\d+\s+.{1,30}[.!]?$/i,
|
|
212
|
-
];
|
|
213
|
-
/**
|
|
214
|
-
* Patterns that indicate errors, failures, or incomplete work.
|
|
215
|
-
* When these are present, auto-continue should kick in to fix the issues.
|
|
216
|
-
* This is a PROGRAMMATIC check - if errors are reported, the task is NOT complete.
|
|
217
|
-
*/
|
|
218
|
-
const ERROR_INDICATOR_PATTERNS = [
|
|
219
|
-
// Build/compile errors
|
|
220
|
-
/\b(error|errors|fail(s|ed|ure|ing)?|broken|crash(es|ed|ing)?)\b/i,
|
|
221
|
-
// Test failures
|
|
222
|
-
/\b(test(s)?\s+(fail|failing|failed)|failing\s+test)/i,
|
|
223
|
-
// TypeScript/compilation errors
|
|
224
|
-
/\b(typescript|ts|type)\s+error/i,
|
|
225
|
-
/\bts\(\d+,\d+\)/i, // TS error format like ts(700,45)
|
|
226
|
-
// Remaining/unresolved issues
|
|
227
|
-
/\b(persists?|remains?|still\s+(has|have|is|are|broken|failing))\b/i,
|
|
228
|
-
/\b(unresolved|outstanding|remaining)\s+(error|issue|problem)/i,
|
|
229
|
-
// Explicit incomplete signals
|
|
230
|
-
/\b(didn'?t|did\s+not|couldn'?t|could\s+not|wasn'?t|was\s+not)\s+(work|succeed|complete|finish|pass)/i,
|
|
231
|
-
/\b(skipped|blocked|cannot|unable\s+to)\b/i,
|
|
232
|
-
];
|
|
233
|
-
/**
|
|
234
|
-
* Check if response contains error indicators that mean work is NOT complete.
|
|
235
|
-
* This is a simple, programmatic check - no complex NLP needed.
|
|
236
|
-
*/
|
|
237
|
-
function containsErrorIndicators(content) {
|
|
238
|
-
return ERROR_INDICATOR_PATTERNS.some(p => p.test(content));
|
|
239
|
-
}
|
|
240
|
-
/**
|
|
241
|
-
* Check if response is a genuine completion signal (short, definitive statement).
|
|
242
|
-
* Returns true if the model is signaling it's actually done with the task.
|
|
243
|
-
* IMPORTANT: Returns false if error indicators are present - model should continue.
|
|
244
|
-
*/
|
|
245
|
-
function isCompletionSignal(content) {
|
|
246
|
-
const trimmed = content.trim();
|
|
247
|
-
// PROGRAMMATIC: If errors are reported, this is NOT a valid completion
|
|
248
|
-
if (containsErrorIndicators(trimmed)) {
|
|
249
|
-
return false;
|
|
250
|
-
}
|
|
251
|
-
// Very short responses (<50 chars) with completion patterns are genuine signals
|
|
252
|
-
if (trimmed.length < 50) {
|
|
253
|
-
return COMPLETION_SIGNAL_PATTERNS.some(p => p.test(trimmed));
|
|
254
|
-
}
|
|
255
|
-
// Responses asking user for direction are valid stops
|
|
256
|
-
if (/\b(would you like|shall I|want me to|anything else)\b/i.test(trimmed) && trimmed.length < 200) {
|
|
257
|
-
return true;
|
|
258
|
-
}
|
|
259
|
-
// File:line citations indicate concrete work was shown
|
|
260
|
-
const hasCitation = /\b\w+\.(ts|js|tsx|jsx|py|go|rs|java|c|cpp|h|md|json|yaml|yml):\d+\b/.test(trimmed);
|
|
261
|
-
if (hasCitation) {
|
|
262
|
-
return true;
|
|
263
|
-
}
|
|
264
|
-
return false;
|
|
265
|
-
}
|
|
266
|
-
/**
|
|
267
|
-
* PROGRAMMATIC CHECK: After tool calls, should we auto-continue?
|
|
268
|
-
*
|
|
269
|
-
* SIMPLE RULE: After tools, model should ONLY stop if:
|
|
270
|
-
* 1. It asks the user a question (ends with ?)
|
|
271
|
-
* 2. It gives a very short completion (< 80 chars) WITHOUT planning words
|
|
272
|
-
*
|
|
273
|
-
* Everything else = CONTINUE. This is intentionally aggressive.
|
|
274
|
-
* The model should either be DONE (short message) or ASKING (question).
|
|
275
|
-
* Long explanations after tool work = continue to force action.
|
|
276
|
-
*/
|
|
277
|
-
function shouldContinueAfterTools(content) {
|
|
278
|
-
const trimmed = content.trim();
|
|
279
|
-
// No content after tools = continue to get results
|
|
280
|
-
if (trimmed.length === 0) {
|
|
281
|
-
return true;
|
|
282
|
-
}
|
|
283
|
-
// ALWAYS CONTINUE: Error indicators mean work isn't done
|
|
284
|
-
if (containsErrorIndicators(trimmed)) {
|
|
285
|
-
return true;
|
|
286
|
-
}
|
|
287
|
-
// ALWAYS CONTINUE: Intent/planning patterns mean model wants to do more
|
|
288
|
-
if (INTENT_WITHOUT_ACTION_PATTERNS.some(p => p.test(trimmed))) {
|
|
289
|
-
return true;
|
|
290
|
-
}
|
|
291
|
-
// VALID STOP: Model asking user a question (ends with ?)
|
|
292
|
-
if (/\?\s*$/.test(trimmed)) {
|
|
49
|
+
function shouldAutoContinue(content, hasToolCalls) {
|
|
50
|
+
// If there are tool calls, no need to auto-continue
|
|
51
|
+
if (hasToolCalls) {
|
|
293
52
|
return false;
|
|
294
53
|
}
|
|
295
|
-
//
|
|
296
|
-
if (
|
|
54
|
+
// If content is very short, likely not an incomplete intent
|
|
55
|
+
if (content.length < 50) {
|
|
297
56
|
return false;
|
|
298
57
|
}
|
|
299
|
-
//
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
if (!hasPlanningWords) {
|
|
304
|
-
return false; // Short and not planning = valid completion
|
|
58
|
+
// Check for intent patterns
|
|
59
|
+
for (const pattern of INTENT_WITHOUT_ACTION_PATTERNS) {
|
|
60
|
+
if (pattern.test(content)) {
|
|
61
|
+
return true;
|
|
305
62
|
}
|
|
306
|
-
// Short but has planning words = continue
|
|
307
|
-
return true;
|
|
308
|
-
}
|
|
309
|
-
// CONTINUE: Long response after tools = likely narrating/summarizing
|
|
310
|
-
// Force model to either ask a question or give a short completion
|
|
311
|
-
return true;
|
|
312
|
-
}
|
|
313
|
-
function shouldAutoContinue(content, hasToolCalls, hasReasoningContent = false) {
|
|
314
|
-
// Strip <thinking> blocks to get actual response content
|
|
315
|
-
const withoutThinking = content.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
|
|
316
|
-
// Empty content shouldn't trigger auto-continue
|
|
317
|
-
if (withoutThinking.length === 0) {
|
|
318
|
-
return { shouldContinue: false };
|
|
319
|
-
}
|
|
320
|
-
// HIGHEST PRIORITY: Check for text-formatted tool calls
|
|
321
|
-
// This is when the model writes "read_file(package.json)" as text instead of calling the API
|
|
322
|
-
if (hasTextFormattedToolCall(withoutThinking)) {
|
|
323
|
-
return { shouldContinue: true, reason: 'text_tool_call' };
|
|
324
|
-
}
|
|
325
|
-
// PROGRAMMATIC: If response reports errors/failures, auto-continue to fix them
|
|
326
|
-
// This is critical - if model reports "error persists" or "build failed", it should continue
|
|
327
|
-
if (containsErrorIndicators(withoutThinking)) {
|
|
328
|
-
return { shouldContinue: true, reason: 'error_indicators' };
|
|
329
|
-
}
|
|
330
|
-
// If model output thinking/reasoning without much content, likely needs prompting
|
|
331
|
-
if (hasReasoningContent && withoutThinking.length < 30) {
|
|
332
|
-
return { shouldContinue: true, reason: 'short_reasoning' };
|
|
333
|
-
}
|
|
334
|
-
// Check for intent patterns - model wants to do more work
|
|
335
|
-
const hasMoreIntent = INTENT_WITHOUT_ACTION_PATTERNS.some(p => p.test(withoutThinking));
|
|
336
|
-
// Even if tools were called, continue if narrative expresses MORE intent
|
|
337
|
-
// e.g., model calls grep but then says "I need to remove..." - should continue
|
|
338
|
-
if (hasToolCalls) {
|
|
339
|
-
return { shouldContinue: hasMoreIntent, reason: hasMoreIntent ? 'intent_patterns' : undefined };
|
|
340
|
-
}
|
|
341
|
-
// No tool calls - check for intent or premature conclusion
|
|
342
|
-
if (hasMoreIntent) {
|
|
343
|
-
return { shouldContinue: true, reason: 'intent_patterns' };
|
|
344
63
|
}
|
|
345
|
-
|
|
346
|
-
if (isPrematureConclusion(withoutThinking)) {
|
|
347
|
-
return { shouldContinue: true, reason: 'premature_conclusion' };
|
|
348
|
-
}
|
|
349
|
-
return { shouldContinue: false };
|
|
64
|
+
return false;
|
|
350
65
|
}
|
|
351
66
|
/**
|
|
352
67
|
* Check if an error is a context overflow error
|
|
@@ -362,53 +77,15 @@ function isContextOverflowError(error) {
|
|
|
362
77
|
message.includes('max_tokens') ||
|
|
363
78
|
message.includes('context window'));
|
|
364
79
|
}
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
const networkPatterns = [
|
|
374
|
-
'econnrefused', 'econnreset', 'enotfound', 'etimedout', 'epipe',
|
|
375
|
-
'network error', 'connection error', 'fetch failed', 'socket hang up',
|
|
376
|
-
'network is unreachable', 'connection refused', 'connection reset',
|
|
377
|
-
];
|
|
378
|
-
if (networkPatterns.some(p => message.includes(p))) {
|
|
379
|
-
return true;
|
|
380
|
-
}
|
|
381
|
-
// Rate limit errors
|
|
382
|
-
if (message.includes('rate limit') || message.includes('429') || message.includes('too many requests')) {
|
|
383
|
-
return true;
|
|
384
|
-
}
|
|
385
|
-
// Server errors (5xx)
|
|
386
|
-
if (message.includes('500') || message.includes('502') || message.includes('503') || message.includes('504')) {
|
|
387
|
-
return true;
|
|
80
|
+
class StreamInterruptionError extends Error {
|
|
81
|
+
reason;
|
|
82
|
+
partialResponse;
|
|
83
|
+
constructor(reason, message, partialResponse) {
|
|
84
|
+
super(message);
|
|
85
|
+
this.name = 'StreamInterruptionError';
|
|
86
|
+
this.reason = reason;
|
|
87
|
+
this.partialResponse = partialResponse;
|
|
388
88
|
}
|
|
389
|
-
// Temporary service errors
|
|
390
|
-
if (message.includes('service unavailable') || message.includes('temporarily unavailable') ||
|
|
391
|
-
message.includes('overloaded') || message.includes('server error')) {
|
|
392
|
-
return true;
|
|
393
|
-
}
|
|
394
|
-
return false;
|
|
395
|
-
}
|
|
396
|
-
/**
|
|
397
|
-
* Maximum number of transient error retries
|
|
398
|
-
*/
|
|
399
|
-
const MAX_TRANSIENT_RETRIES = 3;
|
|
400
|
-
/**
|
|
401
|
-
* Delay before retry (in ms), with exponential backoff
|
|
402
|
-
*/
|
|
403
|
-
function getRetryDelay(attempt) {
|
|
404
|
-
// Base delay of 1 second, doubles each attempt: 1s, 2s, 4s
|
|
405
|
-
return Math.min(1000 * Math.pow(2, attempt - 1), 10000);
|
|
406
|
-
}
|
|
407
|
-
/**
|
|
408
|
-
* Sleep for the specified milliseconds
|
|
409
|
-
*/
|
|
410
|
-
function sleep(ms) {
|
|
411
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
412
89
|
}
|
|
413
90
|
export class AgentRuntime {
|
|
414
91
|
messages = [];
|
|
@@ -423,21 +100,6 @@ export class AgentRuntime {
|
|
|
423
100
|
workingDirectory;
|
|
424
101
|
cancellationRequested = false;
|
|
425
102
|
_autoContinueEnabled = false;
|
|
426
|
-
// Loop detection: track last tool calls to detect stuck loops
|
|
427
|
-
lastToolCallSignature = null;
|
|
428
|
-
repeatedToolCallCount = 0;
|
|
429
|
-
static MAX_REPEATED_TOOL_CALLS = 5; // Allow up to 4 identical calls before stopping
|
|
430
|
-
// Behavioral loop detection: track recent tool calls to catch repetitive patterns
|
|
431
|
-
// e.g., calling "execute_bash" with "git status" 5 times even if output differs slightly
|
|
432
|
-
recentToolCalls = [];
|
|
433
|
-
static TOOL_HISTORY_SIZE = 12;
|
|
434
|
-
static BEHAVIORAL_LOOP_THRESHOLD = 3; // Same tool+cmd 3+ times in last 12 = stuck
|
|
435
|
-
// Tool result cache: prevent duplicate identical tool calls by returning cached results
|
|
436
|
-
// Key: tool signature (name + JSON args), Value: result string
|
|
437
|
-
toolResultCache = new Map();
|
|
438
|
-
static TOOL_CACHE_MAX_SIZE = 50; // Keep last 50 tool results
|
|
439
|
-
// Track if first tool call callback has been fired this turn
|
|
440
|
-
firstToolCallFired = false;
|
|
441
103
|
constructor(options) {
|
|
442
104
|
this.provider = options.provider;
|
|
443
105
|
this.toolRuntime = options.toolRuntime;
|
|
@@ -489,20 +151,8 @@ export class AgentRuntime {
|
|
|
489
151
|
if (!prompt) {
|
|
490
152
|
return '';
|
|
491
153
|
}
|
|
492
|
-
//
|
|
493
|
-
// This guarantees the user sees feedback the moment their request is received
|
|
494
|
-
if (this.callbacks.onRequestReceived) {
|
|
495
|
-
const maxLength = 160;
|
|
496
|
-
const normalized = prompt.replace(/\s+/g, ' ');
|
|
497
|
-
const preview = normalized.length > maxLength
|
|
498
|
-
? `${normalized.slice(0, maxLength - 3)}...`
|
|
499
|
-
: normalized;
|
|
500
|
-
this.callbacks.onRequestReceived(preview);
|
|
501
|
-
}
|
|
502
|
-
// Reset cancellation flag, loop tracking, and first tool call flag at start of new request
|
|
154
|
+
// Reset cancellation flag at start of new request
|
|
503
155
|
this.cancellationRequested = false;
|
|
504
|
-
this.resetBehavioralLoopTracking();
|
|
505
|
-
this.firstToolCallFired = false;
|
|
506
156
|
// Handle multi-line paste: show summary to user, send full content to AI
|
|
507
157
|
if (isMultilinePaste(prompt)) {
|
|
508
158
|
const processed = processPaste(prompt);
|
|
@@ -518,9 +168,28 @@ export class AgentRuntime {
|
|
|
518
168
|
const run = { startedAt: Date.now() };
|
|
519
169
|
this.activeRun = run;
|
|
520
170
|
try {
|
|
521
|
-
// Always use streaming when available - no fallback
|
|
522
171
|
if (useStreaming && this.provider.generateStream) {
|
|
523
|
-
|
|
172
|
+
try {
|
|
173
|
+
return await this.processConversationStreaming();
|
|
174
|
+
}
|
|
175
|
+
catch (error) {
|
|
176
|
+
const message = safeErrorMessage(error);
|
|
177
|
+
const reason = error instanceof StreamInterruptionError ? error.reason : undefined;
|
|
178
|
+
const partialResponse = error instanceof StreamInterruptionError ? error.partialResponse : undefined;
|
|
179
|
+
console.warn(`[agent] Streaming failed, falling back to non-streaming: ${message}`);
|
|
180
|
+
// If we captured part of the response, seed it into history and ask the model to continue
|
|
181
|
+
// so we don't restart the answer from scratch during fallback.
|
|
182
|
+
if (partialResponse && partialResponse.trim()) {
|
|
183
|
+
const partial = partialResponse.trim();
|
|
184
|
+
this.messages.push({ role: 'assistant', content: partial });
|
|
185
|
+
this.messages.push({
|
|
186
|
+
role: 'user',
|
|
187
|
+
content: 'Continue your previous response from where it stopped. Do not repeat text you already provided.',
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
this.callbacks.onStreamFallback?.({ message, error, reason, partialResponse });
|
|
191
|
+
return await this.processConversation();
|
|
192
|
+
}
|
|
524
193
|
}
|
|
525
194
|
return await this.processConversation();
|
|
526
195
|
}
|
|
@@ -535,7 +204,6 @@ export class AgentRuntime {
|
|
|
535
204
|
async processConversation() {
|
|
536
205
|
let contextRecoveryAttempts = 0;
|
|
537
206
|
let autoContinueAttempts = 0;
|
|
538
|
-
let transientRetryAttempts = 0;
|
|
539
207
|
while (true) {
|
|
540
208
|
// Check for cancellation at start of each iteration
|
|
541
209
|
if (this.cancellationRequested) {
|
|
@@ -551,126 +219,43 @@ export class AgentRuntime {
|
|
|
551
219
|
// Reset recovery attempts on successful generation
|
|
552
220
|
contextRecoveryAttempts = 0;
|
|
553
221
|
if (response.type === 'tool_calls') {
|
|
554
|
-
|
|
555
|
-
const
|
|
556
|
-
if (behavioralLoopResult) {
|
|
557
|
-
this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats });
|
|
558
|
-
this.messages.push({ role: 'assistant', content: behavioralLoopResult });
|
|
559
|
-
return behavioralLoopResult;
|
|
560
|
-
}
|
|
561
|
-
// Loop detection: check if same tool calls are being repeated (exact signature match)
|
|
562
|
-
const toolSignature = response.toolCalls
|
|
563
|
-
.map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
|
|
564
|
-
.sort()
|
|
565
|
-
.join('|');
|
|
566
|
-
if (toolSignature === this.lastToolCallSignature) {
|
|
567
|
-
this.repeatedToolCallCount++;
|
|
568
|
-
if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
|
|
569
|
-
// Break out of loop - model is stuck
|
|
570
|
-
const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
|
|
571
|
-
this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats });
|
|
572
|
-
this.messages.push({ role: 'assistant', content: loopMsg });
|
|
573
|
-
this.lastToolCallSignature = null;
|
|
574
|
-
this.repeatedToolCallCount = 0;
|
|
575
|
-
return loopMsg;
|
|
576
|
-
}
|
|
577
|
-
}
|
|
578
|
-
else {
|
|
579
|
-
this.lastToolCallSignature = toolSignature;
|
|
580
|
-
this.repeatedToolCallCount = 1;
|
|
581
|
-
}
|
|
582
|
-
// Always emit narration if present - it shows the AI's thought process before tools
|
|
583
|
-
const narration = response.content?.trim();
|
|
584
|
-
const hasNarration = !!narration;
|
|
585
|
-
const shouldPromptAfterTools = this._autoContinueEnabled &&
|
|
586
|
-
autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS &&
|
|
587
|
-
shouldContinueAfterTools(narration ?? '');
|
|
588
|
-
// Fire first tool call callback if not yet fired this turn
|
|
589
|
-
// ALWAYS emit acknowledgement before first tool - ensures user sees immediate feedback
|
|
590
|
-
if (!this.firstToolCallFired && this.callbacks.onBeforeFirstToolCall) {
|
|
591
|
-
this.firstToolCallFired = true;
|
|
592
|
-
const toolNames = response.toolCalls.map(t => t.name);
|
|
593
|
-
const injectedAck = this.callbacks.onBeforeFirstToolCall(toolNames, hasNarration);
|
|
594
|
-
// ALWAYS emit acknowledgement if returned - provides immediate user feedback
|
|
595
|
-
if (injectedAck) {
|
|
596
|
-
this.emitAssistantMessage(injectedAck, { isFinal: false, usage, contextStats });
|
|
597
|
-
}
|
|
598
|
-
}
|
|
222
|
+
const suppressNarration = this.shouldSuppressToolNarration();
|
|
223
|
+
const narration = suppressNarration ? '' : response.content?.trim();
|
|
599
224
|
if (narration) {
|
|
600
|
-
this.emitAssistantMessage(narration, {
|
|
601
|
-
isFinal: false,
|
|
602
|
-
usage,
|
|
603
|
-
contextStats,
|
|
604
|
-
suppressDisplay: shouldPromptAfterTools,
|
|
605
|
-
});
|
|
225
|
+
this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats });
|
|
606
226
|
}
|
|
607
227
|
const assistantMessage = {
|
|
608
228
|
role: 'assistant',
|
|
609
|
-
content: response.content ?? '',
|
|
229
|
+
content: suppressNarration ? '' : (response.content ?? ''),
|
|
610
230
|
};
|
|
611
231
|
if (response.toolCalls?.length) {
|
|
612
232
|
assistantMessage.toolCalls = response.toolCalls;
|
|
613
233
|
}
|
|
614
234
|
this.messages.push(assistantMessage);
|
|
615
235
|
await this.resolveToolCalls(response.toolCalls);
|
|
616
|
-
// PROGRAMMATIC CONTINUATION: After tool work, model must either:
|
|
617
|
-
// 1. Call more tools (already handled by continue above)
|
|
618
|
-
// 2. Give a short completion signal
|
|
619
|
-
// 3. Ask user for direction
|
|
620
|
-
// PROGRAMMATIC: If model outputs narrative instead of concrete findings, continue
|
|
621
|
-
if (shouldPromptAfterTools) {
|
|
622
|
-
autoContinueAttempts++;
|
|
623
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'after_tools_narration');
|
|
624
|
-
this.messages.push({
|
|
625
|
-
role: 'user',
|
|
626
|
-
content: instruction.prompt,
|
|
627
|
-
});
|
|
628
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
|
|
629
|
-
continue;
|
|
630
|
-
}
|
|
631
236
|
// Reset auto-continue counter since model is actively working
|
|
632
237
|
autoContinueAttempts = 0;
|
|
633
238
|
continue;
|
|
634
239
|
}
|
|
635
240
|
const reply = response.content?.trim() ?? '';
|
|
636
|
-
// Reset loop detection when we get a text response (not just tool calls)
|
|
637
|
-
if (reply.length >= 10) {
|
|
638
|
-
this.lastToolCallSignature = null;
|
|
639
|
-
this.repeatedToolCallCount = 0;
|
|
640
|
-
}
|
|
641
|
-
// If model returned empty or very short AND auto-continue is enabled, prompt it to respond
|
|
642
|
-
// This is disabled by default to prevent loops
|
|
643
|
-
if (this._autoContinueEnabled && reply.length < 10 && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
644
|
-
autoContinueAttempts++;
|
|
645
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'short_response');
|
|
646
|
-
this.messages.push({ role: 'assistant', content: reply || '' });
|
|
647
|
-
this.messages.push({
|
|
648
|
-
role: 'user',
|
|
649
|
-
content: instruction.prompt,
|
|
650
|
-
});
|
|
651
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
|
|
652
|
-
continue;
|
|
653
|
-
}
|
|
654
241
|
// Check if model expressed intent to act but didn't call tools
|
|
655
242
|
// This catches "Let me create..." without actual tool calls
|
|
656
|
-
// Also catches text-formatted tool calls like "_tool_call_\nread_file(...)"
|
|
657
243
|
// Only auto-continue if the feature is enabled
|
|
658
|
-
|
|
659
|
-
if (this._autoContinueEnabled && continueResult.shouldContinue && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
244
|
+
if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
660
245
|
autoContinueAttempts++;
|
|
661
|
-
|
|
246
|
+
// Emit the planning content but mark as non-final
|
|
247
|
+
if (reply) {
|
|
248
|
+
this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats });
|
|
249
|
+
}
|
|
662
250
|
this.messages.push({ role: 'assistant', content: reply });
|
|
663
|
-
//
|
|
664
|
-
const
|
|
665
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, continueReason);
|
|
251
|
+
// Auto-prompt with increasingly direct instructions
|
|
252
|
+
const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
|
|
666
253
|
this.messages.push({
|
|
667
254
|
role: 'user',
|
|
668
|
-
content:
|
|
255
|
+
content: AUTO_CONTINUE_PROMPTS[promptIndex],
|
|
669
256
|
});
|
|
670
|
-
const
|
|
671
|
-
|
|
672
|
-
: instruction.message;
|
|
673
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, uiMessage);
|
|
257
|
+
const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
|
|
258
|
+
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
|
|
674
259
|
continue;
|
|
675
260
|
}
|
|
676
261
|
if (reply) {
|
|
@@ -693,14 +278,6 @@ export class AgentRuntime {
|
|
|
693
278
|
continue;
|
|
694
279
|
}
|
|
695
280
|
}
|
|
696
|
-
// Auto-retry transient errors (network issues, rate limits, server errors)
|
|
697
|
-
if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
|
|
698
|
-
transientRetryAttempts++;
|
|
699
|
-
const delayMs = getRetryDelay(transientRetryAttempts);
|
|
700
|
-
this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
|
|
701
|
-
await sleep(delayMs);
|
|
702
|
-
continue;
|
|
703
|
-
}
|
|
704
281
|
// Re-throw if not recoverable or recovery failed
|
|
705
282
|
throw error;
|
|
706
283
|
}
|
|
@@ -712,7 +289,6 @@ export class AgentRuntime {
|
|
|
712
289
|
}
|
|
713
290
|
let contextRecoveryAttempts = 0;
|
|
714
291
|
let autoContinueAttempts = 0;
|
|
715
|
-
let transientRetryAttempts = 0;
|
|
716
292
|
while (true) {
|
|
717
293
|
// Check for cancellation at start of each iteration
|
|
718
294
|
if (this.cancellationRequested) {
|
|
@@ -745,10 +321,45 @@ export class AgentRuntime {
|
|
|
745
321
|
}
|
|
746
322
|
}
|
|
747
323
|
};
|
|
748
|
-
|
|
324
|
+
const buildTimeoutError = (reason) => {
|
|
325
|
+
const base = reason === 'startup-timeout'
|
|
326
|
+
? 'Streaming stalled before any content arrived.'
|
|
327
|
+
: 'Streaming stalled due to inactivity.';
|
|
328
|
+
return new StreamInterruptionError(reason, `${base} Falling back to non-streaming.`, fullContent || reasoningContent);
|
|
329
|
+
};
|
|
330
|
+
// Timer for first token arrival
|
|
331
|
+
let startupTimer = null;
|
|
332
|
+
const startupTimeoutPromise = new Promise((_, reject) => {
|
|
333
|
+
startupTimer = setTimeout(() => reject(buildTimeoutError('startup-timeout')), STREAM_FIRST_CHUNK_TIMEOUT_MS);
|
|
334
|
+
});
|
|
335
|
+
const createIdleTimeout = () => {
|
|
336
|
+
let idleTimer = null;
|
|
337
|
+
const promise = new Promise((_, reject) => {
|
|
338
|
+
idleTimer = setTimeout(() => reject(buildTimeoutError('idle-timeout')), STREAM_INACTIVITY_TIMEOUT_MS);
|
|
339
|
+
});
|
|
340
|
+
const cancel = () => {
|
|
341
|
+
if (idleTimer) {
|
|
342
|
+
clearTimeout(idleTimer);
|
|
343
|
+
idleTimer = null;
|
|
344
|
+
}
|
|
345
|
+
};
|
|
346
|
+
return { promise, cancel };
|
|
347
|
+
};
|
|
348
|
+
let idleTimeout = createIdleTimeout();
|
|
349
|
+
let firstChunkSeen = false;
|
|
749
350
|
try {
|
|
750
351
|
while (true) {
|
|
751
|
-
const
|
|
352
|
+
const races = [
|
|
353
|
+
iterator.next(),
|
|
354
|
+
idleTimeout.promise,
|
|
355
|
+
];
|
|
356
|
+
if (!firstChunkSeen) {
|
|
357
|
+
races.push(startupTimeoutPromise);
|
|
358
|
+
}
|
|
359
|
+
const result = (await Promise.race(races));
|
|
360
|
+
// Reset idle timer for the next iteration
|
|
361
|
+
idleTimeout.cancel();
|
|
362
|
+
idleTimeout = createIdleTimeout();
|
|
752
363
|
// Check for cancellation during streaming
|
|
753
364
|
if (this.cancellationRequested) {
|
|
754
365
|
await closeStream();
|
|
@@ -763,10 +374,17 @@ export class AgentRuntime {
|
|
|
763
374
|
break;
|
|
764
375
|
}
|
|
765
376
|
const chunk = result.value;
|
|
377
|
+
if (!firstChunkSeen) {
|
|
378
|
+
firstChunkSeen = true;
|
|
379
|
+
if (startupTimer) {
|
|
380
|
+
clearTimeout(startupTimer);
|
|
381
|
+
startupTimer = null;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
766
384
|
if (chunk.type === 'reasoning' && chunk.content) {
|
|
767
|
-
// Buffer reasoning content - don't stream token-by-token
|
|
768
|
-
// It will be emitted as a complete block when ready
|
|
769
385
|
reasoningContent += chunk.content;
|
|
386
|
+
// Surface reasoning tokens to the UI so thought process is visible
|
|
387
|
+
this.callbacks.onStreamChunk?.(chunk.content, 'reasoning');
|
|
770
388
|
continue;
|
|
771
389
|
}
|
|
772
390
|
if (chunk.type === 'content' && chunk.content) {
|
|
@@ -779,31 +397,11 @@ export class AgentRuntime {
|
|
|
779
397
|
}
|
|
780
398
|
}
|
|
781
399
|
else if (chunk.type === 'tool_call' && chunk.toolCall) {
|
|
782
|
-
// FIRST TOOL CALL: ALWAYS inject acknowledgement for immediate user feedback
|
|
783
|
-
if (toolCalls.length === 0) {
|
|
784
|
-
const hasNarration = !!(fullContent.trim() || reasoningContent.trim());
|
|
785
|
-
// Fire callback and ALWAYS inject acknowledgement BEFORE anything else
|
|
786
|
-
if (!this.firstToolCallFired && this.callbacks.onBeforeFirstToolCall) {
|
|
787
|
-
this.firstToolCallFired = true;
|
|
788
|
-
const injectedAck = this.callbacks.onBeforeFirstToolCall([chunk.toolCall.name], hasNarration);
|
|
789
|
-
// ALWAYS inject acknowledgement if returned - ensures immediate user feedback
|
|
790
|
-
if (injectedAck) {
|
|
791
|
-
// Inject acknowledgement as the FIRST thing user sees
|
|
792
|
-
this.callbacks.onStreamChunk?.(injectedAck + '\n', 'content');
|
|
793
|
-
fullContent = injectedAck + '\n' + fullContent; // Add to content for context
|
|
794
|
-
}
|
|
795
|
-
}
|
|
796
|
-
// Emit complete reasoning block first
|
|
797
|
-
if (reasoningContent.trim()) {
|
|
798
|
-
this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
|
|
799
|
-
}
|
|
800
|
-
// Then emit buffered narration content
|
|
801
|
-
if (suppressStreamNarration && bufferedContent) {
|
|
802
|
-
this.callbacks.onStreamChunk?.(bufferedContent, 'content');
|
|
803
|
-
bufferedContent = '';
|
|
804
|
-
}
|
|
805
|
-
}
|
|
806
400
|
toolCalls.push(chunk.toolCall);
|
|
401
|
+
// Drop any speculative narration once we know the model is actually calling tools
|
|
402
|
+
if (suppressStreamNarration) {
|
|
403
|
+
bufferedContent = '';
|
|
404
|
+
}
|
|
807
405
|
}
|
|
808
406
|
else if (chunk.type === 'usage' && chunk.usage) {
|
|
809
407
|
usage = chunk.usage;
|
|
@@ -811,94 +409,37 @@ export class AgentRuntime {
|
|
|
811
409
|
}
|
|
812
410
|
}
|
|
813
411
|
finally {
|
|
412
|
+
idleTimeout.cancel();
|
|
413
|
+
if (startupTimer) {
|
|
414
|
+
clearTimeout(startupTimer);
|
|
415
|
+
}
|
|
814
416
|
await closeStream();
|
|
815
417
|
}
|
|
816
418
|
// Reset recovery attempts on successful generation
|
|
817
419
|
contextRecoveryAttempts = 0;
|
|
818
420
|
const contextStats = this.getContextStats();
|
|
819
421
|
const combinedContent = fullContent || reasoningContent;
|
|
820
|
-
// If no tool calls were issued,
|
|
821
|
-
if (toolCalls.length === 0) {
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
|
|
825
|
-
}
|
|
826
|
-
// Emit buffered narration content
|
|
827
|
-
if (suppressStreamNarration && bufferedContent) {
|
|
828
|
-
this.callbacks.onStreamChunk?.(bufferedContent, 'content');
|
|
829
|
-
bufferedContent = '';
|
|
830
|
-
}
|
|
422
|
+
// If no tool calls were issued, flush any buffered narration now
|
|
423
|
+
if (suppressStreamNarration && toolCalls.length === 0 && bufferedContent) {
|
|
424
|
+
this.callbacks.onStreamChunk?.(bufferedContent, 'content');
|
|
425
|
+
bufferedContent = '';
|
|
831
426
|
}
|
|
832
427
|
// Check if we got tool calls
|
|
833
428
|
if (toolCalls.length > 0) {
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
const behavioralLoopResult = this.checkBehavioralLoop(toolCalls);
|
|
837
|
-
if (behavioralLoopResult) {
|
|
838
|
-
this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats, wasStreamed: true });
|
|
839
|
-
this.messages.push({ role: 'assistant', content: behavioralLoopResult });
|
|
840
|
-
return behavioralLoopResult;
|
|
841
|
-
}
|
|
842
|
-
// Loop detection: check if same tool calls are being repeated (exact signature match)
|
|
843
|
-
const toolSignature = toolCalls
|
|
844
|
-
.map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
|
|
845
|
-
.sort()
|
|
846
|
-
.join('|');
|
|
847
|
-
if (toolSignature === this.lastToolCallSignature) {
|
|
848
|
-
this.repeatedToolCallCount++;
|
|
849
|
-
if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
|
|
850
|
-
// Break out of loop - model is stuck
|
|
851
|
-
const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
|
|
852
|
-
this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats, wasStreamed: true });
|
|
853
|
-
this.messages.push({ role: 'assistant', content: loopMsg });
|
|
854
|
-
this.lastToolCallSignature = null;
|
|
855
|
-
this.repeatedToolCallCount = 0;
|
|
856
|
-
return loopMsg;
|
|
857
|
-
}
|
|
858
|
-
}
|
|
859
|
-
else {
|
|
860
|
-
this.lastToolCallSignature = toolSignature;
|
|
861
|
-
this.repeatedToolCallCount = 1;
|
|
862
|
-
}
|
|
863
|
-
// Content was already streamed via onStreamChunk, just record it for context
|
|
864
|
-
// (wasStreamed=true prevents duplicate display)
|
|
865
|
-
// Note: Acknowledgement injection happens during streaming (when first tool_call chunk arrives)
|
|
866
|
-
const narration = combinedContent.trim();
|
|
867
|
-
const shouldPromptAfterTools = this._autoContinueEnabled &&
|
|
868
|
-
autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS &&
|
|
869
|
-
shouldContinueAfterTools(narration ?? '');
|
|
429
|
+
const suppressNarration = this.shouldSuppressToolNarration();
|
|
430
|
+
const narration = suppressNarration ? '' : combinedContent.trim();
|
|
870
431
|
if (narration) {
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
usage,
|
|
874
|
-
contextStats,
|
|
875
|
-
wasStreamed: true,
|
|
876
|
-
suppressDisplay: shouldPromptAfterTools,
|
|
877
|
-
});
|
|
432
|
+
// Mark as wasStreamed since content was already output via onStreamChunk
|
|
433
|
+
this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats, wasStreamed: true });
|
|
878
434
|
}
|
|
879
435
|
const assistantMessage = {
|
|
880
436
|
role: 'assistant',
|
|
881
|
-
content: combinedContent,
|
|
437
|
+
content: suppressNarration ? '' : combinedContent,
|
|
882
438
|
toolCalls,
|
|
883
439
|
};
|
|
884
440
|
this.messages.push(assistantMessage);
|
|
885
441
|
await this.resolveToolCalls(toolCalls);
|
|
886
|
-
//
|
|
887
|
-
// 1. Call more tools (already handled by continue above)
|
|
888
|
-
// 2. Give a short completion signal
|
|
889
|
-
// 3. Ask user for direction
|
|
890
|
-
// PROGRAMMATIC: If model outputs narrative instead of concrete findings, continue
|
|
891
|
-
if (shouldPromptAfterTools) {
|
|
892
|
-
autoContinueAttempts++;
|
|
893
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'after_tools_narration');
|
|
894
|
-
this.messages.push({
|
|
895
|
-
role: 'user',
|
|
896
|
-
content: instruction.prompt,
|
|
897
|
-
});
|
|
898
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
|
|
899
|
-
continue;
|
|
900
|
-
}
|
|
901
|
-
// Reset auto-continue counter since model appears to be done
|
|
442
|
+
// Reset auto-continue counter since model is actively working
|
|
902
443
|
autoContinueAttempts = 0;
|
|
903
444
|
continue;
|
|
904
445
|
}
|
|
@@ -906,46 +447,24 @@ export class AgentRuntime {
|
|
|
906
447
|
// This catches "Let me create..." without actual tool calls
|
|
907
448
|
// Only auto-continue if the feature is enabled
|
|
908
449
|
const reply = combinedContent.trim();
|
|
909
|
-
|
|
910
|
-
if (reply.length >= 10) {
|
|
911
|
-
this.lastToolCallSignature = null;
|
|
912
|
-
this.repeatedToolCallCount = 0;
|
|
913
|
-
}
|
|
914
|
-
// If model returned empty or very short AND auto-continue is enabled, prompt it to respond
|
|
915
|
-
// This is disabled by default to prevent loops
|
|
916
|
-
if (this._autoContinueEnabled && reply.length < 10 && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
450
|
+
if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
917
451
|
autoContinueAttempts++;
|
|
918
|
-
|
|
919
|
-
|
|
452
|
+
// Emit the planning content but mark as non-final
|
|
453
|
+
// Mark as wasStreamed since content was already output via onStreamChunk
|
|
454
|
+
if (reply) {
|
|
455
|
+
this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats, wasStreamed: true });
|
|
456
|
+
}
|
|
457
|
+
this.messages.push({ role: 'assistant', content: reply });
|
|
458
|
+
// Auto-prompt with increasingly direct instructions
|
|
459
|
+
const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
|
|
920
460
|
this.messages.push({
|
|
921
461
|
role: 'user',
|
|
922
|
-
content:
|
|
462
|
+
content: AUTO_CONTINUE_PROMPTS[promptIndex],
|
|
923
463
|
});
|
|
924
|
-
|
|
464
|
+
const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
|
|
465
|
+
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
|
|
925
466
|
continue;
|
|
926
467
|
}
|
|
927
|
-
// PROGRAMMATIC CHECK: Text-only responses after tool work need scrutiny
|
|
928
|
-
// If model outputs substantial narrative without tools, it's likely summarizing
|
|
929
|
-
// Check if this is a genuine completion or a premature summary
|
|
930
|
-
// Also catches text-formatted tool calls like "_tool_call_\nread_file(...)"
|
|
931
|
-
if (this._autoContinueEnabled && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
932
|
-
// Intent patterns still catch "let me X" without tools
|
|
933
|
-
const streamContinueResult = shouldAutoContinue(reply, false);
|
|
934
|
-
if (streamContinueResult.shouldContinue) {
|
|
935
|
-
autoContinueAttempts++;
|
|
936
|
-
const planningPreview = buildPlanningPreview(reply);
|
|
937
|
-
this.messages.push({ role: 'assistant', content: reply });
|
|
938
|
-
// Choose prompt based on reason - text tool calls get a specific, forceful prompt
|
|
939
|
-
const continueReason = streamContinueResult.reason === 'text_tool_call' ? 'text_tool_call' : 'intent_without_action';
|
|
940
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, continueReason);
|
|
941
|
-
this.messages.push({ role: 'user', content: instruction.prompt });
|
|
942
|
-
const uiMessage = planningPreview
|
|
943
|
-
? `${instruction.message} Next action: ${planningPreview}`
|
|
944
|
-
: instruction.message;
|
|
945
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, uiMessage);
|
|
946
|
-
continue;
|
|
947
|
-
}
|
|
948
|
-
}
|
|
949
468
|
// Final message - mark as streamed to avoid double-display in UI
|
|
950
469
|
if (reply) {
|
|
951
470
|
this.emitAssistantMessage(reply, { isFinal: true, usage, contextStats, wasStreamed: true });
|
|
@@ -967,14 +486,6 @@ export class AgentRuntime {
|
|
|
967
486
|
continue;
|
|
968
487
|
}
|
|
969
488
|
}
|
|
970
|
-
// Auto-retry transient errors (network issues, rate limits, server errors)
|
|
971
|
-
if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
|
|
972
|
-
transientRetryAttempts++;
|
|
973
|
-
const delayMs = getRetryDelay(transientRetryAttempts);
|
|
974
|
-
this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
|
|
975
|
-
await sleep(delayMs);
|
|
976
|
-
continue;
|
|
977
|
-
}
|
|
978
489
|
// Re-throw if not recoverable or recovery failed
|
|
979
490
|
throw error;
|
|
980
491
|
}
|
|
@@ -1006,23 +517,7 @@ export class AgentRuntime {
|
|
|
1006
517
|
// Fast path: single tool call
|
|
1007
518
|
if (numCalls === 1) {
|
|
1008
519
|
const call = toolCalls[0];
|
|
1009
|
-
// Check cache first - prevent duplicate identical tool calls
|
|
1010
|
-
const cached = this.getCachedToolResult(call);
|
|
1011
|
-
if (cached !== null) {
|
|
1012
|
-
// Return cached result with indicator that it was from cache
|
|
1013
|
-
this.messages.push({
|
|
1014
|
-
role: 'tool',
|
|
1015
|
-
name: call.name,
|
|
1016
|
-
toolCallId: call.id,
|
|
1017
|
-
content: `[Cached result - identical call already executed]\n\n${cached}`,
|
|
1018
|
-
});
|
|
1019
|
-
return;
|
|
1020
|
-
}
|
|
1021
|
-
this.callbacks.onToolExecution?.(call.name, true);
|
|
1022
520
|
const output = await this.toolRuntime.execute(call);
|
|
1023
|
-
this.callbacks.onToolExecution?.(call.name, false);
|
|
1024
|
-
// Cache the result for future identical calls
|
|
1025
|
-
this.cacheToolResult(call, output);
|
|
1026
521
|
this.messages.push({
|
|
1027
522
|
role: 'tool',
|
|
1028
523
|
name: call.name,
|
|
@@ -1032,103 +527,53 @@ export class AgentRuntime {
|
|
|
1032
527
|
return;
|
|
1033
528
|
}
|
|
1034
529
|
// PERF: For reasonable batch sizes, execute all in parallel
|
|
1035
|
-
// Check cache for each call and only execute non-cached ones
|
|
1036
530
|
if (numCalls <= 10) {
|
|
1037
|
-
const
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
// Execute non-cached calls in parallel
|
|
1050
|
-
if (toExecute.length > 0) {
|
|
1051
|
-
const toolNames = toExecute.map(c => c.name).join(', ');
|
|
1052
|
-
this.callbacks.onToolExecution?.(toolNames, true);
|
|
1053
|
-
const executed = await Promise.all(toExecute.map(async (call) => {
|
|
1054
|
-
const output = await this.toolRuntime.execute(call);
|
|
1055
|
-
this.cacheToolResult(call, output);
|
|
1056
|
-
return { call, output, fromCache: false };
|
|
1057
|
-
}));
|
|
1058
|
-
this.callbacks.onToolExecution?.(toolNames, false);
|
|
1059
|
-
cachedResults.push(...executed);
|
|
1060
|
-
}
|
|
1061
|
-
// Add all results to messages in the original order
|
|
1062
|
-
for (const originalCall of toolCalls) {
|
|
1063
|
-
const result = cachedResults.find(r => r.call.id === originalCall.id);
|
|
1064
|
-
if (result) {
|
|
1065
|
-
const content = result.fromCache
|
|
1066
|
-
? `[Cached result - identical call already executed]\n\n${result.output}`
|
|
1067
|
-
: result.output;
|
|
1068
|
-
this.messages.push({
|
|
1069
|
-
role: 'tool',
|
|
1070
|
-
name: result.call.name,
|
|
1071
|
-
toolCallId: result.call.id,
|
|
1072
|
-
content,
|
|
1073
|
-
});
|
|
1074
|
-
}
|
|
531
|
+
const results = await Promise.all(toolCalls.map(async (call) => ({
|
|
532
|
+
call,
|
|
533
|
+
output: await this.toolRuntime.execute(call),
|
|
534
|
+
})));
|
|
535
|
+
// Add results to messages in the same order as tool calls
|
|
536
|
+
for (const { call, output } of results) {
|
|
537
|
+
this.messages.push({
|
|
538
|
+
role: 'tool',
|
|
539
|
+
name: call.name,
|
|
540
|
+
toolCallId: call.id,
|
|
541
|
+
content: output,
|
|
542
|
+
});
|
|
1075
543
|
}
|
|
1076
544
|
return;
|
|
1077
545
|
}
|
|
1078
|
-
// PERF: For large batches, use chunked parallel execution
|
|
546
|
+
// PERF: For large batches, use chunked parallel execution
|
|
547
|
+
// This prevents memory pressure from too many concurrent operations
|
|
1079
548
|
const CHUNK_SIZE = 8;
|
|
1080
|
-
const
|
|
549
|
+
const results = [];
|
|
1081
550
|
for (let i = 0; i < numCalls; i += CHUNK_SIZE) {
|
|
1082
551
|
const chunk = toolCalls.slice(i, i + CHUNK_SIZE);
|
|
1083
|
-
const
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
const executed = await Promise.all(toExecuteInChunk.map(async (call) => {
|
|
1098
|
-
const output = await this.toolRuntime.execute(call);
|
|
1099
|
-
this.cacheToolResult(call, output);
|
|
1100
|
-
return { call, output, fromCache: false };
|
|
1101
|
-
}));
|
|
1102
|
-
this.callbacks.onToolExecution?.(chunkNames, false);
|
|
1103
|
-
cachedInChunk.push(...executed);
|
|
1104
|
-
}
|
|
1105
|
-
allResults.push(...cachedInChunk);
|
|
1106
|
-
}
|
|
1107
|
-
// Add results to messages in original order
|
|
1108
|
-
for (const originalCall of toolCalls) {
|
|
1109
|
-
const result = allResults.find(r => r.call.id === originalCall.id);
|
|
1110
|
-
if (result) {
|
|
1111
|
-
const content = result.fromCache
|
|
1112
|
-
? `[Cached result - identical call already executed]\n\n${result.output}`
|
|
1113
|
-
: result.output;
|
|
1114
|
-
this.messages.push({
|
|
1115
|
-
role: 'tool',
|
|
1116
|
-
name: result.call.name,
|
|
1117
|
-
toolCallId: result.call.id,
|
|
1118
|
-
content,
|
|
1119
|
-
});
|
|
1120
|
-
}
|
|
552
|
+
const chunkResults = await Promise.all(chunk.map(async (call) => ({
|
|
553
|
+
call,
|
|
554
|
+
output: await this.toolRuntime.execute(call),
|
|
555
|
+
})));
|
|
556
|
+
results.push(...chunkResults);
|
|
557
|
+
}
|
|
558
|
+
// Add results to messages in order
|
|
559
|
+
for (const { call, output } of results) {
|
|
560
|
+
this.messages.push({
|
|
561
|
+
role: 'tool',
|
|
562
|
+
name: call.name,
|
|
563
|
+
toolCallId: call.id,
|
|
564
|
+
content: output,
|
|
565
|
+
});
|
|
1121
566
|
}
|
|
1122
567
|
}
|
|
1123
568
|
get providerTools() {
|
|
1124
569
|
return this.toolRuntime.listProviderTools();
|
|
1125
570
|
}
|
|
1126
571
|
/**
|
|
1127
|
-
*
|
|
1128
|
-
*
|
|
572
|
+
* OpenAI models frequently add speculative tool narration in the content field.
|
|
573
|
+
* Suppress that text to avoid surfacing hallucinated tool usage in the UI.
|
|
1129
574
|
*/
|
|
1130
575
|
shouldSuppressToolNarration() {
|
|
1131
|
-
return
|
|
576
|
+
return this.providerId.toLowerCase().includes('openai');
|
|
1132
577
|
}
|
|
1133
578
|
emitAssistantMessage(content, metadata) {
|
|
1134
579
|
if (!content) {
|
|
@@ -1181,138 +626,6 @@ export class AgentRuntime {
|
|
|
1181
626
|
model: this.modelId,
|
|
1182
627
|
});
|
|
1183
628
|
}
|
|
1184
|
-
/**
|
|
1185
|
-
* Extract a "command hash" from tool arguments for behavioral loop detection.
|
|
1186
|
-
* For execute_bash, this is the actual command. For other tools, key identifying args.
|
|
1187
|
-
*/
|
|
1188
|
-
extractCmdHash(name, args) {
|
|
1189
|
-
// For bash/execute commands, extract the command itself
|
|
1190
|
-
if (name === 'execute_bash' || name === 'Bash') {
|
|
1191
|
-
const cmd = args['command'];
|
|
1192
|
-
if (cmd) {
|
|
1193
|
-
// Normalize: trim, take first 100 chars, remove variable parts like timestamps
|
|
1194
|
-
return cmd.trim().slice(0, 100).replace(/\d{10,}/g, 'N');
|
|
1195
|
-
}
|
|
1196
|
-
}
|
|
1197
|
-
// For file operations, use the path
|
|
1198
|
-
if (name === 'read_file' || name === 'Read' || name === 'read_files') {
|
|
1199
|
-
const path = args['path'] || args['file_path'] || args['paths'];
|
|
1200
|
-
if (path)
|
|
1201
|
-
return `path:${JSON.stringify(path).slice(0, 100)}`;
|
|
1202
|
-
}
|
|
1203
|
-
if (name === 'list_files' || name === 'Glob') {
|
|
1204
|
-
const path = args['path'] || args['pattern'];
|
|
1205
|
-
if (path)
|
|
1206
|
-
return `path:${JSON.stringify(path).slice(0, 100)}`;
|
|
1207
|
-
}
|
|
1208
|
-
// For search, use the query/pattern
|
|
1209
|
-
if (name === 'Grep' || name === 'grep' || name === 'search') {
|
|
1210
|
-
const pattern = args['pattern'] || args['query'];
|
|
1211
|
-
if (pattern)
|
|
1212
|
-
return `search:${String(pattern).slice(0, 100)}`;
|
|
1213
|
-
}
|
|
1214
|
-
// Default: use first significant arg value
|
|
1215
|
-
const firstArg = Object.values(args)[0];
|
|
1216
|
-
if (firstArg) {
|
|
1217
|
-
return String(firstArg).slice(0, 100);
|
|
1218
|
-
}
|
|
1219
|
-
return 'no-args';
|
|
1220
|
-
}
|
|
1221
|
-
/**
|
|
1222
|
-
* Check for behavioral loops - model calling the same tool with similar args repeatedly.
|
|
1223
|
-
* Returns an error message if a loop is detected, null otherwise.
|
|
1224
|
-
*
|
|
1225
|
-
* FUNDAMENTAL PREVENTION: Cached calls are excluded from loop detection since they
|
|
1226
|
-
* don't actually execute (the cache provides the result). This means:
|
|
1227
|
-
* - First call: executes and caches result
|
|
1228
|
-
* - Second identical call: returns cached result, NOT counted toward loop
|
|
1229
|
-
* - Only genuinely NEW (non-cached) repetitive calls trigger loop detection
|
|
1230
|
-
*
|
|
1231
|
-
* This catches patterns like:
|
|
1232
|
-
* - "git status -sb" called 3 times with DIFFERENT outputs (cache miss each time)
|
|
1233
|
-
* - Repeated file reads where file content changed
|
|
1234
|
-
* - Repeated searches with same pattern but new results
|
|
1235
|
-
*/
|
|
1236
|
-
checkBehavioralLoop(toolCalls) {
|
|
1237
|
-
// Filter out calls that will be served from cache - these don't count toward loops
|
|
1238
|
-
// since they're handled fundamentally by the caching mechanism
|
|
1239
|
-
const nonCachedCalls = toolCalls.filter(call => this.getCachedToolResult(call) === null);
|
|
1240
|
-
// If all calls are cached, no loop detection needed
|
|
1241
|
-
if (nonCachedCalls.length === 0) {
|
|
1242
|
-
return null;
|
|
1243
|
-
}
|
|
1244
|
-
// Count existing occurrences in recent history
|
|
1245
|
-
const existingCounts = new Map();
|
|
1246
|
-
for (const { name, cmdHash } of this.recentToolCalls) {
|
|
1247
|
-
const key = `${name}:${cmdHash}`;
|
|
1248
|
-
existingCounts.set(key, (existingCounts.get(key) ?? 0) + 1);
|
|
1249
|
-
}
|
|
1250
|
-
// Check if ANY incoming NON-CACHED call would exceed threshold
|
|
1251
|
-
for (const call of nonCachedCalls) {
|
|
1252
|
-
const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
|
|
1253
|
-
const key = `${call.name}:${cmdHash}`;
|
|
1254
|
-
const currentCount = existingCounts.get(key) ?? 0;
|
|
1255
|
-
// If adding this call would reach or exceed threshold, block immediately
|
|
1256
|
-
if (currentCount + 1 >= AgentRuntime.BEHAVIORAL_LOOP_THRESHOLD) {
|
|
1257
|
-
// Reset history to prevent immediate re-trigger
|
|
1258
|
-
this.recentToolCalls = [];
|
|
1259
|
-
return `Behavioral loop detected: "${call.name}" called ${currentCount + 1} times with similar arguments. The task appears stuck. Please try a different approach or provide more specific instructions.`;
|
|
1260
|
-
}
|
|
1261
|
-
}
|
|
1262
|
-
// Track only non-cached tool calls (cached ones are handled by caching)
|
|
1263
|
-
for (const call of nonCachedCalls) {
|
|
1264
|
-
const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
|
|
1265
|
-
this.recentToolCalls.push({ name: call.name, cmdHash });
|
|
1266
|
-
}
|
|
1267
|
-
// Keep only recent history
|
|
1268
|
-
while (this.recentToolCalls.length > AgentRuntime.TOOL_HISTORY_SIZE) {
|
|
1269
|
-
this.recentToolCalls.shift();
|
|
1270
|
-
}
|
|
1271
|
-
return null;
|
|
1272
|
-
}
|
|
1273
|
-
/**
|
|
1274
|
-
* Reset behavioral loop tracking (called when user provides new input or task completes)
|
|
1275
|
-
*/
|
|
1276
|
-
resetBehavioralLoopTracking() {
|
|
1277
|
-
this.recentToolCalls = [];
|
|
1278
|
-
this.lastToolCallSignature = null;
|
|
1279
|
-
this.repeatedToolCallCount = 0;
|
|
1280
|
-
// Note: we DON'T clear toolResultCache here - cached results remain valid across turns
|
|
1281
|
-
// to prevent re-executing identical tool calls within a session
|
|
1282
|
-
}
|
|
1283
|
-
/**
|
|
1284
|
-
* Create a stable cache key for a tool call based on name and arguments
|
|
1285
|
-
*/
|
|
1286
|
-
getToolCacheKey(call) {
|
|
1287
|
-
const args = call.arguments ?? {};
|
|
1288
|
-
// Sort keys for consistent ordering
|
|
1289
|
-
const sortedArgs = Object.keys(args).sort().reduce((acc, key) => {
|
|
1290
|
-
acc[key] = args[key];
|
|
1291
|
-
return acc;
|
|
1292
|
-
}, {});
|
|
1293
|
-
return `${call.name}:${JSON.stringify(sortedArgs)}`;
|
|
1294
|
-
}
|
|
1295
|
-
/**
|
|
1296
|
-
* Get cached result for a tool call, or null if not cached
|
|
1297
|
-
*/
|
|
1298
|
-
getCachedToolResult(call) {
|
|
1299
|
-
const key = this.getToolCacheKey(call);
|
|
1300
|
-
return this.toolResultCache.get(key) ?? null;
|
|
1301
|
-
}
|
|
1302
|
-
/**
|
|
1303
|
-
* Cache a tool result for future identical calls
|
|
1304
|
-
*/
|
|
1305
|
-
cacheToolResult(call, result) {
|
|
1306
|
-
const key = this.getToolCacheKey(call);
|
|
1307
|
-
// Evict oldest entries if cache is full
|
|
1308
|
-
if (this.toolResultCache.size >= AgentRuntime.TOOL_CACHE_MAX_SIZE) {
|
|
1309
|
-
const firstKey = this.toolResultCache.keys().next().value;
|
|
1310
|
-
if (firstKey) {
|
|
1311
|
-
this.toolResultCache.delete(firstKey);
|
|
1312
|
-
}
|
|
1313
|
-
}
|
|
1314
|
-
this.toolResultCache.set(key, result);
|
|
1315
|
-
}
|
|
1316
629
|
getHistory() {
|
|
1317
630
|
return this.messages.map(cloneMessage);
|
|
1318
631
|
}
|