erosolar-cli 2.1.167 → 2.1.169
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/contracts/agent-schemas.json +0 -5
- package/dist/core/agent.d.ts +3 -18
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +4 -515
- package/dist/core/agent.js.map +1 -1
- package/dist/core/preferences.js +2 -2
- package/dist/core/preferences.js.map +1 -1
- package/dist/runtime/agentController.d.ts.map +1 -1
- package/dist/runtime/agentController.js +3 -6
- package/dist/runtime/agentController.js.map +1 -1
- package/dist/runtime/agentSession.d.ts +0 -2
- package/dist/runtime/agentSession.d.ts.map +1 -1
- package/dist/runtime/agentSession.js +0 -1
- package/dist/runtime/agentSession.js.map +1 -1
- package/dist/shell/interactiveShell.d.ts +0 -9
- package/dist/shell/interactiveShell.d.ts.map +1 -1
- package/dist/shell/interactiveShell.js +21 -174
- package/dist/shell/interactiveShell.js.map +1 -1
- package/dist/ui/PromptController.d.ts +0 -3
- package/dist/ui/PromptController.d.ts.map +1 -1
- package/dist/ui/PromptController.js +0 -3
- package/dist/ui/PromptController.js.map +1 -1
- package/dist/ui/UnifiedUIRenderer.d.ts +0 -2
- package/dist/ui/UnifiedUIRenderer.d.ts.map +1 -1
- package/dist/ui/UnifiedUIRenderer.js +1 -13
- package/dist/ui/UnifiedUIRenderer.js.map +1 -1
- package/dist/ui/shortcutsHelp.d.ts.map +1 -1
- package/dist/ui/shortcutsHelp.js +0 -1
- package/dist/ui/shortcutsHelp.js.map +1 -1
- package/package.json +1 -1
package/dist/core/agent.js
CHANGED
|
@@ -4,350 +4,7 @@ import { safeErrorMessage } from './secretStore.js';
|
|
|
4
4
|
* Maximum number of context overflow recovery attempts
|
|
5
5
|
*/
|
|
6
6
|
const MAX_CONTEXT_RECOVERY_ATTEMPTS = 3;
|
|
7
|
-
/**
|
|
8
|
-
* Maximum number of auto-continuation attempts when model expresses intent but doesn't act
|
|
9
|
-
* Increased to allow more recovery attempts for complex tasks
|
|
10
|
-
*/
|
|
11
|
-
const MAX_AUTO_CONTINUE_ATTEMPTS = 8;
|
|
12
|
-
/**
|
|
13
|
-
* Threshold for short completion - responses shorter than this can be valid completions.
|
|
14
|
-
* Anything longer after tool work needs scrutiny.
|
|
15
|
-
*/
|
|
16
|
-
const SHORT_COMPLETION_THRESHOLD = 80;
|
|
17
|
-
const AUTO_CONTINUE_ESCALATION_ATTEMPT = 4;
|
|
18
7
|
// Streaming runs without timeouts - we let the model take as long as it needs
|
|
19
|
-
/**
|
|
20
|
-
* Patterns that indicate the model intends to take action but hasn't yet
|
|
21
|
-
* These suggest the model should be prompted to continue
|
|
22
|
-
*/
|
|
23
|
-
const INTENT_WITHOUT_ACTION_PATTERNS = [
|
|
24
|
-
// TEXT-FORMATTED TOOL CALLS: Model outputs tool call as text instead of using API
|
|
25
|
-
// These are CRITICAL to catch - model is trying to call tools but failing to do so properly
|
|
26
|
-
// Pattern: "_tool_call_" or similar markers followed by function-like syntax
|
|
27
|
-
/_tool_call_\s*\n?\s*\w+\(/im,
|
|
28
|
-
// Pattern: "tool_call:" or "Tool call:" followed by function name
|
|
29
|
-
/tool[_\s]?call:?\s*\n?\s*\w+\(/im,
|
|
30
|
-
// Pattern: function call syntax at start of line like "read_file(..." without being in code block
|
|
31
|
-
/^\s*(read_file|write_file|edit_file|execute_bash|list_files|search|grep|glob)\s*\(/im,
|
|
32
|
-
// Pattern: "Executing X..." or "Calling X..." without actual tool call
|
|
33
|
-
/\b(executing|calling)\s+(read_file|write_file|edit_file|execute_bash|list_files|search|grep|glob)\b/i,
|
|
34
|
-
// Pattern: indented tool call syntax like " read_file(package.json)"
|
|
35
|
-
/^\s{2,}(read_file|write_file|edit_file|execute_bash|list_files|Bash|Read|Write|Edit|Grep|Glob)\s*\([^)]*\)\s*$/im,
|
|
36
|
-
// "Let me X" patterns - model is stating what it will do
|
|
37
|
-
/\blet me\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|examine|review|run)/i,
|
|
38
|
-
// "I'll X" / "I will X" patterns - include all apostrophe variants (straight ', left ', right ')
|
|
39
|
-
/\bi['''\u2018\u2019]ll\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now|use|inspect|examine|run)/i,
|
|
40
|
-
/\bi will\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now|use|inspect|examine|run)/i,
|
|
41
|
-
// "Will X" without "I" (common pattern)
|
|
42
|
-
/\bwill\s+(run|use|read|search|check|inspect|examine|list|find|analyze|create|write|look)\s/i,
|
|
43
|
-
// "I'm going to X" patterns - include all apostrophe variants
|
|
44
|
-
/\bi['''\u2018\u2019]m going to\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|run)/i,
|
|
45
|
-
// "Now I'll X" / "First, I'll X" patterns
|
|
46
|
-
/\b(now|first|next)\s*(,)?\s*i['''\u2018\u2019]ll\s+/i,
|
|
47
|
-
// Explicit continuation signals
|
|
48
|
-
/\bhere['''\u2018\u2019]s (the|my) (plan|approach|solution|implementation)/i,
|
|
49
|
-
// "Approach:" header indicates planning without action
|
|
50
|
-
/\bapproach:/i,
|
|
51
|
-
// Numbered steps suggesting action to come
|
|
52
|
-
/^\s*\d+\.\s+(create|write|implement|add|update|edit|modify|fix|show|read|check)/im,
|
|
53
|
-
// Bullet points suggesting planned actions
|
|
54
|
-
/^[\s•\-\*]+\s*(create|write|implement|add|update|edit|modify|fix|shows?|reads?|checks?)\s/im,
|
|
55
|
-
// Statements about needing to do something
|
|
56
|
-
/\bneed\s+(to\s+)?(identify|search|find|check|read|look|inspect|analyze|examine|review|list|remove)/i,
|
|
57
|
-
// "Should be X" - model is describing action but not taking it
|
|
58
|
-
/\bshould be\s+(deleted|removed|fixed|updated|changed|cleaned|cleared)/i,
|
|
59
|
-
// "Can be X" - same pattern
|
|
60
|
-
/\bcan be\s+(deleted|removed|fixed|updated|changed|cleaned|cleared)/i,
|
|
61
|
-
// Questions that should trigger investigation instead of asking user
|
|
62
|
-
/\bwhat (files?|areas?|code|patterns?)\s+(should|would you like|do you want)/i,
|
|
63
|
-
// GIVING UP PATTERNS - model is asking for clarification instead of investigating
|
|
64
|
-
/\bplease\s+(point me to|show me|tell me|specify|clarify|provide)/i,
|
|
65
|
-
/\bwithout\s+(more|additional|further)\s+(details?|info|information|context|guidance)/i,
|
|
66
|
-
/\bcan you\s+(clarify|specify|tell me|point me|show me)/i,
|
|
67
|
-
/\blet me know\s+(what|which|where|how)/i,
|
|
68
|
-
/\b(no|can['\u2018\u2019]t|cannot)\s+(obvious|clear|specific|find|identify|determine)/i,
|
|
69
|
-
// "I don't have enough" type responses
|
|
70
|
-
/\bdon['\u2018\u2019]t have\s+(enough|sufficient|the)\s+(info|information|context|details)/i,
|
|
71
|
-
// "Could you" requests for clarification
|
|
72
|
-
/\bcould you\s+(provide|specify|clarify|point|tell|show)/i,
|
|
73
|
-
];
|
|
74
|
-
/**
|
|
75
|
-
* Patterns that indicate the model gave a premature summary without concrete findings.
|
|
76
|
-
* These summaries wrap up too quickly without file:line citations or code snippets.
|
|
77
|
-
*/
|
|
78
|
-
const PREMATURE_CONCLUSION_PATTERNS = [
|
|
79
|
-
// "Performed/Did a quick X" - surface-level investigation claim
|
|
80
|
-
/\b(performed|did)\s+a\s+(quick|brief|fast)\s+(search|grep|scan|review|check)/i,
|
|
81
|
-
// "Validated by running" - claiming verification without showing details
|
|
82
|
-
/\bvalidated\s+by\s+running/i,
|
|
83
|
-
// Mentioning matches but not examining them: "found X matches" without file paths
|
|
84
|
-
/\bfound\s+\d+\s+(match|result|item|file|issue)/i,
|
|
85
|
-
// Summary without substance: "cleaned up", "removed", "fixed" but no specifics
|
|
86
|
-
// Allow any words between the article and the target noun (e.g., "removed the .tsbuildinfo build cache")
|
|
87
|
-
/\b(cleaned up|removed|fixed)\s+(the|a|some)?\s*\S*\s*(file|cache|build|artifact)/i,
|
|
88
|
-
];
|
|
89
|
-
/**
|
|
90
|
-
* Check if response looks like a premature conclusion without concrete findings.
|
|
91
|
-
* Looks for summary language without actual file:line citations.
|
|
92
|
-
*/
|
|
93
|
-
function isPrematureConclusion(content) {
|
|
94
|
-
// Must match a premature conclusion pattern
|
|
95
|
-
const matchesPremature = PREMATURE_CONCLUSION_PATTERNS.some(p => p.test(content));
|
|
96
|
-
if (!matchesPremature) {
|
|
97
|
-
return false;
|
|
98
|
-
}
|
|
99
|
-
// Check if there are actual file:line citations (e.g., "src/foo.ts:42")
|
|
100
|
-
const hasFileCitation = /\b\w+\.(ts|js|tsx|jsx|py|go|rs|java|c|cpp|h|md|json|yaml|yml):\d+\b/.test(content);
|
|
101
|
-
if (hasFileCitation) {
|
|
102
|
-
return false; // Has concrete findings, not premature
|
|
103
|
-
}
|
|
104
|
-
// Check for code blocks with actual code
|
|
105
|
-
const hasCodeBlock = /```[\s\S]{20,}```/.test(content);
|
|
106
|
-
if (hasCodeBlock) {
|
|
107
|
-
return false; // Has code snippets, not premature
|
|
108
|
-
}
|
|
109
|
-
// Short response with no concrete findings = premature conclusion
|
|
110
|
-
return content.length < 500;
|
|
111
|
-
}
|
|
112
|
-
// Short, forceful prompts - model should just act
|
|
113
|
-
const AUTO_CONTINUE_PROMPTS = [
|
|
114
|
-
'Continue.',
|
|
115
|
-
'Keep going. Call tools.',
|
|
116
|
-
'You stopped mid-task. Continue with tools.',
|
|
117
|
-
'Do not summarize. Act with tools or ask a specific question.',
|
|
118
|
-
'Use tools to complete the task.',
|
|
119
|
-
];
|
|
120
|
-
// Specific prompt for when model outputs text-formatted tool calls instead of using the API
|
|
121
|
-
const TEXT_TOOL_CALL_PROMPT = 'You wrote a tool call as text. Use the actual tool API - call the function directly, do not write it as text.';
|
|
122
|
-
// Forceful prompt used when the model keeps narrating or stalling after several attempts
|
|
123
|
-
const AUTO_CONTINUE_FORCE_PROMPT = 'You are stuck narrating. Immediately call the necessary tools to finish the task. If truly done, respond with a concise final answer citing any file paths/lines touched. Do not ask for confirmation.';
|
|
124
|
-
const SHORT_RESPONSE_PROMPT = 'Based on the tool results above, provide your complete response. Summarize findings and suggest next steps if applicable.';
|
|
125
|
-
/**
|
|
126
|
-
* Select an auto-continue prompt and user-facing message based on attempt count and reason.
|
|
127
|
-
* Escalates to a forceful instruction after repeated stalls.
|
|
128
|
-
*/
|
|
129
|
-
function buildAutoContinueInstruction(attempt, reason) {
|
|
130
|
-
const promptIndex = Math.max(0, Math.min(attempt - 1, AUTO_CONTINUE_PROMPTS.length - 1));
|
|
131
|
-
let prompt;
|
|
132
|
-
switch (reason) {
|
|
133
|
-
case 'text_tool_call':
|
|
134
|
-
prompt = TEXT_TOOL_CALL_PROMPT;
|
|
135
|
-
break;
|
|
136
|
-
case 'short_response':
|
|
137
|
-
prompt = SHORT_RESPONSE_PROMPT;
|
|
138
|
-
break;
|
|
139
|
-
default:
|
|
140
|
-
prompt = AUTO_CONTINUE_PROMPTS[promptIndex];
|
|
141
|
-
break;
|
|
142
|
-
}
|
|
143
|
-
const isEscalated = attempt >= AUTO_CONTINUE_ESCALATION_ATTEMPT;
|
|
144
|
-
if (isEscalated) {
|
|
145
|
-
prompt = AUTO_CONTINUE_FORCE_PROMPT;
|
|
146
|
-
}
|
|
147
|
-
const baseMessage = {
|
|
148
|
-
after_tools_narration: 'Model narrated after tools instead of completing. Prompting to continue...',
|
|
149
|
-
intent_without_action: "Model expressed intent but didn't act. Prompting to continue...",
|
|
150
|
-
text_tool_call: 'Model wrote tool call as text instead of using API. Prompting to use actual tools...',
|
|
151
|
-
short_response: 'Model responded too briefly. Prompting for a complete answer...',
|
|
152
|
-
};
|
|
153
|
-
const message = isEscalated
|
|
154
|
-
? `${baseMessage[reason]} Escalating to force tool use and completion.`
|
|
155
|
-
: baseMessage[reason];
|
|
156
|
-
return { prompt, message };
|
|
157
|
-
}
|
|
158
|
-
/**
|
|
159
|
-
* Generate a short, UI-safe preview of planning/intent text.
|
|
160
|
-
* Keeps only the first line and truncates long content.
|
|
161
|
-
*/
|
|
162
|
-
function buildPlanningPreview(content, maxLength = 140) {
|
|
163
|
-
const trimmed = (content || '').trim();
|
|
164
|
-
if (!trimmed) {
|
|
165
|
-
return null;
|
|
166
|
-
}
|
|
167
|
-
const firstLine = trimmed.split('\n').find(line => line.trim()) ?? '';
|
|
168
|
-
const collapsed = firstLine.replace(/\s+/g, ' ').trim();
|
|
169
|
-
if (!collapsed) {
|
|
170
|
-
return null;
|
|
171
|
-
}
|
|
172
|
-
const needsEllipsis = collapsed.length > maxLength;
|
|
173
|
-
const preview = collapsed.slice(0, maxLength).trim();
|
|
174
|
-
return needsEllipsis ? `${preview}...` : preview;
|
|
175
|
-
}
|
|
176
|
-
/**
|
|
177
|
-
* Detect if content contains text-formatted tool calls that should have been actual API calls.
|
|
178
|
-
* This is a PROGRAMMATIC check - if the model outputs "read_file(package.json)" as text,
|
|
179
|
-
* it clearly intended to call a tool but failed to use the proper API.
|
|
180
|
-
*/
|
|
181
|
-
function hasTextFormattedToolCall(content) {
|
|
182
|
-
// Patterns that indicate the model wrote a tool call as text instead of using the API
|
|
183
|
-
const textToolCallPatterns = [
|
|
184
|
-
// "_tool_call_" marker with function syntax
|
|
185
|
-
/_tool_call_\s*\n?\s*\w+\(/im,
|
|
186
|
-
// "tool_call:" followed by function name
|
|
187
|
-
/tool[_\s]?call:?\s*\n?\s*\w+\(/im,
|
|
188
|
-
// Common tool function call syntax at line start (not in code block)
|
|
189
|
-
/^\s*(read_file|write_file|edit_file|execute_bash|list_files)\s*\([^)]+\)/im,
|
|
190
|
-
// Indented tool call like " read_file(package.json)"
|
|
191
|
-
/^\s{2,}(read_file|write_file|edit_file|execute_bash|list_files|Bash|Read|Write|Edit|Grep|Glob)\s*\([^)]*\)\s*$/im,
|
|
192
|
-
// "Executing/Calling tool_name..." without actual execution
|
|
193
|
-
/\b(executing|calling)\s+(read_file|write_file|edit_file|execute_bash)\s*\.{3}?\s*$/im,
|
|
194
|
-
];
|
|
195
|
-
// Skip if the content is inside a code block (```...```)
|
|
196
|
-
const withoutCodeBlocks = content.replace(/```[\s\S]*?```/g, '');
|
|
197
|
-
return textToolCallPatterns.some(p => p.test(withoutCodeBlocks));
|
|
198
|
-
}
|
|
199
|
-
/**
|
|
200
|
-
* Patterns that indicate genuine completion of a task.
|
|
201
|
-
* These are short, definitive statements that signal work is done.
|
|
202
|
-
*/
|
|
203
|
-
const COMPLETION_SIGNAL_PATTERNS = [
|
|
204
|
-
// Explicit done signals
|
|
205
|
-
/^(done|complete|finished|all set)[.!]?$/i,
|
|
206
|
-
// Short confirmations with specifics
|
|
207
|
-
/^(removed|deleted|fixed|created|updated|added)\s+.{1,50}[.!]?$/i,
|
|
208
|
-
// "X is now Y" short statements
|
|
209
|
-
/^.{1,30}\s+is\s+now\s+.{1,30}[.!]?$/i,
|
|
210
|
-
// Task completion with count
|
|
211
|
-
/^(cleaned|removed|fixed|updated)\s+\d+\s+.{1,30}[.!]?$/i,
|
|
212
|
-
];
|
|
213
|
-
/**
|
|
214
|
-
* Patterns that indicate errors, failures, or incomplete work.
|
|
215
|
-
* When these are present, auto-continue should kick in to fix the issues.
|
|
216
|
-
* This is a PROGRAMMATIC check - if errors are reported, the task is NOT complete.
|
|
217
|
-
*/
|
|
218
|
-
const ERROR_INDICATOR_PATTERNS = [
|
|
219
|
-
// Build/compile errors
|
|
220
|
-
/\b(error|errors|fail(s|ed|ure|ing)?|broken|crash(es|ed|ing)?)\b/i,
|
|
221
|
-
// Test failures
|
|
222
|
-
/\b(test(s)?\s+(fail|failing|failed)|failing\s+test)/i,
|
|
223
|
-
// TypeScript/compilation errors
|
|
224
|
-
/\b(typescript|ts|type)\s+error/i,
|
|
225
|
-
/\bts\(\d+,\d+\)/i, // TS error format like ts(700,45)
|
|
226
|
-
// Remaining/unresolved issues
|
|
227
|
-
/\b(persists?|remains?|still\s+(has|have|is|are|broken|failing))\b/i,
|
|
228
|
-
/\b(unresolved|outstanding|remaining)\s+(error|issue|problem)/i,
|
|
229
|
-
// Explicit incomplete signals
|
|
230
|
-
/\b(didn'?t|did\s+not|couldn'?t|could\s+not|wasn'?t|was\s+not)\s+(work|succeed|complete|finish|pass)/i,
|
|
231
|
-
/\b(skipped|blocked|cannot|unable\s+to)\b/i,
|
|
232
|
-
];
|
|
233
|
-
/**
|
|
234
|
-
* Check if response contains error indicators that mean work is NOT complete.
|
|
235
|
-
* This is a simple, programmatic check - no complex NLP needed.
|
|
236
|
-
*/
|
|
237
|
-
function containsErrorIndicators(content) {
|
|
238
|
-
return ERROR_INDICATOR_PATTERNS.some(p => p.test(content));
|
|
239
|
-
}
|
|
240
|
-
/**
|
|
241
|
-
* Check if response is a genuine completion signal (short, definitive statement).
|
|
242
|
-
* Returns true if the model is signaling it's actually done with the task.
|
|
243
|
-
* IMPORTANT: Returns false if error indicators are present - model should continue.
|
|
244
|
-
*/
|
|
245
|
-
function isCompletionSignal(content) {
|
|
246
|
-
const trimmed = content.trim();
|
|
247
|
-
// PROGRAMMATIC: If errors are reported, this is NOT a valid completion
|
|
248
|
-
if (containsErrorIndicators(trimmed)) {
|
|
249
|
-
return false;
|
|
250
|
-
}
|
|
251
|
-
// Very short responses (<50 chars) with completion patterns are genuine signals
|
|
252
|
-
if (trimmed.length < 50) {
|
|
253
|
-
return COMPLETION_SIGNAL_PATTERNS.some(p => p.test(trimmed));
|
|
254
|
-
}
|
|
255
|
-
// Responses asking user for direction are valid stops
|
|
256
|
-
if (/\b(would you like|shall I|want me to|anything else)\b/i.test(trimmed) && trimmed.length < 200) {
|
|
257
|
-
return true;
|
|
258
|
-
}
|
|
259
|
-
// File:line citations indicate concrete work was shown
|
|
260
|
-
const hasCitation = /\b\w+\.(ts|js|tsx|jsx|py|go|rs|java|c|cpp|h|md|json|yaml|yml):\d+\b/.test(trimmed);
|
|
261
|
-
if (hasCitation) {
|
|
262
|
-
return true;
|
|
263
|
-
}
|
|
264
|
-
return false;
|
|
265
|
-
}
|
|
266
|
-
/**
|
|
267
|
-
* PROGRAMMATIC CHECK: After tool calls, should we auto-continue?
|
|
268
|
-
*
|
|
269
|
-
* SIMPLE RULE: After tools, model should ONLY stop if:
|
|
270
|
-
* 1. It asks the user a question (ends with ?)
|
|
271
|
-
* 2. It gives a very short completion (< 80 chars) WITHOUT planning words
|
|
272
|
-
*
|
|
273
|
-
* Everything else = CONTINUE. This is intentionally aggressive.
|
|
274
|
-
* The model should either be DONE (short message) or ASKING (question).
|
|
275
|
-
* Long explanations after tool work = continue to force action.
|
|
276
|
-
*/
|
|
277
|
-
function shouldContinueAfterTools(content) {
|
|
278
|
-
const trimmed = content.trim();
|
|
279
|
-
// No content after tools = continue to get results
|
|
280
|
-
if (trimmed.length === 0) {
|
|
281
|
-
return true;
|
|
282
|
-
}
|
|
283
|
-
// ALWAYS CONTINUE: Error indicators mean work isn't done
|
|
284
|
-
if (containsErrorIndicators(trimmed)) {
|
|
285
|
-
return true;
|
|
286
|
-
}
|
|
287
|
-
// ALWAYS CONTINUE: Intent/planning patterns mean model wants to do more
|
|
288
|
-
if (INTENT_WITHOUT_ACTION_PATTERNS.some(p => p.test(trimmed))) {
|
|
289
|
-
return true;
|
|
290
|
-
}
|
|
291
|
-
// VALID STOP: Model asking user a question (ends with ?)
|
|
292
|
-
if (/\?\s*$/.test(trimmed)) {
|
|
293
|
-
return false;
|
|
294
|
-
}
|
|
295
|
-
// VALID STOP: Explicit user-direction phrases
|
|
296
|
-
if (/\b(would you like|shall I|want me to|do you want|should I|what would you prefer)\b/i.test(trimmed)) {
|
|
297
|
-
return false;
|
|
298
|
-
}
|
|
299
|
-
// VALID STOP: Very short completion (< 80 chars) - likely "Done." or similar
|
|
300
|
-
// But ONLY if it doesn't have planning words
|
|
301
|
-
if (trimmed.length < SHORT_COMPLETION_THRESHOLD) {
|
|
302
|
-
const hasPlanningWords = /\b(i'll|i will|let me|i need|i'm going|will now|going to|first|next|now i)\b/i.test(trimmed);
|
|
303
|
-
if (!hasPlanningWords) {
|
|
304
|
-
return false; // Short and not planning = valid completion
|
|
305
|
-
}
|
|
306
|
-
// Short but has planning words = continue
|
|
307
|
-
return true;
|
|
308
|
-
}
|
|
309
|
-
// CONTINUE: Long response after tools = likely narrating/summarizing
|
|
310
|
-
// Force model to either ask a question or give a short completion
|
|
311
|
-
return true;
|
|
312
|
-
}
|
|
313
|
-
function shouldAutoContinue(content, hasToolCalls, hasReasoningContent = false) {
|
|
314
|
-
// Strip <thinking> blocks to get actual response content
|
|
315
|
-
const withoutThinking = content.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
|
|
316
|
-
// Empty content shouldn't trigger auto-continue
|
|
317
|
-
if (withoutThinking.length === 0) {
|
|
318
|
-
return { shouldContinue: false };
|
|
319
|
-
}
|
|
320
|
-
// HIGHEST PRIORITY: Check for text-formatted tool calls
|
|
321
|
-
// This is when the model writes "read_file(package.json)" as text instead of calling the API
|
|
322
|
-
if (hasTextFormattedToolCall(withoutThinking)) {
|
|
323
|
-
return { shouldContinue: true, reason: 'text_tool_call' };
|
|
324
|
-
}
|
|
325
|
-
// PROGRAMMATIC: If response reports errors/failures, auto-continue to fix them
|
|
326
|
-
// This is critical - if model reports "error persists" or "build failed", it should continue
|
|
327
|
-
if (containsErrorIndicators(withoutThinking)) {
|
|
328
|
-
return { shouldContinue: true, reason: 'error_indicators' };
|
|
329
|
-
}
|
|
330
|
-
// If model output thinking/reasoning without much content, likely needs prompting
|
|
331
|
-
if (hasReasoningContent && withoutThinking.length < 30) {
|
|
332
|
-
return { shouldContinue: true, reason: 'short_reasoning' };
|
|
333
|
-
}
|
|
334
|
-
// Check for intent patterns - model wants to do more work
|
|
335
|
-
const hasMoreIntent = INTENT_WITHOUT_ACTION_PATTERNS.some(p => p.test(withoutThinking));
|
|
336
|
-
// Even if tools were called, continue if narrative expresses MORE intent
|
|
337
|
-
// e.g., model calls grep but then says "I need to remove..." - should continue
|
|
338
|
-
if (hasToolCalls) {
|
|
339
|
-
return { shouldContinue: hasMoreIntent, reason: hasMoreIntent ? 'intent_patterns' : undefined };
|
|
340
|
-
}
|
|
341
|
-
// No tool calls - check for intent or premature conclusion
|
|
342
|
-
if (hasMoreIntent) {
|
|
343
|
-
return { shouldContinue: true, reason: 'intent_patterns' };
|
|
344
|
-
}
|
|
345
|
-
// Check for premature conclusions - model summarized without concrete findings
|
|
346
|
-
if (isPrematureConclusion(withoutThinking)) {
|
|
347
|
-
return { shouldContinue: true, reason: 'premature_conclusion' };
|
|
348
|
-
}
|
|
349
|
-
return { shouldContinue: false };
|
|
350
|
-
}
|
|
351
8
|
/**
|
|
352
9
|
* Check if an error is a context overflow error
|
|
353
10
|
*/
|
|
@@ -422,7 +79,6 @@ export class AgentRuntime {
|
|
|
422
79
|
modelId;
|
|
423
80
|
workingDirectory;
|
|
424
81
|
cancellationRequested = false;
|
|
425
|
-
_autoContinueEnabled = false;
|
|
426
82
|
// Loop detection: track last tool calls to detect stuck loops
|
|
427
83
|
lastToolCallSignature = null;
|
|
428
84
|
repeatedToolCallCount = 0;
|
|
@@ -436,8 +92,6 @@ export class AgentRuntime {
|
|
|
436
92
|
// Key: tool signature (name + JSON args), Value: result string
|
|
437
93
|
toolResultCache = new Map();
|
|
438
94
|
static TOOL_CACHE_MAX_SIZE = 50; // Keep last 50 tool results
|
|
439
|
-
// Track if first tool call callback has been fired this turn
|
|
440
|
-
firstToolCallFired = false;
|
|
441
95
|
constructor(options) {
|
|
442
96
|
this.provider = options.provider;
|
|
443
97
|
this.toolRuntime = options.toolRuntime;
|
|
@@ -446,7 +100,6 @@ export class AgentRuntime {
|
|
|
446
100
|
this.providerId = options.providerId ?? 'unknown';
|
|
447
101
|
this.modelId = options.modelId ?? 'unknown';
|
|
448
102
|
this.workingDirectory = options.workingDirectory ?? process.cwd();
|
|
449
|
-
this._autoContinueEnabled = options.autoContinue ?? false;
|
|
450
103
|
const trimmedPrompt = options.systemPrompt.trim();
|
|
451
104
|
this.baseSystemPrompt = trimmedPrompt || null;
|
|
452
105
|
if (trimmedPrompt) {
|
|
@@ -472,37 +125,14 @@ export class AgentRuntime {
|
|
|
472
125
|
isRunning() {
|
|
473
126
|
return this.activeRun !== null;
|
|
474
127
|
}
|
|
475
|
-
/**
|
|
476
|
-
* Check if auto-continuation is enabled.
|
|
477
|
-
*/
|
|
478
|
-
isAutoContinueEnabled() {
|
|
479
|
-
return this._autoContinueEnabled;
|
|
480
|
-
}
|
|
481
|
-
/**
|
|
482
|
-
* Enable or disable auto-continuation.
|
|
483
|
-
*/
|
|
484
|
-
setAutoContinue(enabled) {
|
|
485
|
-
this._autoContinueEnabled = enabled;
|
|
486
|
-
}
|
|
487
128
|
async send(text, useStreaming = false) {
|
|
488
129
|
const prompt = text.trim();
|
|
489
130
|
if (!prompt) {
|
|
490
131
|
return '';
|
|
491
132
|
}
|
|
492
|
-
//
|
|
493
|
-
// This guarantees the user sees feedback the moment their request is received
|
|
494
|
-
if (this.callbacks.onRequestReceived) {
|
|
495
|
-
const maxLength = 160;
|
|
496
|
-
const normalized = prompt.replace(/\s+/g, ' ');
|
|
497
|
-
const preview = normalized.length > maxLength
|
|
498
|
-
? `${normalized.slice(0, maxLength - 3)}...`
|
|
499
|
-
: normalized;
|
|
500
|
-
this.callbacks.onRequestReceived(preview);
|
|
501
|
-
}
|
|
502
|
-
// Reset cancellation flag, loop tracking, and first tool call flag at start of new request
|
|
133
|
+
// Reset cancellation flag and loop tracking at start of new request
|
|
503
134
|
this.cancellationRequested = false;
|
|
504
135
|
this.resetBehavioralLoopTracking();
|
|
505
|
-
this.firstToolCallFired = false;
|
|
506
136
|
// Handle multi-line paste: show summary to user, send full content to AI
|
|
507
137
|
if (isMultilinePaste(prompt)) {
|
|
508
138
|
const processed = processPaste(prompt);
|
|
@@ -534,7 +164,6 @@ export class AgentRuntime {
|
|
|
534
164
|
}
|
|
535
165
|
async processConversation() {
|
|
536
166
|
let contextRecoveryAttempts = 0;
|
|
537
|
-
let autoContinueAttempts = 0;
|
|
538
167
|
let transientRetryAttempts = 0;
|
|
539
168
|
while (true) {
|
|
540
169
|
// Check for cancellation at start of each iteration
|
|
@@ -579,29 +208,13 @@ export class AgentRuntime {
|
|
|
579
208
|
this.lastToolCallSignature = toolSignature;
|
|
580
209
|
this.repeatedToolCallCount = 1;
|
|
581
210
|
}
|
|
582
|
-
//
|
|
211
|
+
// Emit narration if present - it shows the AI's thought process before tools
|
|
583
212
|
const narration = response.content?.trim();
|
|
584
|
-
const hasNarration = !!narration;
|
|
585
|
-
const shouldPromptAfterTools = this._autoContinueEnabled &&
|
|
586
|
-
autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS &&
|
|
587
|
-
shouldContinueAfterTools(narration ?? '');
|
|
588
|
-
// Fire first tool call callback if not yet fired this turn
|
|
589
|
-
// ALWAYS emit acknowledgement before first tool - ensures user sees immediate feedback
|
|
590
|
-
if (!this.firstToolCallFired && this.callbacks.onBeforeFirstToolCall) {
|
|
591
|
-
this.firstToolCallFired = true;
|
|
592
|
-
const toolNames = response.toolCalls.map(t => t.name);
|
|
593
|
-
const injectedAck = this.callbacks.onBeforeFirstToolCall(toolNames, hasNarration);
|
|
594
|
-
// ALWAYS emit acknowledgement if returned - provides immediate user feedback
|
|
595
|
-
if (injectedAck) {
|
|
596
|
-
this.emitAssistantMessage(injectedAck, { isFinal: false, usage, contextStats });
|
|
597
|
-
}
|
|
598
|
-
}
|
|
599
213
|
if (narration) {
|
|
600
214
|
this.emitAssistantMessage(narration, {
|
|
601
215
|
isFinal: false,
|
|
602
216
|
usage,
|
|
603
217
|
contextStats,
|
|
604
|
-
suppressDisplay: shouldPromptAfterTools,
|
|
605
218
|
});
|
|
606
219
|
}
|
|
607
220
|
const assistantMessage = {
|
|
@@ -613,23 +226,6 @@ export class AgentRuntime {
|
|
|
613
226
|
}
|
|
614
227
|
this.messages.push(assistantMessage);
|
|
615
228
|
await this.resolveToolCalls(response.toolCalls);
|
|
616
|
-
// PROGRAMMATIC CONTINUATION: After tool work, model must either:
|
|
617
|
-
// 1. Call more tools (already handled by continue above)
|
|
618
|
-
// 2. Give a short completion signal
|
|
619
|
-
// 3. Ask user for direction
|
|
620
|
-
// PROGRAMMATIC: If model outputs narrative instead of concrete findings, continue
|
|
621
|
-
if (shouldPromptAfterTools) {
|
|
622
|
-
autoContinueAttempts++;
|
|
623
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'after_tools_narration');
|
|
624
|
-
this.messages.push({
|
|
625
|
-
role: 'user',
|
|
626
|
-
content: instruction.prompt,
|
|
627
|
-
});
|
|
628
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
|
|
629
|
-
continue;
|
|
630
|
-
}
|
|
631
|
-
// Reset auto-continue counter since model is actively working
|
|
632
|
-
autoContinueAttempts = 0;
|
|
633
229
|
continue;
|
|
634
230
|
}
|
|
635
231
|
const reply = response.content?.trim() ?? '';
|
|
@@ -638,41 +234,6 @@ export class AgentRuntime {
|
|
|
638
234
|
this.lastToolCallSignature = null;
|
|
639
235
|
this.repeatedToolCallCount = 0;
|
|
640
236
|
}
|
|
641
|
-
// If model returned empty or very short AND auto-continue is enabled, prompt it to respond
|
|
642
|
-
// This is disabled by default to prevent loops
|
|
643
|
-
if (this._autoContinueEnabled && reply.length < 10 && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
644
|
-
autoContinueAttempts++;
|
|
645
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'short_response');
|
|
646
|
-
this.messages.push({ role: 'assistant', content: reply || '' });
|
|
647
|
-
this.messages.push({
|
|
648
|
-
role: 'user',
|
|
649
|
-
content: instruction.prompt,
|
|
650
|
-
});
|
|
651
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
|
|
652
|
-
continue;
|
|
653
|
-
}
|
|
654
|
-
// Check if model expressed intent to act but didn't call tools
|
|
655
|
-
// This catches "Let me create..." without actual tool calls
|
|
656
|
-
// Also catches text-formatted tool calls like "_tool_call_\nread_file(...)"
|
|
657
|
-
// Only auto-continue if the feature is enabled
|
|
658
|
-
const continueResult = shouldAutoContinue(reply, false);
|
|
659
|
-
if (this._autoContinueEnabled && continueResult.shouldContinue && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
660
|
-
autoContinueAttempts++;
|
|
661
|
-
const planningPreview = buildPlanningPreview(reply);
|
|
662
|
-
this.messages.push({ role: 'assistant', content: reply });
|
|
663
|
-
// Choose prompt based on reason - text tool calls get a specific, forceful prompt
|
|
664
|
-
const continueReason = continueResult.reason === 'text_tool_call' ? 'text_tool_call' : 'intent_without_action';
|
|
665
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, continueReason);
|
|
666
|
-
this.messages.push({
|
|
667
|
-
role: 'user',
|
|
668
|
-
content: instruction.prompt,
|
|
669
|
-
});
|
|
670
|
-
const uiMessage = planningPreview
|
|
671
|
-
? `${instruction.message} Next action: ${planningPreview}`
|
|
672
|
-
: instruction.message;
|
|
673
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, uiMessage);
|
|
674
|
-
continue;
|
|
675
|
-
}
|
|
676
237
|
if (reply) {
|
|
677
238
|
this.emitAssistantMessage(reply, { isFinal: true, usage, contextStats });
|
|
678
239
|
}
|
|
@@ -711,7 +272,6 @@ export class AgentRuntime {
|
|
|
711
272
|
return this.processConversation();
|
|
712
273
|
}
|
|
713
274
|
let contextRecoveryAttempts = 0;
|
|
714
|
-
let autoContinueAttempts = 0;
|
|
715
275
|
let transientRetryAttempts = 0;
|
|
716
276
|
while (true) {
|
|
717
277
|
// Check for cancellation at start of each iteration
|
|
@@ -779,20 +339,8 @@ export class AgentRuntime {
|
|
|
779
339
|
}
|
|
780
340
|
}
|
|
781
341
|
else if (chunk.type === 'tool_call' && chunk.toolCall) {
|
|
782
|
-
//
|
|
342
|
+
// On first tool call, flush any buffered content
|
|
783
343
|
if (toolCalls.length === 0) {
|
|
784
|
-
const hasNarration = !!(fullContent.trim() || reasoningContent.trim());
|
|
785
|
-
// Fire callback and ALWAYS inject acknowledgement BEFORE anything else
|
|
786
|
-
if (!this.firstToolCallFired && this.callbacks.onBeforeFirstToolCall) {
|
|
787
|
-
this.firstToolCallFired = true;
|
|
788
|
-
const injectedAck = this.callbacks.onBeforeFirstToolCall([chunk.toolCall.name], hasNarration);
|
|
789
|
-
// ALWAYS inject acknowledgement if returned - ensures immediate user feedback
|
|
790
|
-
if (injectedAck) {
|
|
791
|
-
// Inject acknowledgement as the FIRST thing user sees
|
|
792
|
-
this.callbacks.onStreamChunk?.(injectedAck + '\n', 'content');
|
|
793
|
-
fullContent = injectedAck + '\n' + fullContent; // Add to content for context
|
|
794
|
-
}
|
|
795
|
-
}
|
|
796
344
|
// Emit complete reasoning block first
|
|
797
345
|
if (reasoningContent.trim()) {
|
|
798
346
|
this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
|
|
@@ -864,16 +412,12 @@ export class AgentRuntime {
|
|
|
864
412
|
// (wasStreamed=true prevents duplicate display)
|
|
865
413
|
// Note: Acknowledgement injection happens during streaming (when first tool_call chunk arrives)
|
|
866
414
|
const narration = combinedContent.trim();
|
|
867
|
-
const shouldPromptAfterTools = this._autoContinueEnabled &&
|
|
868
|
-
autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS &&
|
|
869
|
-
shouldContinueAfterTools(narration ?? '');
|
|
870
415
|
if (narration) {
|
|
871
416
|
this.emitAssistantMessage(narration, {
|
|
872
417
|
isFinal: false,
|
|
873
418
|
usage,
|
|
874
419
|
contextStats,
|
|
875
420
|
wasStreamed: true,
|
|
876
|
-
suppressDisplay: shouldPromptAfterTools,
|
|
877
421
|
});
|
|
878
422
|
}
|
|
879
423
|
const assistantMessage = {
|
|
@@ -883,69 +427,14 @@ export class AgentRuntime {
|
|
|
883
427
|
};
|
|
884
428
|
this.messages.push(assistantMessage);
|
|
885
429
|
await this.resolveToolCalls(toolCalls);
|
|
886
|
-
// PROGRAMMATIC CONTINUATION: After tool work, model must either:
|
|
887
|
-
// 1. Call more tools (already handled by continue above)
|
|
888
|
-
// 2. Give a short completion signal
|
|
889
|
-
// 3. Ask user for direction
|
|
890
|
-
// PROGRAMMATIC: If model outputs narrative instead of concrete findings, continue
|
|
891
|
-
if (shouldPromptAfterTools) {
|
|
892
|
-
autoContinueAttempts++;
|
|
893
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'after_tools_narration');
|
|
894
|
-
this.messages.push({
|
|
895
|
-
role: 'user',
|
|
896
|
-
content: instruction.prompt,
|
|
897
|
-
});
|
|
898
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
|
|
899
|
-
continue;
|
|
900
|
-
}
|
|
901
|
-
// Reset auto-continue counter since model appears to be done
|
|
902
|
-
autoContinueAttempts = 0;
|
|
903
430
|
continue;
|
|
904
431
|
}
|
|
905
|
-
// Check if model expressed intent to act but didn't call tools
|
|
906
|
-
// This catches "Let me create..." without actual tool calls
|
|
907
|
-
// Only auto-continue if the feature is enabled
|
|
908
432
|
const reply = combinedContent.trim();
|
|
909
433
|
// Reset loop detection when we get a text response (not just tool calls)
|
|
910
434
|
if (reply.length >= 10) {
|
|
911
435
|
this.lastToolCallSignature = null;
|
|
912
436
|
this.repeatedToolCallCount = 0;
|
|
913
437
|
}
|
|
914
|
-
// If model returned empty or very short AND auto-continue is enabled, prompt it to respond
|
|
915
|
-
// This is disabled by default to prevent loops
|
|
916
|
-
if (this._autoContinueEnabled && reply.length < 10 && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
917
|
-
autoContinueAttempts++;
|
|
918
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'short_response');
|
|
919
|
-
this.messages.push({ role: 'assistant', content: reply || '' });
|
|
920
|
-
this.messages.push({
|
|
921
|
-
role: 'user',
|
|
922
|
-
content: instruction.prompt,
|
|
923
|
-
});
|
|
924
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
|
|
925
|
-
continue;
|
|
926
|
-
}
|
|
927
|
-
// PROGRAMMATIC CHECK: Text-only responses after tool work need scrutiny
|
|
928
|
-
// If model outputs substantial narrative without tools, it's likely summarizing
|
|
929
|
-
// Check if this is a genuine completion or a premature summary
|
|
930
|
-
// Also catches text-formatted tool calls like "_tool_call_\nread_file(...)"
|
|
931
|
-
if (this._autoContinueEnabled && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
|
|
932
|
-
// Intent patterns still catch "let me X" without tools
|
|
933
|
-
const streamContinueResult = shouldAutoContinue(reply, false);
|
|
934
|
-
if (streamContinueResult.shouldContinue) {
|
|
935
|
-
autoContinueAttempts++;
|
|
936
|
-
const planningPreview = buildPlanningPreview(reply);
|
|
937
|
-
this.messages.push({ role: 'assistant', content: reply });
|
|
938
|
-
// Choose prompt based on reason - text tool calls get a specific, forceful prompt
|
|
939
|
-
const continueReason = streamContinueResult.reason === 'text_tool_call' ? 'text_tool_call' : 'intent_without_action';
|
|
940
|
-
const instruction = buildAutoContinueInstruction(autoContinueAttempts, continueReason);
|
|
941
|
-
this.messages.push({ role: 'user', content: instruction.prompt });
|
|
942
|
-
const uiMessage = planningPreview
|
|
943
|
-
? `${instruction.message} Next action: ${planningPreview}`
|
|
944
|
-
: instruction.message;
|
|
945
|
-
this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, uiMessage);
|
|
946
|
-
continue;
|
|
947
|
-
}
|
|
948
|
-
}
|
|
949
438
|
// Final message - mark as streamed to avoid double-display in UI
|
|
950
439
|
if (reply) {
|
|
951
440
|
this.emitAssistantMessage(reply, { isFinal: true, usage, contextStats, wasStreamed: true });
|
|
@@ -1131,7 +620,7 @@ export class AgentRuntime {
|
|
|
1131
620
|
return false; // Always show thinking/narration
|
|
1132
621
|
}
|
|
1133
622
|
emitAssistantMessage(content, metadata) {
|
|
1134
|
-
if (!content) {
|
|
623
|
+
if (!content || !content.trim()) {
|
|
1135
624
|
return;
|
|
1136
625
|
}
|
|
1137
626
|
const elapsedMs = this.activeRun ? Date.now() - this.activeRun.startedAt : undefined;
|