erosolar-cli 2.1.168 → 2.1.169

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/README.md +1 -1
  2. package/agents/erosolar-code.rules.json +2 -2
  3. package/agents/general.rules.json +21 -3
  4. package/dist/capabilities/statusCapability.js +2 -2
  5. package/dist/capabilities/statusCapability.js.map +1 -1
  6. package/dist/contracts/agent-schemas.json +5 -5
  7. package/dist/core/agent.d.ts +70 -24
  8. package/dist/core/agent.d.ts.map +1 -1
  9. package/dist/core/agent.js +424 -248
  10. package/dist/core/agent.js.map +1 -1
  11. package/dist/core/preferences.d.ts +1 -0
  12. package/dist/core/preferences.d.ts.map +1 -1
  13. package/dist/core/preferences.js +8 -1
  14. package/dist/core/preferences.js.map +1 -1
  15. package/dist/core/reliabilityPrompt.d.ts +9 -0
  16. package/dist/core/reliabilityPrompt.d.ts.map +1 -0
  17. package/dist/core/reliabilityPrompt.js +31 -0
  18. package/dist/core/reliabilityPrompt.js.map +1 -0
  19. package/dist/core/schemaValidator.js +3 -3
  20. package/dist/core/schemaValidator.js.map +1 -1
  21. package/dist/core/toolPreconditions.d.ts +0 -11
  22. package/dist/core/toolPreconditions.d.ts.map +1 -1
  23. package/dist/core/toolPreconditions.js +33 -164
  24. package/dist/core/toolPreconditions.js.map +1 -1
  25. package/dist/core/toolRuntime.d.ts.map +1 -1
  26. package/dist/core/toolRuntime.js +9 -114
  27. package/dist/core/toolRuntime.js.map +1 -1
  28. package/dist/core/updateChecker.d.ts +61 -1
  29. package/dist/core/updateChecker.d.ts.map +1 -1
  30. package/dist/core/updateChecker.js +147 -3
  31. package/dist/core/updateChecker.js.map +1 -1
  32. package/dist/headless/headlessApp.d.ts.map +1 -1
  33. package/dist/headless/headlessApp.js +0 -39
  34. package/dist/headless/headlessApp.js.map +1 -1
  35. package/dist/plugins/tools/nodeDefaults.d.ts.map +1 -1
  36. package/dist/plugins/tools/nodeDefaults.js +0 -2
  37. package/dist/plugins/tools/nodeDefaults.js.map +1 -1
  38. package/dist/providers/openaiResponsesProvider.d.ts.map +1 -1
  39. package/dist/providers/openaiResponsesProvider.js +79 -74
  40. package/dist/providers/openaiResponsesProvider.js.map +1 -1
  41. package/dist/runtime/agentController.d.ts.map +1 -1
  42. package/dist/runtime/agentController.js +6 -3
  43. package/dist/runtime/agentController.js.map +1 -1
  44. package/dist/runtime/agentSession.d.ts +0 -2
  45. package/dist/runtime/agentSession.d.ts.map +1 -1
  46. package/dist/runtime/agentSession.js +2 -2
  47. package/dist/runtime/agentSession.js.map +1 -1
  48. package/dist/shell/interactiveShell.d.ts +11 -12
  49. package/dist/shell/interactiveShell.d.ts.map +1 -1
  50. package/dist/shell/interactiveShell.js +269 -193
  51. package/dist/shell/interactiveShell.js.map +1 -1
  52. package/dist/shell/systemPrompt.d.ts.map +1 -1
  53. package/dist/shell/systemPrompt.js +4 -15
  54. package/dist/shell/systemPrompt.js.map +1 -1
  55. package/dist/subagents/taskRunner.js +2 -1
  56. package/dist/subagents/taskRunner.js.map +1 -1
  57. package/dist/tools/bashTools.d.ts.map +1 -1
  58. package/dist/tools/bashTools.js +101 -8
  59. package/dist/tools/bashTools.js.map +1 -1
  60. package/dist/tools/diffUtils.d.ts +8 -2
  61. package/dist/tools/diffUtils.d.ts.map +1 -1
  62. package/dist/tools/diffUtils.js +72 -13
  63. package/dist/tools/diffUtils.js.map +1 -1
  64. package/dist/tools/grepTools.d.ts.map +1 -1
  65. package/dist/tools/grepTools.js +10 -2
  66. package/dist/tools/grepTools.js.map +1 -1
  67. package/dist/tools/searchTools.d.ts.map +1 -1
  68. package/dist/tools/searchTools.js +4 -2
  69. package/dist/tools/searchTools.js.map +1 -1
  70. package/dist/ui/PromptController.d.ts +2 -3
  71. package/dist/ui/PromptController.d.ts.map +1 -1
  72. package/dist/ui/PromptController.js +2 -3
  73. package/dist/ui/PromptController.js.map +1 -1
  74. package/dist/ui/ShellUIAdapter.d.ts +71 -18
  75. package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
  76. package/dist/ui/ShellUIAdapter.js +237 -139
  77. package/dist/ui/ShellUIAdapter.js.map +1 -1
  78. package/dist/ui/UnifiedUIController.d.ts +0 -1
  79. package/dist/ui/UnifiedUIController.d.ts.map +1 -1
  80. package/dist/ui/UnifiedUIController.js +0 -1
  81. package/dist/ui/UnifiedUIController.js.map +1 -1
  82. package/dist/ui/UnifiedUIRenderer.d.ts +122 -7
  83. package/dist/ui/UnifiedUIRenderer.d.ts.map +1 -1
  84. package/dist/ui/UnifiedUIRenderer.js +823 -130
  85. package/dist/ui/UnifiedUIRenderer.js.map +1 -1
  86. package/dist/ui/animatedStatus.d.ts +129 -0
  87. package/dist/ui/animatedStatus.d.ts.map +1 -0
  88. package/dist/ui/animatedStatus.js +384 -0
  89. package/dist/ui/animatedStatus.js.map +1 -0
  90. package/dist/ui/display.d.ts +13 -48
  91. package/dist/ui/display.d.ts.map +1 -1
  92. package/dist/ui/display.js +22 -105
  93. package/dist/ui/display.js.map +1 -1
  94. package/dist/ui/shortcutsHelp.d.ts.map +1 -1
  95. package/dist/ui/shortcutsHelp.js +0 -1
  96. package/dist/ui/shortcutsHelp.js.map +1 -1
  97. package/dist/ui/unified/index.d.ts +1 -1
  98. package/dist/ui/unified/index.d.ts.map +1 -1
  99. package/dist/ui/unified/index.js +0 -2
  100. package/dist/ui/unified/index.js.map +1 -1
  101. package/package.json +1 -2
  102. package/dist/StringUtils.d.ts +0 -8
  103. package/dist/StringUtils.d.ts.map +0 -1
  104. package/dist/StringUtils.js +0 -11
  105. package/dist/StringUtils.js.map +0 -1
  106. package/dist/core/aiFlowSupervisor.d.ts +0 -44
  107. package/dist/core/aiFlowSupervisor.d.ts.map +0 -1
  108. package/dist/core/aiFlowSupervisor.js +0 -299
  109. package/dist/core/aiFlowSupervisor.js.map +0 -1
  110. package/dist/core/cliTestHarness.d.ts +0 -200
  111. package/dist/core/cliTestHarness.d.ts.map +0 -1
  112. package/dist/core/cliTestHarness.js +0 -549
  113. package/dist/core/cliTestHarness.js.map +0 -1
  114. package/dist/core/testUtils.d.ts +0 -121
  115. package/dist/core/testUtils.d.ts.map +0 -1
  116. package/dist/core/testUtils.js +0 -235
  117. package/dist/core/testUtils.js.map +0 -1
  118. package/dist/core/toolValidation.d.ts +0 -116
  119. package/dist/core/toolValidation.d.ts.map +0 -1
  120. package/dist/core/toolValidation.js +0 -282
  121. package/dist/core/toolValidation.js.map +0 -1
  122. package/dist/ui/compactRenderer.d.ts +0 -139
  123. package/dist/ui/compactRenderer.d.ts.map +0 -1
  124. package/dist/ui/compactRenderer.js +0 -398
  125. package/dist/ui/compactRenderer.js.map +0 -1
  126. package/dist/ui/streamingFormatter.d.ts +0 -30
  127. package/dist/ui/streamingFormatter.d.ts.map +0 -1
  128. package/dist/ui/streamingFormatter.js +0 -91
  129. package/dist/ui/streamingFormatter.js.map +0 -1
  130. package/dist/utils/errorUtils.d.ts +0 -16
  131. package/dist/utils/errorUtils.d.ts.map +0 -1
  132. package/dist/utils/errorUtils.js +0 -66
  133. package/dist/utils/errorUtils.js.map +0 -1
@@ -4,65 +4,7 @@ import { safeErrorMessage } from './secretStore.js';
4
4
  * Maximum number of context overflow recovery attempts
5
5
  */
6
6
  const MAX_CONTEXT_RECOVERY_ATTEMPTS = 3;
7
- /**
8
- * Maximum number of auto-continuation attempts when model expresses intent but doesn't act
9
- */
10
- const MAX_AUTO_CONTINUE_ATTEMPTS = 3;
11
- /**
12
- * Streaming safety timeouts (ms)
13
- * - First chunk timeout: fail fast if the stream never starts
14
- * - Inactivity timeout: abort if no chunks arrive for an extended period
15
- */
16
- // Allow more headroom before declaring a streaming stall to avoid premature fallbacks.
17
- const STREAM_FIRST_CHUNK_TIMEOUT_MS = 25000;
18
- const STREAM_INACTIVITY_TIMEOUT_MS = 60000;
19
- /**
20
- * Patterns that indicate the model intends to take action but hasn't yet
21
- * These suggest the model should be prompted to continue
22
- */
23
- const INTENT_WITHOUT_ACTION_PATTERNS = [
24
- // "Let me X" patterns - model is stating what it will do
25
- /\blet me\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|examine|review)/i,
26
- // "I'll X" / "I will X" patterns
27
- /\bi['']ll\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
28
- /\bi will\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
29
- // "I'm going to X" patterns
30
- /\bi['']m going to\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze)/i,
31
- // "Now I'll X" / "First, I'll X" patterns
32
- /\b(now|first|next)\s*(,)?\s*i['']ll\s+/i,
33
- // Explicit continuation signals
34
- /\bhere['']s (the|my) (plan|approach|solution|implementation)/i,
35
- // Numbered steps suggesting action to come
36
- /^\s*\d+\.\s+(create|write|implement|add|update|edit|modify|fix|show|read|check)/im,
37
- // Bullet points suggesting planned actions
38
- /^[\s•\-\*]+\s*(create|write|implement|add|update|edit|modify|fix|shows?|reads?|checks?)\s/im,
39
- ];
40
- const AUTO_CONTINUE_PROMPTS = [
41
- 'Continue. Use tools now: start with Read/read_file to inspect the target file, then call Edit (or Write if available) with file_path/old_string/new_string to apply changes. Keep using tools until the task is done.',
42
- 'You MUST call tools immediately. Issue Read -> Edit/Write tool calls with explicit parameters; no more explaining or planning.',
43
- 'CRITICAL: Call a tool right now. Use Edit with file_path, old_string, new_string (or Write with file_path and content). Respond with tool calls only.',
44
- ];
45
- /**
46
- * Check if response indicates intent to act without actually acting
47
- * This detects when the model says "let me do X" but doesn't call any tools
48
- */
49
- function shouldAutoContinue(content, hasToolCalls) {
50
- // If there are tool calls, no need to auto-continue
51
- if (hasToolCalls) {
52
- return false;
53
- }
54
- // If content is very short, likely not an incomplete intent
55
- if (content.length < 50) {
56
- return false;
57
- }
58
- // Check for intent patterns
59
- for (const pattern of INTENT_WITHOUT_ACTION_PATTERNS) {
60
- if (pattern.test(content)) {
61
- return true;
62
- }
63
- }
64
- return false;
65
- }
7
+ // Streaming runs without timeouts - we let the model take as long as it needs
66
8
  /**
67
9
  * Check if an error is a context overflow error
68
10
  */
@@ -77,15 +19,53 @@ function isContextOverflowError(error) {
77
19
  message.includes('max_tokens') ||
78
20
  message.includes('context window'));
79
21
  }
80
- class StreamInterruptionError extends Error {
81
- reason;
82
- partialResponse;
83
- constructor(reason, message, partialResponse) {
84
- super(message);
85
- this.name = 'StreamInterruptionError';
86
- this.reason = reason;
87
- this.partialResponse = partialResponse;
22
+ /**
23
+ * Check if an error is a transient/retryable error (network issues, rate limits, server errors)
24
+ */
25
+ function isTransientError(error) {
26
+ if (!(error instanceof Error))
27
+ return false;
28
+ const message = error.message.toLowerCase();
29
+ // Network errors
30
+ const networkPatterns = [
31
+ 'econnrefused', 'econnreset', 'enotfound', 'etimedout', 'epipe',
32
+ 'network error', 'connection error', 'fetch failed', 'socket hang up',
33
+ 'network is unreachable', 'connection refused', 'connection reset',
34
+ ];
35
+ if (networkPatterns.some(p => message.includes(p))) {
36
+ return true;
37
+ }
38
+ // Rate limit errors
39
+ if (message.includes('rate limit') || message.includes('429') || message.includes('too many requests')) {
40
+ return true;
41
+ }
42
+ // Server errors (5xx)
43
+ if (message.includes('500') || message.includes('502') || message.includes('503') || message.includes('504')) {
44
+ return true;
45
+ }
46
+ // Temporary service errors
47
+ if (message.includes('service unavailable') || message.includes('temporarily unavailable') ||
48
+ message.includes('overloaded') || message.includes('server error')) {
49
+ return true;
88
50
  }
51
+ return false;
52
+ }
53
+ /**
54
+ * Maximum number of transient error retries
55
+ */
56
+ const MAX_TRANSIENT_RETRIES = 3;
57
+ /**
58
+ * Delay before retry (in ms), with exponential backoff
59
+ */
60
+ function getRetryDelay(attempt) {
61
+ // Base delay of 1 second, doubles each attempt: 1s, 2s, 4s
62
+ return Math.min(1000 * Math.pow(2, attempt - 1), 10000);
63
+ }
64
+ /**
65
+ * Sleep for the specified milliseconds
66
+ */
67
+ function sleep(ms) {
68
+ return new Promise(resolve => setTimeout(resolve, ms));
89
69
  }
90
70
  export class AgentRuntime {
91
71
  messages = [];
@@ -99,7 +79,19 @@ export class AgentRuntime {
99
79
  modelId;
100
80
  workingDirectory;
101
81
  cancellationRequested = false;
102
- _autoContinueEnabled = false;
82
+ // Loop detection: track last tool calls to detect stuck loops
83
+ lastToolCallSignature = null;
84
+ repeatedToolCallCount = 0;
85
+ static MAX_REPEATED_TOOL_CALLS = 5; // Allow up to 4 identical calls before stopping
86
+ // Behavioral loop detection: track recent tool calls to catch repetitive patterns
87
+ // e.g., calling "execute_bash" with "git status" 5 times even if output differs slightly
88
+ recentToolCalls = [];
89
+ static TOOL_HISTORY_SIZE = 12;
90
+ static BEHAVIORAL_LOOP_THRESHOLD = 3; // Same tool+cmd 3+ times in last 12 = stuck
91
+ // Tool result cache: prevent duplicate identical tool calls by returning cached results
92
+ // Key: tool signature (name + JSON args), Value: result string
93
+ toolResultCache = new Map();
94
+ static TOOL_CACHE_MAX_SIZE = 50; // Keep last 50 tool results
103
95
  constructor(options) {
104
96
  this.provider = options.provider;
105
97
  this.toolRuntime = options.toolRuntime;
@@ -108,7 +100,6 @@ export class AgentRuntime {
108
100
  this.providerId = options.providerId ?? 'unknown';
109
101
  this.modelId = options.modelId ?? 'unknown';
110
102
  this.workingDirectory = options.workingDirectory ?? process.cwd();
111
- this._autoContinueEnabled = options.autoContinue ?? false;
112
103
  const trimmedPrompt = options.systemPrompt.trim();
113
104
  this.baseSystemPrompt = trimmedPrompt || null;
114
105
  if (trimmedPrompt) {
@@ -134,25 +125,14 @@ export class AgentRuntime {
134
125
  isRunning() {
135
126
  return this.activeRun !== null;
136
127
  }
137
- /**
138
- * Check if auto-continuation is enabled.
139
- */
140
- isAutoContinueEnabled() {
141
- return this._autoContinueEnabled;
142
- }
143
- /**
144
- * Enable or disable auto-continuation.
145
- */
146
- setAutoContinue(enabled) {
147
- this._autoContinueEnabled = enabled;
148
- }
149
128
  async send(text, useStreaming = false) {
150
129
  const prompt = text.trim();
151
130
  if (!prompt) {
152
131
  return '';
153
132
  }
154
- // Reset cancellation flag at start of new request
133
+ // Reset cancellation flag and loop tracking at start of new request
155
134
  this.cancellationRequested = false;
135
+ this.resetBehavioralLoopTracking();
156
136
  // Handle multi-line paste: show summary to user, send full content to AI
157
137
  if (isMultilinePaste(prompt)) {
158
138
  const processed = processPaste(prompt);
@@ -168,28 +148,9 @@ export class AgentRuntime {
168
148
  const run = { startedAt: Date.now() };
169
149
  this.activeRun = run;
170
150
  try {
151
+ // Always use streaming when available - no fallback
171
152
  if (useStreaming && this.provider.generateStream) {
172
- try {
173
- return await this.processConversationStreaming();
174
- }
175
- catch (error) {
176
- const message = safeErrorMessage(error);
177
- const reason = error instanceof StreamInterruptionError ? error.reason : undefined;
178
- const partialResponse = error instanceof StreamInterruptionError ? error.partialResponse : undefined;
179
- console.warn(`[agent] Streaming failed, falling back to non-streaming: ${message}`);
180
- // If we captured part of the response, seed it into history and ask the model to continue
181
- // so we don't restart the answer from scratch during fallback.
182
- if (partialResponse && partialResponse.trim()) {
183
- const partial = partialResponse.trim();
184
- this.messages.push({ role: 'assistant', content: partial });
185
- this.messages.push({
186
- role: 'user',
187
- content: 'Continue your previous response from where it stopped. Do not repeat text you already provided.',
188
- });
189
- }
190
- this.callbacks.onStreamFallback?.({ message, error, reason, partialResponse });
191
- return await this.processConversation();
192
- }
153
+ return await this.processConversationStreaming();
193
154
  }
194
155
  return await this.processConversation();
195
156
  }
@@ -203,7 +164,7 @@ export class AgentRuntime {
203
164
  }
204
165
  async processConversation() {
205
166
  let contextRecoveryAttempts = 0;
206
- let autoContinueAttempts = 0;
167
+ let transientRetryAttempts = 0;
207
168
  while (true) {
208
169
  // Check for cancellation at start of each iteration
209
170
  if (this.cancellationRequested) {
@@ -219,44 +180,59 @@ export class AgentRuntime {
219
180
  // Reset recovery attempts on successful generation
220
181
  contextRecoveryAttempts = 0;
221
182
  if (response.type === 'tool_calls') {
222
- const suppressNarration = this.shouldSuppressToolNarration();
223
- const narration = suppressNarration ? '' : response.content?.trim();
183
+ // BEHAVIORAL LOOP DETECTION: Check if model is stuck calling same tool repeatedly
184
+ const behavioralLoopResult = this.checkBehavioralLoop(response.toolCalls);
185
+ if (behavioralLoopResult) {
186
+ this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats });
187
+ this.messages.push({ role: 'assistant', content: behavioralLoopResult });
188
+ return behavioralLoopResult;
189
+ }
190
+ // Loop detection: check if same tool calls are being repeated (exact signature match)
191
+ const toolSignature = response.toolCalls
192
+ .map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
193
+ .sort()
194
+ .join('|');
195
+ if (toolSignature === this.lastToolCallSignature) {
196
+ this.repeatedToolCallCount++;
197
+ if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
198
+ // Break out of loop - model is stuck
199
+ const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
200
+ this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats });
201
+ this.messages.push({ role: 'assistant', content: loopMsg });
202
+ this.lastToolCallSignature = null;
203
+ this.repeatedToolCallCount = 0;
204
+ return loopMsg;
205
+ }
206
+ }
207
+ else {
208
+ this.lastToolCallSignature = toolSignature;
209
+ this.repeatedToolCallCount = 1;
210
+ }
211
+ // Emit narration if present - it shows the AI's thought process before tools
212
+ const narration = response.content?.trim();
224
213
  if (narration) {
225
- this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats });
214
+ this.emitAssistantMessage(narration, {
215
+ isFinal: false,
216
+ usage,
217
+ contextStats,
218
+ });
226
219
  }
227
220
  const assistantMessage = {
228
221
  role: 'assistant',
229
- content: suppressNarration ? '' : (response.content ?? ''),
222
+ content: response.content ?? '',
230
223
  };
231
224
  if (response.toolCalls?.length) {
232
225
  assistantMessage.toolCalls = response.toolCalls;
233
226
  }
234
227
  this.messages.push(assistantMessage);
235
228
  await this.resolveToolCalls(response.toolCalls);
236
- // Reset auto-continue counter since model is actively working
237
- autoContinueAttempts = 0;
238
229
  continue;
239
230
  }
240
231
  const reply = response.content?.trim() ?? '';
241
- // Check if model expressed intent to act but didn't call tools
242
- // This catches "Let me create..." without actual tool calls
243
- // Only auto-continue if the feature is enabled
244
- if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
245
- autoContinueAttempts++;
246
- // Emit the planning content but mark as non-final
247
- if (reply) {
248
- this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats });
249
- }
250
- this.messages.push({ role: 'assistant', content: reply });
251
- // Auto-prompt with increasingly direct instructions
252
- const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
253
- this.messages.push({
254
- role: 'user',
255
- content: AUTO_CONTINUE_PROMPTS[promptIndex],
256
- });
257
- const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
258
- this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
259
- continue;
232
+ // Reset loop detection when we get a text response (not just tool calls)
233
+ if (reply.length >= 10) {
234
+ this.lastToolCallSignature = null;
235
+ this.repeatedToolCallCount = 0;
260
236
  }
261
237
  if (reply) {
262
238
  this.emitAssistantMessage(reply, { isFinal: true, usage, contextStats });
@@ -278,6 +254,14 @@ export class AgentRuntime {
278
254
  continue;
279
255
  }
280
256
  }
257
+ // Auto-retry transient errors (network issues, rate limits, server errors)
258
+ if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
259
+ transientRetryAttempts++;
260
+ const delayMs = getRetryDelay(transientRetryAttempts);
261
+ this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
262
+ await sleep(delayMs);
263
+ continue;
264
+ }
281
265
  // Re-throw if not recoverable or recovery failed
282
266
  throw error;
283
267
  }
@@ -288,7 +272,7 @@ export class AgentRuntime {
288
272
  return this.processConversation();
289
273
  }
290
274
  let contextRecoveryAttempts = 0;
291
- let autoContinueAttempts = 0;
275
+ let transientRetryAttempts = 0;
292
276
  while (true) {
293
277
  // Check for cancellation at start of each iteration
294
278
  if (this.cancellationRequested) {
@@ -321,45 +305,10 @@ export class AgentRuntime {
321
305
  }
322
306
  }
323
307
  };
324
- const buildTimeoutError = (reason) => {
325
- const base = reason === 'startup-timeout'
326
- ? 'Streaming stalled before any content arrived.'
327
- : 'Streaming stalled due to inactivity.';
328
- return new StreamInterruptionError(reason, `${base} Falling back to non-streaming.`, fullContent || reasoningContent);
329
- };
330
- // Timer for first token arrival
331
- let startupTimer = null;
332
- const startupTimeoutPromise = new Promise((_, reject) => {
333
- startupTimer = setTimeout(() => reject(buildTimeoutError('startup-timeout')), STREAM_FIRST_CHUNK_TIMEOUT_MS);
334
- });
335
- const createIdleTimeout = () => {
336
- let idleTimer = null;
337
- const promise = new Promise((_, reject) => {
338
- idleTimer = setTimeout(() => reject(buildTimeoutError('idle-timeout')), STREAM_INACTIVITY_TIMEOUT_MS);
339
- });
340
- const cancel = () => {
341
- if (idleTimer) {
342
- clearTimeout(idleTimer);
343
- idleTimer = null;
344
- }
345
- };
346
- return { promise, cancel };
347
- };
348
- let idleTimeout = createIdleTimeout();
349
- let firstChunkSeen = false;
308
+ // Simple streaming loop - no timeouts, let the stream run until done
350
309
  try {
351
310
  while (true) {
352
- const races = [
353
- iterator.next(),
354
- idleTimeout.promise,
355
- ];
356
- if (!firstChunkSeen) {
357
- races.push(startupTimeoutPromise);
358
- }
359
- const result = (await Promise.race(races));
360
- // Reset idle timer for the next iteration
361
- idleTimeout.cancel();
362
- idleTimeout = createIdleTimeout();
311
+ const result = await iterator.next();
363
312
  // Check for cancellation during streaming
364
313
  if (this.cancellationRequested) {
365
314
  await closeStream();
@@ -374,17 +323,10 @@ export class AgentRuntime {
374
323
  break;
375
324
  }
376
325
  const chunk = result.value;
377
- if (!firstChunkSeen) {
378
- firstChunkSeen = true;
379
- if (startupTimer) {
380
- clearTimeout(startupTimer);
381
- startupTimer = null;
382
- }
383
- }
384
326
  if (chunk.type === 'reasoning' && chunk.content) {
327
+ // Buffer reasoning content - don't stream token-by-token
328
+ // It will be emitted as a complete block when ready
385
329
  reasoningContent += chunk.content;
386
- // Surface reasoning tokens to the UI so thought process is visible
387
- this.callbacks.onStreamChunk?.(chunk.content, 'reasoning');
388
330
  continue;
389
331
  }
390
332
  if (chunk.type === 'content' && chunk.content) {
@@ -397,11 +339,19 @@ export class AgentRuntime {
397
339
  }
398
340
  }
399
341
  else if (chunk.type === 'tool_call' && chunk.toolCall) {
400
- toolCalls.push(chunk.toolCall);
401
- // Drop any speculative narration once we know the model is actually calling tools
402
- if (suppressStreamNarration) {
403
- bufferedContent = '';
342
+ // On first tool call, flush any buffered content
343
+ if (toolCalls.length === 0) {
344
+ // Emit complete reasoning block first
345
+ if (reasoningContent.trim()) {
346
+ this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
347
+ }
348
+ // Then emit buffered narration content
349
+ if (suppressStreamNarration && bufferedContent) {
350
+ this.callbacks.onStreamChunk?.(bufferedContent, 'content');
351
+ bufferedContent = '';
352
+ }
404
353
  }
354
+ toolCalls.push(chunk.toolCall);
405
355
  }
406
356
  else if (chunk.type === 'usage' && chunk.usage) {
407
357
  usage = chunk.usage;
@@ -409,61 +359,81 @@ export class AgentRuntime {
409
359
  }
410
360
  }
411
361
  finally {
412
- idleTimeout.cancel();
413
- if (startupTimer) {
414
- clearTimeout(startupTimer);
415
- }
416
362
  await closeStream();
417
363
  }
418
364
  // Reset recovery attempts on successful generation
419
365
  contextRecoveryAttempts = 0;
420
366
  const contextStats = this.getContextStats();
421
367
  const combinedContent = fullContent || reasoningContent;
422
- // If no tool calls were issued, flush any buffered narration now
423
- if (suppressStreamNarration && toolCalls.length === 0 && bufferedContent) {
424
- this.callbacks.onStreamChunk?.(bufferedContent, 'content');
425
- bufferedContent = '';
368
+ // If no tool calls were issued, emit reasoning and buffered content as complete blocks
369
+ if (toolCalls.length === 0) {
370
+ // Emit complete reasoning block if we have one
371
+ if (reasoningContent.trim()) {
372
+ this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
373
+ }
374
+ // Emit buffered narration content
375
+ if (suppressStreamNarration && bufferedContent) {
376
+ this.callbacks.onStreamChunk?.(bufferedContent, 'content');
377
+ bufferedContent = '';
378
+ }
426
379
  }
427
380
  // Check if we got tool calls
428
381
  if (toolCalls.length > 0) {
429
- const suppressNarration = this.shouldSuppressToolNarration();
430
- const narration = suppressNarration ? '' : combinedContent.trim();
382
+ // BEHAVIORAL LOOP DETECTION: Check if model is stuck calling same tool repeatedly
383
+ // This catches patterns like "git status" called 5 times even with slightly different outputs
384
+ const behavioralLoopResult = this.checkBehavioralLoop(toolCalls);
385
+ if (behavioralLoopResult) {
386
+ this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats, wasStreamed: true });
387
+ this.messages.push({ role: 'assistant', content: behavioralLoopResult });
388
+ return behavioralLoopResult;
389
+ }
390
+ // Loop detection: check if same tool calls are being repeated (exact signature match)
391
+ const toolSignature = toolCalls
392
+ .map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
393
+ .sort()
394
+ .join('|');
395
+ if (toolSignature === this.lastToolCallSignature) {
396
+ this.repeatedToolCallCount++;
397
+ if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
398
+ // Break out of loop - model is stuck
399
+ const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
400
+ this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats, wasStreamed: true });
401
+ this.messages.push({ role: 'assistant', content: loopMsg });
402
+ this.lastToolCallSignature = null;
403
+ this.repeatedToolCallCount = 0;
404
+ return loopMsg;
405
+ }
406
+ }
407
+ else {
408
+ this.lastToolCallSignature = toolSignature;
409
+ this.repeatedToolCallCount = 1;
410
+ }
411
+ // Content was already streamed via onStreamChunk, just record it for context
412
+ // (wasStreamed=true prevents duplicate display)
413
+ // Note: Acknowledgement injection happens during streaming (when first tool_call chunk arrives)
414
+ const narration = combinedContent.trim();
431
415
  if (narration) {
432
- // Mark as wasStreamed since content was already output via onStreamChunk
433
- this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats, wasStreamed: true });
416
+ this.emitAssistantMessage(narration, {
417
+ isFinal: false,
418
+ usage,
419
+ contextStats,
420
+ wasStreamed: true,
421
+ });
434
422
  }
435
423
  const assistantMessage = {
436
424
  role: 'assistant',
437
- content: suppressNarration ? '' : combinedContent,
425
+ content: combinedContent,
438
426
  toolCalls,
439
427
  };
440
428
  this.messages.push(assistantMessage);
441
429
  await this.resolveToolCalls(toolCalls);
442
- // Reset auto-continue counter since model is actively working
443
- autoContinueAttempts = 0;
444
430
  continue;
445
431
  }
446
- // Check if model expressed intent to act but didn't call tools
447
- // This catches "Let me create..." without actual tool calls
448
- // Only auto-continue if the feature is enabled
449
432
  const reply = combinedContent.trim();
450
- if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
451
- autoContinueAttempts++;
452
- // Emit the planning content but mark as non-final
453
- // Mark as wasStreamed since content was already output via onStreamChunk
454
- if (reply) {
455
- this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats, wasStreamed: true });
456
- }
457
- this.messages.push({ role: 'assistant', content: reply });
458
- // Auto-prompt with increasingly direct instructions
459
- const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
460
- this.messages.push({
461
- role: 'user',
462
- content: AUTO_CONTINUE_PROMPTS[promptIndex],
463
- });
464
- const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
465
- this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
466
- continue;
433
+ // Reset loop detection when we get a text response (not just tool calls)
434
+ if (reply.length >= 10) {
435
+ this.lastToolCallSignature = null;
436
+ this.repeatedToolCallCount = 0;
467
437
  }
468
438
  // Final message - mark as streamed to avoid double-display in UI
469
439
  if (reply) {
@@ -486,6 +456,14 @@ export class AgentRuntime {
486
456
  continue;
487
457
  }
488
458
  }
459
+ // Auto-retry transient errors (network issues, rate limits, server errors)
460
+ if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
461
+ transientRetryAttempts++;
462
+ const delayMs = getRetryDelay(transientRetryAttempts);
463
+ this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
464
+ await sleep(delayMs);
465
+ continue;
466
+ }
489
467
  // Re-throw if not recoverable or recovery failed
490
468
  throw error;
491
469
  }
@@ -517,7 +495,23 @@ export class AgentRuntime {
517
495
  // Fast path: single tool call
518
496
  if (numCalls === 1) {
519
497
  const call = toolCalls[0];
498
+ // Check cache first - prevent duplicate identical tool calls
499
+ const cached = this.getCachedToolResult(call);
500
+ if (cached !== null) {
501
+ // Return cached result with indicator that it was from cache
502
+ this.messages.push({
503
+ role: 'tool',
504
+ name: call.name,
505
+ toolCallId: call.id,
506
+ content: `[Cached result - identical call already executed]\n\n${cached}`,
507
+ });
508
+ return;
509
+ }
510
+ this.callbacks.onToolExecution?.(call.name, true);
520
511
  const output = await this.toolRuntime.execute(call);
512
+ this.callbacks.onToolExecution?.(call.name, false);
513
+ // Cache the result for future identical calls
514
+ this.cacheToolResult(call, output);
521
515
  this.messages.push({
522
516
  role: 'tool',
523
517
  name: call.name,
@@ -527,56 +521,106 @@ export class AgentRuntime {
527
521
  return;
528
522
  }
529
523
  // PERF: For reasonable batch sizes, execute all in parallel
524
+ // Check cache for each call and only execute non-cached ones
530
525
  if (numCalls <= 10) {
531
- const results = await Promise.all(toolCalls.map(async (call) => ({
532
- call,
533
- output: await this.toolRuntime.execute(call),
534
- })));
535
- // Add results to messages in the same order as tool calls
536
- for (const { call, output } of results) {
537
- this.messages.push({
538
- role: 'tool',
539
- name: call.name,
540
- toolCallId: call.id,
541
- content: output,
542
- });
526
+ const cachedResults = [];
527
+ const toExecute = [];
528
+ // Separate cached from non-cached calls
529
+ for (const call of toolCalls) {
530
+ const cached = this.getCachedToolResult(call);
531
+ if (cached !== null) {
532
+ cachedResults.push({ call, output: cached, fromCache: true });
533
+ }
534
+ else {
535
+ toExecute.push(call);
536
+ }
537
+ }
538
+ // Execute non-cached calls in parallel
539
+ if (toExecute.length > 0) {
540
+ const toolNames = toExecute.map(c => c.name).join(', ');
541
+ this.callbacks.onToolExecution?.(toolNames, true);
542
+ const executed = await Promise.all(toExecute.map(async (call) => {
543
+ const output = await this.toolRuntime.execute(call);
544
+ this.cacheToolResult(call, output);
545
+ return { call, output, fromCache: false };
546
+ }));
547
+ this.callbacks.onToolExecution?.(toolNames, false);
548
+ cachedResults.push(...executed);
549
+ }
550
+ // Add all results to messages in the original order
551
+ for (const originalCall of toolCalls) {
552
+ const result = cachedResults.find(r => r.call.id === originalCall.id);
553
+ if (result) {
554
+ const content = result.fromCache
555
+ ? `[Cached result - identical call already executed]\n\n${result.output}`
556
+ : result.output;
557
+ this.messages.push({
558
+ role: 'tool',
559
+ name: result.call.name,
560
+ toolCallId: result.call.id,
561
+ content,
562
+ });
563
+ }
543
564
  }
544
565
  return;
545
566
  }
546
- // PERF: For large batches, use chunked parallel execution
547
- // This prevents memory pressure from too many concurrent operations
567
+ // PERF: For large batches, use chunked parallel execution with caching
548
568
  const CHUNK_SIZE = 8;
549
- const results = [];
569
+ const allResults = [];
550
570
  for (let i = 0; i < numCalls; i += CHUNK_SIZE) {
551
571
  const chunk = toolCalls.slice(i, i + CHUNK_SIZE);
552
- const chunkResults = await Promise.all(chunk.map(async (call) => ({
553
- call,
554
- output: await this.toolRuntime.execute(call),
555
- })));
556
- results.push(...chunkResults);
557
- }
558
- // Add results to messages in order
559
- for (const { call, output } of results) {
560
- this.messages.push({
561
- role: 'tool',
562
- name: call.name,
563
- toolCallId: call.id,
564
- content: output,
565
- });
572
+ const cachedInChunk = [];
573
+ const toExecuteInChunk = [];
574
+ for (const call of chunk) {
575
+ const cached = this.getCachedToolResult(call);
576
+ if (cached !== null) {
577
+ cachedInChunk.push({ call, output: cached, fromCache: true });
578
+ }
579
+ else {
580
+ toExecuteInChunk.push(call);
581
+ }
582
+ }
583
+ if (toExecuteInChunk.length > 0) {
584
+ const chunkNames = toExecuteInChunk.map(c => c.name).join(', ');
585
+ this.callbacks.onToolExecution?.(chunkNames, true);
586
+ const executed = await Promise.all(toExecuteInChunk.map(async (call) => {
587
+ const output = await this.toolRuntime.execute(call);
588
+ this.cacheToolResult(call, output);
589
+ return { call, output, fromCache: false };
590
+ }));
591
+ this.callbacks.onToolExecution?.(chunkNames, false);
592
+ cachedInChunk.push(...executed);
593
+ }
594
+ allResults.push(...cachedInChunk);
595
+ }
596
+ // Add results to messages in original order
597
+ for (const originalCall of toolCalls) {
598
+ const result = allResults.find(r => r.call.id === originalCall.id);
599
+ if (result) {
600
+ const content = result.fromCache
601
+ ? `[Cached result - identical call already executed]\n\n${result.output}`
602
+ : result.output;
603
+ this.messages.push({
604
+ role: 'tool',
605
+ name: result.call.name,
606
+ toolCallId: result.call.id,
607
+ content,
608
+ });
609
+ }
566
610
  }
567
611
  }
568
612
  get providerTools() {
569
613
  return this.toolRuntime.listProviderTools();
570
614
  }
571
615
  /**
572
- * OpenAI models frequently add speculative tool narration in the content field.
573
- * Suppress that text to avoid surfacing hallucinated tool usage in the UI.
616
+ * Whether to suppress tool narration in the content field.
617
+ * Previously suppressed for OpenAI but now we show all thinking/narration.
574
618
  */
575
619
  shouldSuppressToolNarration() {
576
- return this.providerId.toLowerCase().includes('openai');
620
+ return false; // Always show thinking/narration
577
621
  }
578
622
  emitAssistantMessage(content, metadata) {
579
- if (!content) {
623
+ if (!content || !content.trim()) {
580
624
  return;
581
625
  }
582
626
  const elapsedMs = this.activeRun ? Date.now() - this.activeRun.startedAt : undefined;
@@ -626,6 +670,138 @@ export class AgentRuntime {
626
670
  model: this.modelId,
627
671
  });
628
672
  }
673
+ /**
674
+ * Extract a "command hash" from tool arguments for behavioral loop detection.
675
+ * For execute_bash, this is the actual command. For other tools, key identifying args.
676
+ */
677
+ extractCmdHash(name, args) {
678
+ // For bash/execute commands, extract the command itself
679
+ if (name === 'execute_bash' || name === 'Bash') {
680
+ const cmd = args['command'];
681
+ if (cmd) {
682
+ // Normalize: trim, take first 100 chars, remove variable parts like timestamps
683
+ return cmd.trim().slice(0, 100).replace(/\d{10,}/g, 'N');
684
+ }
685
+ }
686
+ // For file operations, use the path
687
+ if (name === 'read_file' || name === 'Read' || name === 'read_files') {
688
+ const path = args['path'] || args['file_path'] || args['paths'];
689
+ if (path)
690
+ return `path:${JSON.stringify(path).slice(0, 100)}`;
691
+ }
692
+ if (name === 'list_files' || name === 'Glob') {
693
+ const path = args['path'] || args['pattern'];
694
+ if (path)
695
+ return `path:${JSON.stringify(path).slice(0, 100)}`;
696
+ }
697
+ // For search, use the query/pattern
698
+ if (name === 'Grep' || name === 'grep' || name === 'search') {
699
+ const pattern = args['pattern'] || args['query'];
700
+ if (pattern)
701
+ return `search:${String(pattern).slice(0, 100)}`;
702
+ }
703
+ // Default: use first significant arg value
704
+ const firstArg = Object.values(args)[0];
705
+ if (firstArg) {
706
+ return String(firstArg).slice(0, 100);
707
+ }
708
+ return 'no-args';
709
+ }
710
+ /**
711
+ * Check for behavioral loops - model calling the same tool with similar args repeatedly.
712
+ * Returns an error message if a loop is detected, null otherwise.
713
+ *
714
+ * FUNDAMENTAL PREVENTION: Cached calls are excluded from loop detection since they
715
+ * don't actually execute (the cache provides the result). This means:
716
+ * - First call: executes and caches result
717
+ * - Second identical call: returns cached result, NOT counted toward loop
718
+ * - Only genuinely NEW (non-cached) repetitive calls trigger loop detection
719
+ *
720
+ * This catches patterns like:
721
+ * - "git status -sb" called 3 times with DIFFERENT outputs (cache miss each time)
722
+ * - Repeated file reads where file content changed
723
+ * - Repeated searches with same pattern but new results
724
+ */
725
+ checkBehavioralLoop(toolCalls) {
726
+ // Filter out calls that will be served from cache - these don't count toward loops
727
+ // since they're handled fundamentally by the caching mechanism
728
+ const nonCachedCalls = toolCalls.filter(call => this.getCachedToolResult(call) === null);
729
+ // If all calls are cached, no loop detection needed
730
+ if (nonCachedCalls.length === 0) {
731
+ return null;
732
+ }
733
+ // Count existing occurrences in recent history
734
+ const existingCounts = new Map();
735
+ for (const { name, cmdHash } of this.recentToolCalls) {
736
+ const key = `${name}:${cmdHash}`;
737
+ existingCounts.set(key, (existingCounts.get(key) ?? 0) + 1);
738
+ }
739
+ // Check if ANY incoming NON-CACHED call would exceed threshold
740
+ for (const call of nonCachedCalls) {
741
+ const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
742
+ const key = `${call.name}:${cmdHash}`;
743
+ const currentCount = existingCounts.get(key) ?? 0;
744
+ // If adding this call would reach or exceed threshold, block immediately
745
+ if (currentCount + 1 >= AgentRuntime.BEHAVIORAL_LOOP_THRESHOLD) {
746
+ // Reset history to prevent immediate re-trigger
747
+ this.recentToolCalls = [];
748
+ return `Behavioral loop detected: "${call.name}" called ${currentCount + 1} times with similar arguments. The task appears stuck. Please try a different approach or provide more specific instructions.`;
749
+ }
750
+ }
751
+ // Track only non-cached tool calls (cached ones are handled by caching)
752
+ for (const call of nonCachedCalls) {
753
+ const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
754
+ this.recentToolCalls.push({ name: call.name, cmdHash });
755
+ }
756
+ // Keep only recent history
757
+ while (this.recentToolCalls.length > AgentRuntime.TOOL_HISTORY_SIZE) {
758
+ this.recentToolCalls.shift();
759
+ }
760
+ return null;
761
+ }
762
+ /**
763
+ * Reset behavioral loop tracking (called when user provides new input or task completes)
764
+ */
765
+ resetBehavioralLoopTracking() {
766
+ this.recentToolCalls = [];
767
+ this.lastToolCallSignature = null;
768
+ this.repeatedToolCallCount = 0;
769
+ // Note: we DON'T clear toolResultCache here - cached results remain valid across turns
770
+ // to prevent re-executing identical tool calls within a session
771
+ }
772
+ /**
773
+ * Create a stable cache key for a tool call based on name and arguments
774
+ */
775
+ getToolCacheKey(call) {
776
+ const args = call.arguments ?? {};
777
+ // Sort keys for consistent ordering
778
+ const sortedArgs = Object.keys(args).sort().reduce((acc, key) => {
779
+ acc[key] = args[key];
780
+ return acc;
781
+ }, {});
782
+ return `${call.name}:${JSON.stringify(sortedArgs)}`;
783
+ }
784
+ /**
785
+ * Get cached result for a tool call, or null if not cached
786
+ */
787
+ getCachedToolResult(call) {
788
+ const key = this.getToolCacheKey(call);
789
+ return this.toolResultCache.get(key) ?? null;
790
+ }
791
+ /**
792
+ * Cache a tool result for future identical calls
793
+ */
794
+ cacheToolResult(call, result) {
795
+ const key = this.getToolCacheKey(call);
796
+ // Evict oldest entries if cache is full
797
+ if (this.toolResultCache.size >= AgentRuntime.TOOL_CACHE_MAX_SIZE) {
798
+ const firstKey = this.toolResultCache.keys().next().value;
799
+ if (firstKey) {
800
+ this.toolResultCache.delete(firstKey);
801
+ }
802
+ }
803
+ this.toolResultCache.set(key, result);
804
+ }
629
805
  getHistory() {
630
806
  return this.messages.map(cloneMessage);
631
807
  }