erosolar-cli 2.1.171 → 2.1.172

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. package/README.md +1 -1
  2. package/agents/erosolar-code.rules.json +2 -2
  3. package/agents/general.rules.json +3 -21
  4. package/dist/StringUtils.d.ts +8 -0
  5. package/dist/StringUtils.d.ts.map +1 -0
  6. package/dist/StringUtils.js +11 -0
  7. package/dist/StringUtils.js.map +1 -0
  8. package/dist/capabilities/statusCapability.js +2 -2
  9. package/dist/capabilities/statusCapability.js.map +1 -1
  10. package/dist/contracts/agent-schemas.json +5 -5
  11. package/dist/core/agent.d.ts +24 -83
  12. package/dist/core/agent.d.ts.map +1 -1
  13. package/dist/core/agent.js +248 -499
  14. package/dist/core/agent.js.map +1 -1
  15. package/dist/core/aiFlowSupervisor.d.ts +44 -0
  16. package/dist/core/aiFlowSupervisor.d.ts.map +1 -0
  17. package/dist/core/aiFlowSupervisor.js +299 -0
  18. package/dist/core/aiFlowSupervisor.js.map +1 -0
  19. package/dist/core/cliTestHarness.d.ts +200 -0
  20. package/dist/core/cliTestHarness.d.ts.map +1 -0
  21. package/dist/core/cliTestHarness.js +549 -0
  22. package/dist/core/cliTestHarness.js.map +1 -0
  23. package/dist/core/preferences.d.ts +0 -1
  24. package/dist/core/preferences.d.ts.map +1 -1
  25. package/dist/core/preferences.js +1 -8
  26. package/dist/core/preferences.js.map +1 -1
  27. package/dist/core/schemaValidator.js +3 -3
  28. package/dist/core/schemaValidator.js.map +1 -1
  29. package/dist/core/testUtils.d.ts +121 -0
  30. package/dist/core/testUtils.d.ts.map +1 -0
  31. package/dist/core/testUtils.js +235 -0
  32. package/dist/core/testUtils.js.map +1 -0
  33. package/dist/core/toolPreconditions.d.ts +11 -0
  34. package/dist/core/toolPreconditions.d.ts.map +1 -1
  35. package/dist/core/toolPreconditions.js +164 -33
  36. package/dist/core/toolPreconditions.js.map +1 -1
  37. package/dist/core/toolRuntime.d.ts.map +1 -1
  38. package/dist/core/toolRuntime.js +114 -9
  39. package/dist/core/toolRuntime.js.map +1 -1
  40. package/dist/core/toolValidation.d.ts +116 -0
  41. package/dist/core/toolValidation.d.ts.map +1 -0
  42. package/dist/core/toolValidation.js +282 -0
  43. package/dist/core/toolValidation.js.map +1 -0
  44. package/dist/core/updateChecker.d.ts +1 -61
  45. package/dist/core/updateChecker.d.ts.map +1 -1
  46. package/dist/core/updateChecker.js +3 -147
  47. package/dist/core/updateChecker.js.map +1 -1
  48. package/dist/headless/evalMode.d.ts.map +1 -1
  49. package/dist/headless/evalMode.js +0 -6
  50. package/dist/headless/evalMode.js.map +1 -1
  51. package/dist/headless/headlessApp.d.ts.map +1 -1
  52. package/dist/headless/headlessApp.js +39 -6
  53. package/dist/headless/headlessApp.js.map +1 -1
  54. package/dist/mcp/sseClient.d.ts +1 -4
  55. package/dist/mcp/sseClient.d.ts.map +1 -1
  56. package/dist/mcp/sseClient.js +2 -36
  57. package/dist/mcp/sseClient.js.map +1 -1
  58. package/dist/mcp/stdioClient.d.ts +1 -4
  59. package/dist/mcp/stdioClient.d.ts.map +1 -1
  60. package/dist/mcp/stdioClient.js +1 -41
  61. package/dist/mcp/stdioClient.js.map +1 -1
  62. package/dist/mcp/toolBridge.d.ts +0 -3
  63. package/dist/mcp/toolBridge.d.ts.map +1 -1
  64. package/dist/mcp/toolBridge.js +2 -2
  65. package/dist/mcp/toolBridge.js.map +1 -1
  66. package/dist/mcp/types.d.ts +0 -18
  67. package/dist/mcp/types.d.ts.map +1 -1
  68. package/dist/plugins/tools/nodeDefaults.d.ts.map +1 -1
  69. package/dist/plugins/tools/nodeDefaults.js +2 -0
  70. package/dist/plugins/tools/nodeDefaults.js.map +1 -1
  71. package/dist/providers/openaiResponsesProvider.d.ts.map +1 -1
  72. package/dist/providers/openaiResponsesProvider.js +74 -79
  73. package/dist/providers/openaiResponsesProvider.js.map +1 -1
  74. package/dist/runtime/agentController.d.ts.map +1 -1
  75. package/dist/runtime/agentController.js +3 -6
  76. package/dist/runtime/agentController.js.map +1 -1
  77. package/dist/runtime/agentSession.d.ts +2 -0
  78. package/dist/runtime/agentSession.d.ts.map +1 -1
  79. package/dist/runtime/agentSession.js +2 -2
  80. package/dist/runtime/agentSession.js.map +1 -1
  81. package/dist/shell/interactiveShell.d.ts +18 -20
  82. package/dist/shell/interactiveShell.d.ts.map +1 -1
  83. package/dist/shell/interactiveShell.js +291 -329
  84. package/dist/shell/interactiveShell.js.map +1 -1
  85. package/dist/shell/shellApp.d.ts.map +1 -1
  86. package/dist/shell/shellApp.js +8 -16
  87. package/dist/shell/shellApp.js.map +1 -1
  88. package/dist/shell/systemPrompt.d.ts.map +1 -1
  89. package/dist/shell/systemPrompt.js +15 -4
  90. package/dist/shell/systemPrompt.js.map +1 -1
  91. package/dist/subagents/taskRunner.js +1 -2
  92. package/dist/subagents/taskRunner.js.map +1 -1
  93. package/dist/tools/bashTools.d.ts.map +1 -1
  94. package/dist/tools/bashTools.js +8 -101
  95. package/dist/tools/bashTools.js.map +1 -1
  96. package/dist/tools/diffUtils.d.ts +2 -8
  97. package/dist/tools/diffUtils.d.ts.map +1 -1
  98. package/dist/tools/diffUtils.js +13 -72
  99. package/dist/tools/diffUtils.js.map +1 -1
  100. package/dist/tools/grepTools.d.ts.map +1 -1
  101. package/dist/tools/grepTools.js +2 -10
  102. package/dist/tools/grepTools.js.map +1 -1
  103. package/dist/tools/planningTools.d.ts +10 -0
  104. package/dist/tools/planningTools.d.ts.map +1 -1
  105. package/dist/tools/planningTools.js +16 -0
  106. package/dist/tools/planningTools.js.map +1 -1
  107. package/dist/tools/searchTools.d.ts.map +1 -1
  108. package/dist/tools/searchTools.js +2 -4
  109. package/dist/tools/searchTools.js.map +1 -1
  110. package/dist/ui/PromptController.d.ts +4 -4
  111. package/dist/ui/PromptController.d.ts.map +1 -1
  112. package/dist/ui/PromptController.js +7 -1
  113. package/dist/ui/PromptController.js.map +1 -1
  114. package/dist/ui/ShellUIAdapter.d.ts +28 -292
  115. package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
  116. package/dist/ui/ShellUIAdapter.js +121 -1513
  117. package/dist/ui/ShellUIAdapter.js.map +1 -1
  118. package/dist/ui/UnifiedUIRenderer.d.ts +30 -136
  119. package/dist/ui/UnifiedUIRenderer.d.ts.map +1 -1
  120. package/dist/ui/UnifiedUIRenderer.js +370 -955
  121. package/dist/ui/UnifiedUIRenderer.js.map +1 -1
  122. package/dist/ui/animatedStatus.d.ts +6 -128
  123. package/dist/ui/animatedStatus.d.ts.map +1 -1
  124. package/dist/ui/animatedStatus.js +50 -383
  125. package/dist/ui/animatedStatus.js.map +1 -1
  126. package/dist/ui/display.d.ts +26 -182
  127. package/dist/ui/display.d.ts.map +1 -1
  128. package/dist/ui/display.js +97 -678
  129. package/dist/ui/display.js.map +1 -1
  130. package/dist/ui/layout.d.ts +1 -0
  131. package/dist/ui/layout.d.ts.map +1 -1
  132. package/dist/ui/layout.js +12 -0
  133. package/dist/ui/layout.js.map +1 -1
  134. package/dist/ui/orchestration/UIUpdateCoordinator.d.ts +7 -61
  135. package/dist/ui/orchestration/UIUpdateCoordinator.d.ts.map +1 -1
  136. package/dist/ui/orchestration/UIUpdateCoordinator.js +20 -232
  137. package/dist/ui/orchestration/UIUpdateCoordinator.js.map +1 -1
  138. package/dist/ui/planOverlay.d.ts +28 -0
  139. package/dist/ui/planOverlay.d.ts.map +1 -0
  140. package/dist/ui/planOverlay.js +156 -0
  141. package/dist/ui/planOverlay.js.map +1 -0
  142. package/dist/ui/shortcutsHelp.d.ts.map +1 -1
  143. package/dist/ui/shortcutsHelp.js +1 -0
  144. package/dist/ui/shortcutsHelp.js.map +1 -1
  145. package/dist/ui/streamingFormatter.d.ts +30 -0
  146. package/dist/ui/streamingFormatter.d.ts.map +1 -0
  147. package/dist/ui/streamingFormatter.js +91 -0
  148. package/dist/ui/streamingFormatter.js.map +1 -0
  149. package/dist/ui/unified/index.d.ts +1 -30
  150. package/dist/ui/unified/index.d.ts.map +1 -1
  151. package/dist/ui/unified/index.js +2 -45
  152. package/dist/ui/unified/index.js.map +1 -1
  153. package/dist/utils/errorUtils.d.ts +16 -0
  154. package/dist/utils/errorUtils.d.ts.map +1 -0
  155. package/dist/utils/errorUtils.js +66 -0
  156. package/dist/utils/errorUtils.js.map +1 -0
  157. package/package.json +2 -1
  158. package/dist/codex/capabilities/codexCoreCapability.d.ts +0 -6
  159. package/dist/codex/capabilities/codexCoreCapability.d.ts.map +0 -1
  160. package/dist/codex/capabilities/codexCoreCapability.js +0 -516
  161. package/dist/codex/capabilities/codexCoreCapability.js.map +0 -1
  162. package/dist/codex/fs.d.ts +0 -4
  163. package/dist/codex/fs.d.ts.map +0 -1
  164. package/dist/codex/fs.js +0 -25
  165. package/dist/codex/fs.js.map +0 -1
  166. package/dist/codex/persistence/planStore.d.ts +0 -4
  167. package/dist/codex/persistence/planStore.d.ts.map +0 -1
  168. package/dist/codex/persistence/planStore.js +0 -59
  169. package/dist/codex/persistence/planStore.js.map +0 -1
  170. package/dist/codex/pluginAllowlist.d.ts +0 -4
  171. package/dist/codex/pluginAllowlist.d.ts.map +0 -1
  172. package/dist/codex/pluginAllowlist.js +0 -14
  173. package/dist/codex/pluginAllowlist.js.map +0 -1
  174. package/dist/codex/types.d.ts +0 -21
  175. package/dist/codex/types.d.ts.map +0 -1
  176. package/dist/codex/types.js +0 -62
  177. package/dist/codex/types.js.map +0 -1
  178. package/dist/core/reliabilityPrompt.d.ts +0 -9
  179. package/dist/core/reliabilityPrompt.d.ts.map +0 -1
  180. package/dist/core/reliabilityPrompt.js +0 -31
  181. package/dist/core/reliabilityPrompt.js.map +0 -1
  182. package/dist/ui/UnifiedUIController.d.ts +0 -81
  183. package/dist/ui/UnifiedUIController.d.ts.map +0 -1
  184. package/dist/ui/UnifiedUIController.js +0 -212
  185. package/dist/ui/UnifiedUIController.js.map +0 -1
  186. package/dist/ui/animation/AnimationScheduler.d.ts +0 -192
  187. package/dist/ui/animation/AnimationScheduler.d.ts.map +0 -1
  188. package/dist/ui/animation/AnimationScheduler.js +0 -432
  189. package/dist/ui/animation/AnimationScheduler.js.map +0 -1
  190. package/dist/ui/inPlaceUpdater.d.ts +0 -181
  191. package/dist/ui/inPlaceUpdater.d.ts.map +0 -1
  192. package/dist/ui/inPlaceUpdater.js +0 -515
  193. package/dist/ui/inPlaceUpdater.js.map +0 -1
  194. package/dist/ui/interrupts/InterruptManager.d.ts +0 -142
  195. package/dist/ui/interrupts/InterruptManager.d.ts.map +0 -1
  196. package/dist/ui/interrupts/InterruptManager.js +0 -439
  197. package/dist/ui/interrupts/InterruptManager.js.map +0 -1
  198. package/dist/ui/telemetry/ResponseTracker.d.ts +0 -22
  199. package/dist/ui/telemetry/ResponseTracker.d.ts.map +0 -1
  200. package/dist/ui/telemetry/ResponseTracker.js +0 -60
  201. package/dist/ui/telemetry/ResponseTracker.js.map +0 -1
  202. package/dist/ui/telemetry/UITelemetry.d.ts +0 -181
  203. package/dist/ui/telemetry/UITelemetry.d.ts.map +0 -1
  204. package/dist/ui/telemetry/UITelemetry.js +0 -446
  205. package/dist/ui/telemetry/UITelemetry.js.map +0 -1
  206. package/dist/ui/unified/layout.d.ts +0 -12
  207. package/dist/ui/unified/layout.d.ts.map +0 -1
  208. package/dist/ui/unified/layout.js +0 -96
  209. package/dist/ui/unified/layout.js.map +0 -1
@@ -4,7 +4,65 @@ import { safeErrorMessage } from './secretStore.js';
4
4
  * Maximum number of context overflow recovery attempts
5
5
  */
6
6
  const MAX_CONTEXT_RECOVERY_ATTEMPTS = 3;
7
- // Streaming runs without timeouts - we let the model take as long as it needs
7
+ /**
8
+ * Maximum number of auto-continuation attempts when model expresses intent but doesn't act
9
+ */
10
+ const MAX_AUTO_CONTINUE_ATTEMPTS = 3;
11
+ /**
12
+ * Streaming safety timeouts (ms)
13
+ * - First chunk timeout: fail fast if the stream never starts
14
+ * - Inactivity timeout: abort if no chunks arrive for an extended period
15
+ */
16
+ // Allow more headroom before declaring a streaming stall to avoid premature fallbacks.
17
+ const STREAM_FIRST_CHUNK_TIMEOUT_MS = 25000;
18
+ const STREAM_INACTIVITY_TIMEOUT_MS = 60000;
19
+ /**
20
+ * Patterns that indicate the model intends to take action but hasn't yet
21
+ * These suggest the model should be prompted to continue
22
+ */
23
+ const INTENT_WITHOUT_ACTION_PATTERNS = [
24
+ // "Let me X" patterns - model is stating what it will do
25
+ /\blet me\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|examine|review)/i,
26
+ // "I'll X" / "I will X" patterns
27
+ /\bi['']ll\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
28
+ /\bi will\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
29
+ // "I'm going to X" patterns
30
+ /\bi['']m going to\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze)/i,
31
+ // "Now I'll X" / "First, I'll X" patterns
32
+ /\b(now|first|next)\s*(,)?\s*i['']ll\s+/i,
33
+ // Explicit continuation signals
34
+ /\bhere['']s (the|my) (plan|approach|solution|implementation)/i,
35
+ // Numbered steps suggesting action to come
36
+ /^\s*\d+\.\s+(create|write|implement|add|update|edit|modify|fix|show|read|check)/im,
37
+ // Bullet points suggesting planned actions
38
+ /^[\s•\-\*]+\s*(create|write|implement|add|update|edit|modify|fix|shows?|reads?|checks?)\s/im,
39
+ ];
40
+ const AUTO_CONTINUE_PROMPTS = [
41
+ 'Continue. Use tools now: start with Read/read_file to inspect the target file, then call Edit (or Write if available) with file_path/old_string/new_string to apply changes. Keep using tools until the task is done.',
42
+ 'You MUST call tools immediately. Issue Read -> Edit/Write tool calls with explicit parameters; no more explaining or planning.',
43
+ 'CRITICAL: Call a tool right now. Use Edit with file_path, old_string, new_string (or Write with file_path and content). Respond with tool calls only.',
44
+ ];
45
+ /**
46
+ * Check if response indicates intent to act without actually acting
47
+ * This detects when the model says "let me do X" but doesn't call any tools
48
+ */
49
+ function shouldAutoContinue(content, hasToolCalls) {
50
+ // If there are tool calls, no need to auto-continue
51
+ if (hasToolCalls) {
52
+ return false;
53
+ }
54
+ // If content is very short, likely not an incomplete intent
55
+ if (content.length < 50) {
56
+ return false;
57
+ }
58
+ // Check for intent patterns
59
+ for (const pattern of INTENT_WITHOUT_ACTION_PATTERNS) {
60
+ if (pattern.test(content)) {
61
+ return true;
62
+ }
63
+ }
64
+ return false;
65
+ }
8
66
  /**
9
67
  * Check if an error is a context overflow error
10
68
  */
@@ -19,53 +77,15 @@ function isContextOverflowError(error) {
19
77
  message.includes('max_tokens') ||
20
78
  message.includes('context window'));
21
79
  }
22
- /**
23
- * Check if an error is a transient/retryable error (network issues, rate limits, server errors)
24
- */
25
- function isTransientError(error) {
26
- if (!(error instanceof Error))
27
- return false;
28
- const message = error.message.toLowerCase();
29
- // Network errors
30
- const networkPatterns = [
31
- 'econnrefused', 'econnreset', 'enotfound', 'etimedout', 'epipe',
32
- 'network error', 'connection error', 'fetch failed', 'socket hang up',
33
- 'network is unreachable', 'connection refused', 'connection reset',
34
- ];
35
- if (networkPatterns.some(p => message.includes(p))) {
36
- return true;
80
+ class StreamInterruptionError extends Error {
81
+ reason;
82
+ partialResponse;
83
+ constructor(reason, message, partialResponse) {
84
+ super(message);
85
+ this.name = 'StreamInterruptionError';
86
+ this.reason = reason;
87
+ this.partialResponse = partialResponse;
37
88
  }
38
- // Rate limit errors
39
- if (message.includes('rate limit') || message.includes('429') || message.includes('too many requests')) {
40
- return true;
41
- }
42
- // Server errors (5xx)
43
- if (message.includes('500') || message.includes('502') || message.includes('503') || message.includes('504')) {
44
- return true;
45
- }
46
- // Temporary service errors
47
- if (message.includes('service unavailable') || message.includes('temporarily unavailable') ||
48
- message.includes('overloaded') || message.includes('server error')) {
49
- return true;
50
- }
51
- return false;
52
- }
53
- /**
54
- * Maximum number of transient error retries
55
- */
56
- const MAX_TRANSIENT_RETRIES = 3;
57
- /**
58
- * Delay before retry (in ms), with exponential backoff
59
- */
60
- function getRetryDelay(attempt) {
61
- // Base delay of 1 second, doubles each attempt: 1s, 2s, 4s
62
- return Math.min(1000 * Math.pow(2, attempt - 1), 10000);
63
- }
64
- /**
65
- * Sleep for the specified milliseconds
66
- */
67
- function sleep(ms) {
68
- return new Promise(resolve => setTimeout(resolve, ms));
69
89
  }
70
90
  export class AgentRuntime {
71
91
  messages = [];
@@ -79,53 +99,7 @@ export class AgentRuntime {
79
99
  modelId;
80
100
  workingDirectory;
81
101
  cancellationRequested = false;
82
- // Loop detection: track last tool calls to detect stuck loops
83
- lastToolCallSignature = null;
84
- repeatedToolCallCount = 0;
85
- static MAX_REPEATED_TOOL_CALLS = 5; // Allow up to 4 identical calls before stopping
86
- // Behavioral loop detection: track recent tool calls to catch repetitive patterns
87
- // e.g., calling "execute_bash" with "git status" 5 times even if output differs slightly
88
- recentToolCalls = [];
89
- static TOOL_HISTORY_SIZE = 12;
90
- static BEHAVIORAL_LOOP_THRESHOLD = 3; // Same tool+cmd 3+ times in last 12 = stuck
91
- // Never cache stateful tools - they must always execute to reflect current system state
92
- static NON_CACHEABLE_TOOL_NAMES = new Set([
93
- 'bash',
94
- 'execute_bash',
95
- 'execute_command',
96
- 'run_command',
97
- 'edit',
98
- 'edit_file',
99
- 'notebookedit',
100
- 'read',
101
- 'read_file',
102
- 'read_files',
103
- 'list_files',
104
- 'list_dir',
105
- 'glob',
106
- 'grep',
107
- 'search',
108
- 'search_text',
109
- 'git_status',
110
- 'git_diff',
111
- 'git_log',
112
- 'git_commit',
113
- 'git_push',
114
- ]);
115
- // Skip loop short-circuiting for direct execution tools to avoid blocking user commands
116
- static LOOP_EXEMPT_TOOL_NAMES = new Set([
117
- 'bash',
118
- 'execute_bash',
119
- 'execute_command',
120
- 'run_command',
121
- 'edit',
122
- 'edit_file',
123
- 'notebookedit',
124
- ]);
125
- // Tool result cache: prevent duplicate identical tool calls by returning cached results
126
- // Key: tool signature (name + JSON args), Value: result string
127
- toolResultCache = new Map();
128
- static TOOL_CACHE_MAX_SIZE = 50; // Keep last 50 tool results
102
+ _autoContinueEnabled = false;
129
103
  constructor(options) {
130
104
  this.provider = options.provider;
131
105
  this.toolRuntime = options.toolRuntime;
@@ -134,6 +108,7 @@ export class AgentRuntime {
134
108
  this.providerId = options.providerId ?? 'unknown';
135
109
  this.modelId = options.modelId ?? 'unknown';
136
110
  this.workingDirectory = options.workingDirectory ?? process.cwd();
111
+ this._autoContinueEnabled = options.autoContinue ?? false;
137
112
  const trimmedPrompt = options.systemPrompt.trim();
138
113
  this.baseSystemPrompt = trimmedPrompt || null;
139
114
  if (trimmedPrompt) {
@@ -159,14 +134,25 @@ export class AgentRuntime {
159
134
  isRunning() {
160
135
  return this.activeRun !== null;
161
136
  }
137
+ /**
138
+ * Check if auto-continuation is enabled.
139
+ */
140
+ isAutoContinueEnabled() {
141
+ return this._autoContinueEnabled;
142
+ }
143
+ /**
144
+ * Enable or disable auto-continuation.
145
+ */
146
+ setAutoContinue(enabled) {
147
+ this._autoContinueEnabled = enabled;
148
+ }
162
149
  async send(text, useStreaming = false) {
163
150
  const prompt = text.trim();
164
151
  if (!prompt) {
165
152
  return '';
166
153
  }
167
- // Reset cancellation flag and loop tracking at start of new request
154
+ // Reset cancellation flag at start of new request
168
155
  this.cancellationRequested = false;
169
- this.resetBehavioralLoopTracking();
170
156
  // Handle multi-line paste: show summary to user, send full content to AI
171
157
  if (isMultilinePaste(prompt)) {
172
158
  const processed = processPaste(prompt);
@@ -182,9 +168,28 @@ export class AgentRuntime {
182
168
  const run = { startedAt: Date.now() };
183
169
  this.activeRun = run;
184
170
  try {
185
- // Always use streaming when available - no fallback
186
171
  if (useStreaming && this.provider.generateStream) {
187
- return await this.processConversationStreaming();
172
+ try {
173
+ return await this.processConversationStreaming();
174
+ }
175
+ catch (error) {
176
+ const message = safeErrorMessage(error);
177
+ const reason = error instanceof StreamInterruptionError ? error.reason : undefined;
178
+ const partialResponse = error instanceof StreamInterruptionError ? error.partialResponse : undefined;
179
+ console.warn(`[agent] Streaming failed, falling back to non-streaming: ${message}`);
180
+ // If we captured part of the response, seed it into history and ask the model to continue
181
+ // so we don't restart the answer from scratch during fallback.
182
+ if (partialResponse && partialResponse.trim()) {
183
+ const partial = partialResponse.trim();
184
+ this.messages.push({ role: 'assistant', content: partial });
185
+ this.messages.push({
186
+ role: 'user',
187
+ content: 'Continue your previous response from where it stopped. Do not repeat text you already provided.',
188
+ });
189
+ }
190
+ this.callbacks.onStreamFallback?.({ message, error, reason, partialResponse });
191
+ return await this.processConversation();
192
+ }
188
193
  }
189
194
  return await this.processConversation();
190
195
  }
@@ -198,7 +203,7 @@ export class AgentRuntime {
198
203
  }
199
204
  async processConversation() {
200
205
  let contextRecoveryAttempts = 0;
201
- let transientRetryAttempts = 0;
206
+ let autoContinueAttempts = 0;
202
207
  while (true) {
203
208
  // Check for cancellation at start of each iteration
204
209
  if (this.cancellationRequested) {
@@ -214,66 +219,44 @@ export class AgentRuntime {
214
219
  // Reset recovery attempts on successful generation
215
220
  contextRecoveryAttempts = 0;
216
221
  if (response.type === 'tool_calls') {
217
- // BEHAVIORAL LOOP DETECTION: Check if model is stuck calling same tool repeatedly
218
- const behavioralLoopResult = this.checkBehavioralLoop(response.toolCalls);
219
- if (behavioralLoopResult) {
220
- this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats });
221
- this.messages.push({ role: 'assistant', content: behavioralLoopResult });
222
- return behavioralLoopResult;
223
- }
224
- // Loop detection: check if same tool calls are being repeated (exact signature match)
225
- const signatureCalls = response.toolCalls.filter(call => !this.shouldSkipLoopDetection(call));
226
- const toolSignature = signatureCalls.length
227
- ? signatureCalls
228
- .map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
229
- .sort()
230
- .join('|')
231
- : null;
232
- if (toolSignature && toolSignature === this.lastToolCallSignature) {
233
- this.repeatedToolCallCount++;
234
- if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
235
- // Break out of loop - model is stuck
236
- const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
237
- this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats });
238
- this.messages.push({ role: 'assistant', content: loopMsg });
239
- this.lastToolCallSignature = null;
240
- this.repeatedToolCallCount = 0;
241
- return loopMsg;
242
- }
243
- }
244
- else if (toolSignature) {
245
- this.lastToolCallSignature = toolSignature;
246
- this.repeatedToolCallCount = 1;
247
- }
248
- else {
249
- this.lastToolCallSignature = null;
250
- this.repeatedToolCallCount = 0;
251
- }
252
- // Emit narration if present - it shows the AI's thought process before tools
253
- const narration = response.content?.trim();
222
+ const suppressNarration = this.shouldSuppressToolNarration();
223
+ const narration = suppressNarration ? '' : response.content?.trim();
254
224
  if (narration) {
255
- this.emitAssistantMessage(narration, {
256
- isFinal: false,
257
- usage,
258
- contextStats,
259
- });
225
+ this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats });
260
226
  }
261
227
  const assistantMessage = {
262
228
  role: 'assistant',
263
- content: response.content ?? '',
229
+ content: suppressNarration ? '' : (response.content ?? ''),
264
230
  };
265
231
  if (response.toolCalls?.length) {
266
232
  assistantMessage.toolCalls = response.toolCalls;
267
233
  }
268
234
  this.messages.push(assistantMessage);
269
235
  await this.resolveToolCalls(response.toolCalls);
236
+ // Reset auto-continue counter since model is actively working
237
+ autoContinueAttempts = 0;
270
238
  continue;
271
239
  }
272
240
  const reply = response.content?.trim() ?? '';
273
- // Reset loop detection when we get a text response (not just tool calls)
274
- if (reply.length >= 10) {
275
- this.lastToolCallSignature = null;
276
- this.repeatedToolCallCount = 0;
241
+ // Check if model expressed intent to act but didn't call tools
242
+ // This catches "Let me create..." without actual tool calls
243
+ // Only auto-continue if the feature is enabled
244
+ if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
245
+ autoContinueAttempts++;
246
+ // Emit the planning content but mark as non-final
247
+ if (reply) {
248
+ this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats });
249
+ }
250
+ this.messages.push({ role: 'assistant', content: reply });
251
+ // Auto-prompt with increasingly direct instructions
252
+ const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
253
+ this.messages.push({
254
+ role: 'user',
255
+ content: AUTO_CONTINUE_PROMPTS[promptIndex],
256
+ });
257
+ const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
258
+ this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
259
+ continue;
277
260
  }
278
261
  if (reply) {
279
262
  this.emitAssistantMessage(reply, { isFinal: true, usage, contextStats });
@@ -295,14 +278,6 @@ export class AgentRuntime {
295
278
  continue;
296
279
  }
297
280
  }
298
- // Auto-retry transient errors (network issues, rate limits, server errors)
299
- if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
300
- transientRetryAttempts++;
301
- const delayMs = getRetryDelay(transientRetryAttempts);
302
- this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
303
- await sleep(delayMs);
304
- continue;
305
- }
306
281
  // Re-throw if not recoverable or recovery failed
307
282
  throw error;
308
283
  }
@@ -313,7 +288,7 @@ export class AgentRuntime {
313
288
  return this.processConversation();
314
289
  }
315
290
  let contextRecoveryAttempts = 0;
316
- let transientRetryAttempts = 0;
291
+ let autoContinueAttempts = 0;
317
292
  while (true) {
318
293
  // Check for cancellation at start of each iteration
319
294
  if (this.cancellationRequested) {
@@ -346,10 +321,45 @@ export class AgentRuntime {
346
321
  }
347
322
  }
348
323
  };
349
- // Simple streaming loop - no timeouts, let the stream run until done
324
+ const buildTimeoutError = (reason) => {
325
+ const base = reason === 'startup-timeout'
326
+ ? 'Streaming stalled before any content arrived.'
327
+ : 'Streaming stalled due to inactivity.';
328
+ return new StreamInterruptionError(reason, `${base} Falling back to non-streaming.`, fullContent || reasoningContent);
329
+ };
330
+ // Timer for first token arrival
331
+ let startupTimer = null;
332
+ const startupTimeoutPromise = new Promise((_, reject) => {
333
+ startupTimer = setTimeout(() => reject(buildTimeoutError('startup-timeout')), STREAM_FIRST_CHUNK_TIMEOUT_MS);
334
+ });
335
+ const createIdleTimeout = () => {
336
+ let idleTimer = null;
337
+ const promise = new Promise((_, reject) => {
338
+ idleTimer = setTimeout(() => reject(buildTimeoutError('idle-timeout')), STREAM_INACTIVITY_TIMEOUT_MS);
339
+ });
340
+ const cancel = () => {
341
+ if (idleTimer) {
342
+ clearTimeout(idleTimer);
343
+ idleTimer = null;
344
+ }
345
+ };
346
+ return { promise, cancel };
347
+ };
348
+ let idleTimeout = createIdleTimeout();
349
+ let firstChunkSeen = false;
350
350
  try {
351
351
  while (true) {
352
- const result = await iterator.next();
352
+ const races = [
353
+ iterator.next(),
354
+ idleTimeout.promise,
355
+ ];
356
+ if (!firstChunkSeen) {
357
+ races.push(startupTimeoutPromise);
358
+ }
359
+ const result = (await Promise.race(races));
360
+ // Reset idle timer for the next iteration
361
+ idleTimeout.cancel();
362
+ idleTimeout = createIdleTimeout();
353
363
  // Check for cancellation during streaming
354
364
  if (this.cancellationRequested) {
355
365
  await closeStream();
@@ -364,10 +374,17 @@ export class AgentRuntime {
364
374
  break;
365
375
  }
366
376
  const chunk = result.value;
377
+ if (!firstChunkSeen) {
378
+ firstChunkSeen = true;
379
+ if (startupTimer) {
380
+ clearTimeout(startupTimer);
381
+ startupTimer = null;
382
+ }
383
+ }
367
384
  if (chunk.type === 'reasoning' && chunk.content) {
368
- // Buffer reasoning content - don't stream token-by-token
369
- // It will be emitted as a complete block when ready
370
385
  reasoningContent += chunk.content;
386
+ // Surface reasoning tokens to the UI so thought process is visible
387
+ this.callbacks.onStreamChunk?.(chunk.content, 'reasoning');
371
388
  continue;
372
389
  }
373
390
  if (chunk.type === 'content' && chunk.content) {
@@ -380,19 +397,11 @@ export class AgentRuntime {
380
397
  }
381
398
  }
382
399
  else if (chunk.type === 'tool_call' && chunk.toolCall) {
383
- // On first tool call, flush any buffered content
384
- if (toolCalls.length === 0) {
385
- // Emit complete reasoning block first
386
- if (reasoningContent.trim()) {
387
- this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
388
- }
389
- // Then emit buffered narration content
390
- if (suppressStreamNarration && bufferedContent) {
391
- this.callbacks.onStreamChunk?.(bufferedContent, 'content');
392
- bufferedContent = '';
393
- }
394
- }
395
400
  toolCalls.push(chunk.toolCall);
401
+ // Drop any speculative narration once we know the model is actually calling tools
402
+ if (suppressStreamNarration) {
403
+ bufferedContent = '';
404
+ }
396
405
  }
397
406
  else if (chunk.type === 'usage' && chunk.usage) {
398
407
  usage = chunk.usage;
@@ -400,88 +409,61 @@ export class AgentRuntime {
400
409
  }
401
410
  }
402
411
  finally {
412
+ idleTimeout.cancel();
413
+ if (startupTimer) {
414
+ clearTimeout(startupTimer);
415
+ }
403
416
  await closeStream();
404
417
  }
405
418
  // Reset recovery attempts on successful generation
406
419
  contextRecoveryAttempts = 0;
407
420
  const contextStats = this.getContextStats();
408
421
  const combinedContent = fullContent || reasoningContent;
409
- // If no tool calls were issued, emit reasoning and buffered content as complete blocks
410
- if (toolCalls.length === 0) {
411
- // Emit complete reasoning block if we have one
412
- if (reasoningContent.trim()) {
413
- this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
414
- }
415
- // Emit buffered narration content
416
- if (suppressStreamNarration && bufferedContent) {
417
- this.callbacks.onStreamChunk?.(bufferedContent, 'content');
418
- bufferedContent = '';
419
- }
422
+ // If no tool calls were issued, flush any buffered narration now
423
+ if (suppressStreamNarration && toolCalls.length === 0 && bufferedContent) {
424
+ this.callbacks.onStreamChunk?.(bufferedContent, 'content');
425
+ bufferedContent = '';
420
426
  }
421
427
  // Check if we got tool calls
422
428
  if (toolCalls.length > 0) {
423
- // BEHAVIORAL LOOP DETECTION: Check if model is stuck calling same tool repeatedly
424
- // This catches patterns like "git status" called 5 times even with slightly different outputs
425
- const behavioralLoopResult = this.checkBehavioralLoop(toolCalls);
426
- if (behavioralLoopResult) {
427
- this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats, wasStreamed: true });
428
- this.messages.push({ role: 'assistant', content: behavioralLoopResult });
429
- return behavioralLoopResult;
430
- }
431
- // Loop detection: check if same tool calls are being repeated (exact signature match)
432
- const signatureCalls = toolCalls.filter(call => !this.shouldSkipLoopDetection(call));
433
- const toolSignature = signatureCalls.length
434
- ? signatureCalls
435
- .map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
436
- .sort()
437
- .join('|')
438
- : null;
439
- if (toolSignature && toolSignature === this.lastToolCallSignature) {
440
- this.repeatedToolCallCount++;
441
- if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
442
- // Break out of loop - model is stuck
443
- const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
444
- this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats, wasStreamed: true });
445
- this.messages.push({ role: 'assistant', content: loopMsg });
446
- this.lastToolCallSignature = null;
447
- this.repeatedToolCallCount = 0;
448
- return loopMsg;
449
- }
450
- }
451
- else if (toolSignature) {
452
- this.lastToolCallSignature = toolSignature;
453
- this.repeatedToolCallCount = 1;
454
- }
455
- else {
456
- this.lastToolCallSignature = null;
457
- this.repeatedToolCallCount = 0;
458
- }
459
- // Content was already streamed via onStreamChunk, just record it for context
460
- // (wasStreamed=true prevents duplicate display)
461
- // Note: Acknowledgement injection happens during streaming (when first tool_call chunk arrives)
462
- const narration = combinedContent.trim();
429
+ const suppressNarration = this.shouldSuppressToolNarration();
430
+ const narration = suppressNarration ? '' : combinedContent.trim();
463
431
  if (narration) {
464
- this.emitAssistantMessage(narration, {
465
- isFinal: false,
466
- usage,
467
- contextStats,
468
- wasStreamed: true,
469
- });
432
+ // Mark as wasStreamed since content was already output via onStreamChunk
433
+ this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats, wasStreamed: true });
470
434
  }
471
435
  const assistantMessage = {
472
436
  role: 'assistant',
473
- content: combinedContent,
437
+ content: suppressNarration ? '' : combinedContent,
474
438
  toolCalls,
475
439
  };
476
440
  this.messages.push(assistantMessage);
477
441
  await this.resolveToolCalls(toolCalls);
442
+ // Reset auto-continue counter since model is actively working
443
+ autoContinueAttempts = 0;
478
444
  continue;
479
445
  }
446
+ // Check if model expressed intent to act but didn't call tools
447
+ // This catches "Let me create..." without actual tool calls
448
+ // Only auto-continue if the feature is enabled
480
449
  const reply = combinedContent.trim();
481
- // Reset loop detection when we get a text response (not just tool calls)
482
- if (reply.length >= 10) {
483
- this.lastToolCallSignature = null;
484
- this.repeatedToolCallCount = 0;
450
+ if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
451
+ autoContinueAttempts++;
452
+ // Emit the planning content but mark as non-final
453
+ // Mark as wasStreamed since content was already output via onStreamChunk
454
+ if (reply) {
455
+ this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats, wasStreamed: true });
456
+ }
457
+ this.messages.push({ role: 'assistant', content: reply });
458
+ // Auto-prompt with increasingly direct instructions
459
+ const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
460
+ this.messages.push({
461
+ role: 'user',
462
+ content: AUTO_CONTINUE_PROMPTS[promptIndex],
463
+ });
464
+ const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
465
+ this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
466
+ continue;
485
467
  }
486
468
  // Final message - mark as streamed to avoid double-display in UI
487
469
  if (reply) {
@@ -504,14 +486,6 @@ export class AgentRuntime {
504
486
  continue;
505
487
  }
506
488
  }
507
- // Auto-retry transient errors (network issues, rate limits, server errors)
508
- if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
509
- transientRetryAttempts++;
510
- const delayMs = getRetryDelay(transientRetryAttempts);
511
- this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
512
- await sleep(delayMs);
513
- continue;
514
- }
515
489
  // Re-throw if not recoverable or recovery failed
516
490
  throw error;
517
491
  }
@@ -543,23 +517,7 @@ export class AgentRuntime {
543
517
  // Fast path: single tool call
544
518
  if (numCalls === 1) {
545
519
  const call = toolCalls[0];
546
- // Check cache first - prevent duplicate identical tool calls
547
- const cached = this.getCachedToolResult(call);
548
- if (cached !== null) {
549
- // Return cached result with indicator that it was from cache
550
- this.messages.push({
551
- role: 'tool',
552
- name: call.name,
553
- toolCallId: call.id,
554
- content: `[Cached result - identical call already executed]\n\n${cached}`,
555
- });
556
- return;
557
- }
558
- this.callbacks.onToolExecution?.(call.name, true);
559
520
  const output = await this.toolRuntime.execute(call);
560
- this.callbacks.onToolExecution?.(call.name, false);
561
- // Cache the result for future identical calls
562
- this.cacheToolResult(call, output);
563
521
  this.messages.push({
564
522
  role: 'tool',
565
523
  name: call.name,
@@ -569,106 +527,56 @@ export class AgentRuntime {
569
527
  return;
570
528
  }
571
529
  // PERF: For reasonable batch sizes, execute all in parallel
572
- // Check cache for each call and only execute non-cached ones
573
530
  if (numCalls <= 10) {
574
- const cachedResults = [];
575
- const toExecute = [];
576
- // Separate cached from non-cached calls
577
- for (const call of toolCalls) {
578
- const cached = this.getCachedToolResult(call);
579
- if (cached !== null) {
580
- cachedResults.push({ call, output: cached, fromCache: true });
581
- }
582
- else {
583
- toExecute.push(call);
584
- }
585
- }
586
- // Execute non-cached calls in parallel
587
- if (toExecute.length > 0) {
588
- const toolNames = toExecute.map(c => c.name).join(', ');
589
- this.callbacks.onToolExecution?.(toolNames, true);
590
- const executed = await Promise.all(toExecute.map(async (call) => {
591
- const output = await this.toolRuntime.execute(call);
592
- this.cacheToolResult(call, output);
593
- return { call, output, fromCache: false };
594
- }));
595
- this.callbacks.onToolExecution?.(toolNames, false);
596
- cachedResults.push(...executed);
597
- }
598
- // Add all results to messages in the original order
599
- for (const originalCall of toolCalls) {
600
- const result = cachedResults.find(r => r.call.id === originalCall.id);
601
- if (result) {
602
- const content = result.fromCache
603
- ? `[Cached result - identical call already executed]\n\n${result.output}`
604
- : result.output;
605
- this.messages.push({
606
- role: 'tool',
607
- name: result.call.name,
608
- toolCallId: result.call.id,
609
- content,
610
- });
611
- }
531
+ const results = await Promise.all(toolCalls.map(async (call) => ({
532
+ call,
533
+ output: await this.toolRuntime.execute(call),
534
+ })));
535
+ // Add results to messages in the same order as tool calls
536
+ for (const { call, output } of results) {
537
+ this.messages.push({
538
+ role: 'tool',
539
+ name: call.name,
540
+ toolCallId: call.id,
541
+ content: output,
542
+ });
612
543
  }
613
544
  return;
614
545
  }
615
- // PERF: For large batches, use chunked parallel execution with caching
546
+ // PERF: For large batches, use chunked parallel execution
547
+ // This prevents memory pressure from too many concurrent operations
616
548
  const CHUNK_SIZE = 8;
617
- const allResults = [];
549
+ const results = [];
618
550
  for (let i = 0; i < numCalls; i += CHUNK_SIZE) {
619
551
  const chunk = toolCalls.slice(i, i + CHUNK_SIZE);
620
- const cachedInChunk = [];
621
- const toExecuteInChunk = [];
622
- for (const call of chunk) {
623
- const cached = this.getCachedToolResult(call);
624
- if (cached !== null) {
625
- cachedInChunk.push({ call, output: cached, fromCache: true });
626
- }
627
- else {
628
- toExecuteInChunk.push(call);
629
- }
630
- }
631
- if (toExecuteInChunk.length > 0) {
632
- const chunkNames = toExecuteInChunk.map(c => c.name).join(', ');
633
- this.callbacks.onToolExecution?.(chunkNames, true);
634
- const executed = await Promise.all(toExecuteInChunk.map(async (call) => {
635
- const output = await this.toolRuntime.execute(call);
636
- this.cacheToolResult(call, output);
637
- return { call, output, fromCache: false };
638
- }));
639
- this.callbacks.onToolExecution?.(chunkNames, false);
640
- cachedInChunk.push(...executed);
641
- }
642
- allResults.push(...cachedInChunk);
643
- }
644
- // Add results to messages in original order
645
- for (const originalCall of toolCalls) {
646
- const result = allResults.find(r => r.call.id === originalCall.id);
647
- if (result) {
648
- const content = result.fromCache
649
- ? `[Cached result - identical call already executed]\n\n${result.output}`
650
- : result.output;
651
- this.messages.push({
652
- role: 'tool',
653
- name: result.call.name,
654
- toolCallId: result.call.id,
655
- content,
656
- });
657
- }
552
+ const chunkResults = await Promise.all(chunk.map(async (call) => ({
553
+ call,
554
+ output: await this.toolRuntime.execute(call),
555
+ })));
556
+ results.push(...chunkResults);
557
+ }
558
+ // Add results to messages in order
559
+ for (const { call, output } of results) {
560
+ this.messages.push({
561
+ role: 'tool',
562
+ name: call.name,
563
+ toolCallId: call.id,
564
+ content: output,
565
+ });
658
566
  }
659
567
  }
660
568
  get providerTools() {
661
569
  return this.toolRuntime.listProviderTools();
662
570
  }
663
571
  /**
664
- * Whether to suppress tool narration in the content field.
665
- * Previously suppressed for OpenAI but now we show all thinking/narration.
572
+ * OpenAI models frequently add speculative tool narration in the content field.
573
+ * Suppress that text to avoid surfacing hallucinated tool usage in the UI.
666
574
  */
667
575
  shouldSuppressToolNarration() {
668
- return false; // Always show thinking/narration
576
+ return this.providerId.toLowerCase().includes('openai');
669
577
  }
670
578
  emitAssistantMessage(content, metadata) {
671
- if (!content || !content.trim()) {
579
+ if (!content) {
672
580
  return;
673
581
  }
674
582
  const elapsedMs = this.activeRun ? Date.now() - this.activeRun.startedAt : undefined;
@@ -718,165 +626,6 @@ export class AgentRuntime {
718
626
  model: this.modelId,
719
627
  });
720
628
  }
721
- /**
722
- * Extract a "command hash" from tool arguments for behavioral loop detection.
723
- * For execute_bash, this is the actual command. For other tools, key identifying args.
724
- */
725
- extractCmdHash(name, args) {
726
- // For bash/execute commands, extract the command itself
727
- if (name === 'execute_bash' || name === 'Bash') {
728
- const cmd = args['command'];
729
- if (cmd) {
730
- // Normalize: trim, take first 100 chars, remove variable parts like timestamps
731
- return cmd.trim().slice(0, 100).replace(/\d{10,}/g, 'N');
732
- }
733
- }
734
- // For file operations, use the path
735
- if (name === 'read_file' || name === 'Read' || name === 'read_files') {
736
- const path = args['path'] || args['file_path'] || args['paths'];
737
- if (path)
738
- return `path:${JSON.stringify(path).slice(0, 100)}`;
739
- }
740
- if (name === 'list_files' || name === 'Glob') {
741
- const path = args['path'] || args['pattern'];
742
- if (path)
743
- return `path:${JSON.stringify(path).slice(0, 100)}`;
744
- }
745
- // For search, use the query/pattern
746
- if (name === 'Grep' || name === 'grep' || name === 'search') {
747
- const pattern = args['pattern'] || args['query'];
748
- if (pattern)
749
- return `search:${String(pattern).slice(0, 100)}`;
750
- }
751
- // Default: use first significant arg value
752
- const firstArg = Object.values(args)[0];
753
- if (firstArg) {
754
- return String(firstArg).slice(0, 100);
755
- }
756
- return 'no-args';
757
- }
758
- /**
759
- * Check for behavioral loops - model calling the same tool with similar args repeatedly.
760
- * Returns an error message if a loop is detected, null otherwise.
761
- *
762
- * FUNDAMENTAL PREVENTION: Cached calls are excluded from loop detection since they
763
- * don't actually execute (the cache provides the result). This means:
764
- * - First call: executes and caches result
765
- * - Second identical call: returns cached result, NOT counted toward loop
766
- * - Only genuinely NEW (non-cached) repetitive calls trigger loop detection
767
- *
768
- * Direct execution tools (bash/edit) are also exempt to avoid short-circuiting
769
- * legitimate repeated user commands.
770
- *
771
- * This catches patterns like:
772
- * - "git status -sb" called 3 times with DIFFERENT outputs (cache miss each time)
773
- * - Repeated file reads where file content changed
774
- * - Repeated searches with same pattern but new results
775
- */
776
- checkBehavioralLoop(toolCalls) {
777
- // Skip loop detection for direct execution tools (bash/edit) to avoid false positives
778
- const loopEligibleCalls = toolCalls.filter(call => !this.shouldSkipLoopDetection(call));
779
- if (loopEligibleCalls.length === 0) {
780
- return null;
781
- }
782
- // Filter out calls that will be served from cache - these don't count toward loops
783
- // since they're handled fundamentally by the caching mechanism
784
- const nonCachedCalls = loopEligibleCalls.filter(call => this.getCachedToolResult(call) === null);
785
- // If all calls are cached, no loop detection needed
786
- if (nonCachedCalls.length === 0) {
787
- return null;
788
- }
789
- // Count existing occurrences in recent history
790
- const existingCounts = new Map();
791
- for (const { name, cmdHash } of this.recentToolCalls) {
792
- const key = `${name}:${cmdHash}`;
793
- existingCounts.set(key, (existingCounts.get(key) ?? 0) + 1);
794
- }
795
- // Check if ANY incoming NON-CACHED call would exceed threshold
796
- for (const call of nonCachedCalls) {
797
- const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
798
- const key = `${call.name}:${cmdHash}`;
799
- const currentCount = existingCounts.get(key) ?? 0;
800
- // If adding this call would reach or exceed threshold, block immediately
801
- if (currentCount + 1 >= AgentRuntime.BEHAVIORAL_LOOP_THRESHOLD) {
802
- // Reset history to prevent immediate re-trigger
803
- this.recentToolCalls = [];
804
- return `Behavioral loop detected: "${call.name}" called ${currentCount + 1} times with similar arguments. The task appears stuck. Please try a different approach or provide more specific instructions.`;
805
- }
806
- }
807
- // Track only non-cached tool calls (cached ones are handled by caching)
808
- for (const call of nonCachedCalls) {
809
- const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
810
- this.recentToolCalls.push({ name: call.name, cmdHash });
811
- }
812
- // Keep only recent history
813
- while (this.recentToolCalls.length > AgentRuntime.TOOL_HISTORY_SIZE) {
814
- this.recentToolCalls.shift();
815
- }
816
- return null;
817
- }
818
- /**
819
- * Reset behavioral loop tracking (called when user provides new input or task completes)
820
- */
821
- resetBehavioralLoopTracking() {
822
- this.recentToolCalls = [];
823
- this.lastToolCallSignature = null;
824
- this.repeatedToolCallCount = 0;
825
- // Note: we DON'T clear toolResultCache here for cacheable tools; stateful tools bypass caching
826
- }
827
- /**
828
- * Create a stable cache key for a tool call based on name and arguments
829
- */
830
- getToolCacheKey(call) {
831
- const args = call.arguments ?? {};
832
- // Sort keys for consistent ordering
833
- const sortedArgs = Object.keys(args).sort().reduce((acc, key) => {
834
- acc[key] = args[key];
835
- return acc;
836
- }, {});
837
- return `${call.name}:${JSON.stringify(sortedArgs)}`;
838
- }
839
- /**
840
- * Only cache tools that are safe to reuse; stateful commands must always execute.
841
- */
842
- isCacheableTool(call) {
843
- const nameLower = call.name.toLowerCase();
844
- return !AgentRuntime.NON_CACHEABLE_TOOL_NAMES.has(nameLower);
845
- }
846
- /**
847
- * Direct execution tools should not trigger behavioral loop short-circuiting.
848
- */
849
- shouldSkipLoopDetection(call) {
850
- const nameLower = call.name.toLowerCase();
851
- return AgentRuntime.LOOP_EXEMPT_TOOL_NAMES.has(nameLower);
852
- }
853
- /**
854
- * Get cached result for a tool call, or null if not cached
855
- */
856
- getCachedToolResult(call) {
857
- if (!this.isCacheableTool(call)) {
858
- return null;
859
- }
860
- const key = this.getToolCacheKey(call);
861
- return this.toolResultCache.get(key) ?? null;
862
- }
863
- /**
864
- * Cache a tool result for future identical calls
865
- */
866
- cacheToolResult(call, result) {
867
- if (!this.isCacheableTool(call)) {
868
- return;
869
- }
870
- const key = this.getToolCacheKey(call);
871
- // Evict oldest entries if cache is full
872
- if (this.toolResultCache.size >= AgentRuntime.TOOL_CACHE_MAX_SIZE) {
873
- const firstKey = this.toolResultCache.keys().next().value;
874
- if (firstKey) {
875
- this.toolResultCache.delete(firstKey);
876
- }
877
- }
878
- this.toolResultCache.set(key, result);
879
- }
880
629
  getHistory() {
881
630
  return this.messages.map(cloneMessage);
882
631
  }