erosolar-cli 2.1.167 → 2.1.168

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/agents/erosolar-code.rules.json +2 -2
  2. package/agents/general.rules.json +3 -21
  3. package/dist/StringUtils.d.ts +8 -0
  4. package/dist/StringUtils.d.ts.map +1 -0
  5. package/dist/StringUtils.js +11 -0
  6. package/dist/StringUtils.js.map +1 -0
  7. package/dist/capabilities/statusCapability.js +2 -2
  8. package/dist/capabilities/statusCapability.js.map +1 -1
  9. package/dist/contracts/agent-schemas.json +0 -5
  10. package/dist/core/agent.d.ts +11 -72
  11. package/dist/core/agent.d.ts.map +1 -1
  12. package/dist/core/agent.js +182 -869
  13. package/dist/core/agent.js.map +1 -1
  14. package/dist/core/aiFlowSupervisor.d.ts +44 -0
  15. package/dist/core/aiFlowSupervisor.d.ts.map +1 -0
  16. package/dist/core/aiFlowSupervisor.js +299 -0
  17. package/dist/core/aiFlowSupervisor.js.map +1 -0
  18. package/dist/core/cliTestHarness.d.ts +200 -0
  19. package/dist/core/cliTestHarness.d.ts.map +1 -0
  20. package/dist/core/cliTestHarness.js +549 -0
  21. package/dist/core/cliTestHarness.js.map +1 -0
  22. package/dist/core/preferences.d.ts +0 -1
  23. package/dist/core/preferences.d.ts.map +1 -1
  24. package/dist/core/preferences.js +2 -9
  25. package/dist/core/preferences.js.map +1 -1
  26. package/dist/core/schemaValidator.js +3 -3
  27. package/dist/core/schemaValidator.js.map +1 -1
  28. package/dist/core/testUtils.d.ts +121 -0
  29. package/dist/core/testUtils.d.ts.map +1 -0
  30. package/dist/core/testUtils.js +235 -0
  31. package/dist/core/testUtils.js.map +1 -0
  32. package/dist/core/toolPreconditions.d.ts +11 -0
  33. package/dist/core/toolPreconditions.d.ts.map +1 -1
  34. package/dist/core/toolPreconditions.js +164 -33
  35. package/dist/core/toolPreconditions.js.map +1 -1
  36. package/dist/core/toolRuntime.d.ts.map +1 -1
  37. package/dist/core/toolRuntime.js +114 -9
  38. package/dist/core/toolRuntime.js.map +1 -1
  39. package/dist/core/toolValidation.d.ts +116 -0
  40. package/dist/core/toolValidation.d.ts.map +1 -0
  41. package/dist/core/toolValidation.js +282 -0
  42. package/dist/core/toolValidation.js.map +1 -0
  43. package/dist/core/updateChecker.d.ts +1 -61
  44. package/dist/core/updateChecker.d.ts.map +1 -1
  45. package/dist/core/updateChecker.js +3 -147
  46. package/dist/core/updateChecker.js.map +1 -1
  47. package/dist/headless/headlessApp.d.ts.map +1 -1
  48. package/dist/headless/headlessApp.js +39 -0
  49. package/dist/headless/headlessApp.js.map +1 -1
  50. package/dist/plugins/tools/nodeDefaults.d.ts.map +1 -1
  51. package/dist/plugins/tools/nodeDefaults.js +2 -0
  52. package/dist/plugins/tools/nodeDefaults.js.map +1 -1
  53. package/dist/providers/openaiResponsesProvider.d.ts.map +1 -1
  54. package/dist/providers/openaiResponsesProvider.js +74 -79
  55. package/dist/providers/openaiResponsesProvider.js.map +1 -1
  56. package/dist/runtime/agentController.d.ts.map +1 -1
  57. package/dist/runtime/agentController.js +0 -6
  58. package/dist/runtime/agentController.js.map +1 -1
  59. package/dist/runtime/agentSession.d.ts.map +1 -1
  60. package/dist/runtime/agentSession.js +2 -3
  61. package/dist/runtime/agentSession.js.map +1 -1
  62. package/dist/shell/interactiveShell.d.ts +8 -16
  63. package/dist/shell/interactiveShell.d.ts.map +1 -1
  64. package/dist/shell/interactiveShell.js +159 -388
  65. package/dist/shell/interactiveShell.js.map +1 -1
  66. package/dist/shell/systemPrompt.d.ts.map +1 -1
  67. package/dist/shell/systemPrompt.js +15 -4
  68. package/dist/shell/systemPrompt.js.map +1 -1
  69. package/dist/subagents/taskRunner.js +1 -2
  70. package/dist/subagents/taskRunner.js.map +1 -1
  71. package/dist/tools/bashTools.d.ts.map +1 -1
  72. package/dist/tools/bashTools.js +8 -101
  73. package/dist/tools/bashTools.js.map +1 -1
  74. package/dist/tools/diffUtils.d.ts +2 -8
  75. package/dist/tools/diffUtils.d.ts.map +1 -1
  76. package/dist/tools/diffUtils.js +13 -72
  77. package/dist/tools/diffUtils.js.map +1 -1
  78. package/dist/tools/grepTools.d.ts.map +1 -1
  79. package/dist/tools/grepTools.js +2 -10
  80. package/dist/tools/grepTools.js.map +1 -1
  81. package/dist/tools/searchTools.d.ts.map +1 -1
  82. package/dist/tools/searchTools.js +2 -4
  83. package/dist/tools/searchTools.js.map +1 -1
  84. package/dist/ui/PromptController.d.ts +0 -2
  85. package/dist/ui/PromptController.d.ts.map +1 -1
  86. package/dist/ui/PromptController.js +0 -2
  87. package/dist/ui/PromptController.js.map +1 -1
  88. package/dist/ui/ShellUIAdapter.d.ts +18 -71
  89. package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
  90. package/dist/ui/ShellUIAdapter.js +139 -237
  91. package/dist/ui/ShellUIAdapter.js.map +1 -1
  92. package/dist/ui/UnifiedUIController.d.ts +1 -0
  93. package/dist/ui/UnifiedUIController.d.ts.map +1 -1
  94. package/dist/ui/UnifiedUIController.js +1 -0
  95. package/dist/ui/UnifiedUIController.js.map +1 -1
  96. package/dist/ui/UnifiedUIRenderer.d.ts +5 -122
  97. package/dist/ui/UnifiedUIRenderer.d.ts.map +1 -1
  98. package/dist/ui/UnifiedUIRenderer.js +125 -830
  99. package/dist/ui/UnifiedUIRenderer.js.map +1 -1
  100. package/dist/ui/compactRenderer.d.ts +139 -0
  101. package/dist/ui/compactRenderer.d.ts.map +1 -0
  102. package/dist/ui/compactRenderer.js +398 -0
  103. package/dist/ui/compactRenderer.js.map +1 -0
  104. package/dist/ui/display.d.ts +48 -13
  105. package/dist/ui/display.d.ts.map +1 -1
  106. package/dist/ui/display.js +105 -22
  107. package/dist/ui/display.js.map +1 -1
  108. package/dist/ui/streamingFormatter.d.ts +30 -0
  109. package/dist/ui/streamingFormatter.d.ts.map +1 -0
  110. package/dist/ui/streamingFormatter.js +91 -0
  111. package/dist/ui/streamingFormatter.js.map +1 -0
  112. package/dist/ui/unified/index.d.ts +1 -1
  113. package/dist/ui/unified/index.d.ts.map +1 -1
  114. package/dist/ui/unified/index.js +2 -0
  115. package/dist/ui/unified/index.js.map +1 -1
  116. package/dist/utils/errorUtils.d.ts +16 -0
  117. package/dist/utils/errorUtils.d.ts.map +1 -0
  118. package/dist/utils/errorUtils.js +66 -0
  119. package/dist/utils/errorUtils.js.map +1 -0
  120. package/package.json +2 -1
  121. package/dist/core/reliabilityPrompt.d.ts +0 -9
  122. package/dist/core/reliabilityPrompt.d.ts.map +0 -1
  123. package/dist/core/reliabilityPrompt.js +0 -31
  124. package/dist/core/reliabilityPrompt.js.map +0 -1
  125. package/dist/ui/animatedStatus.d.ts +0 -129
  126. package/dist/ui/animatedStatus.d.ts.map +0 -1
  127. package/dist/ui/animatedStatus.js +0 -384
  128. package/dist/ui/animatedStatus.js.map +0 -1
@@ -6,347 +6,62 @@ import { safeErrorMessage } from './secretStore.js';
6
6
  const MAX_CONTEXT_RECOVERY_ATTEMPTS = 3;
7
7
  /**
8
8
  * Maximum number of auto-continuation attempts when model expresses intent but doesn't act
9
- * Increased to allow more recovery attempts for complex tasks
10
9
  */
11
- const MAX_AUTO_CONTINUE_ATTEMPTS = 8;
10
+ const MAX_AUTO_CONTINUE_ATTEMPTS = 3;
12
11
  /**
13
- * Threshold for short completion - responses shorter than this can be valid completions.
14
- * Anything longer after tool work needs scrutiny.
12
+ * Streaming safety timeouts (ms)
13
+ * - First chunk timeout: fail fast if the stream never starts
14
+ * - Inactivity timeout: abort if no chunks arrive for an extended period
15
15
  */
16
- const SHORT_COMPLETION_THRESHOLD = 80;
17
- const AUTO_CONTINUE_ESCALATION_ATTEMPT = 4;
18
- // Streaming runs without timeouts - we let the model take as long as it needs
16
+ // Allow more headroom before declaring a streaming stall to avoid premature fallbacks.
17
+ const STREAM_FIRST_CHUNK_TIMEOUT_MS = 25000;
18
+ const STREAM_INACTIVITY_TIMEOUT_MS = 60000;
19
19
  /**
20
20
  * Patterns that indicate the model intends to take action but hasn't yet
21
21
  * These suggest the model should be prompted to continue
22
22
  */
23
23
  const INTENT_WITHOUT_ACTION_PATTERNS = [
24
- // TEXT-FORMATTED TOOL CALLS: Model outputs tool call as text instead of using API
25
- // These are CRITICAL to catch - model is trying to call tools but failing to do so properly
26
- // Pattern: "_tool_call_" or similar markers followed by function-like syntax
27
- /_tool_call_\s*\n?\s*\w+\(/im,
28
- // Pattern: "tool_call:" or "Tool call:" followed by function name
29
- /tool[_\s]?call:?\s*\n?\s*\w+\(/im,
30
- // Pattern: function call syntax at start of line like "read_file(..." without being in code block
31
- /^\s*(read_file|write_file|edit_file|execute_bash|list_files|search|grep|glob)\s*\(/im,
32
- // Pattern: "Executing X..." or "Calling X..." without actual tool call
33
- /\b(executing|calling)\s+(read_file|write_file|edit_file|execute_bash|list_files|search|grep|glob)\b/i,
34
- // Pattern: indented tool call syntax like " read_file(package.json)"
35
- /^\s{2,}(read_file|write_file|edit_file|execute_bash|list_files|Bash|Read|Write|Edit|Grep|Glob)\s*\([^)]*\)\s*$/im,
36
24
  // "Let me X" patterns - model is stating what it will do
37
- /\blet me\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|examine|review|run)/i,
38
- // "I'll X" / "I will X" patterns - include all apostrophe variants (straight ', left ', right ')
39
- /\bi['''\u2018\u2019]ll\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now|use|inspect|examine|run)/i,
40
- /\bi will\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now|use|inspect|examine|run)/i,
41
- // "Will X" without "I" (common pattern)
42
- /\bwill\s+(run|use|read|search|check|inspect|examine|list|find|analyze|create|write|look)\s/i,
43
- // "I'm going to X" patterns - include all apostrophe variants
44
- /\bi['''\u2018\u2019]m going to\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|run)/i,
25
+ /\blet me\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|examine|review)/i,
26
+ // "I'll X" / "I will X" patterns
27
+ /\bi['']ll\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
28
+ /\bi will\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze|start|begin|now)/i,
29
+ // "I'm going to X" patterns
30
+ /\bi['']m going to\s+(create|write|implement|add|update|edit|modify|fix|build|make|refactor|read|check|look|search|find|analyze)/i,
45
31
  // "Now I'll X" / "First, I'll X" patterns
46
- /\b(now|first|next)\s*(,)?\s*i['''\u2018\u2019]ll\s+/i,
32
+ /\b(now|first|next)\s*(,)?\s*i['']ll\s+/i,
47
33
  // Explicit continuation signals
48
- /\bhere['''\u2018\u2019]s (the|my) (plan|approach|solution|implementation)/i,
49
- // "Approach:" header indicates planning without action
50
- /\bapproach:/i,
34
+ /\bhere['']s (the|my) (plan|approach|solution|implementation)/i,
51
35
  // Numbered steps suggesting action to come
52
36
  /^\s*\d+\.\s+(create|write|implement|add|update|edit|modify|fix|show|read|check)/im,
53
37
  // Bullet points suggesting planned actions
54
38
  /^[\s•\-\*]+\s*(create|write|implement|add|update|edit|modify|fix|shows?|reads?|checks?)\s/im,
55
- // Statements about needing to do something
56
- /\bneed\s+(to\s+)?(identify|search|find|check|read|look|inspect|analyze|examine|review|list|remove)/i,
57
- // "Should be X" - model is describing action but not taking it
58
- /\bshould be\s+(deleted|removed|fixed|updated|changed|cleaned|cleared)/i,
59
- // "Can be X" - same pattern
60
- /\bcan be\s+(deleted|removed|fixed|updated|changed|cleaned|cleared)/i,
61
- // Questions that should trigger investigation instead of asking user
62
- /\bwhat (files?|areas?|code|patterns?)\s+(should|would you like|do you want)/i,
63
- // GIVING UP PATTERNS - model is asking for clarification instead of investigating
64
- /\bplease\s+(point me to|show me|tell me|specify|clarify|provide)/i,
65
- /\bwithout\s+(more|additional|further)\s+(details?|info|information|context|guidance)/i,
66
- /\bcan you\s+(clarify|specify|tell me|point me|show me)/i,
67
- /\blet me know\s+(what|which|where|how)/i,
68
- /\b(no|can['\u2018\u2019]t|cannot)\s+(obvious|clear|specific|find|identify|determine)/i,
69
- // "I don't have enough" type responses
70
- /\bdon['\u2018\u2019]t have\s+(enough|sufficient|the)\s+(info|information|context|details)/i,
71
- // "Could you" requests for clarification
72
- /\bcould you\s+(provide|specify|clarify|point|tell|show)/i,
73
39
  ];
74
- /**
75
- * Patterns that indicate the model gave a premature summary without concrete findings.
76
- * These summaries wrap up too quickly without file:line citations or code snippets.
77
- */
78
- const PREMATURE_CONCLUSION_PATTERNS = [
79
- // "Performed/Did a quick X" - surface-level investigation claim
80
- /\b(performed|did)\s+a\s+(quick|brief|fast)\s+(search|grep|scan|review|check)/i,
81
- // "Validated by running" - claiming verification without showing details
82
- /\bvalidated\s+by\s+running/i,
83
- // Mentioning matches but not examining them: "found X matches" without file paths
84
- /\bfound\s+\d+\s+(match|result|item|file|issue)/i,
85
- // Summary without substance: "cleaned up", "removed", "fixed" but no specifics
86
- // Allow any words between the article and the target noun (e.g., "removed the .tsbuildinfo build cache")
87
- /\b(cleaned up|removed|fixed)\s+(the|a|some)?\s*\S*\s*(file|cache|build|artifact)/i,
88
- ];
89
- /**
90
- * Check if response looks like a premature conclusion without concrete findings.
91
- * Looks for summary language without actual file:line citations.
92
- */
93
- function isPrematureConclusion(content) {
94
- // Must match a premature conclusion pattern
95
- const matchesPremature = PREMATURE_CONCLUSION_PATTERNS.some(p => p.test(content));
96
- if (!matchesPremature) {
97
- return false;
98
- }
99
- // Check if there are actual file:line citations (e.g., "src/foo.ts:42")
100
- const hasFileCitation = /\b\w+\.(ts|js|tsx|jsx|py|go|rs|java|c|cpp|h|md|json|yaml|yml):\d+\b/.test(content);
101
- if (hasFileCitation) {
102
- return false; // Has concrete findings, not premature
103
- }
104
- // Check for code blocks with actual code
105
- const hasCodeBlock = /```[\s\S]{20,}```/.test(content);
106
- if (hasCodeBlock) {
107
- return false; // Has code snippets, not premature
108
- }
109
- // Short response with no concrete findings = premature conclusion
110
- return content.length < 500;
111
- }
112
- // Short, forceful prompts - model should just act
113
40
  const AUTO_CONTINUE_PROMPTS = [
114
- 'Continue.',
115
- 'Keep going. Call tools.',
116
- 'You stopped mid-task. Continue with tools.',
117
- 'Do not summarize. Act with tools or ask a specific question.',
118
- 'Use tools to complete the task.',
41
+ 'Continue. Use tools now: start with Read/read_file to inspect the target file, then call Edit (or Write if available) with file_path/old_string/new_string to apply changes. Keep using tools until the task is done.',
42
+ 'You MUST call tools immediately. Issue Read -> Edit/Write tool calls with explicit parameters; no more explaining or planning.',
43
+ 'CRITICAL: Call a tool right now. Use Edit with file_path, old_string, new_string (or Write with file_path and content). Respond with tool calls only.',
119
44
  ];
120
- // Specific prompt for when model outputs text-formatted tool calls instead of using the API
121
- const TEXT_TOOL_CALL_PROMPT = 'You wrote a tool call as text. Use the actual tool API - call the function directly, do not write it as text.';
122
- // Forceful prompt used when the model keeps narrating or stalling after several attempts
123
- const AUTO_CONTINUE_FORCE_PROMPT = 'You are stuck narrating. Immediately call the necessary tools to finish the task. If truly done, respond with a concise final answer citing any file paths/lines touched. Do not ask for confirmation.';
124
- const SHORT_RESPONSE_PROMPT = 'Based on the tool results above, provide your complete response. Summarize findings and suggest next steps if applicable.';
125
- /**
126
- * Select an auto-continue prompt and user-facing message based on attempt count and reason.
127
- * Escalates to a forceful instruction after repeated stalls.
128
- */
129
- function buildAutoContinueInstruction(attempt, reason) {
130
- const promptIndex = Math.max(0, Math.min(attempt - 1, AUTO_CONTINUE_PROMPTS.length - 1));
131
- let prompt;
132
- switch (reason) {
133
- case 'text_tool_call':
134
- prompt = TEXT_TOOL_CALL_PROMPT;
135
- break;
136
- case 'short_response':
137
- prompt = SHORT_RESPONSE_PROMPT;
138
- break;
139
- default:
140
- prompt = AUTO_CONTINUE_PROMPTS[promptIndex];
141
- break;
142
- }
143
- const isEscalated = attempt >= AUTO_CONTINUE_ESCALATION_ATTEMPT;
144
- if (isEscalated) {
145
- prompt = AUTO_CONTINUE_FORCE_PROMPT;
146
- }
147
- const baseMessage = {
148
- after_tools_narration: 'Model narrated after tools instead of completing. Prompting to continue...',
149
- intent_without_action: "Model expressed intent but didn't act. Prompting to continue...",
150
- text_tool_call: 'Model wrote tool call as text instead of using API. Prompting to use actual tools...',
151
- short_response: 'Model responded too briefly. Prompting for a complete answer...',
152
- };
153
- const message = isEscalated
154
- ? `${baseMessage[reason]} Escalating to force tool use and completion.`
155
- : baseMessage[reason];
156
- return { prompt, message };
157
- }
158
- /**
159
- * Generate a short, UI-safe preview of planning/intent text.
160
- * Keeps only the first line and truncates long content.
161
- */
162
- function buildPlanningPreview(content, maxLength = 140) {
163
- const trimmed = (content || '').trim();
164
- if (!trimmed) {
165
- return null;
166
- }
167
- const firstLine = trimmed.split('\n').find(line => line.trim()) ?? '';
168
- const collapsed = firstLine.replace(/\s+/g, ' ').trim();
169
- if (!collapsed) {
170
- return null;
171
- }
172
- const needsEllipsis = collapsed.length > maxLength;
173
- const preview = collapsed.slice(0, maxLength).trim();
174
- return needsEllipsis ? `${preview}...` : preview;
175
- }
176
- /**
177
- * Detect if content contains text-formatted tool calls that should have been actual API calls.
178
- * This is a PROGRAMMATIC check - if the model outputs "read_file(package.json)" as text,
179
- * it clearly intended to call a tool but failed to use the proper API.
180
- */
181
- function hasTextFormattedToolCall(content) {
182
- // Patterns that indicate the model wrote a tool call as text instead of using the API
183
- const textToolCallPatterns = [
184
- // "_tool_call_" marker with function syntax
185
- /_tool_call_\s*\n?\s*\w+\(/im,
186
- // "tool_call:" followed by function name
187
- /tool[_\s]?call:?\s*\n?\s*\w+\(/im,
188
- // Common tool function call syntax at line start (not in code block)
189
- /^\s*(read_file|write_file|edit_file|execute_bash|list_files)\s*\([^)]+\)/im,
190
- // Indented tool call like " read_file(package.json)"
191
- /^\s{2,}(read_file|write_file|edit_file|execute_bash|list_files|Bash|Read|Write|Edit|Grep|Glob)\s*\([^)]*\)\s*$/im,
192
- // "Executing/Calling tool_name..." without actual execution
193
- /\b(executing|calling)\s+(read_file|write_file|edit_file|execute_bash)\s*\.{3}?\s*$/im,
194
- ];
195
- // Skip if the content is inside a code block (```...```)
196
- const withoutCodeBlocks = content.replace(/```[\s\S]*?```/g, '');
197
- return textToolCallPatterns.some(p => p.test(withoutCodeBlocks));
198
- }
199
45
  /**
200
- * Patterns that indicate genuine completion of a task.
201
- * These are short, definitive statements that signal work is done.
46
+ * Check if response indicates intent to act without actually acting
47
+ * This detects when the model says "let me do X" but doesn't call any tools
202
48
  */
203
- const COMPLETION_SIGNAL_PATTERNS = [
204
- // Explicit done signals
205
- /^(done|complete|finished|all set)[.!]?$/i,
206
- // Short confirmations with specifics
207
- /^(removed|deleted|fixed|created|updated|added)\s+.{1,50}[.!]?$/i,
208
- // "X is now Y" short statements
209
- /^.{1,30}\s+is\s+now\s+.{1,30}[.!]?$/i,
210
- // Task completion with count
211
- /^(cleaned|removed|fixed|updated)\s+\d+\s+.{1,30}[.!]?$/i,
212
- ];
213
- /**
214
- * Patterns that indicate errors, failures, or incomplete work.
215
- * When these are present, auto-continue should kick in to fix the issues.
216
- * This is a PROGRAMMATIC check - if errors are reported, the task is NOT complete.
217
- */
218
- const ERROR_INDICATOR_PATTERNS = [
219
- // Build/compile errors
220
- /\b(error|errors|fail(s|ed|ure|ing)?|broken|crash(es|ed|ing)?)\b/i,
221
- // Test failures
222
- /\b(test(s)?\s+(fail|failing|failed)|failing\s+test)/i,
223
- // TypeScript/compilation errors
224
- /\b(typescript|ts|type)\s+error/i,
225
- /\bts\(\d+,\d+\)/i, // TS error format like ts(700,45)
226
- // Remaining/unresolved issues
227
- /\b(persists?|remains?|still\s+(has|have|is|are|broken|failing))\b/i,
228
- /\b(unresolved|outstanding|remaining)\s+(error|issue|problem)/i,
229
- // Explicit incomplete signals
230
- /\b(didn'?t|did\s+not|couldn'?t|could\s+not|wasn'?t|was\s+not)\s+(work|succeed|complete|finish|pass)/i,
231
- /\b(skipped|blocked|cannot|unable\s+to)\b/i,
232
- ];
233
- /**
234
- * Check if response contains error indicators that mean work is NOT complete.
235
- * This is a simple, programmatic check - no complex NLP needed.
236
- */
237
- function containsErrorIndicators(content) {
238
- return ERROR_INDICATOR_PATTERNS.some(p => p.test(content));
239
- }
240
- /**
241
- * Check if response is a genuine completion signal (short, definitive statement).
242
- * Returns true if the model is signaling it's actually done with the task.
243
- * IMPORTANT: Returns false if error indicators are present - model should continue.
244
- */
245
- function isCompletionSignal(content) {
246
- const trimmed = content.trim();
247
- // PROGRAMMATIC: If errors are reported, this is NOT a valid completion
248
- if (containsErrorIndicators(trimmed)) {
249
- return false;
250
- }
251
- // Very short responses (<50 chars) with completion patterns are genuine signals
252
- if (trimmed.length < 50) {
253
- return COMPLETION_SIGNAL_PATTERNS.some(p => p.test(trimmed));
254
- }
255
- // Responses asking user for direction are valid stops
256
- if (/\b(would you like|shall I|want me to|anything else)\b/i.test(trimmed) && trimmed.length < 200) {
257
- return true;
258
- }
259
- // File:line citations indicate concrete work was shown
260
- const hasCitation = /\b\w+\.(ts|js|tsx|jsx|py|go|rs|java|c|cpp|h|md|json|yaml|yml):\d+\b/.test(trimmed);
261
- if (hasCitation) {
262
- return true;
263
- }
264
- return false;
265
- }
266
- /**
267
- * PROGRAMMATIC CHECK: After tool calls, should we auto-continue?
268
- *
269
- * SIMPLE RULE: After tools, model should ONLY stop if:
270
- * 1. It asks the user a question (ends with ?)
271
- * 2. It gives a very short completion (< 80 chars) WITHOUT planning words
272
- *
273
- * Everything else = CONTINUE. This is intentionally aggressive.
274
- * The model should either be DONE (short message) or ASKING (question).
275
- * Long explanations after tool work = continue to force action.
276
- */
277
- function shouldContinueAfterTools(content) {
278
- const trimmed = content.trim();
279
- // No content after tools = continue to get results
280
- if (trimmed.length === 0) {
281
- return true;
282
- }
283
- // ALWAYS CONTINUE: Error indicators mean work isn't done
284
- if (containsErrorIndicators(trimmed)) {
285
- return true;
286
- }
287
- // ALWAYS CONTINUE: Intent/planning patterns mean model wants to do more
288
- if (INTENT_WITHOUT_ACTION_PATTERNS.some(p => p.test(trimmed))) {
289
- return true;
290
- }
291
- // VALID STOP: Model asking user a question (ends with ?)
292
- if (/\?\s*$/.test(trimmed)) {
49
+ function shouldAutoContinue(content, hasToolCalls) {
50
+ // If there are tool calls, no need to auto-continue
51
+ if (hasToolCalls) {
293
52
  return false;
294
53
  }
295
- // VALID STOP: Explicit user-direction phrases
296
- if (/\b(would you like|shall I|want me to|do you want|should I|what would you prefer)\b/i.test(trimmed)) {
54
+ // If content is very short, likely not an incomplete intent
55
+ if (content.length < 50) {
297
56
  return false;
298
57
  }
299
- // VALID STOP: Very short completion (< 80 chars) - likely "Done." or similar
300
- // But ONLY if it doesn't have planning words
301
- if (trimmed.length < SHORT_COMPLETION_THRESHOLD) {
302
- const hasPlanningWords = /\b(i'll|i will|let me|i need|i'm going|will now|going to|first|next|now i)\b/i.test(trimmed);
303
- if (!hasPlanningWords) {
304
- return false; // Short and not planning = valid completion
58
+ // Check for intent patterns
59
+ for (const pattern of INTENT_WITHOUT_ACTION_PATTERNS) {
60
+ if (pattern.test(content)) {
61
+ return true;
305
62
  }
306
- // Short but has planning words = continue
307
- return true;
308
- }
309
- // CONTINUE: Long response after tools = likely narrating/summarizing
310
- // Force model to either ask a question or give a short completion
311
- return true;
312
- }
313
- function shouldAutoContinue(content, hasToolCalls, hasReasoningContent = false) {
314
- // Strip <thinking> blocks to get actual response content
315
- const withoutThinking = content.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
316
- // Empty content shouldn't trigger auto-continue
317
- if (withoutThinking.length === 0) {
318
- return { shouldContinue: false };
319
- }
320
- // HIGHEST PRIORITY: Check for text-formatted tool calls
321
- // This is when the model writes "read_file(package.json)" as text instead of calling the API
322
- if (hasTextFormattedToolCall(withoutThinking)) {
323
- return { shouldContinue: true, reason: 'text_tool_call' };
324
- }
325
- // PROGRAMMATIC: If response reports errors/failures, auto-continue to fix them
326
- // This is critical - if model reports "error persists" or "build failed", it should continue
327
- if (containsErrorIndicators(withoutThinking)) {
328
- return { shouldContinue: true, reason: 'error_indicators' };
329
- }
330
- // If model output thinking/reasoning without much content, likely needs prompting
331
- if (hasReasoningContent && withoutThinking.length < 30) {
332
- return { shouldContinue: true, reason: 'short_reasoning' };
333
- }
334
- // Check for intent patterns - model wants to do more work
335
- const hasMoreIntent = INTENT_WITHOUT_ACTION_PATTERNS.some(p => p.test(withoutThinking));
336
- // Even if tools were called, continue if narrative expresses MORE intent
337
- // e.g., model calls grep but then says "I need to remove..." - should continue
338
- if (hasToolCalls) {
339
- return { shouldContinue: hasMoreIntent, reason: hasMoreIntent ? 'intent_patterns' : undefined };
340
- }
341
- // No tool calls - check for intent or premature conclusion
342
- if (hasMoreIntent) {
343
- return { shouldContinue: true, reason: 'intent_patterns' };
344
63
  }
345
- // Check for premature conclusions - model summarized without concrete findings
346
- if (isPrematureConclusion(withoutThinking)) {
347
- return { shouldContinue: true, reason: 'premature_conclusion' };
348
- }
349
- return { shouldContinue: false };
64
+ return false;
350
65
  }
351
66
  /**
352
67
  * Check if an error is a context overflow error
@@ -362,53 +77,15 @@ function isContextOverflowError(error) {
362
77
  message.includes('max_tokens') ||
363
78
  message.includes('context window'));
364
79
  }
365
- /**
366
- * Check if an error is a transient/retryable error (network issues, rate limits, server errors)
367
- */
368
- function isTransientError(error) {
369
- if (!(error instanceof Error))
370
- return false;
371
- const message = error.message.toLowerCase();
372
- // Network errors
373
- const networkPatterns = [
374
- 'econnrefused', 'econnreset', 'enotfound', 'etimedout', 'epipe',
375
- 'network error', 'connection error', 'fetch failed', 'socket hang up',
376
- 'network is unreachable', 'connection refused', 'connection reset',
377
- ];
378
- if (networkPatterns.some(p => message.includes(p))) {
379
- return true;
380
- }
381
- // Rate limit errors
382
- if (message.includes('rate limit') || message.includes('429') || message.includes('too many requests')) {
383
- return true;
384
- }
385
- // Server errors (5xx)
386
- if (message.includes('500') || message.includes('502') || message.includes('503') || message.includes('504')) {
387
- return true;
80
+ class StreamInterruptionError extends Error {
81
+ reason;
82
+ partialResponse;
83
+ constructor(reason, message, partialResponse) {
84
+ super(message);
85
+ this.name = 'StreamInterruptionError';
86
+ this.reason = reason;
87
+ this.partialResponse = partialResponse;
388
88
  }
389
- // Temporary service errors
390
- if (message.includes('service unavailable') || message.includes('temporarily unavailable') ||
391
- message.includes('overloaded') || message.includes('server error')) {
392
- return true;
393
- }
394
- return false;
395
- }
396
- /**
397
- * Maximum number of transient error retries
398
- */
399
- const MAX_TRANSIENT_RETRIES = 3;
400
- /**
401
- * Delay before retry (in ms), with exponential backoff
402
- */
403
- function getRetryDelay(attempt) {
404
- // Base delay of 1 second, doubles each attempt: 1s, 2s, 4s
405
- return Math.min(1000 * Math.pow(2, attempt - 1), 10000);
406
- }
407
- /**
408
- * Sleep for the specified milliseconds
409
- */
410
- function sleep(ms) {
411
- return new Promise(resolve => setTimeout(resolve, ms));
412
89
  }
413
90
  export class AgentRuntime {
414
91
  messages = [];
@@ -423,21 +100,6 @@ export class AgentRuntime {
423
100
  workingDirectory;
424
101
  cancellationRequested = false;
425
102
  _autoContinueEnabled = false;
426
- // Loop detection: track last tool calls to detect stuck loops
427
- lastToolCallSignature = null;
428
- repeatedToolCallCount = 0;
429
- static MAX_REPEATED_TOOL_CALLS = 5; // Allow up to 4 identical calls before stopping
430
- // Behavioral loop detection: track recent tool calls to catch repetitive patterns
431
- // e.g., calling "execute_bash" with "git status" 5 times even if output differs slightly
432
- recentToolCalls = [];
433
- static TOOL_HISTORY_SIZE = 12;
434
- static BEHAVIORAL_LOOP_THRESHOLD = 3; // Same tool+cmd 3+ times in last 12 = stuck
435
- // Tool result cache: prevent duplicate identical tool calls by returning cached results
436
- // Key: tool signature (name + JSON args), Value: result string
437
- toolResultCache = new Map();
438
- static TOOL_CACHE_MAX_SIZE = 50; // Keep last 50 tool results
439
- // Track if first tool call callback has been fired this turn
440
- firstToolCallFired = false;
441
103
  constructor(options) {
442
104
  this.provider = options.provider;
443
105
  this.toolRuntime = options.toolRuntime;
@@ -489,20 +151,8 @@ export class AgentRuntime {
489
151
  if (!prompt) {
490
152
  return '';
491
153
  }
492
- // CRITICAL: Fire acknowledgement callback IMMEDIATELY - before ANY other processing
493
- // This guarantees the user sees feedback the moment their request is received
494
- if (this.callbacks.onRequestReceived) {
495
- const maxLength = 160;
496
- const normalized = prompt.replace(/\s+/g, ' ');
497
- const preview = normalized.length > maxLength
498
- ? `${normalized.slice(0, maxLength - 3)}...`
499
- : normalized;
500
- this.callbacks.onRequestReceived(preview);
501
- }
502
- // Reset cancellation flag, loop tracking, and first tool call flag at start of new request
154
+ // Reset cancellation flag at start of new request
503
155
  this.cancellationRequested = false;
504
- this.resetBehavioralLoopTracking();
505
- this.firstToolCallFired = false;
506
156
  // Handle multi-line paste: show summary to user, send full content to AI
507
157
  if (isMultilinePaste(prompt)) {
508
158
  const processed = processPaste(prompt);
@@ -518,9 +168,28 @@ export class AgentRuntime {
518
168
  const run = { startedAt: Date.now() };
519
169
  this.activeRun = run;
520
170
  try {
521
- // Always use streaming when available - no fallback
522
171
  if (useStreaming && this.provider.generateStream) {
523
- return await this.processConversationStreaming();
172
+ try {
173
+ return await this.processConversationStreaming();
174
+ }
175
+ catch (error) {
176
+ const message = safeErrorMessage(error);
177
+ const reason = error instanceof StreamInterruptionError ? error.reason : undefined;
178
+ const partialResponse = error instanceof StreamInterruptionError ? error.partialResponse : undefined;
179
+ console.warn(`[agent] Streaming failed, falling back to non-streaming: ${message}`);
180
+ // If we captured part of the response, seed it into history and ask the model to continue
181
+ // so we don't restart the answer from scratch during fallback.
182
+ if (partialResponse && partialResponse.trim()) {
183
+ const partial = partialResponse.trim();
184
+ this.messages.push({ role: 'assistant', content: partial });
185
+ this.messages.push({
186
+ role: 'user',
187
+ content: 'Continue your previous response from where it stopped. Do not repeat text you already provided.',
188
+ });
189
+ }
190
+ this.callbacks.onStreamFallback?.({ message, error, reason, partialResponse });
191
+ return await this.processConversation();
192
+ }
524
193
  }
525
194
  return await this.processConversation();
526
195
  }
@@ -535,7 +204,6 @@ export class AgentRuntime {
535
204
  async processConversation() {
536
205
  let contextRecoveryAttempts = 0;
537
206
  let autoContinueAttempts = 0;
538
- let transientRetryAttempts = 0;
539
207
  while (true) {
540
208
  // Check for cancellation at start of each iteration
541
209
  if (this.cancellationRequested) {
@@ -551,126 +219,43 @@ export class AgentRuntime {
551
219
  // Reset recovery attempts on successful generation
552
220
  contextRecoveryAttempts = 0;
553
221
  if (response.type === 'tool_calls') {
554
- // BEHAVIORAL LOOP DETECTION: Check if model is stuck calling same tool repeatedly
555
- const behavioralLoopResult = this.checkBehavioralLoop(response.toolCalls);
556
- if (behavioralLoopResult) {
557
- this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats });
558
- this.messages.push({ role: 'assistant', content: behavioralLoopResult });
559
- return behavioralLoopResult;
560
- }
561
- // Loop detection: check if same tool calls are being repeated (exact signature match)
562
- const toolSignature = response.toolCalls
563
- .map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
564
- .sort()
565
- .join('|');
566
- if (toolSignature === this.lastToolCallSignature) {
567
- this.repeatedToolCallCount++;
568
- if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
569
- // Break out of loop - model is stuck
570
- const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
571
- this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats });
572
- this.messages.push({ role: 'assistant', content: loopMsg });
573
- this.lastToolCallSignature = null;
574
- this.repeatedToolCallCount = 0;
575
- return loopMsg;
576
- }
577
- }
578
- else {
579
- this.lastToolCallSignature = toolSignature;
580
- this.repeatedToolCallCount = 1;
581
- }
582
- // Always emit narration if present - it shows the AI's thought process before tools
583
- const narration = response.content?.trim();
584
- const hasNarration = !!narration;
585
- const shouldPromptAfterTools = this._autoContinueEnabled &&
586
- autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS &&
587
- shouldContinueAfterTools(narration ?? '');
588
- // Fire first tool call callback if not yet fired this turn
589
- // ALWAYS emit acknowledgement before first tool - ensures user sees immediate feedback
590
- if (!this.firstToolCallFired && this.callbacks.onBeforeFirstToolCall) {
591
- this.firstToolCallFired = true;
592
- const toolNames = response.toolCalls.map(t => t.name);
593
- const injectedAck = this.callbacks.onBeforeFirstToolCall(toolNames, hasNarration);
594
- // ALWAYS emit acknowledgement if returned - provides immediate user feedback
595
- if (injectedAck) {
596
- this.emitAssistantMessage(injectedAck, { isFinal: false, usage, contextStats });
597
- }
598
- }
222
+ const suppressNarration = this.shouldSuppressToolNarration();
223
+ const narration = suppressNarration ? '' : response.content?.trim();
599
224
  if (narration) {
600
- this.emitAssistantMessage(narration, {
601
- isFinal: false,
602
- usage,
603
- contextStats,
604
- suppressDisplay: shouldPromptAfterTools,
605
- });
225
+ this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats });
606
226
  }
607
227
  const assistantMessage = {
608
228
  role: 'assistant',
609
- content: response.content ?? '',
229
+ content: suppressNarration ? '' : (response.content ?? ''),
610
230
  };
611
231
  if (response.toolCalls?.length) {
612
232
  assistantMessage.toolCalls = response.toolCalls;
613
233
  }
614
234
  this.messages.push(assistantMessage);
615
235
  await this.resolveToolCalls(response.toolCalls);
616
- // PROGRAMMATIC CONTINUATION: After tool work, model must either:
617
- // 1. Call more tools (already handled by continue above)
618
- // 2. Give a short completion signal
619
- // 3. Ask user for direction
620
- // PROGRAMMATIC: If model outputs narrative instead of concrete findings, continue
621
- if (shouldPromptAfterTools) {
622
- autoContinueAttempts++;
623
- const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'after_tools_narration');
624
- this.messages.push({
625
- role: 'user',
626
- content: instruction.prompt,
627
- });
628
- this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
629
- continue;
630
- }
631
236
  // Reset auto-continue counter since model is actively working
632
237
  autoContinueAttempts = 0;
633
238
  continue;
634
239
  }
635
240
  const reply = response.content?.trim() ?? '';
636
- // Reset loop detection when we get a text response (not just tool calls)
637
- if (reply.length >= 10) {
638
- this.lastToolCallSignature = null;
639
- this.repeatedToolCallCount = 0;
640
- }
641
- // If model returned empty or very short AND auto-continue is enabled, prompt it to respond
642
- // This is disabled by default to prevent loops
643
- if (this._autoContinueEnabled && reply.length < 10 && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
644
- autoContinueAttempts++;
645
- const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'short_response');
646
- this.messages.push({ role: 'assistant', content: reply || '' });
647
- this.messages.push({
648
- role: 'user',
649
- content: instruction.prompt,
650
- });
651
- this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
652
- continue;
653
- }
654
241
  // Check if model expressed intent to act but didn't call tools
655
242
  // This catches "Let me create..." without actual tool calls
656
- // Also catches text-formatted tool calls like "_tool_call_\nread_file(...)"
657
243
  // Only auto-continue if the feature is enabled
658
- const continueResult = shouldAutoContinue(reply, false);
659
- if (this._autoContinueEnabled && continueResult.shouldContinue && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
244
+ if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
660
245
  autoContinueAttempts++;
661
- const planningPreview = buildPlanningPreview(reply);
246
+ // Emit the planning content but mark as non-final
247
+ if (reply) {
248
+ this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats });
249
+ }
662
250
  this.messages.push({ role: 'assistant', content: reply });
663
- // Choose prompt based on reason - text tool calls get a specific, forceful prompt
664
- const continueReason = continueResult.reason === 'text_tool_call' ? 'text_tool_call' : 'intent_without_action';
665
- const instruction = buildAutoContinueInstruction(autoContinueAttempts, continueReason);
251
+ // Auto-prompt with increasingly direct instructions
252
+ const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
666
253
  this.messages.push({
667
254
  role: 'user',
668
- content: instruction.prompt,
255
+ content: AUTO_CONTINUE_PROMPTS[promptIndex],
669
256
  });
670
- const uiMessage = planningPreview
671
- ? `${instruction.message} Next action: ${planningPreview}`
672
- : instruction.message;
673
- this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, uiMessage);
257
+ const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
258
+ this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
674
259
  continue;
675
260
  }
676
261
  if (reply) {
@@ -693,14 +278,6 @@ export class AgentRuntime {
693
278
  continue;
694
279
  }
695
280
  }
696
- // Auto-retry transient errors (network issues, rate limits, server errors)
697
- if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
698
- transientRetryAttempts++;
699
- const delayMs = getRetryDelay(transientRetryAttempts);
700
- this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
701
- await sleep(delayMs);
702
- continue;
703
- }
704
281
  // Re-throw if not recoverable or recovery failed
705
282
  throw error;
706
283
  }
@@ -712,7 +289,6 @@ export class AgentRuntime {
712
289
  }
713
290
  let contextRecoveryAttempts = 0;
714
291
  let autoContinueAttempts = 0;
715
- let transientRetryAttempts = 0;
716
292
  while (true) {
717
293
  // Check for cancellation at start of each iteration
718
294
  if (this.cancellationRequested) {
@@ -745,10 +321,45 @@ export class AgentRuntime {
745
321
  }
746
322
  }
747
323
  };
748
- // Simple streaming loop - no timeouts, let the stream run until done
324
+ const buildTimeoutError = (reason) => {
325
+ const base = reason === 'startup-timeout'
326
+ ? 'Streaming stalled before any content arrived.'
327
+ : 'Streaming stalled due to inactivity.';
328
+ return new StreamInterruptionError(reason, `${base} Falling back to non-streaming.`, fullContent || reasoningContent);
329
+ };
330
+ // Timer for first token arrival
331
+ let startupTimer = null;
332
+ const startupTimeoutPromise = new Promise((_, reject) => {
333
+ startupTimer = setTimeout(() => reject(buildTimeoutError('startup-timeout')), STREAM_FIRST_CHUNK_TIMEOUT_MS);
334
+ });
335
+ const createIdleTimeout = () => {
336
+ let idleTimer = null;
337
+ const promise = new Promise((_, reject) => {
338
+ idleTimer = setTimeout(() => reject(buildTimeoutError('idle-timeout')), STREAM_INACTIVITY_TIMEOUT_MS);
339
+ });
340
+ const cancel = () => {
341
+ if (idleTimer) {
342
+ clearTimeout(idleTimer);
343
+ idleTimer = null;
344
+ }
345
+ };
346
+ return { promise, cancel };
347
+ };
348
+ let idleTimeout = createIdleTimeout();
349
+ let firstChunkSeen = false;
749
350
  try {
750
351
  while (true) {
751
- const result = await iterator.next();
352
+ const races = [
353
+ iterator.next(),
354
+ idleTimeout.promise,
355
+ ];
356
+ if (!firstChunkSeen) {
357
+ races.push(startupTimeoutPromise);
358
+ }
359
+ const result = (await Promise.race(races));
360
+ // Reset idle timer for the next iteration
361
+ idleTimeout.cancel();
362
+ idleTimeout = createIdleTimeout();
752
363
  // Check for cancellation during streaming
753
364
  if (this.cancellationRequested) {
754
365
  await closeStream();
@@ -763,10 +374,17 @@ export class AgentRuntime {
763
374
  break;
764
375
  }
765
376
  const chunk = result.value;
377
+ if (!firstChunkSeen) {
378
+ firstChunkSeen = true;
379
+ if (startupTimer) {
380
+ clearTimeout(startupTimer);
381
+ startupTimer = null;
382
+ }
383
+ }
766
384
  if (chunk.type === 'reasoning' && chunk.content) {
767
- // Buffer reasoning content - don't stream token-by-token
768
- // It will be emitted as a complete block when ready
769
385
  reasoningContent += chunk.content;
386
+ // Surface reasoning tokens to the UI so thought process is visible
387
+ this.callbacks.onStreamChunk?.(chunk.content, 'reasoning');
770
388
  continue;
771
389
  }
772
390
  if (chunk.type === 'content' && chunk.content) {
@@ -779,31 +397,11 @@ export class AgentRuntime {
779
397
  }
780
398
  }
781
399
  else if (chunk.type === 'tool_call' && chunk.toolCall) {
782
- // FIRST TOOL CALL: ALWAYS inject acknowledgement for immediate user feedback
783
- if (toolCalls.length === 0) {
784
- const hasNarration = !!(fullContent.trim() || reasoningContent.trim());
785
- // Fire callback and ALWAYS inject acknowledgement BEFORE anything else
786
- if (!this.firstToolCallFired && this.callbacks.onBeforeFirstToolCall) {
787
- this.firstToolCallFired = true;
788
- const injectedAck = this.callbacks.onBeforeFirstToolCall([chunk.toolCall.name], hasNarration);
789
- // ALWAYS inject acknowledgement if returned - ensures immediate user feedback
790
- if (injectedAck) {
791
- // Inject acknowledgement as the FIRST thing user sees
792
- this.callbacks.onStreamChunk?.(injectedAck + '\n', 'content');
793
- fullContent = injectedAck + '\n' + fullContent; // Add to content for context
794
- }
795
- }
796
- // Emit complete reasoning block first
797
- if (reasoningContent.trim()) {
798
- this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
799
- }
800
- // Then emit buffered narration content
801
- if (suppressStreamNarration && bufferedContent) {
802
- this.callbacks.onStreamChunk?.(bufferedContent, 'content');
803
- bufferedContent = '';
804
- }
805
- }
806
400
  toolCalls.push(chunk.toolCall);
401
+ // Drop any speculative narration once we know the model is actually calling tools
402
+ if (suppressStreamNarration) {
403
+ bufferedContent = '';
404
+ }
807
405
  }
808
406
  else if (chunk.type === 'usage' && chunk.usage) {
809
407
  usage = chunk.usage;
@@ -811,94 +409,37 @@ export class AgentRuntime {
811
409
  }
812
410
  }
813
411
  finally {
412
+ idleTimeout.cancel();
413
+ if (startupTimer) {
414
+ clearTimeout(startupTimer);
415
+ }
814
416
  await closeStream();
815
417
  }
816
418
  // Reset recovery attempts on successful generation
817
419
  contextRecoveryAttempts = 0;
818
420
  const contextStats = this.getContextStats();
819
421
  const combinedContent = fullContent || reasoningContent;
820
- // If no tool calls were issued, emit reasoning and buffered content as complete blocks
821
- if (toolCalls.length === 0) {
822
- // Emit complete reasoning block if we have one
823
- if (reasoningContent.trim()) {
824
- this.callbacks.onStreamChunk?.(reasoningContent, 'reasoning');
825
- }
826
- // Emit buffered narration content
827
- if (suppressStreamNarration && bufferedContent) {
828
- this.callbacks.onStreamChunk?.(bufferedContent, 'content');
829
- bufferedContent = '';
830
- }
422
+ // If no tool calls were issued, flush any buffered narration now
423
+ if (suppressStreamNarration && toolCalls.length === 0 && bufferedContent) {
424
+ this.callbacks.onStreamChunk?.(bufferedContent, 'content');
425
+ bufferedContent = '';
831
426
  }
832
427
  // Check if we got tool calls
833
428
  if (toolCalls.length > 0) {
834
- // BEHAVIORAL LOOP DETECTION: Check if model is stuck calling same tool repeatedly
835
- // This catches patterns like "git status" called 5 times even with slightly different outputs
836
- const behavioralLoopResult = this.checkBehavioralLoop(toolCalls);
837
- if (behavioralLoopResult) {
838
- this.emitAssistantMessage(behavioralLoopResult, { isFinal: true, usage, contextStats, wasStreamed: true });
839
- this.messages.push({ role: 'assistant', content: behavioralLoopResult });
840
- return behavioralLoopResult;
841
- }
842
- // Loop detection: check if same tool calls are being repeated (exact signature match)
843
- const toolSignature = toolCalls
844
- .map((t) => `${t.name}:${JSON.stringify(t.arguments)}`)
845
- .sort()
846
- .join('|');
847
- if (toolSignature === this.lastToolCallSignature) {
848
- this.repeatedToolCallCount++;
849
- if (this.repeatedToolCallCount >= AgentRuntime.MAX_REPEATED_TOOL_CALLS) {
850
- // Break out of loop - model is stuck
851
- const loopMsg = `Tool loop detected: same tools called ${this.repeatedToolCallCount} times. Please try a different approach or provide more specific instructions.`;
852
- this.emitAssistantMessage(loopMsg, { isFinal: true, usage, contextStats, wasStreamed: true });
853
- this.messages.push({ role: 'assistant', content: loopMsg });
854
- this.lastToolCallSignature = null;
855
- this.repeatedToolCallCount = 0;
856
- return loopMsg;
857
- }
858
- }
859
- else {
860
- this.lastToolCallSignature = toolSignature;
861
- this.repeatedToolCallCount = 1;
862
- }
863
- // Content was already streamed via onStreamChunk, just record it for context
864
- // (wasStreamed=true prevents duplicate display)
865
- // Note: Acknowledgement injection happens during streaming (when first tool_call chunk arrives)
866
- const narration = combinedContent.trim();
867
- const shouldPromptAfterTools = this._autoContinueEnabled &&
868
- autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS &&
869
- shouldContinueAfterTools(narration ?? '');
429
+ const suppressNarration = this.shouldSuppressToolNarration();
430
+ const narration = suppressNarration ? '' : combinedContent.trim();
870
431
  if (narration) {
871
- this.emitAssistantMessage(narration, {
872
- isFinal: false,
873
- usage,
874
- contextStats,
875
- wasStreamed: true,
876
- suppressDisplay: shouldPromptAfterTools,
877
- });
432
+ // Mark as wasStreamed since content was already output via onStreamChunk
433
+ this.emitAssistantMessage(narration, { isFinal: false, usage, contextStats, wasStreamed: true });
878
434
  }
879
435
  const assistantMessage = {
880
436
  role: 'assistant',
881
- content: combinedContent,
437
+ content: suppressNarration ? '' : combinedContent,
882
438
  toolCalls,
883
439
  };
884
440
  this.messages.push(assistantMessage);
885
441
  await this.resolveToolCalls(toolCalls);
886
- // PROGRAMMATIC CONTINUATION: After tool work, model must either:
887
- // 1. Call more tools (already handled by continue above)
888
- // 2. Give a short completion signal
889
- // 3. Ask user for direction
890
- // PROGRAMMATIC: If model outputs narrative instead of concrete findings, continue
891
- if (shouldPromptAfterTools) {
892
- autoContinueAttempts++;
893
- const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'after_tools_narration');
894
- this.messages.push({
895
- role: 'user',
896
- content: instruction.prompt,
897
- });
898
- this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
899
- continue;
900
- }
901
- // Reset auto-continue counter since model appears to be done
442
+ // Reset auto-continue counter since model is actively working
902
443
  autoContinueAttempts = 0;
903
444
  continue;
904
445
  }
@@ -906,46 +447,24 @@ export class AgentRuntime {
906
447
  // This catches "Let me create..." without actual tool calls
907
448
  // Only auto-continue if the feature is enabled
908
449
  const reply = combinedContent.trim();
909
- // Reset loop detection when we get a text response (not just tool calls)
910
- if (reply.length >= 10) {
911
- this.lastToolCallSignature = null;
912
- this.repeatedToolCallCount = 0;
913
- }
914
- // If model returned empty or very short AND auto-continue is enabled, prompt it to respond
915
- // This is disabled by default to prevent loops
916
- if (this._autoContinueEnabled && reply.length < 10 && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
450
+ if (this._autoContinueEnabled && shouldAutoContinue(reply, false) && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
917
451
  autoContinueAttempts++;
918
- const instruction = buildAutoContinueInstruction(autoContinueAttempts, 'short_response');
919
- this.messages.push({ role: 'assistant', content: reply || '' });
452
+ // Emit the planning content but mark as non-final
453
+ // Mark as wasStreamed since content was already output via onStreamChunk
454
+ if (reply) {
455
+ this.emitAssistantMessage(reply, { isFinal: false, usage, contextStats, wasStreamed: true });
456
+ }
457
+ this.messages.push({ role: 'assistant', content: reply });
458
+ // Auto-prompt with increasingly direct instructions
459
+ const promptIndex = Math.min(autoContinueAttempts - 1, AUTO_CONTINUE_PROMPTS.length - 1);
920
460
  this.messages.push({
921
461
  role: 'user',
922
- content: instruction.prompt,
462
+ content: AUTO_CONTINUE_PROMPTS[promptIndex],
923
463
  });
924
- this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, instruction.message);
464
+ const autoContinueMessage = `Model expressed intent but didn't use tools. Auto-prompting to continue...`;
465
+ this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, autoContinueMessage);
925
466
  continue;
926
467
  }
927
- // PROGRAMMATIC CHECK: Text-only responses after tool work need scrutiny
928
- // If model outputs substantial narrative without tools, it's likely summarizing
929
- // Check if this is a genuine completion or a premature summary
930
- // Also catches text-formatted tool calls like "_tool_call_\nread_file(...)"
931
- if (this._autoContinueEnabled && autoContinueAttempts < MAX_AUTO_CONTINUE_ATTEMPTS) {
932
- // Intent patterns still catch "let me X" without tools
933
- const streamContinueResult = shouldAutoContinue(reply, false);
934
- if (streamContinueResult.shouldContinue) {
935
- autoContinueAttempts++;
936
- const planningPreview = buildPlanningPreview(reply);
937
- this.messages.push({ role: 'assistant', content: reply });
938
- // Choose prompt based on reason - text tool calls get a specific, forceful prompt
939
- const continueReason = streamContinueResult.reason === 'text_tool_call' ? 'text_tool_call' : 'intent_without_action';
940
- const instruction = buildAutoContinueInstruction(autoContinueAttempts, continueReason);
941
- this.messages.push({ role: 'user', content: instruction.prompt });
942
- const uiMessage = planningPreview
943
- ? `${instruction.message} Next action: ${planningPreview}`
944
- : instruction.message;
945
- this.callbacks.onAutoContinue?.(autoContinueAttempts, MAX_AUTO_CONTINUE_ATTEMPTS, uiMessage);
946
- continue;
947
- }
948
- }
949
468
  // Final message - mark as streamed to avoid double-display in UI
950
469
  if (reply) {
951
470
  this.emitAssistantMessage(reply, { isFinal: true, usage, contextStats, wasStreamed: true });
@@ -967,14 +486,6 @@ export class AgentRuntime {
967
486
  continue;
968
487
  }
969
488
  }
970
- // Auto-retry transient errors (network issues, rate limits, server errors)
971
- if (isTransientError(error) && transientRetryAttempts < MAX_TRANSIENT_RETRIES) {
972
- transientRetryAttempts++;
973
- const delayMs = getRetryDelay(transientRetryAttempts);
974
- this.callbacks.onRetrying?.(transientRetryAttempts, MAX_TRANSIENT_RETRIES, error);
975
- await sleep(delayMs);
976
- continue;
977
- }
978
489
  // Re-throw if not recoverable or recovery failed
979
490
  throw error;
980
491
  }
@@ -1006,23 +517,7 @@ export class AgentRuntime {
1006
517
  // Fast path: single tool call
1007
518
  if (numCalls === 1) {
1008
519
  const call = toolCalls[0];
1009
- // Check cache first - prevent duplicate identical tool calls
1010
- const cached = this.getCachedToolResult(call);
1011
- if (cached !== null) {
1012
- // Return cached result with indicator that it was from cache
1013
- this.messages.push({
1014
- role: 'tool',
1015
- name: call.name,
1016
- toolCallId: call.id,
1017
- content: `[Cached result - identical call already executed]\n\n${cached}`,
1018
- });
1019
- return;
1020
- }
1021
- this.callbacks.onToolExecution?.(call.name, true);
1022
520
  const output = await this.toolRuntime.execute(call);
1023
- this.callbacks.onToolExecution?.(call.name, false);
1024
- // Cache the result for future identical calls
1025
- this.cacheToolResult(call, output);
1026
521
  this.messages.push({
1027
522
  role: 'tool',
1028
523
  name: call.name,
@@ -1032,103 +527,53 @@ export class AgentRuntime {
1032
527
  return;
1033
528
  }
1034
529
  // PERF: For reasonable batch sizes, execute all in parallel
1035
- // Check cache for each call and only execute non-cached ones
1036
530
  if (numCalls <= 10) {
1037
- const cachedResults = [];
1038
- const toExecute = [];
1039
- // Separate cached from non-cached calls
1040
- for (const call of toolCalls) {
1041
- const cached = this.getCachedToolResult(call);
1042
- if (cached !== null) {
1043
- cachedResults.push({ call, output: cached, fromCache: true });
1044
- }
1045
- else {
1046
- toExecute.push(call);
1047
- }
1048
- }
1049
- // Execute non-cached calls in parallel
1050
- if (toExecute.length > 0) {
1051
- const toolNames = toExecute.map(c => c.name).join(', ');
1052
- this.callbacks.onToolExecution?.(toolNames, true);
1053
- const executed = await Promise.all(toExecute.map(async (call) => {
1054
- const output = await this.toolRuntime.execute(call);
1055
- this.cacheToolResult(call, output);
1056
- return { call, output, fromCache: false };
1057
- }));
1058
- this.callbacks.onToolExecution?.(toolNames, false);
1059
- cachedResults.push(...executed);
1060
- }
1061
- // Add all results to messages in the original order
1062
- for (const originalCall of toolCalls) {
1063
- const result = cachedResults.find(r => r.call.id === originalCall.id);
1064
- if (result) {
1065
- const content = result.fromCache
1066
- ? `[Cached result - identical call already executed]\n\n${result.output}`
1067
- : result.output;
1068
- this.messages.push({
1069
- role: 'tool',
1070
- name: result.call.name,
1071
- toolCallId: result.call.id,
1072
- content,
1073
- });
1074
- }
531
+ const results = await Promise.all(toolCalls.map(async (call) => ({
532
+ call,
533
+ output: await this.toolRuntime.execute(call),
534
+ })));
535
+ // Add results to messages in the same order as tool calls
536
+ for (const { call, output } of results) {
537
+ this.messages.push({
538
+ role: 'tool',
539
+ name: call.name,
540
+ toolCallId: call.id,
541
+ content: output,
542
+ });
1075
543
  }
1076
544
  return;
1077
545
  }
1078
- // PERF: For large batches, use chunked parallel execution with caching
546
+ // PERF: For large batches, use chunked parallel execution
547
+ // This prevents memory pressure from too many concurrent operations
1079
548
  const CHUNK_SIZE = 8;
1080
- const allResults = [];
549
+ const results = [];
1081
550
  for (let i = 0; i < numCalls; i += CHUNK_SIZE) {
1082
551
  const chunk = toolCalls.slice(i, i + CHUNK_SIZE);
1083
- const cachedInChunk = [];
1084
- const toExecuteInChunk = [];
1085
- for (const call of chunk) {
1086
- const cached = this.getCachedToolResult(call);
1087
- if (cached !== null) {
1088
- cachedInChunk.push({ call, output: cached, fromCache: true });
1089
- }
1090
- else {
1091
- toExecuteInChunk.push(call);
1092
- }
1093
- }
1094
- if (toExecuteInChunk.length > 0) {
1095
- const chunkNames = toExecuteInChunk.map(c => c.name).join(', ');
1096
- this.callbacks.onToolExecution?.(chunkNames, true);
1097
- const executed = await Promise.all(toExecuteInChunk.map(async (call) => {
1098
- const output = await this.toolRuntime.execute(call);
1099
- this.cacheToolResult(call, output);
1100
- return { call, output, fromCache: false };
1101
- }));
1102
- this.callbacks.onToolExecution?.(chunkNames, false);
1103
- cachedInChunk.push(...executed);
1104
- }
1105
- allResults.push(...cachedInChunk);
1106
- }
1107
- // Add results to messages in original order
1108
- for (const originalCall of toolCalls) {
1109
- const result = allResults.find(r => r.call.id === originalCall.id);
1110
- if (result) {
1111
- const content = result.fromCache
1112
- ? `[Cached result - identical call already executed]\n\n${result.output}`
1113
- : result.output;
1114
- this.messages.push({
1115
- role: 'tool',
1116
- name: result.call.name,
1117
- toolCallId: result.call.id,
1118
- content,
1119
- });
1120
- }
552
+ const chunkResults = await Promise.all(chunk.map(async (call) => ({
553
+ call,
554
+ output: await this.toolRuntime.execute(call),
555
+ })));
556
+ results.push(...chunkResults);
557
+ }
558
+ // Add results to messages in order
559
+ for (const { call, output } of results) {
560
+ this.messages.push({
561
+ role: 'tool',
562
+ name: call.name,
563
+ toolCallId: call.id,
564
+ content: output,
565
+ });
1121
566
  }
1122
567
  }
1123
568
  get providerTools() {
1124
569
  return this.toolRuntime.listProviderTools();
1125
570
  }
1126
571
  /**
1127
- * Whether to suppress tool narration in the content field.
1128
- * Previously suppressed for OpenAI but now we show all thinking/narration.
572
+ * OpenAI models frequently add speculative tool narration in the content field.
573
+ * Suppress that text to avoid surfacing hallucinated tool usage in the UI.
1129
574
  */
1130
575
  shouldSuppressToolNarration() {
1131
- return false; // Always show thinking/narration
576
+ return this.providerId.toLowerCase().includes('openai');
1132
577
  }
1133
578
  emitAssistantMessage(content, metadata) {
1134
579
  if (!content) {
@@ -1181,138 +626,6 @@ export class AgentRuntime {
1181
626
  model: this.modelId,
1182
627
  });
1183
628
  }
1184
- /**
1185
- * Extract a "command hash" from tool arguments for behavioral loop detection.
1186
- * For execute_bash, this is the actual command. For other tools, key identifying args.
1187
- */
1188
- extractCmdHash(name, args) {
1189
- // For bash/execute commands, extract the command itself
1190
- if (name === 'execute_bash' || name === 'Bash') {
1191
- const cmd = args['command'];
1192
- if (cmd) {
1193
- // Normalize: trim, take first 100 chars, remove variable parts like timestamps
1194
- return cmd.trim().slice(0, 100).replace(/\d{10,}/g, 'N');
1195
- }
1196
- }
1197
- // For file operations, use the path
1198
- if (name === 'read_file' || name === 'Read' || name === 'read_files') {
1199
- const path = args['path'] || args['file_path'] || args['paths'];
1200
- if (path)
1201
- return `path:${JSON.stringify(path).slice(0, 100)}`;
1202
- }
1203
- if (name === 'list_files' || name === 'Glob') {
1204
- const path = args['path'] || args['pattern'];
1205
- if (path)
1206
- return `path:${JSON.stringify(path).slice(0, 100)}`;
1207
- }
1208
- // For search, use the query/pattern
1209
- if (name === 'Grep' || name === 'grep' || name === 'search') {
1210
- const pattern = args['pattern'] || args['query'];
1211
- if (pattern)
1212
- return `search:${String(pattern).slice(0, 100)}`;
1213
- }
1214
- // Default: use first significant arg value
1215
- const firstArg = Object.values(args)[0];
1216
- if (firstArg) {
1217
- return String(firstArg).slice(0, 100);
1218
- }
1219
- return 'no-args';
1220
- }
1221
- /**
1222
- * Check for behavioral loops - model calling the same tool with similar args repeatedly.
1223
- * Returns an error message if a loop is detected, null otherwise.
1224
- *
1225
- * FUNDAMENTAL PREVENTION: Cached calls are excluded from loop detection since they
1226
- * don't actually execute (the cache provides the result). This means:
1227
- * - First call: executes and caches result
1228
- * - Second identical call: returns cached result, NOT counted toward loop
1229
- * - Only genuinely NEW (non-cached) repetitive calls trigger loop detection
1230
- *
1231
- * This catches patterns like:
1232
- * - "git status -sb" called 3 times with DIFFERENT outputs (cache miss each time)
1233
- * - Repeated file reads where file content changed
1234
- * - Repeated searches with same pattern but new results
1235
- */
1236
- checkBehavioralLoop(toolCalls) {
1237
- // Filter out calls that will be served from cache - these don't count toward loops
1238
- // since they're handled fundamentally by the caching mechanism
1239
- const nonCachedCalls = toolCalls.filter(call => this.getCachedToolResult(call) === null);
1240
- // If all calls are cached, no loop detection needed
1241
- if (nonCachedCalls.length === 0) {
1242
- return null;
1243
- }
1244
- // Count existing occurrences in recent history
1245
- const existingCounts = new Map();
1246
- for (const { name, cmdHash } of this.recentToolCalls) {
1247
- const key = `${name}:${cmdHash}`;
1248
- existingCounts.set(key, (existingCounts.get(key) ?? 0) + 1);
1249
- }
1250
- // Check if ANY incoming NON-CACHED call would exceed threshold
1251
- for (const call of nonCachedCalls) {
1252
- const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
1253
- const key = `${call.name}:${cmdHash}`;
1254
- const currentCount = existingCounts.get(key) ?? 0;
1255
- // If adding this call would reach or exceed threshold, block immediately
1256
- if (currentCount + 1 >= AgentRuntime.BEHAVIORAL_LOOP_THRESHOLD) {
1257
- // Reset history to prevent immediate re-trigger
1258
- this.recentToolCalls = [];
1259
- return `Behavioral loop detected: "${call.name}" called ${currentCount + 1} times with similar arguments. The task appears stuck. Please try a different approach or provide more specific instructions.`;
1260
- }
1261
- }
1262
- // Track only non-cached tool calls (cached ones are handled by caching)
1263
- for (const call of nonCachedCalls) {
1264
- const cmdHash = this.extractCmdHash(call.name, call.arguments ?? {});
1265
- this.recentToolCalls.push({ name: call.name, cmdHash });
1266
- }
1267
- // Keep only recent history
1268
- while (this.recentToolCalls.length > AgentRuntime.TOOL_HISTORY_SIZE) {
1269
- this.recentToolCalls.shift();
1270
- }
1271
- return null;
1272
- }
1273
- /**
1274
- * Reset behavioral loop tracking (called when user provides new input or task completes)
1275
- */
1276
- resetBehavioralLoopTracking() {
1277
- this.recentToolCalls = [];
1278
- this.lastToolCallSignature = null;
1279
- this.repeatedToolCallCount = 0;
1280
- // Note: we DON'T clear toolResultCache here - cached results remain valid across turns
1281
- // to prevent re-executing identical tool calls within a session
1282
- }
1283
- /**
1284
- * Create a stable cache key for a tool call based on name and arguments
1285
- */
1286
- getToolCacheKey(call) {
1287
- const args = call.arguments ?? {};
1288
- // Sort keys for consistent ordering
1289
- const sortedArgs = Object.keys(args).sort().reduce((acc, key) => {
1290
- acc[key] = args[key];
1291
- return acc;
1292
- }, {});
1293
- return `${call.name}:${JSON.stringify(sortedArgs)}`;
1294
- }
1295
- /**
1296
- * Get cached result for a tool call, or null if not cached
1297
- */
1298
- getCachedToolResult(call) {
1299
- const key = this.getToolCacheKey(call);
1300
- return this.toolResultCache.get(key) ?? null;
1301
- }
1302
- /**
1303
- * Cache a tool result for future identical calls
1304
- */
1305
- cacheToolResult(call, result) {
1306
- const key = this.getToolCacheKey(call);
1307
- // Evict oldest entries if cache is full
1308
- if (this.toolResultCache.size >= AgentRuntime.TOOL_CACHE_MAX_SIZE) {
1309
- const firstKey = this.toolResultCache.keys().next().value;
1310
- if (firstKey) {
1311
- this.toolResultCache.delete(firstKey);
1312
- }
1313
- }
1314
- this.toolResultCache.set(key, result);
1315
- }
1316
629
  getHistory() {
1317
630
  return this.messages.map(cloneMessage);
1318
631
  }