erosolar-cli 2.1.174 → 2.1.176

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,12 +12,13 @@ import { ensureSecretForProvider, getSecretDefinitionForProvider, getSecretValue
12
12
  import { saveActiveProfilePreference, saveModelPreference, loadToolSettings, saveToolSettings, clearToolSettings, clearActiveProfilePreference, loadSessionPreferences, saveSessionPreferences, loadFeatureFlags, saveFeatureFlags, toggleFeatureFlag, FEATURE_FLAG_INFO, } from '../core/preferences.js';
13
13
  import { getLearningSummary, getRecentLearning, commitLearning, exportAllLearning, getLearningDir, } from '../core/learningPersistence.js';
14
14
  import { buildEnabledToolSet, evaluateToolPermissions, getToolToggleOptions, } from '../capabilities/toolRegistry.js';
15
+ import { FlowOrchestrator } from '../runtime/flowOrchestrator.js';
15
16
  import { detectApiKeyError } from '../core/errors/apiKeyErrors.js';
16
17
  import { detectPromptBlockError, } from '../core/errors/promptBlockErrors.js';
17
18
  import { detectNetworkError } from '../core/errors/networkErrors.js';
18
19
  import { buildWorkspaceContext } from '../workspace.js';
19
20
  import { buildInteractiveSystemPrompt } from './systemPrompt.js';
20
- import { getTaskCompletionDetector, resetTaskCompletionDetector, } from './taskCompletionDetector.js';
21
+ import { getTaskCompletionDetector, resetTaskCompletionDetector, WRITE_TOOLS, } from './taskCompletionDetector.js';
21
22
  import { discoverAllModels, quickCheckProviders, getCachedDiscoveredModels, sortModelsByPriority } from '../core/modelDiscovery.js';
22
23
  import { getModels, getSlashCommands, getProviders } from '../core/agentSchemaLoader.js';
23
24
  import { loadMcpServers } from '../mcp/config.js';
@@ -85,6 +86,10 @@ const CONTEXT_CLEANUP_SYSTEM_PROMPT = `Summarize earlier IDE collaboration so th
85
86
  - Respond in plain Markdown only (no tool or shell calls).`;
86
87
  const MAX_ATTACHMENT_BYTES = 200 * 1024; // 200KB per attachment
87
88
  const MAX_ATTACHMENT_CHARS = 16000; // Guardrail to avoid flooding context
89
+ const WRITE_TOOL_NAMES = new Set(Array.from(WRITE_TOOLS)
90
+ .map((tool) => tool.toLowerCase())
91
+ // Bash/execute commands can be read-only; guard only on clear mutating tools
92
+ .filter((tool) => !tool.includes('bash') && !tool.startsWith('execute')));
88
93
  export class InteractiveShell {
89
94
  agent = null;
90
95
  profile;
@@ -119,6 +124,7 @@ export class InteractiveShell {
119
124
  uiUpdates;
120
125
  _fileChangeTracker = new FileChangeTracker(); // Reserved for future file tracking features
121
126
  alphaZeroMetrics; // Alpha Zero 2 performance tracking
127
+ flowOrchestrator = new FlowOrchestrator();
122
128
  maxNetworkRetries = 2;
123
129
  statusSubscription = null;
124
130
  followUpQueue = [];
@@ -157,6 +163,7 @@ export class InteractiveShell {
157
163
  // AlphaZero learning tracking
158
164
  currentTaskType = 'general';
159
165
  currentToolCalls = [];
166
+ toolsUsedThisRun = [];
160
167
  lastUserQuery = '';
161
168
  lastAssistantResponse = null;
162
169
  responseRendered = false;
@@ -2266,8 +2273,13 @@ export class InteractiveShell {
2266
2273
  await this.processSlashCommand(trimmed);
2267
2274
  return;
2268
2275
  }
2269
- // Check for continuous/infinite loop commands
2270
- if (this.isContinuousCommand(trimmed)) {
2276
+ // Check for continuous/infinite loop commands or auto-escalation to completion mode
2277
+ const explicitContinuous = this.isContinuousCommand(trimmed);
2278
+ const autoContinuous = this.shouldAutoRunToCompletion(trimmed);
2279
+ if (explicitContinuous || autoContinuous) {
2280
+ if (autoContinuous && !explicitContinuous) {
2281
+ display.showSystemMessage('⚡ Actionable request detected; running continuously until complete (Ctrl+C to stop).');
2282
+ }
2271
2283
  await this.processContinuousRequest(trimmed);
2272
2284
  this.syncRendererInput();
2273
2285
  return;
@@ -2289,6 +2301,90 @@ export class InteractiveShell {
2289
2301
  ];
2290
2302
  return patterns.some(pattern => pattern.test(lower));
2291
2303
  }
2304
+ /**
2305
+ * Detects actionable requests that should auto-run until completion even without explicit cues.
2306
+ * Filters out obvious questions and informational prompts to avoid over-triggering.
2307
+ */
2308
+ shouldAutoRunToCompletion(input) {
2309
+ const normalized = input.trim().toLowerCase();
2310
+ if (!normalized) {
2311
+ return false;
2312
+ }
2313
+ const isQuestion = /\?$/.test(normalized);
2314
+ const startsWithInfoWord = /^(what|why|how|when|where|who|which|can|could|would|should|is|are|do|does|did)\b/.test(normalized);
2315
+ // Multi-step maintenance/cleanup intents (order-agnostic to catch "junk to remove?" cases)
2316
+ const maintenancePatterns = [
2317
+ /\b(clean\s*up|cleanup|tidy|prune|trim|housekeep|housekeeping|sanitize)\b/,
2318
+ /\b(junk|unused|dead|legacy|obsolete|temp|tmp|artifact|artifacts|cache|trash)\b.*\b(remove|delete|strip|drop|purge|clear|clean)\b/,
2319
+ /\b(remove|delete|strip|drop|purge|clear|clean)\b.*\b(junk|unused|dead|legacy|obsolete|temp|tmp|artifact|artifacts|cache|trash)\b/,
2320
+ ];
2321
+ const strongMaintenanceIntent = maintenancePatterns.some((pattern) => pattern.test(normalized));
2322
+ // General action intent in a code context
2323
+ const actionVerb = /\b(fix|resolve|address|refactor|implement|upgrade|migrate|optimi[sz]e|modernize|stabilize|harden|ship|complete|finish|clean|remove|delete|prune|tidy|audit|cleanup)\b/;
2324
+ const codeContext = /\b(repo|codebase|project|app|service|package|module|component|workspace|cli|tests?|source|files?|artifacts?)\b/;
2325
+ const strongActionIntent = actionVerb.test(normalized) && codeContext.test(normalized) && normalized.length > 20;
2326
+ // Allow maintenance/refactor tasks to auto-run even if phrased as questions ("got any junk to remove?")
2327
+ if (strongMaintenanceIntent || strongActionIntent) {
2328
+ // But still avoid purely informational questions like "what is this repo?"
2329
+ if (startsWithInfoWord && !strongMaintenanceIntent) {
2330
+ return false;
2331
+ }
2332
+ return true;
2333
+ }
2334
+ return false;
2335
+ }
2336
+ /**
2337
+ * Decide whether to automatically continue execution when the model stops after a plan/summary
2338
+ * without taking actions. This keeps flows moving toward completion instead of stalling on planning.
2339
+ */
2340
+ shouldAutoEscalateToExecution(request, response, toolsUsed) {
2341
+ // Only auto-escalate when the original request itself was action-oriented
2342
+ if (!this.shouldAutoRunToCompletion(request)) {
2343
+ return false;
2344
+ }
2345
+ if (!response || response.includes('TASK_FULLY_COMPLETE')) {
2346
+ return false;
2347
+ }
2348
+ const lower = response.toLowerCase();
2349
+ // Skip if the model explicitly says it is blocked
2350
+ const blockerPatterns = [
2351
+ /\bneed(s)?\s+(more\s+)?(info|information|details|clarification)\b/,
2352
+ /\bmissing\s+(api\s*key|credentials?|access|permission|token)\b/,
2353
+ /\b(no|not)\s+authorized\b/,
2354
+ /\bcannot\s+(proceed|continue|start|run)\b/,
2355
+ /\bblocked\b/,
2356
+ ];
2357
+ if (blockerPatterns.some((pattern) => pattern.test(lower))) {
2358
+ return false;
2359
+ }
2360
+ const mutatingToolUsed = toolsUsed.some((tool) => WRITE_TOOL_NAMES.has(tool.toLowerCase()));
2361
+ const planOnly = this.flowOrchestrator.isPlanOnlyResponse(response);
2362
+ const lowActionDensity = response.split(/\s+/).length < 80;
2363
+ // If it's clearly a plan-only response, continue regardless of prior tool usage
2364
+ if (planOnly) {
2365
+ return true;
2366
+ }
2367
+ // Thin responses without any mutating work should keep pushing toward completion
2368
+ if (!mutatingToolUsed && lowActionDensity) {
2369
+ return true;
2370
+ }
2371
+ return false;
2372
+ }
2373
+ /**
2374
+ * Build a follow-through request when the model stops after planning instead of executing.
2375
+ */
2376
+ maybeAutoFollowThrough(request, response, toolsUsed) {
2377
+ if (!this.shouldAutoEscalateToExecution(request, response, toolsUsed)) {
2378
+ return null;
2379
+ }
2380
+ const prompt = `${request.trim()}
2381
+
2382
+ Execute the plan you outlined. Use the available tools (bash, edits, git) to take the next actions and keep going until the request is fully accomplished. Do not restate the plan—perform the steps and report progress. Reply with TASK_FULLY_COMPLETE only when the work is truly done.`;
2383
+ return {
2384
+ prompt,
2385
+ reason: 'Plan-only or low-action response detected; auto-continuing to complete the task.',
2386
+ };
2387
+ }
2292
2388
  isExitCommand(input) {
2293
2389
  const lower = input.trim().toLowerCase();
2294
2390
  return (lower === 'exit' ||
@@ -5751,6 +5847,8 @@ export class InteractiveShell {
5751
5847
  if (!agent) {
5752
5848
  return;
5753
5849
  }
5850
+ this.toolsUsedThisRun = [];
5851
+ this.currentToolCalls = [];
5754
5852
  this.runtimeSession.toolRuntime.clearDiffSnapshots?.();
5755
5853
  if (this.suppressNextNetworkReset) {
5756
5854
  this.suppressNextNetworkReset = false;
@@ -5785,6 +5883,7 @@ export class InteractiveShell {
5785
5883
  this.setProcessingStatus();
5786
5884
  this.beginAiRuntime();
5787
5885
  let responseText = '';
5886
+ let autoFollowThrough = null;
5788
5887
  try {
5789
5888
  // Start streaming - no header needed, the input area already provides context
5790
5889
  this.startStreamingHeartbeat('Streaming response');
@@ -5800,13 +5899,14 @@ export class InteractiveShell {
5800
5899
  display.showWarning('The provider returned an empty response. Check your API key/provider selection or retry the prompt.');
5801
5900
  }
5802
5901
  // AlphaZero: Extract and track tool calls from response
5803
- const toolsUsed = this.extractToolsFromResponse(responseText);
5902
+ const toolsUsed = this.getExecutedTools(responseText);
5804
5903
  this.currentToolCalls = toolsUsed.map(name => ({
5805
5904
  name,
5806
5905
  arguments: {},
5807
5906
  success: true, // Assume success if we got here
5808
5907
  duration: 0,
5809
5908
  }));
5909
+ autoFollowThrough = this.maybeAutoFollowThrough(request, responseText, toolsUsed);
5810
5910
  // AlphaZero: Check for failure in response
5811
5911
  const failure = detectFailure(responseText, {
5812
5912
  toolCalls: this.currentToolCalls,
@@ -5865,6 +5965,11 @@ export class InteractiveShell {
5865
5965
  this.updateToolUsageMeta(this.uiAdapter.getToolUsageSummary({ plain: true }));
5866
5966
  this.setIdleStatus();
5867
5967
  this.updateStatusMessage(null);
5968
+ if (autoFollowThrough) {
5969
+ display.showSystemMessage(`🔁 Auto-follow-through: ${autoFollowThrough.reason}`);
5970
+ this.enqueueFollowUpAction({ type: 'continuous', text: autoFollowThrough.prompt });
5971
+ }
5972
+ this.toolsUsedThisRun = [];
5868
5973
  queueMicrotask(() => this.uiUpdates.setMode('idle'));
5869
5974
  // CRITICAL: Ensure readline prompt is active for user input
5870
5975
  // Erosolar-CLI style: New prompt naturally appears at bottom
@@ -5902,6 +6007,8 @@ export class InteractiveShell {
5902
6007
  if (!agent) {
5903
6008
  return;
5904
6009
  }
6010
+ this.toolsUsedThisRun = [];
6011
+ this.currentToolCalls = [];
5905
6012
  this.lastUserQuery = initialRequest;
5906
6013
  this.clearToolUsageMeta();
5907
6014
  this.isProcessing = true;
@@ -5930,13 +6037,8 @@ export class InteractiveShell {
5930
6037
  this.beginAiRuntime();
5931
6038
  // No streaming header - just start streaming directly
5932
6039
  this.startStreamingHeartbeat('Streaming');
6040
+ this.flowOrchestrator.start(initialRequest);
5933
6041
  let iteration = 0;
5934
- let lastResponse = '';
5935
- let consecutiveNoProgress = 0;
5936
- const MAX_NO_PROGRESS = 5; // Increased to allow more attempts before giving up
5937
- let pendingVerification = false;
5938
- let verificationAttempts = 0;
5939
- const MAX_VERIFICATION_ATTEMPTS = 2;
5940
6042
  try {
5941
6043
  // Enhance initial prompt with git context for self-improvement tasks
5942
6044
  let currentPrompt = initialRequest;
@@ -5954,6 +6056,7 @@ When truly finished with ALL tasks, explicitly state "TASK_FULLY_COMPLETE".`;
5954
6056
  }
5955
6057
  while (iteration < MAX_ITERATIONS) {
5956
6058
  iteration++;
6059
+ this.toolsUsedThisRun = [];
5957
6060
  display.showSystemMessage(`\n📍 Iteration ${iteration}/${MAX_ITERATIONS}`);
5958
6061
  this.updateStatusMessage(`Working on iteration ${iteration}...`);
5959
6062
  try {
@@ -5970,104 +6073,52 @@ When truly finished with ALL tasks, explicitly state "TASK_FULLY_COMPLETE".`;
5970
6073
  this.alphaZeroMetrics.recordMessage(elapsedMs);
5971
6074
  if (!response?.trim()) {
5972
6075
  display.showWarning('Model returned an empty response. Retrying this iteration...');
5973
- consecutiveNoProgress++;
5974
6076
  currentPrompt = `${initialRequest}
5975
6077
 
5976
6078
  The previous reply was empty. Resume the task now: take the next action, call the necessary tools, and report progress.`;
5977
6079
  continue;
5978
6080
  }
5979
6081
  // Extract tools used from the response (look for tool call patterns)
5980
- const toolsUsed = this.extractToolsFromResponse(response);
5981
- completionDetector.recordToolCall.bind(completionDetector);
6082
+ const toolsUsed = this.getExecutedTools(response);
5982
6083
  toolsUsed.forEach(tool => completionDetector.recordToolCall(tool, true, true));
5983
6084
  // Use intelligent completion detection
5984
6085
  const completionAnalysis = completionDetector.analyzeCompletion(response, toolsUsed);
5985
6086
  display.showSystemMessage(`📈 Completion confidence: ${(completionAnalysis.confidence * 100).toFixed(0)}%`);
5986
- // Check for explicit TASK_FULLY_COMPLETE marker (highest priority)
5987
- // BUT: Don't terminate if the response also indicates work is incomplete
5988
- if (response.includes('TASK_FULLY_COMPLETE')) {
5989
- const hasContradiction = this.responseIndicatesIncompleteWork(response);
5990
- if (hasContradiction) {
5991
- display.showSystemMessage(`\n⚠️ TASK_FULLY_COMPLETE detected but response indicates incomplete work. Continuing...`);
5992
- // Override the completion signal - the AI is contradicting itself
5993
- currentPrompt = `You marked the task as TASK_FULLY_COMPLETE but also indicated that work is still pending or not integrated. Please clarify:
5994
-
5995
- 1. Is ALL the originally requested work actually complete and functional?
5996
- 2. If there are parts that are "ready but not integrated" or "implemented but not connected", those are NOT complete.
5997
- 3. Only say TASK_FULLY_COMPLETE when the user's original request is 100% fulfilled.
5998
-
5999
- What remains to be done? Continue with the next step.`;
6000
- await new Promise(resolve => setTimeout(resolve, 500));
6001
- continue;
6002
- }
6003
- display.showSystemMessage(`\n✅ Task explicitly marked complete after ${iteration} iteration(s).`);
6087
+ const decision = this.flowOrchestrator.decide({
6088
+ iteration,
6089
+ response,
6090
+ toolsUsed,
6091
+ completionAnalysis,
6092
+ verificationConfirmed: this.flowOrchestrator.isVerificationPending()
6093
+ ? completionDetector.isVerificationConfirmed(response)
6094
+ : false,
6095
+ });
6096
+ if (decision.type === 'stop') {
6097
+ display.showSystemMessage(decision.message);
6004
6098
  break;
6005
6099
  }
6006
- // High confidence completion without verification needed
6007
- if (completionAnalysis.isComplete && completionAnalysis.confidence >= 0.85) {
6008
- display.showSystemMessage(`\n✅ Task completed with high confidence after ${iteration} iteration(s).`);
6009
- display.showSystemMessage(` Reason: ${completionAnalysis.reason}`);
6100
+ if (decision.type === 'stagnation-stop') {
6101
+ display.showWarning(decision.message);
6010
6102
  break;
6011
6103
  }
6012
- // Medium confidence - run verification round
6013
- if (completionAnalysis.shouldVerify && completionAnalysis.verificationPrompt && !pendingVerification) {
6014
- if (verificationAttempts < MAX_VERIFICATION_ATTEMPTS) {
6015
- display.showSystemMessage(`\n🔍 Running verification round (confidence: ${(completionAnalysis.confidence * 100).toFixed(0)}%)...`);
6016
- pendingVerification = true;
6017
- verificationAttempts++;
6018
- currentPrompt = completionAnalysis.verificationPrompt;
6019
- await new Promise(resolve => setTimeout(resolve, 500));
6020
- continue;
6021
- }
6104
+ if (decision.type === 'execute-plan') {
6105
+ display.showSystemMessage(decision.message);
6106
+ currentPrompt = decision.prompt;
6107
+ await new Promise(resolve => setTimeout(resolve, 500));
6108
+ continue;
6022
6109
  }
6023
- // If we were in verification mode, check the result
6024
- if (pendingVerification) {
6025
- pendingVerification = false;
6026
- if (completionDetector.isVerificationConfirmed(response)) {
6027
- display.showSystemMessage(`\n✅ Task completion verified by AI after ${iteration} iteration(s).`);
6028
- break;
6029
- }
6030
- else {
6031
- display.showSystemMessage(`🔄 Verification indicates more work needed. Continuing...`);
6032
- }
6110
+ if (decision.type === 'verify') {
6111
+ display.showSystemMessage(decision.message);
6112
+ currentPrompt = decision.prompt;
6113
+ await new Promise(resolve => setTimeout(resolve, 500));
6114
+ continue;
6033
6115
  }
6034
- // Check for no progress (same response multiple times)
6035
- const responseChanged = response !== lastResponse;
6036
- if (!responseChanged) {
6037
- consecutiveNoProgress++;
6038
- if (consecutiveNoProgress >= MAX_NO_PROGRESS) {
6039
- // Before giving up, ask one final verification
6040
- if (verificationAttempts < MAX_VERIFICATION_ATTEMPTS) {
6041
- display.showSystemMessage(`\n⚠️ No progress for ${MAX_NO_PROGRESS} iterations. Running final verification...`);
6042
- currentPrompt = `I notice you may be stuck or finished. Please confirm:
6043
-
6044
- 1. Is the original task FULLY complete?
6045
- 2. If yes, respond with exactly: "TASK_FULLY_COMPLETE"
6046
- 3. If no, what specific action should be taken next?
6047
-
6048
- Be explicit about the current state.`;
6049
- verificationAttempts++;
6050
- consecutiveNoProgress = 0;
6051
- await new Promise(resolve => setTimeout(resolve, 500));
6052
- continue;
6053
- }
6054
- display.showSystemMessage(`\n⚠️ No progress detected for ${MAX_NO_PROGRESS} iterations and verification exhausted. Stopping.`);
6055
- break;
6116
+ if (decision.type === 'continue') {
6117
+ if (decision.message) {
6118
+ display.showSystemMessage(decision.message);
6056
6119
  }
6120
+ currentPrompt = decision.prompt;
6057
6121
  }
6058
- else {
6059
- consecutiveNoProgress = 0;
6060
- }
6061
- lastResponse = response;
6062
- // Prepare next iteration prompt - explicitly encourage progress reporting
6063
- currentPrompt = `Continue with the next step. Remember:
6064
- - Use bash to run git commands (git status, git add, git commit, git push)
6065
- - Commit your changes with descriptive messages after completing improvements
6066
- - Push changes when a logical milestone is reached
6067
- - If all tasks are complete, respond with exactly: "TASK_FULLY_COMPLETE"
6068
- - If there are errors or blockers, explain what's preventing progress
6069
-
6070
- What's the next action?`;
6071
6122
  // Small delay between iterations to prevent rate limiting
6072
6123
  await new Promise(resolve => setTimeout(resolve, 500));
6073
6124
  }
@@ -6110,6 +6161,7 @@ What's the next action?`;
6110
6161
  this.updateToolUsageMeta(this.uiAdapter.getToolUsageSummary({ plain: true }));
6111
6162
  this.setIdleStatus();
6112
6163
  this.updateStatusMessage(null);
6164
+ this.toolsUsedThisRun = [];
6113
6165
  queueMicrotask(() => this.uiUpdates.setMode('idle'));
6114
6166
  // CRITICAL: Ensure readline prompt is active for user input
6115
6167
  // Erosolar-CLI style: New prompt naturally appears at bottom
@@ -6120,94 +6172,24 @@ What's the next action?`;
6120
6172
  }
6121
6173
  }
6122
6174
  /**
6123
- * Check if a response contains indicators that work is actually incomplete,
6124
- * even if it also contains TASK_FULLY_COMPLETE marker.
6125
- * This catches contradictory responses where the AI says "done" but also "not integrated yet".
6175
+ * Resolve executed tools for the current turn. Prefer the actual tool
6176
+ * execution callbacks; fall back to parsing the response when unavailable.
6126
6177
  */
6127
- responseIndicatesIncompleteWork(response) {
6128
- // Patterns that indicate work isn't actually complete
6129
- // Organized by category for maintainability
6130
- const incompletePatterns = [
6131
- // === INTEGRATION/DEPLOYMENT STATE ===
6132
- // "hasn't been integrated/implemented/connected yet"
6133
- /hasn'?t\s+been\s+(integrated|implemented|connected|deployed|added|completed|tested|verified)\s*(yet|still)?/i,
6134
- // "not yet integrated/implemented" or "not integrated"
6135
- /not\s+(yet\s+)?(integrated|implemented|connected|deployed|functional|working|complete|tested|verified)/i,
6136
- // "ready for integration" = NOT integrated
6137
- /ready\s+(for|to\s+be)\s+(integration|integrated|connected|deployed|testing|review)/i,
6138
- // "needs to be integrated"
6139
- /needs?\s+to\s+be\s+(integrated|connected|deployed|added|hooked|wired|tested|reviewed|merged)/i,
6140
- // Passive voice: "was not performed/completed"
6141
- /was\s+not\s+(performed|completed|implemented|deployed|integrated|tested)/i,
6142
- // "the [X] service hasn't been"
6143
- /the\s+\w+\s+(service|module|component|feature)\s+hasn'?t\s+been/i,
6144
- // === PARTIAL/INCOMPLETE STATE ===
6145
- // "still stores/uses/has" (current bad state persists)
6146
- /still\s+(stores?|uses?|has|contains?|needs?|requires?|missing|lacks?|broken)/i,
6147
- // Partial completion: "partially", "mostly", "almost"
6148
- /\b(partially|mostly|almost|nearly|not\s+fully)\s+(complete|done|finished|implemented|working)/i,
6149
- // Explicit partial: "part of", "some of", "half of"
6150
- /\b(only\s+)?(part|some|half|portion)\s+of\s+(the\s+)?(task|work|feature|implementation)/i,
6151
- // === QUALIFIER WORDS (uncertain completion) ===
6152
- // "should be complete", "appears complete", "theoretically"
6153
- /\b(should|might|may|could|appears?\s+to)\s+be\s+(complete|done|working|functional)/i,
6154
- /\btheoretically\s+(complete|done|working|functional)/i,
6155
- // "assuming", "if everything works"
6156
- /\b(assuming|provided|if)\s+(everything|it|this|that)\s+(works?|is\s+correct)/i,
6157
- // === SELF-CONTRADICTION PHRASES ===
6158
- // "done but...", "complete except...", "finished however..."
6159
- /\b(done|complete|finished)\s+(but|except|however|although|though)/i,
6160
- // "however" followed by incomplete indicator
6161
- /however[,\s].{0,50}?(hasn'?t|not\s+yet|still\s+needs?|pending|remains?|missing|broken|failing)/i,
6162
- // "but" followed by negative state
6163
- /\bbut\s+.{0,30}?(not|hasn'?t|won'?t|can'?t|doesn'?t|isn'?t|wasn'?t)/i,
6164
- // === FUTURE TENSE / DEFERRED WORK ===
6165
- // "will need to", "will require"
6166
- /will\s+(need\s+to|require|have\s+to)\s+(integrate|connect|deploy|complete|implement|test|fix)/i,
6167
- // Deferred: "left as", "deferred", "postponed", "out of scope"
6168
- /\b(left\s+as|deferred|postponed|out\s+of\s+scope|for\s+later|in\s+a\s+future)/i,
6169
- // Time-dependent: "after restart", "takes effect after", "once you"
6170
- /\b(after\s+(restart|reboot|redeploy)|takes?\s+effect\s+after|once\s+you)/i,
6171
- // === REMAINING WORK INDICATORS ===
6172
- // "remaining tasks", "outstanding items"
6173
- /\b(remaining|outstanding|pending|leftover)\s+(tasks?|items?|work|issues?|steps?)/i,
6174
- // "X more to do", "still have to"
6175
- /\b(more\s+to\s+do|still\s+have\s+to|yet\s+to\s+be\s+done)/i,
6176
- // Explicit blockers
6177
- /\b(blocker|blocked\s+by|waiting\s+(for|on)|depends?\s+on)/i,
6178
- // === ERROR/FAILURE STATE ===
6179
- // "failing tests", "build errors"
6180
- /\b(failing|broken|erroring)\s+(tests?|builds?|checks?|validations?)/i,
6181
- // "tests? (are )?(still )?failing"
6182
- /\btests?\s+(are\s+)?(still\s+)?failing/i,
6183
- // "errors? to (address|fix)"
6184
- /\b(errors?|warnings?|issues?)\s+to\s+(address|fix|resolve)/i,
6185
- // "doesn't work", "isn't working", "not working"
6186
- /\b(doesn'?t|isn'?t|not)\s+(work|working|functional|functioning)/i,
6187
- // === MANUAL STEPS REQUIRED ===
6188
- // "you'll need to", "manually run", "requires user"
6189
- /\b(you('ll|\s+will)\s+need\s+to|manually\s+(run|configure|set|update)|requires?\s+user)/i,
6190
- // "run this command", "execute the following"
6191
- /\b(run\s+this|execute\s+the\s+following|apply\s+the\s+migration)/i,
6192
- // === TODO/FIXME IN PROSE ===
6193
- // TODO or FIXME mentioned as remaining work (not in code blocks)
6194
- /\b(todo|fixme|hack|xxx):\s/i,
6195
- // "need to add", "should implement"
6196
- /\b(need\s+to|should|must)\s+(add|implement|create|write|build|fix)\b/i,
6197
- // === SCOPE LIMITATIONS ===
6198
- // "didn't have time", "ran out of time"
6199
- /\b(didn'?t|did\s+not)\s+have\s+(time|chance|opportunity)/i,
6200
- // "beyond scope", "outside scope"
6201
- /\b(beyond|outside)\s+(the\s+)?scope/i,
6202
- // "for now" (temporary state)
6203
- /\b(for\s+now|at\s+this\s+point|currently)\s*.{0,20}?(not|without|lacks?|missing)/i,
6204
- ];
6205
- for (const pattern of incompletePatterns) {
6206
- if (pattern.test(response)) {
6207
- return true;
6178
+ getExecutedTools(responseText) {
6179
+ if (this.toolsUsedThisRun.length) {
6180
+ const seen = new Set();
6181
+ const ordered = [];
6182
+ for (const tool of this.toolsUsedThisRun) {
6183
+ const normalized = tool?.trim();
6184
+ if (!normalized || seen.has(normalized)) {
6185
+ continue;
6186
+ }
6187
+ seen.add(normalized);
6188
+ ordered.push(normalized);
6208
6189
  }
6190
+ return ordered;
6209
6191
  }
6210
- return false;
6192
+ return this.extractToolsFromResponse(responseText);
6211
6193
  }
6212
6194
  /**
6213
6195
  * Extract tool names from a response by looking for tool call patterns
@@ -6842,6 +6824,7 @@ Return ONLY JSON array:
6842
6824
  onToolExecution: (toolName, isStart, args) => {
6843
6825
  // Update activity status to show what tool is being executed
6844
6826
  if (isStart) {
6827
+ this.toolsUsedThisRun.push(toolName);
6845
6828
  // Show more specific activity for long-running tools
6846
6829
  let activity = `Running ${toolName}`;
6847
6830
  if (toolName === 'execute_bash' && args?.['command']) {