erosolar-cli 2.1.189 → 2.1.191

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/core/agent.d.ts +11 -11
  2. package/dist/core/agent.d.ts.map +1 -1
  3. package/dist/core/agent.js +84 -31
  4. package/dist/core/agent.js.map +1 -1
  5. package/dist/core/schemaValidator.d.ts +0 -5
  6. package/dist/core/schemaValidator.d.ts.map +1 -1
  7. package/dist/core/schemaValidator.js +0 -65
  8. package/dist/core/schemaValidator.js.map +1 -1
  9. package/dist/core/toolRuntime.d.ts +0 -4
  10. package/dist/core/toolRuntime.d.ts.map +1 -1
  11. package/dist/core/toolRuntime.js +64 -40
  12. package/dist/core/toolRuntime.js.map +1 -1
  13. package/dist/runtime/flowOrchestrator.d.ts +72 -0
  14. package/dist/runtime/flowOrchestrator.d.ts.map +1 -0
  15. package/dist/runtime/flowOrchestrator.js +486 -0
  16. package/dist/runtime/flowOrchestrator.js.map +1 -0
  17. package/dist/shell/interactiveShell.d.ts +4 -19
  18. package/dist/shell/interactiveShell.d.ts.map +1 -1
  19. package/dist/shell/interactiveShell.js +218 -397
  20. package/dist/shell/interactiveShell.js.map +1 -1
  21. package/dist/shell/shellApp.js +1 -8
  22. package/dist/shell/shellApp.js.map +1 -1
  23. package/dist/shell/taskCompletionDetector.d.ts.map +1 -0
  24. package/dist/{core → shell}/taskCompletionDetector.js +1 -1
  25. package/dist/shell/taskCompletionDetector.js.map +1 -0
  26. package/dist/tools/bashTools.d.ts +1 -0
  27. package/dist/tools/bashTools.d.ts.map +1 -1
  28. package/dist/tools/bashTools.js +47 -3
  29. package/dist/tools/bashTools.js.map +1 -1
  30. package/dist/tools/buildTools.js +1 -1
  31. package/dist/tools/buildTools.js.map +1 -1
  32. package/dist/tools/grepTools.js +4 -6
  33. package/dist/tools/grepTools.js.map +1 -1
  34. package/dist/tools/repoChecksTools.d.ts.map +1 -1
  35. package/dist/tools/repoChecksTools.js +7 -5
  36. package/dist/tools/repoChecksTools.js.map +1 -1
  37. package/dist/ui/ShellUIAdapter.d.ts +2 -3
  38. package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
  39. package/dist/ui/ShellUIAdapter.js +7 -9
  40. package/dist/ui/ShellUIAdapter.js.map +1 -1
  41. package/package.json +1 -1
  42. package/README.md +0 -54
  43. package/dist/contracts/models.schema.json +0 -9
  44. package/dist/core/agentOrchestrator.d.ts +0 -49
  45. package/dist/core/agentOrchestrator.d.ts.map +0 -1
  46. package/dist/core/agentOrchestrator.js +0 -313
  47. package/dist/core/agentOrchestrator.js.map +0 -1
  48. package/dist/core/taskCompletionDetector.d.ts.map +0 -1
  49. package/dist/core/taskCompletionDetector.js.map +0 -1
  50. /package/dist/{core → shell}/taskCompletionDetector.d.ts +0 -0
@@ -4,7 +4,6 @@ import { exec } from 'node:child_process';
4
4
  import { promisify } from 'node:util';
5
5
  import { existsSync, readFileSync, statSync, writeFileSync } from 'node:fs';
6
6
  import { join, resolve } from 'node:path';
7
- import { AgentOrchestrator } from '../core/agentOrchestrator.js';
8
7
  import { display } from '../ui/display.js';
9
8
  import { theme } from '../ui/theme.js';
10
9
  import { getTerminalColumns } from '../ui/layout.js';
@@ -13,11 +12,13 @@ import { ensureSecretForProvider, getSecretDefinitionForProvider, getSecretValue
13
12
  import { saveActiveProfilePreference, saveModelPreference, loadToolSettings, saveToolSettings, clearToolSettings, clearActiveProfilePreference, loadSessionPreferences, saveSessionPreferences, loadFeatureFlags, saveFeatureFlags, toggleFeatureFlag, FEATURE_FLAG_INFO, } from '../core/preferences.js';
14
13
  import { getLearningSummary, getRecentLearning, commitLearning, exportAllLearning, getLearningDir, } from '../core/learningPersistence.js';
15
14
  import { buildEnabledToolSet, evaluateToolPermissions, getToolToggleOptions, } from '../capabilities/toolRegistry.js';
15
+ import { FlowOrchestrator } from '../runtime/flowOrchestrator.js';
16
16
  import { detectApiKeyError } from '../core/errors/apiKeyErrors.js';
17
17
  import { detectPromptBlockError, } from '../core/errors/promptBlockErrors.js';
18
18
  import { detectNetworkError } from '../core/errors/networkErrors.js';
19
19
  import { buildWorkspaceContext } from '../workspace.js';
20
20
  import { buildInteractiveSystemPrompt } from './systemPrompt.js';
21
+ import { getTaskCompletionDetector, resetTaskCompletionDetector, WRITE_TOOLS, } from './taskCompletionDetector.js';
21
22
  import { discoverAllModels, quickCheckProviders, getCachedDiscoveredModels, sortModelsByPriority } from '../core/modelDiscovery.js';
22
23
  import { getModels, getSlashCommands, getProviders } from '../core/agentSchemaLoader.js';
23
24
  import { loadMcpServers } from '../mcp/config.js';
@@ -85,6 +86,10 @@ const CONTEXT_CLEANUP_SYSTEM_PROMPT = `Summarize earlier IDE collaboration so th
85
86
  - Respond in plain Markdown only (no tool or shell calls).`;
86
87
  const MAX_ATTACHMENT_BYTES = 200 * 1024; // 200KB per attachment
87
88
  const MAX_ATTACHMENT_CHARS = 16000; // Guardrail to avoid flooding context
89
+ const WRITE_TOOL_NAMES = new Set(Array.from(WRITE_TOOLS)
90
+ .map((tool) => tool.toLowerCase())
91
+ // Bash/execute commands can be read-only; guard only on clear mutating tools
92
+ .filter((tool) => !tool.includes('bash') && !tool.startsWith('execute')));
88
93
  export class InteractiveShell {
89
94
  agent = null;
90
95
  profile;
@@ -119,6 +124,7 @@ export class InteractiveShell {
119
124
  uiUpdates;
120
125
  _fileChangeTracker = new FileChangeTracker(); // Reserved for future file tracking features
121
126
  alphaZeroMetrics; // Alpha Zero 2 performance tracking
127
+ flowOrchestrator = new FlowOrchestrator();
122
128
  maxNetworkRetries = 2;
123
129
  statusSubscription = null;
124
130
  followUpQueue = [];
@@ -134,7 +140,6 @@ export class InteractiveShell {
134
140
  lastContextWarningLevel = null;
135
141
  sessionPreferences;
136
142
  autosaveEnabled;
137
- orchestrationNoticeShown = false;
138
143
  verificationEnabled = false;
139
144
  criticalApprovalMode = 'auto';
140
145
  editGuardMode = 'display-edits';
@@ -2271,17 +2276,14 @@ export class InteractiveShell {
2271
2276
  // Check for continuous/infinite loop commands or auto-escalation to completion mode
2272
2277
  const explicitContinuous = this.isContinuousCommand(trimmed);
2273
2278
  const autoContinuous = this.shouldAutoRunToCompletion(trimmed);
2274
- if (explicitContinuous) {
2279
+ if (explicitContinuous || autoContinuous) {
2280
+ if (autoContinuous && !explicitContinuous) {
2281
+ display.showSystemMessage('⚡ Actionable request detected; running continuously until complete (Ctrl+C to stop).');
2282
+ }
2275
2283
  await this.processContinuousRequest(trimmed);
2276
2284
  this.syncRendererInput();
2277
2285
  return;
2278
2286
  }
2279
- if (autoContinuous) {
2280
- display.showSystemMessage('⚡ Actionable request detected; orchestrating until complete (Ctrl+C to stop).');
2281
- await this.processRequest(trimmed, { orchestrate: true });
2282
- this.syncRendererInput();
2283
- return;
2284
- }
2285
2287
  // Direct execution for all inputs, including multi-line pastes
2286
2288
  await this.processRequest(trimmed);
2287
2289
  this.syncRendererInput();
@@ -2318,113 +2320,19 @@ export class InteractiveShell {
2318
2320
  ];
2319
2321
  const strongMaintenanceIntent = maintenancePatterns.some((pattern) => pattern.test(normalized));
2320
2322
  // General action intent in a code context
2321
- const actionVerb = /\b(fix|resolve|address|refactor|implement|upgrade|migrate|optimi[sz]e|modernize|stabilize|harden|ship|complete|finish|clean|remove|delete|prune|tidy|audit|cleanup|debug|investigate|triage|scan|check|diagnos(e|is))\b/;
2323
+ const actionVerb = /\b(fix|resolve|address|refactor|implement|upgrade|migrate|optimi[sz]e|modernize|stabilize|harden|ship|complete|finish|clean|remove|delete|prune|tidy|audit|cleanup)\b/;
2322
2324
  const codeContext = /\b(repo|codebase|project|app|service|package|module|component|workspace|cli|tests?|source|files?|artifacts?)\b/;
2323
- const hasActionVerb = actionVerb.test(normalized);
2324
- const hasCodeContext = codeContext.test(normalized);
2325
- // Bug/issue hunt or health-check intents, even when phrased as questions
2326
- const bugSweepIntent = /\b(got\s+any|any|open|known)\s+(bugs?|issues?|defects?)\b/;
2327
- const triageIntent = /\b(find|check|hunt|triage|detect|scan|review|look\s+for)\s+(bugs?|issues?|errors?|failures?)\b/;
2328
- const failingTestsIntent = /\b(failing|broken|red)\s+tests?\b|\btests?\s+(are\s+)?failing\b/;
2329
- const qualitySweepIntent = /\b(audit|health\s+check|stability\s+check|bug\s+scan|issue\s+scan|regression\s+pass)\b/;
2330
- const bugOrHealthIntent = [bugSweepIntent, triageIntent, failingTestsIntent, qualitySweepIntent].some((pattern) => pattern.test(normalized));
2331
- // Score-based intent detection keeps questions like "what is this repo?" out
2332
- let actionScore = 0;
2333
- if (strongMaintenanceIntent)
2334
- actionScore += 2;
2335
- if (bugOrHealthIntent)
2336
- actionScore += 2;
2337
- if (hasActionVerb)
2338
- actionScore += 1;
2339
- if (hasCodeContext)
2340
- actionScore += 1;
2341
- const decisiveAction = actionScore >= 2 || (actionScore === 1 && !isQuestion && normalized.length > 10);
2342
- if (!decisiveAction) {
2343
- return false;
2344
- }
2345
- // Respect informational openers unless intent is clearly actionable
2346
- if (startsWithInfoWord && actionScore < 3 && !bugOrHealthIntent && !strongMaintenanceIntent) {
2347
- return false;
2348
- }
2349
- return true;
2350
- }
2351
- shouldContinueOrchestrating(originalRequest, result) {
2352
- if (!result) {
2353
- return { shouldContinue: false, reason: null };
2354
- }
2355
- const actionable = this.shouldAutoRunToCompletion(originalRequest);
2356
- if (!actionable) {
2357
- const incomplete = result.exitReason !== 'complete';
2358
- return { shouldContinue: incomplete, reason: incomplete ? result.exitReason : null };
2359
- }
2360
- if (result.exitReason !== 'complete') {
2361
- return { shouldContinue: true, reason: result.exitReason };
2362
- }
2363
- const executedTools = result.passes.some(pass => (pass.toolsUsed?.length ?? 0) > 0);
2364
- const lastPass = result.passes[result.passes.length - 1];
2365
- const lastPlanOnly = Boolean(lastPass?.planOnly) && !lastPass?.tookAction && (!lastPass?.toolsUsed?.length);
2366
- const lastEmpty = !lastPass?.response?.trim();
2367
- if (!executedTools) {
2368
- return { shouldContinue: true, reason: 'no-action' };
2369
- }
2370
- if (lastPlanOnly) {
2371
- return { shouldContinue: true, reason: 'plan-only' };
2372
- }
2373
- if (lastEmpty) {
2374
- return { shouldContinue: true, reason: 'empty-response' };
2375
- }
2376
- return { shouldContinue: false, reason: null };
2377
- }
2378
- describeContinuationReason(reason) {
2379
- switch (reason) {
2380
- case 'no-action':
2381
- return 'No tools or concrete actions were executed; continuing until real work is done.';
2382
- case 'plan-only':
2383
- return 'Last pass was just planning; executing the next concrete step now.';
2384
- case 'empty-response':
2385
- case 'empty':
2386
- return 'Previous pass returned nothing; resuming with a concrete action.';
2387
- case 'stalled':
2388
- return 'Previous orchestration stalled; forcing continuation.';
2389
- case 'max-passes':
2390
- return 'Reached pass limit; extending the run to finish the task.';
2391
- default:
2392
- return 'Continuing orchestration until the task is actually finished.';
2325
+ const strongActionIntent = actionVerb.test(normalized) && codeContext.test(normalized) && normalized.length > 20;
2326
+ // Allow maintenance/refactor tasks to auto-run even if phrased as questions ("got any junk to remove?")
2327
+ if (strongMaintenanceIntent || strongActionIntent) {
2328
+ // But still avoid purely informational questions like "what is this repo?"
2329
+ if (startsWithInfoWord && !strongMaintenanceIntent) {
2330
+ return false;
2331
+ }
2332
+ return true;
2393
2333
  }
2334
+ return false;
2394
2335
  }
2395
- buildForcedContinuationPrompt(originalRequest, lastResult, continuationReason) {
2396
- const lastPass = lastResult.passes[lastResult.passes.length - 1];
2397
- const rawResponse = lastPass?.response?.trim() ?? '';
2398
- const truncatedResponse = rawResponse ? rawResponse.slice(0, 1200) : '';
2399
- const responseNote = rawResponse && rawResponse.length > truncatedResponse.length
2400
- ? '\n\n[Last response truncated]'
2401
- : '';
2402
- const usedTools = lastResult.passes.flatMap(pass => pass.toolsUsed ?? []);
2403
- const hasToolUsage = usedTools.length > 0;
2404
- const toolsUsed = hasToolUsage
2405
- ? `Tools used so far: ${usedTools.join(', ')}.`
2406
- : 'No tools have been used yet - start with a quick workspace scan (list_files + glob/grep/search) and then take concrete actions now.';
2407
- const responseSection = truncatedResponse
2408
- ? `Last response snapshot:\n${truncatedResponse}${responseNote}\n`
2409
- : '';
2410
- const exitReason = continuationReason ?? lastResult.exitReason ?? 'incomplete';
2411
- const reasonLine = this.describeContinuationReason(exitReason);
2412
- const missingActionLine = hasToolUsage
2413
- ? ''
2414
- : 'No tool calls or file edits have been made yet. Start with list_files + glob/grep/search to sweep the repo, then run a command (read/search/test/edit) and show the output before summarizing.';
2415
- return `${originalRequest.trim()}
2416
-
2417
- The previous orchestration stopped early (reason: ${exitReason}). Continue working until the task is truly finished. Use tools, run checks, and only declare completion when nothing remains.
2418
-
2419
- ${reasonLine}
2420
- ${toolsUsed}
2421
- ${responseSection}
2422
- ${missingActionLine ? `${missingActionLine}\n` : ''}Resume with the next concrete action now.`;
2423
- }
2424
- /**
2425
- * Decide whether to automatically continue execution when the model stops after a plan/summary
2426
- * without taking actions. This keeps flows moving toward completion instead of stalling on planning.
2427
- */
2428
2336
  isExitCommand(input) {
2429
2337
  const lower = input.trim().toLowerCase();
2430
2338
  return (lower === 'exit' ||
@@ -5872,20 +5780,22 @@ ${missingActionLine ? `${missingActionLine}\n` : ''}Resume with the next concret
5872
5780
  this.clearInlinePanel();
5873
5781
  this.syncRendererInput();
5874
5782
  }
5875
- async processRequest(request, options) {
5783
+ async runFlowControlledTask(initialRequest, options) {
5784
+ const { mode } = options;
5785
+ const followUpType = mode === 'continuous' ? 'continuous' : 'request';
5876
5786
  if (this.isProcessing) {
5877
- this.enqueueFollowUpAction({ type: 'request', text: request });
5878
- return;
5787
+ this.enqueueFollowUpAction({ type: followUpType, text: initialRequest });
5788
+ return null;
5879
5789
  }
5880
5790
  if (!this.agent && !this.rebuildAgent()) {
5881
5791
  display.showWarning('Configure an API key via /secrets before sending requests.');
5882
- return;
5792
+ return null;
5883
5793
  }
5884
5794
  this.inlinePanelScopeActive = false;
5885
5795
  this.clearInlinePanel();
5886
5796
  const agent = this.agent;
5887
5797
  if (!agent) {
5888
- return;
5798
+ return null;
5889
5799
  }
5890
5800
  this.toolsUsedThisRun = [];
5891
5801
  this.currentToolCalls = [];
@@ -5896,166 +5806,169 @@ ${missingActionLine ? `${missingActionLine}\n` : ''}Resume with the next concret
5896
5806
  else {
5897
5807
  this.resetNetworkRetryState();
5898
5808
  }
5899
- // Reset per-request render tracking
5900
5809
  this.responseRendered = false;
5901
- if (this.shouldLogPrompt(request)) {
5902
- this.logUserPrompt(request);
5810
+ if (this.shouldLogPrompt(initialRequest)) {
5811
+ this.logUserPrompt(initialRequest);
5903
5812
  }
5904
5813
  this.isProcessing = true;
5905
5814
  this.uiUpdates.setMode('processing');
5906
- this.streamingTokenCount = 0; // Reset token counter for new request
5815
+ this.streamingTokenCount = 0;
5907
5816
  this.terminalInput.setStreaming(true);
5908
- // Keep the persistent input/control bar active as we transition into streaming.
5909
5817
  this.syncRendererInput();
5910
5818
  this.renderer?.render();
5911
- const requestStartTime = Date.now(); // Alpha Zero 2 timing
5912
- // Clear previous parallel agents and start fresh for new request
5819
+ const overallStartTime = Date.now();
5913
5820
  const parallelManager = getParallelAgentManager();
5914
5821
  parallelManager.clear();
5915
5822
  parallelManager.startBatch();
5916
- // AlphaZero: Track task for learning
5917
- this.lastUserQuery = request;
5918
- this.currentTaskType = classifyTaskType(request);
5823
+ this.lastUserQuery = initialRequest;
5824
+ this.currentTaskType = classifyTaskType(initialRequest);
5919
5825
  this.currentToolCalls = [];
5920
5826
  this.clearToolUsageMeta();
5921
5827
  this.renderer?.setActivity('Starting...');
5922
- this.uiAdapter.startProcessing('Working on your request');
5828
+ this.uiAdapter.startProcessing(mode === 'continuous' ? 'Continuous execution mode' : 'Working on your request');
5923
5829
  this.setProcessingStatus();
5924
5830
  this.beginAiRuntime();
5925
- let responseText = '';
5926
- let orchestratorResult = null;
5927
- const orchestrate = options?.orchestrate ?? true;
5928
- const orchestratorPassLimit = options?.maxPasses ?? 10;
5929
- if (orchestrate && !this.orchestrationNoticeShown) {
5930
- display.showSystemMessage('⚡ Orchestrating every prompt until completion. Press Ctrl+C to stop a run early.');
5931
- this.orchestrationNoticeShown = true;
5831
+ this.startStreamingHeartbeat(mode === 'continuous' ? 'Streaming' : 'Streaming response');
5832
+ const completionDetector = getTaskCompletionDetector();
5833
+ completionDetector.reset();
5834
+ this.flowOrchestrator.start(initialRequest);
5835
+ this.updateStatusMessage('Orchestrating request...');
5836
+ if (mode === 'continuous') {
5837
+ display.showSystemMessage('Continuous mode active. Ctrl+C to stop.');
5838
+ }
5839
+ else {
5840
+ display.showSystemMessage('Flow orchestrator engaged; running until the request is satisfied.');
5932
5841
  }
5842
+ const informationalGuardrail = this.flowOrchestrator.isInformationalRequest(initialRequest)
5843
+ ? 'This is an informational request. Keep actions lightweight: avoid running full test/build/lint suites unless explicitly required to answer the question or to confirm a change you made. Prefer quick reads/searches and summarize findings instead of long validations.'
5844
+ : '';
5845
+ const applyInformationalGuardrail = (prompt) => {
5846
+ if (!informationalGuardrail) {
5847
+ return prompt;
5848
+ }
5849
+ return prompt.includes(informationalGuardrail)
5850
+ ? prompt
5851
+ : `${prompt}\n\n${informationalGuardrail}`;
5852
+ };
5853
+ let currentPrompt = applyInformationalGuardrail(initialRequest);
5854
+ if (this.isSelfImprovementRequest(initialRequest)) {
5855
+ currentPrompt = applyInformationalGuardrail(`${initialRequest}
5856
+
5857
+ IMPORTANT: You have full git access. After making improvements:
5858
+ 1. Use bash to run: git status (see changes)
5859
+ 2. Use bash to run: git add -A (stage changes)
5860
+ 3. Use bash to run: git commit -m "descriptive message" (commit)
5861
+ 4. Use bash to run: git push (when milestone reached)
5862
+
5863
+ Commit frequently with descriptive messages. Push when ready.
5864
+ When truly finished with ALL tasks, explicitly state "TASK_FULLY_COMPLETE".`);
5865
+ }
5866
+ let iteration = 0;
5867
+ let lastResponseText = '';
5868
+ let lastToolsUsed = [];
5869
+ let result = null;
5933
5870
  try {
5934
- // Start streaming - no header needed, the input area already provides context
5935
- this.startStreamingHeartbeat('Streaming response');
5936
- if (orchestrate) {
5937
- const orchestrator = new AgentOrchestrator(agent);
5938
- orchestratorResult = await orchestrator.runToCompletion(request, {
5939
- streaming: true,
5940
- maxPasses: orchestratorPassLimit,
5941
- maxStagnantPasses: 3,
5942
- verificationMode: 'auto',
5943
- enforceActions: true,
5944
- });
5945
- const MAX_CONTINUATIONS = 2;
5946
- let continuationRuns = 0;
5947
- while (orchestratorResult && continuationRuns < MAX_CONTINUATIONS) {
5948
- const continuationDecision = this.shouldContinueOrchestrating(request, orchestratorResult);
5949
- if (!continuationDecision.shouldContinue) {
5950
- break;
5871
+ while (true) {
5872
+ iteration++;
5873
+ this.toolsUsedThisRun = [];
5874
+ this.updateStatusMessage('Orchestrating request...');
5875
+ try {
5876
+ display.showThinking('Responding...');
5877
+ this.refreshStatusLine(true);
5878
+ const response = await agent.send(currentPrompt, true);
5879
+ lastResponseText = response ?? '';
5880
+ this.finishStreamingFormatter(undefined, { refreshPrompt: false, mode: 'complete' });
5881
+ await this.awaitPendingCleanup();
5882
+ this.captureHistorySnapshot();
5883
+ this.autosaveIfEnabled();
5884
+ const elapsedMs = Date.now() - overallStartTime;
5885
+ this.alphaZeroMetrics.recordMessage(elapsedMs);
5886
+ if (!response?.trim()) {
5887
+ display.showWarning('Model returned an empty response. Continuing orchestration...');
5888
+ currentPrompt = applyInformationalGuardrail(`${initialRequest}
5889
+
5890
+ The previous reply was empty. Resume the task now: take the next action, call the necessary tools, and report progress.`);
5891
+ continue;
5951
5892
  }
5952
- const reasonMessage = this.describeContinuationReason(continuationDecision.reason);
5953
- display.showSystemMessage(`🔁 ${reasonMessage}`);
5954
- const continuationPrompt = this.buildForcedContinuationPrompt(request, orchestratorResult, continuationDecision.reason ?? undefined);
5955
- const continuationResult = await orchestrator.runToCompletion(continuationPrompt, {
5956
- streaming: true,
5957
- maxPasses: Math.max(orchestratorPassLimit, 12),
5958
- maxStagnantPasses: 3,
5959
- verificationMode: 'auto',
5960
- enforceActions: true,
5893
+ const toolsUsed = this.getExecutedTools(response);
5894
+ lastToolsUsed = toolsUsed;
5895
+ toolsUsed.forEach(tool => completionDetector.recordToolCall(tool, true, true));
5896
+ const completionAnalysis = completionDetector.analyzeCompletion(response, toolsUsed);
5897
+ display.showSystemMessage(`📈 Completion confidence: ${(completionAnalysis.confidence * 100).toFixed(0)}%`);
5898
+ const decision = this.flowOrchestrator.decide({
5899
+ iteration,
5900
+ response,
5901
+ toolsUsed,
5902
+ completionAnalysis,
5903
+ verificationConfirmed: this.flowOrchestrator.isVerificationPending()
5904
+ ? completionDetector.isVerificationConfirmed(response)
5905
+ : false,
5961
5906
  });
5962
- orchestratorResult = {
5963
- finalResponse: continuationResult.finalResponse,
5964
- passes: [...orchestratorResult.passes, ...continuationResult.passes],
5965
- exitReason: continuationResult.exitReason,
5966
- };
5967
- continuationRuns++;
5968
- }
5969
- responseText = orchestratorResult.finalResponse;
5970
- }
5971
- else {
5972
- responseText = await agent.send(request, true);
5973
- }
5974
- this.finishStreamingFormatter(undefined, { refreshPrompt: false, mode: 'complete' });
5975
- await this.awaitPendingCleanup();
5976
- this.captureHistorySnapshot();
5977
- this.autosaveIfEnabled();
5978
- // Track metrics with Alpha Zero 2
5979
- const elapsedMs = Date.now() - requestStartTime;
5980
- this.alphaZeroMetrics.recordMessage(elapsedMs);
5981
- if (!responseText?.trim()) {
5982
- display.showWarning('The provider returned an empty response. Check your API key/provider selection or retry the prompt.');
5983
- }
5984
- // AlphaZero: Extract and track tool calls from response
5985
- const toolsUsed = orchestratorResult
5986
- ? orchestratorResult.passes.flatMap(pass => pass.toolsUsed)
5987
- : this.getExecutedTools(responseText);
5988
- this.currentToolCalls = toolsUsed.map(name => ({
5989
- name,
5990
- arguments: {},
5991
- success: true, // Assume success if we got here
5992
- duration: 0,
5993
- }));
5994
- if (orchestratorResult && orchestratorResult.exitReason !== 'complete') {
5995
- const exitDetail = (() => {
5996
- switch (orchestratorResult?.exitReason) {
5997
- case 'max-passes':
5998
- return 'Reached orchestrator pass limit; showing last response.';
5999
- case 'empty-response':
6000
- return 'Received empty replies while orchestrating; showing last response.';
6001
- case 'stalled':
6002
- return 'Orchestrator detected stagnation; showing last response.';
6003
- default:
6004
- return null;
5907
+ if (decision.type === 'stop') {
5908
+ display.showSystemMessage(decision.message);
5909
+ break;
6005
5910
  }
6006
- })();
6007
- if (exitDetail) {
6008
- display.showSystemMessage(`⚠️ ${exitDetail}`);
6009
- }
6010
- }
6011
- // AlphaZero: Check for failure in response
6012
- const failure = detectFailure(responseText, {
6013
- toolCalls: this.currentToolCalls,
6014
- userMessage: request,
6015
- });
6016
- if (failure) {
6017
- this.lastFailure = failure;
6018
- // Check if we have a recovery strategy
6019
- const strategy = findRecoveryStrategy(failure);
6020
- if (strategy) {
6021
- display.showSystemMessage(`🔄 Found recovery strategy for this type of issue (success rate: ${Math.round(strategy.successRate * 100)}%)`);
5911
+ if (decision.type === 'stagnation-stop') {
5912
+ display.showWarning(decision.message);
5913
+ break;
5914
+ }
5915
+ if (decision.type === 'execute-plan') {
5916
+ display.showSystemMessage(decision.message);
5917
+ currentPrompt = applyInformationalGuardrail(decision.prompt);
5918
+ await new Promise(resolve => setTimeout(resolve, 500));
5919
+ continue;
5920
+ }
5921
+ if (decision.type === 'verify') {
5922
+ display.showSystemMessage(decision.message);
5923
+ currentPrompt = applyInformationalGuardrail(decision.prompt);
5924
+ await new Promise(resolve => setTimeout(resolve, 500));
5925
+ continue;
5926
+ }
5927
+ if (decision.type === 'continue') {
5928
+ if (decision.message) {
5929
+ display.showSystemMessage(decision.message);
5930
+ }
5931
+ currentPrompt = applyInformationalGuardrail(decision.prompt);
5932
+ }
5933
+ await new Promise(resolve => setTimeout(resolve, 500));
6022
5934
  }
6023
- }
6024
- else {
6025
- // Success - record the tool pattern for this task type
6026
- if (this.currentToolCalls.length > 0) {
6027
- const toolPattern = {
6028
- taskType: this.currentTaskType,
6029
- toolSequence: this.currentToolCalls.map(t => t.name),
6030
- successRate: 1.0,
6031
- avgDuration: elapsedMs,
6032
- occurrences: 1,
6033
- };
6034
- addToolPattern(this.currentTaskType, toolPattern);
5935
+ catch (error) {
5936
+ display.stopThinking(false);
5937
+ if (this.isContextOverflowError(error)) {
5938
+ display.showSystemMessage(`⚡ Context overflow handled. Continuing with reduced context...`);
5939
+ continue;
5940
+ }
5941
+ const handled = this.handleProviderError(error, () => this.runFlowControlledTask(initialRequest, options));
5942
+ if (!handled) {
5943
+ display.showError(error instanceof Error ? error.message : String(error), error);
5944
+ break;
5945
+ }
6035
5946
  }
6036
- // Clear action history on success
6037
- clearActionHistory();
6038
- this.lastFailure = null;
6039
5947
  }
6040
- }
6041
- catch (error) {
6042
- const handled = this.handleProviderError(error, () => this.processRequest(request, options));
6043
- if (!handled) {
6044
- // Pass full error object for enhanced formatting with stack trace
6045
- display.showError(error instanceof Error ? error.message : String(error), error);
6046
- }
6047
- }
6048
- finally {
6049
- // Fallback: if no assistant message was rendered (e.g., streaming hiccup), show the full response
6050
- if (!this.responseRendered && responseText.trim()) {
6051
- const finalText = responseText.trim();
5948
+ if (!this.responseRendered && lastResponseText.trim()) {
5949
+ const finalText = lastResponseText.trim();
6052
5950
  display.showAssistantMessage(finalText, { isFinal: true });
6053
5951
  this.ui.controller.recordAssistantResponse(finalText, {
6054
5952
  source: 'final',
6055
5953
  });
6056
5954
  this.responseRendered = true;
6057
5955
  }
5956
+ result = {
5957
+ finalResponse: lastResponseText,
5958
+ toolsUsed: lastToolsUsed,
5959
+ elapsedMs: Date.now() - overallStartTime,
5960
+ };
5961
+ }
5962
+ finally {
6058
5963
  this.finishStreamingFormatter(undefined, { refreshPrompt: false, mode: 'complete' });
5964
+ const totalElapsed = Date.now() - overallStartTime;
5965
+ const minutes = Math.floor(totalElapsed / 60000);
5966
+ const seconds = Math.floor((totalElapsed % 60000) / 1000);
5967
+ const completionLabel = mode === 'continuous'
5968
+ ? `\n🏁 Continuous execution completed in ${minutes}m ${seconds}s total`
5969
+ : `\n🏁 Flow orchestration completed in ${minutes}m ${seconds}s total`;
5970
+ display.showSystemMessage(completionLabel);
5971
+ resetTaskCompletionDetector();
6059
5972
  display.stopThinking(false);
6060
5973
  this.uiUpdates.setMode('processing');
6061
5974
  this.stopStreamingHeartbeat('complete', { quiet: true });
@@ -6068,20 +5981,60 @@ ${missingActionLine ? `${missingActionLine}\n` : ''}Resume with the next concret
6068
5981
  this.updateStatusMessage(null);
6069
5982
  this.toolsUsedThisRun = [];
6070
5983
  queueMicrotask(() => this.uiUpdates.setMode('idle'));
6071
- // CRITICAL: Ensure readline prompt is active for user input
6072
- // Erosolar-CLI style: New prompt naturally appears at bottom
6073
5984
  this.ensureReadlineReady();
6074
5985
  this.scheduleQueueProcessing();
6075
5986
  this.maybeProcessPromptInbox();
6076
5987
  this.refreshQueueIndicators();
6077
5988
  }
5989
+ return result;
5990
+ }
5991
+ handleFlowRunOutcome(request, result) {
5992
+ this.currentToolCalls = result.toolsUsed.map((name) => ({
5993
+ name,
5994
+ arguments: {},
5995
+ success: true,
5996
+ duration: 0,
5997
+ }));
5998
+ const failure = detectFailure(result.finalResponse, {
5999
+ toolCalls: this.currentToolCalls,
6000
+ userMessage: request,
6001
+ });
6002
+ if (failure) {
6003
+ this.lastFailure = failure;
6004
+ const strategy = findRecoveryStrategy(failure);
6005
+ if (strategy) {
6006
+ display.showSystemMessage(`🔄 Found recovery strategy for this type of issue (success rate: ${Math.round(strategy.successRate * 100)}%)`);
6007
+ }
6008
+ return;
6009
+ }
6010
+ if (this.currentToolCalls.length > 0) {
6011
+ const toolPattern = {
6012
+ taskType: this.currentTaskType,
6013
+ toolSequence: this.currentToolCalls.map((t) => t.name),
6014
+ successRate: 1.0,
6015
+ avgDuration: result.elapsedMs,
6016
+ occurrences: 1,
6017
+ };
6018
+ addToolPattern(this.currentTaskType, toolPattern);
6019
+ }
6020
+ clearActionHistory();
6021
+ this.lastFailure = null;
6022
+ }
6023
+ async processRequest(request) {
6024
+ const result = await this.runFlowControlledTask(request, {
6025
+ mode: 'standard',
6026
+ });
6027
+ if (!result) {
6028
+ return;
6029
+ }
6030
+ this.handleFlowRunOutcome(request, result);
6078
6031
  }
6079
6032
  /**
6080
6033
  * Process a continuous/infinite loop request.
6081
6034
  * Runs the agent in a loop until:
6082
6035
  * 1. The agent indicates completion (verified by AI confirmation)
6083
6036
  * 2. User interrupts (Ctrl+C)
6084
- * 3. Maximum iterations reached (safety limit)
6037
+ * 3. The orchestrator halts due to stagnation/verification exhaustion
6085
6038
  *
6086
6039
  * Uses intelligent task completion detection with AI verification
6087
6040
  * to ensure tasks are truly complete before stopping.
@@ -6089,29 +6042,13 @@ ${missingActionLine ? `${missingActionLine}\n` : ''}Resume with the next concret
6089
6042
  * Context is automatically managed - overflow errors trigger auto-recovery.
6090
6043
  */
6091
6044
  async processContinuousRequest(initialRequest) {
6092
- const MAX_PASSES = 100; // Safety limit to prevent truly infinite loops
6093
- if (this.isProcessing) {
6094
- this.enqueueFollowUpAction({ type: 'continuous', text: initialRequest });
6095
- return;
6096
- }
6097
- if (!this.agent && !this.rebuildAgent()) {
6098
- display.showWarning('Configure an API key via /secrets before sending requests.');
6045
+ const result = await this.runFlowControlledTask(initialRequest, {
6046
+ mode: 'continuous',
6047
+ });
6048
+ if (!result) {
6099
6049
  return;
6100
6050
  }
6101
- display.showSystemMessage(`Continuous mode active. Ctrl+C to stop.`);
6102
- const preparedRequest = this.isSelfImprovementRequest(initialRequest)
6103
- ? `${initialRequest}
6104
-
6105
- IMPORTANT: You have full git access. After making improvements:
6106
- 1. Use bash to run: git status (see changes)
6107
- 2. Use bash to run: git add -A (stage changes)
6108
- 3. Use bash to run: git commit -m "descriptive message" (commit)
6109
- 4. Use bash to run: git push (when milestone reached)
6110
-
6111
- Commit frequently with descriptive messages. Push when ready.
6112
- When truly finished with ALL tasks, explicitly state "TASK_FULLY_COMPLETE".`
6113
- : initialRequest;
6114
- await this.processRequest(preparedRequest, { orchestrate: true, maxPasses: MAX_PASSES });
6051
+ this.handleFlowRunOutcome(initialRequest, result);
6115
6052
  }
6116
6053
  /**
6117
6054
  * Resolve executed tools for the current turn. Prefer the actual tool
@@ -6133,129 +6070,6 @@ When truly finished with ALL tasks, explicitly state "TASK_FULLY_COMPLETE".`
6133
6070
  }
6134
6071
  return this.extractToolsFromResponse(responseText);
6135
6072
  }
6136
- /**
6137
- * Detect plan-only responses that narrate intent without executing actions.
6138
- */
6139
- isPlanOnlyResponse(response) {
6140
- const normalized = response.trim().toLowerCase();
6141
- if (!normalized) {
6142
- return false;
6143
- }
6144
- // If the assistant is clearly declaring completion, don't treat it as plan-only
6145
- const completionGuards = [
6146
- /\bnothing\s+(left|else)\s+(to\s+do|pending)\b/i,
6147
- /\b(already|now)\s+(clean|complete|done)\b/i,
6148
- /\b(no\s+(junk|issues?|changes?)\s+found)\b/i,
6149
- ];
6150
- if (completionGuards.some((pattern) => pattern.test(response))) {
6151
- return false;
6152
- }
6153
- const planIndicators = [
6154
- /\bplan\b/i,
6155
- /\bapproach\b/i,
6156
- /\bsteps?:\b/i,
6157
- /\bstep\s+1\b/i,
6158
- /\bstart by\b/i,
6159
- /\bfirst[, ]/i,
6160
- /\bthen\b/i,
6161
- /\bnext\b/i,
6162
- /\bi['’]?\s*will\b/i,
6163
- /\bi['’]?\s*ll\b/i,
6164
- /\bi['’]?\s*can\b.{0,40}\bthen\b/i,
6165
- /\bi['’]?\s*(?:will|ll)\s+begin\b/i,
6166
- ];
6167
- return planIndicators.some((pattern) => pattern.test(response));
6168
- }
6169
- /**
6170
- * Check if a response contains indicators that work is actually incomplete,
6171
- * even if it also contains TASK_FULLY_COMPLETE marker.
6172
- * This catches contradictory responses where the AI says "done" but also "not integrated yet".
6173
- */
6174
- responseIndicatesIncompleteWork(response) {
6175
- // Patterns that indicate work isn't actually complete
6176
- // Organized by category for maintainability
6177
- const incompletePatterns = [
6178
- // === INTEGRATION/DEPLOYMENT STATE ===
6179
- // "hasn't been integrated/implemented/connected yet"
6180
- /hasn'?t\s+been\s+(integrated|implemented|connected|deployed|added|completed|tested|verified)\s*(yet|still)?/i,
6181
- // "not yet integrated/implemented" or "not integrated"
6182
- /not\s+(yet\s+)?(integrated|implemented|connected|deployed|functional|working|complete|tested|verified)/i,
6183
- // "ready for integration" = NOT integrated
6184
- /ready\s+(for|to\s+be)\s+(integration|integrated|connected|deployed|testing|review)/i,
6185
- // "needs to be integrated"
6186
- /needs?\s+to\s+be\s+(integrated|connected|deployed|added|hooked|wired|tested|reviewed|merged)/i,
6187
- // Passive voice: "was not performed/completed"
6188
- /was\s+not\s+(performed|completed|implemented|deployed|integrated|tested)/i,
6189
- // "the [X] service hasn't been"
6190
- /the\s+\w+\s+(service|module|component|feature)\s+hasn'?t\s+been/i,
6191
- // === PARTIAL/INCOMPLETE STATE ===
6192
- // "still stores/uses/has" (current bad state persists)
6193
- /still\s+(stores?|uses?|has|contains?|needs?|requires?|missing|lacks?|broken)/i,
6194
- // Partial completion: "partially", "mostly", "almost"
6195
- /\b(partially|mostly|almost|nearly|not\s+fully)\s+(complete|done|finished|implemented|working)/i,
6196
- // Explicit partial: "part of", "some of", "half of"
6197
- /\b(only\s+)?(part|some|half|portion)\s+of\s+(the\s+)?(task|work|feature|implementation)/i,
6198
- // === QUALIFIER WORDS (uncertain completion) ===
6199
- // "should be complete", "appears complete", "theoretically"
6200
- /\b(should|might|may|could|appears?\s+to)\s+be\s+(complete|done|working|functional)/i,
6201
- /\btheoretically\s+(complete|done|working|functional)/i,
6202
- // "assuming", "if everything works"
6203
- /\b(assuming|provided|if)\s+(everything|it|this|that)\s+(works?|is\s+correct)/i,
6204
- // === SELF-CONTRADICTION PHRASES ===
6205
- // "done but...", "complete except...", "finished however..."
6206
- /\b(done|complete|finished)\s+(but|except|however|although|though)/i,
6207
- // "however" followed by incomplete indicator
6208
- /however[,\s].{0,50}?(hasn'?t|not\s+yet|still\s+needs?|pending|remains?|missing|broken|failing)/i,
6209
- // "but" followed by negative state
6210
- /\bbut\s+.{0,30}?(not|hasn'?t|won'?t|can'?t|doesn'?t|isn'?t|wasn'?t)/i,
6211
- // === FUTURE TENSE / DEFERRED WORK ===
6212
- // "will need to", "will require"
6213
- /will\s+(need\s+to|require|have\s+to)\s+(integrate|connect|deploy|complete|implement|test|fix)/i,
6214
- // Deferred: "left as", "deferred", "postponed", "out of scope"
6215
- /\b(left\s+as|deferred|postponed|out\s+of\s+scope|for\s+later|in\s+a\s+future)/i,
6216
- // Time-dependent: "after restart", "takes effect after", "once you"
6217
- /\b(after\s+(restart|reboot|redeploy)|takes?\s+effect\s+after|once\s+you)/i,
6218
- // === REMAINING WORK INDICATORS ===
6219
- // "remaining tasks", "outstanding items"
6220
- /\b(remaining|outstanding|pending|leftover)\s+(tasks?|items?|work|issues?|steps?)/i,
6221
- // "X more to do", "still have to"
6222
- /\b(more\s+to\s+do|still\s+have\s+to|yet\s+to\s+be\s+done)/i,
6223
- // Explicit blockers
6224
- /\b(blocker|blocked\s+by|waiting\s+(for|on)|depends?\s+on)/i,
6225
- // === ERROR/FAILURE STATE ===
6226
- // "failing tests", "build errors"
6227
- /\b(failing|broken|erroring)\s+(tests?|builds?|checks?|validations?)/i,
6228
- // "tests? (are )?(still )?failing"
6229
- /\btests?\s+(are\s+)?(still\s+)?failing/i,
6230
- // "errors? to (address|fix)"
6231
- /\b(errors?|warnings?|issues?)\s+to\s+(address|fix|resolve)/i,
6232
- // "doesn't work", "isn't working", "not working"
6233
- /\b(doesn'?t|isn'?t|not)\s+(work|working|functional|functioning)/i,
6234
- // === MANUAL STEPS REQUIRED ===
6235
- // "you'll need to", "manually run", "requires user"
6236
- /\b(you('ll|\s+will)\s+need\s+to|manually\s+(run|configure|set|update)|requires?\s+user)/i,
6237
- // "run this command", "execute the following"
6238
- /\b(run\s+this|execute\s+the\s+following|apply\s+the\s+migration)/i,
6239
- // === TODO/FIXME IN PROSE ===
6240
- // TODO or FIXME mentioned as remaining work (not in code blocks)
6241
- /\b(todo|fixme|hack|xxx):\s/i,
6242
- // "need to add", "should implement"
6243
- /\b(need\s+to|should|must)\s+(add|implement|create|write|build|fix)\b/i,
6244
- // === SCOPE LIMITATIONS ===
6245
- // "didn't have time", "ran out of time"
6246
- /\b(didn'?t|did\s+not)\s+have\s+(time|chance|opportunity)/i,
6247
- // "beyond scope", "outside scope"
6248
- /\b(beyond|outside)\s+(the\s+)?scope/i,
6249
- // "for now" (temporary state)
6250
- /\b(for\s+now|at\s+this\s+point|currently)\s*.{0,20}?(not|without|lacks?|missing)/i,
6251
- ];
6252
- for (const pattern of incompletePatterns) {
6253
- if (pattern.test(response)) {
6254
- return true;
6255
- }
6256
- }
6257
- return false;
6258
- }
6259
6073
  /**
6260
6074
  * Extract tool names from a response by looking for tool call patterns
6261
6075
  */
@@ -6748,12 +6562,19 @@ Return ONLY JSON array:
6748
6562
  const activity = normalized ? `Working: ${normalized}` : 'Working';
6749
6563
  this.renderer?.setActivity(activity);
6750
6564
  },
6751
- onBeforeFirstToolCall: (toolNames) => {
6565
+ onBeforeFirstToolCall: (toolNames, hasNarration) => {
6752
6566
  const primaryTool = toolNames[0];
6753
6567
  if (primaryTool) {
6754
6568
  this.renderer?.setActivity(`Running ${primaryTool}`);
6755
6569
  }
6756
- return undefined;
6570
+ if (hasNarration) {
6571
+ return undefined;
6572
+ }
6573
+ const toolList = toolNames.length ? toolNames.join(', ') : 'tools';
6574
+ const ack = `💭 Plan before tools:\n- Goal: ${this.lastUserQuery || 'address request'}\n- Tools: ${toolList}\n- Executing now.`;
6575
+ display.showNarrative(ack);
6576
+ this.ui.controller.recordAssistantThought(ack);
6577
+ return ack;
6757
6578
  },
6758
6579
  onStreamChunk: (chunk, type) => {
6759
6580
  this.handleStreamChunk(chunk, type ?? 'content');