npm - erosolar-cli - Versions diffs - 2.1.193 → 2.1.194 - Mend

erosolar-cli 2.1.193 → 2.1.194

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/README.md +54 -0
package/dist/capabilities/offsecOpsCapability.d.ts +2 -2
package/dist/capabilities/offsecOpsCapability.d.ts.map +1 -1
package/dist/capabilities/offsecOpsCapability.js +6 -7
package/dist/capabilities/offsecOpsCapability.js.map +1 -1
package/dist/contracts/models.schema.json +9 -0
package/dist/core/agent.d.ts +31 -12
package/dist/core/agent.d.ts.map +1 -1
package/dist/core/agent.js +71 -84
package/dist/core/agent.js.map +1 -1
package/dist/core/agentOrchestrator.d.ts +49 -0
package/dist/core/agentOrchestrator.d.ts.map +1 -0
package/dist/core/agentOrchestrator.js +313 -0
package/dist/core/agentOrchestrator.js.map +1 -0
package/dist/core/alphaZeroOrchestrator.d.ts +140 -0
package/dist/core/alphaZeroOrchestrator.d.ts.map +1 -0
package/dist/core/alphaZeroOrchestrator.js +418 -0
package/dist/core/alphaZeroOrchestrator.js.map +1 -0
package/dist/core/schemaValidator.d.ts +5 -0
package/dist/core/schemaValidator.d.ts.map +1 -1
package/dist/core/schemaValidator.js +65 -0
package/dist/core/schemaValidator.js.map +1 -1
package/dist/core/taskCompletionDetector.d.ts +106 -0
package/dist/core/taskCompletionDetector.d.ts.map +1 -0
package/dist/core/taskCompletionDetector.js +402 -0
package/dist/core/taskCompletionDetector.js.map +1 -0
package/dist/core/toolRuntime.d.ts +4 -0
package/dist/core/toolRuntime.d.ts.map +1 -1
package/dist/core/toolRuntime.js +40 -64
package/dist/core/toolRuntime.js.map +1 -1
package/dist/core/types.d.ts +14 -0
package/dist/core/types.d.ts.map +1 -1
package/dist/core/types.js.map +1 -1
package/dist/providers/anthropicProvider.d.ts +8 -1
package/dist/providers/anthropicProvider.d.ts.map +1 -1
package/dist/providers/anthropicProvider.js +51 -0
package/dist/providers/anthropicProvider.js.map +1 -1
package/dist/providers/googleProvider.d.ts +7 -1
package/dist/providers/googleProvider.d.ts.map +1 -1
package/dist/providers/googleProvider.js +41 -0
package/dist/providers/googleProvider.js.map +1 -1
package/dist/providers/openaiChatCompletionsProvider.d.ts +7 -1
package/dist/providers/openaiChatCompletionsProvider.d.ts.map +1 -1
package/dist/providers/openaiChatCompletionsProvider.js +44 -0
package/dist/providers/openaiChatCompletionsProvider.js.map +1 -1
package/dist/shell/interactiveShell.d.ts +24 -4
package/dist/shell/interactiveShell.d.ts.map +1 -1
package/dist/shell/interactiveShell.js +437 -191
package/dist/shell/interactiveShell.js.map +1 -1
package/dist/shell/shellApp.js +8 -1
package/dist/shell/shellApp.js.map +1 -1
package/dist/tools/bashTools.d.ts +0 -1
package/dist/tools/bashTools.d.ts.map +1 -1
package/dist/tools/bashTools.js +3 -47
package/dist/tools/bashTools.js.map +1 -1
package/dist/tools/buildTools.js +1 -1
package/dist/tools/buildTools.js.map +1 -1
package/dist/tools/grepTools.js +6 -4
package/dist/tools/grepTools.js.map +1 -1
package/dist/tools/repoChecksTools.d.ts.map +1 -1
package/dist/tools/repoChecksTools.js +5 -7
package/dist/tools/repoChecksTools.js.map +1 -1
package/dist/ui/ShellUIAdapter.d.ts +3 -2
package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
package/dist/ui/ShellUIAdapter.js +9 -7
package/dist/ui/ShellUIAdapter.js.map +1 -1
package/package.json +1 -1
package/dist/runtime/flowOrchestrator.d.ts +0 -46
package/dist/runtime/flowOrchestrator.d.ts.map +0 -1
package/dist/runtime/flowOrchestrator.js +0 -80
package/dist/runtime/flowOrchestrator.js.map +0 -1
package/dist/shell/taskCompletionDetector.d.ts +0 -52
package/dist/shell/taskCompletionDetector.d.ts.map +0 -1
package/dist/shell/taskCompletionDetector.js +0 -131
package/dist/shell/taskCompletionDetector.js.map +0 -1

package/dist/shell/interactiveShell.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { exec } from 'node:child_process';
 import { promisify } from 'node:util';
 import { existsSync, readFileSync, statSync, writeFileSync } from 'node:fs';
 import { join, resolve } from 'node:path';
+import { AgentOrchestrator } from '../core/agentOrchestrator.js';
 import { display } from '../ui/display.js';
 import { theme } from '../ui/theme.js';
 import { getTerminalColumns } from '../ui/layout.js';
@@ -12,13 +13,11 @@ import { ensureSecretForProvider, getSecretDefinitionForProvider, getSecretValue
 import { saveActiveProfilePreference, saveModelPreference, loadToolSettings, saveToolSettings, clearToolSettings, clearActiveProfilePreference, loadSessionPreferences, saveSessionPreferences, loadFeatureFlags, saveFeatureFlags, toggleFeatureFlag, FEATURE_FLAG_INFO, } from '../core/preferences.js';
 import { getLearningSummary, getRecentLearning, commitLearning, exportAllLearning, getLearningDir, } from '../core/learningPersistence.js';
 import { buildEnabledToolSet, evaluateToolPermissions, getToolToggleOptions, } from '../capabilities/toolRegistry.js';
-import { FlowOrchestrator } from '../runtime/flowOrchestrator.js';
 import { detectApiKeyError } from '../core/errors/apiKeyErrors.js';
 import { detectPromptBlockError, } from '../core/errors/promptBlockErrors.js';
 import { detectNetworkError } from '../core/errors/networkErrors.js';
 import { buildWorkspaceContext } from '../workspace.js';
 import { buildInteractiveSystemPrompt } from './systemPrompt.js';
-import { getTaskCompletionDetector, resetTaskCompletionDetector, WRITE_TOOLS, } from './taskCompletionDetector.js';
 import { discoverAllModels, quickCheckProviders, getCachedDiscoveredModels, sortModelsByPriority } from '../core/modelDiscovery.js';
 import { getModels, getSlashCommands, getProviders } from '../core/agentSchemaLoader.js';
 import { loadMcpServers } from '../mcp/config.js';
@@ -37,8 +36,8 @@ import { analyzeImprovementOpportunities, runSelfImprovementCycle, getImprovemen
 import { listAvailablePlugins } from '../plugins/index.js';
 import { isValidSourceRepo, getRepoName, analyzeSource, runSelfEvolution, stopEvolution, getEvolutionStatus, emergencyEvolutionRollback, learnSourcePatterns, generateFix, } from '../core/selfEvolution.js';
 import { analyzeTokenUsage, discoverModularTargets, getModularStatusDisplay, generateContextOptimizations, getGuidelines, deleteGuideline, getPendingActions, executeModularAction, } from '../core/alphaZeroModular.js';
-import { startOffsecRun, resumeOffsecRun, recordOffsecOutcome, getOffsecNextActions, simulateOffsecRollout, formatOffsecStatus, listOffsecRuns, } from '../core/offsecAlphaZero.js';
 import { generateTestFlows, detectBugs, detectUIUpdates, saveTestFlows, saveBugReports, saveUIUpdates, getTestFlowStatus, } from '../core/intelligentTestFlows.js';
+import { startOffsecRun, resumeOffsecRun, listOffsecRuns, getOffsecNextActions, simulateOffsecRollout, recordOffsecOutcome, formatOffsecStatus, } from '../core/offsecAlphaZero.js';
 import { PromptController } from '../ui/PromptController.js';
 import { enterStreamingMode, exitStreamingMode, isStreamingMode } from '../ui/globalWriteLock.js';
 import { setGlobalAIEnhancer } from '../tools/localExplore.js';
@@ -86,10 +85,6 @@ const CONTEXT_CLEANUP_SYSTEM_PROMPT = `Summarize earlier IDE collaboration so th
 - Respond in plain Markdown only (no tool or shell calls).`;
 const MAX_ATTACHMENT_BYTES = 200 * 1024; // 200KB per attachment
 const MAX_ATTACHMENT_CHARS = 16000; // Guardrail to avoid flooding context
-const WRITE_TOOL_NAMES = new Set(Array.from(WRITE_TOOLS)
-    .map((tool) => tool.toLowerCase())
-    // Bash/execute commands can be read-only; guard only on clear mutating tools
-    .filter((tool) => !tool.includes('bash') && !tool.startsWith('execute')));
 export class InteractiveShell {
     agent = null;
     profile;
@@ -124,7 +119,6 @@ export class InteractiveShell {
     uiUpdates;
     _fileChangeTracker = new FileChangeTracker(); // Reserved for future file tracking features
     alphaZeroMetrics; // Alpha Zero 2 performance tracking
-    flowOrchestrator = new FlowOrchestrator();
     maxNetworkRetries = 2;
     statusSubscription = null;
     followUpQueue = [];
@@ -140,6 +134,7 @@ export class InteractiveShell {
     lastContextWarningLevel = null;
     sessionPreferences;
     autosaveEnabled;
+    orchestrationNoticeShown = false;
     verificationEnabled = false;
     criticalApprovalMode = 'auto';
     editGuardMode = 'display-edits';
@@ -1629,7 +1624,10 @@ export class InteractiveShell {
         return `${provider} · ${this.sessionState.model}`;
     }
     refreshContextGauge() {
-        const tokens = getContextWindowTokens(this.sessionState.model);
+        // First try to get context window from provider API (real value)
+        // Fall back to static model mapping only if provider API is not available
+        const providerContextWindow = this.agent?.getModelInfo()?.contextWindow;
+        const tokens = providerContextWindow ?? getContextWindowTokens(this.sessionState.model);
         const normalizedTokens = typeof tokens === 'number' && Number.isFinite(tokens) ? tokens : null;
         this.activeContextWindowTokens = normalizedTokens;
         if (normalizedTokens !== null) {
@@ -1639,6 +1637,28 @@ export class InteractiveShell {
             };
         }
     }
+    /**
+     * Fetch real context window from provider API and update the gauge.
+     * This should be called after the agent is created.
+     */
+    async fetchAndUpdateContextWindow() {
+        if (!this.agent) {
+            return;
+        }
+        try {
+            const contextWindow = await this.agent.getContextWindowFromProvider();
+            if (contextWindow !== null && Number.isFinite(contextWindow)) {
+                this.activeContextWindowTokens = contextWindow;
+                this.latestTokenUsage = {
+                    used: this.latestTokenUsage.used,
+                    limit: contextWindow,
+                };
+            }
+        }
+        catch {
+            // Ignore errors - keep using static mapping
+        }
+    }
     updateContextUsage(percentage, autoCompactThreshold = CONTEXT_AUTOCOMPACT_PERCENT) {
         this.uiAdapter.updateContextUsage(percentage);
         this.terminalInput.setContextUsage(percentage);
@@ -1997,9 +2017,8 @@ export class InteractiveShell {
             return;
         }
         const isReasoning = type === 'reasoning';
-        // Approximate token count (roughly 4 chars per token)
-        this.streamingTokenCount += Math.ceil(chunk.length / 4);
-        this.renderer?.updateStreamingTokens(this.streamingTokenCount);
+        // Token count is updated from real provider usage data in onAssistantMessage
+        // Do NOT estimate tokens from chunk length - wait for actual API response
         // Keep pinned status updated for all streaming chunks
         this.updateStreamingStatusFromChunk(chunk);
         // Handle <thinking> tags as separate events in the queue
@@ -2276,14 +2295,17 @@ export class InteractiveShell {
         // Check for continuous/infinite loop commands or auto-escalation to completion mode
         const explicitContinuous = this.isContinuousCommand(trimmed);
         const autoContinuous = this.shouldAutoRunToCompletion(trimmed);
-        if (explicitContinuous || autoContinuous) {
-            if (autoContinuous && !explicitContinuous) {
-                display.showSystemMessage('⚡ Actionable request detected; running continuously until complete (Ctrl+C to stop).');
-            }
+        if (explicitContinuous) {
             await this.processContinuousRequest(trimmed);
             this.syncRendererInput();
             return;
         }
+        if (autoContinuous) {
+            display.showSystemMessage('⚡ Actionable request detected; orchestrating until complete (Ctrl+C to stop).');
+            await this.processRequest(trimmed, { orchestrate: true });
+            this.syncRendererInput();
+            return;
+        }
         // Direct execution for all inputs, including multi-line pastes
         await this.processRequest(trimmed);
         this.syncRendererInput();
@@ -2320,19 +2342,113 @@ export class InteractiveShell {
         ];
         const strongMaintenanceIntent = maintenancePatterns.some((pattern) => pattern.test(normalized));
         // General action intent in a code context
-        const actionVerb = /\b(fix|resolve|address|refactor|implement|upgrade|migrate|optimi[sz]e|modernize|stabilize|harden|ship|complete|finish|clean|remove|delete|prune|tidy|audit|cleanup)\b/;
+        const actionVerb = /\b(fix|resolve|address|refactor|implement|upgrade|migrate|optimi[sz]e|modernize|stabilize|harden|ship|complete|finish|clean|remove|delete|prune|tidy|audit|cleanup|debug|investigate|triage|scan|check|diagnos(e|is))\b/;
         const codeContext = /\b(repo|codebase|project|app|service|package|module|component|workspace|cli|tests?|source|files?|artifacts?)\b/;
-        const strongActionIntent = actionVerb.test(normalized) && codeContext.test(normalized) && normalized.length > 20;
-        // Allow maintenance/refactor tasks to auto-run even if phrased as questions ("got any junk to remove?")
-        if (strongMaintenanceIntent || strongActionIntent) {
-            // But still avoid purely informational questions like "what is this repo?"
-            if (startsWithInfoWord && !strongMaintenanceIntent) {
-                return false;
-            }
-            return true;
+        const hasActionVerb = actionVerb.test(normalized);
+        const hasCodeContext = codeContext.test(normalized);
+        // Bug/issue hunt or health-check intents, even when phrased as questions
+        const bugSweepIntent = /\b(got\s+any|any|open|known)\s+(bugs?|issues?|defects?)\b/;
+        const triageIntent = /\b(find|check|hunt|triage|detect|scan|review|look\s+for)\s+(bugs?|issues?|errors?|failures?)\b/;
+        const failingTestsIntent = /\b(failing|broken|red)\s+tests?\b|\btests?\s+(are\s+)?failing\b/;
+        const qualitySweepIntent = /\b(audit|health\s+check|stability\s+check|bug\s+scan|issue\s+scan|regression\s+pass)\b/;
+        const bugOrHealthIntent = [bugSweepIntent, triageIntent, failingTestsIntent, qualitySweepIntent].some((pattern) => pattern.test(normalized));
+        // Score-based intent detection keeps questions like "what is this repo?" out
+        let actionScore = 0;
+        if (strongMaintenanceIntent)
+            actionScore += 2;
+        if (bugOrHealthIntent)
+            actionScore += 2;
+        if (hasActionVerb)
+            actionScore += 1;
+        if (hasCodeContext)
+            actionScore += 1;
+        const decisiveAction = actionScore >= 2 || (actionScore === 1 && !isQuestion && normalized.length > 10);
+        if (!decisiveAction) {
+            return false;
         }
-        return false;
+        // Respect informational openers unless intent is clearly actionable
+        if (startsWithInfoWord && actionScore < 3 && !bugOrHealthIntent && !strongMaintenanceIntent) {
+            return false;
+        }
+        return true;
+    }
+    shouldContinueOrchestrating(originalRequest, result) {
+        if (!result) {
+            return { shouldContinue: false, reason: null };
+        }
+        const actionable = this.shouldAutoRunToCompletion(originalRequest);
+        if (!actionable) {
+            const incomplete = result.exitReason !== 'complete';
+            return { shouldContinue: incomplete, reason: incomplete ? result.exitReason : null };
+        }
+        if (result.exitReason !== 'complete') {
+            return { shouldContinue: true, reason: result.exitReason };
+        }
+        const executedTools = result.passes.some(pass => (pass.toolsUsed?.length ?? 0) > 0);
+        const lastPass = result.passes[result.passes.length - 1];
+        const lastPlanOnly = Boolean(lastPass?.planOnly) && !lastPass?.tookAction && (!lastPass?.toolsUsed?.length);
+        const lastEmpty = !lastPass?.response?.trim();
+        if (!executedTools) {
+            return { shouldContinue: true, reason: 'no-action' };
+        }
+        if (lastPlanOnly) {
+            return { shouldContinue: true, reason: 'plan-only' };
+        }
+        if (lastEmpty) {
+            return { shouldContinue: true, reason: 'empty-response' };
+        }
+        return { shouldContinue: false, reason: null };
+    }
+    describeContinuationReason(reason) {
+        switch (reason) {
+            case 'no-action':
+                return 'No tools or concrete actions were executed; continuing until real work is done.';
+            case 'plan-only':
+                return 'Last pass was just planning; executing the next concrete step now.';
+            case 'empty-response':
+            case 'empty':
+                return 'Previous pass returned nothing; resuming with a concrete action.';
+            case 'stalled':
+                return 'Previous orchestration stalled; forcing continuation.';
+            case 'max-passes':
+                return 'Reached pass limit; extending the run to finish the task.';
+            default:
+                return 'Continuing orchestration until the task is actually finished.';
+        }
+    }
+    buildForcedContinuationPrompt(originalRequest, lastResult, continuationReason) {
+        const lastPass = lastResult.passes[lastResult.passes.length - 1];
+        const rawResponse = lastPass?.response?.trim() ?? '';
+        const truncatedResponse = rawResponse ? rawResponse.slice(0, 1200) : '';
+        const responseNote = rawResponse && rawResponse.length > truncatedResponse.length
+            ? '\n\n[Last response truncated]'
+            : '';
+        const usedTools = lastResult.passes.flatMap(pass => pass.toolsUsed ?? []);
+        const hasToolUsage = usedTools.length > 0;
+        const toolsUsed = hasToolUsage
+            ? `Tools used so far: ${usedTools.join(', ')}.`
+            : 'No tools have been used yet - start with a quick workspace scan (list_files + glob/grep/search) and then take concrete actions now.';
+        const responseSection = truncatedResponse
+            ? `Last response snapshot:\n${truncatedResponse}${responseNote}\n`
+            : '';
+        const exitReason = continuationReason ?? lastResult.exitReason ?? 'incomplete';
+        const reasonLine = this.describeContinuationReason(exitReason);
+        const missingActionLine = hasToolUsage
+            ? ''
+            : 'No tool calls or file edits have been made yet. Start with list_files + glob/grep/search to sweep the repo, then run a command (read/search/test/edit) and show the output before summarizing.';
+        return `${originalRequest.trim()}
+The previous orchestration stopped early (reason: ${exitReason}). Continue working until the task is truly finished. Use tools, run checks, and only declare completion when nothing remains.
+${reasonLine}
+${toolsUsed}
+${responseSection}
+${missingActionLine ? `${missingActionLine}\n` : ''}Resume with the next concrete action now.`;
     }
+    /**
+     * Decide whether to automatically continue execution when the model stops after a plan/summary
+     * without taking actions. This keeps flows moving toward completion instead of stalling on planning.
+     */
     isExitCommand(input) {
         const lower = input.trim().toLowerCase();
         return (lower === 'exit' ||
@@ -5780,22 +5896,20 @@ export class InteractiveShell {
         this.clearInlinePanel();
         this.syncRendererInput();
     }
-    async runFlowControlledTask(initialRequest, options) {
-        const { mode } = options;
-        const followUpType = mode === 'continuous' ? 'continuous' : 'request';
+    async processRequest(request, options) {
         if (this.isProcessing) {
-            this.enqueueFollowUpAction({ type: followUpType, text: initialRequest });
-            return null;
+            this.enqueueFollowUpAction({ type: 'request', text: request });
+            return;
         }
         if (!this.agent && !this.rebuildAgent()) {
             display.showWarning('Configure an API key via /secrets before sending requests.');
-            return null;
+            return;
         }
         this.inlinePanelScopeActive = false;
         this.clearInlinePanel();
         const agent = this.agent;
         if (!agent) {
-            return null;
+            return;
         }
         this.toolsUsedThisRun = [];
         this.currentToolCalls = [];
@@ -5806,138 +5920,166 @@ export class InteractiveShell {
         else {
             this.resetNetworkRetryState();
         }
+        // Reset per-request render tracking
         this.responseRendered = false;
-        if (this.shouldLogPrompt(initialRequest)) {
-            this.logUserPrompt(initialRequest);
+        if (this.shouldLogPrompt(request)) {
+            this.logUserPrompt(request);
         }
         this.isProcessing = true;
         this.uiUpdates.setMode('processing');
-        this.streamingTokenCount = 0;
+        this.streamingTokenCount = 0; // Reset token counter for new request
         this.terminalInput.setStreaming(true);
+        // Keep the persistent input/control bar active as we transition into streaming.
         this.syncRendererInput();
         this.renderer?.render();
-        const overallStartTime = Date.now();
+        const requestStartTime = Date.now(); // Alpha Zero 2 timing
+        // Clear previous parallel agents and start fresh for new request
         const parallelManager = getParallelAgentManager();
         parallelManager.clear();
         parallelManager.startBatch();
-        this.lastUserQuery = initialRequest;
-        this.currentTaskType = classifyTaskType(initialRequest);
+        // AlphaZero: Track task for learning
+        this.lastUserQuery = request;
+        this.currentTaskType = classifyTaskType(request);
         this.currentToolCalls = [];
         this.clearToolUsageMeta();
         this.renderer?.setActivity('Starting...');
-        this.uiAdapter.startProcessing(mode === 'continuous' ? 'Continuous execution mode' : 'Working on your request');
+        this.uiAdapter.startProcessing('Working on your request');
         this.setProcessingStatus();
         this.beginAiRuntime();
-        this.startStreamingHeartbeat(mode === 'continuous' ? 'Streaming' : 'Streaming response');
-        const completionDetector = getTaskCompletionDetector();
-        completionDetector.reset();
-        this.flowOrchestrator.start(initialRequest);
-        this.updateStatusMessage('Orchestrating request...');
-        if (mode === 'continuous') {
-            display.showSystemMessage('Continuous mode active. Ctrl+C to stop.');
+        let responseText = '';
+        let orchestratorResult = null;
+        const orchestrate = options?.orchestrate ?? true;
+        const orchestratorPassLimit = options?.maxPasses ?? 10;
+        if (orchestrate && !this.orchestrationNoticeShown) {
+            display.showSystemMessage('⚡ Orchestrating every prompt until completion. Press Ctrl+C to stop a run early.');
+            this.orchestrationNoticeShown = true;
         }
-        else {
-            display.showSystemMessage('Flow orchestrator engaged; running until the request is satisfied.');
-        }
-        let currentPrompt = initialRequest;
-        if (this.isSelfImprovementRequest(initialRequest)) {
-            currentPrompt = `${initialRequest}
-When finished with ALL tasks, say "TASK_FULLY_COMPLETE".`;
-        }
-        let iteration = 0;
-        let lastResponseText = '';
-        let lastToolsUsed = [];
-        let result = null;
         try {
-            while (true) {
-                iteration++;
-                this.toolsUsedThisRun = [];
-                this.updateStatusMessage('Orchestrating request...');
-                try {
-                    display.showThinking('Responding...');
-                    this.refreshStatusLine(true);
-                    const response = await agent.send(currentPrompt, true);
-                    lastResponseText = response ?? '';
-                    this.finishStreamingFormatter(undefined, { refreshPrompt: false, mode: 'complete' });
-                    await this.awaitPendingCleanup();
-                    this.captureHistorySnapshot();
-                    this.autosaveIfEnabled();
-                    const elapsedMs = Date.now() - overallStartTime;
-                    this.alphaZeroMetrics.recordMessage(elapsedMs);
-                    if (!response?.trim()) {
-                        display.showWarning('Model returned an empty response. Continuing orchestration...');
-                        currentPrompt = `${initialRequest}
-The previous reply was empty. Resume the task now.`;
-                        continue;
-                    }
-                    const toolsUsed = this.getExecutedTools(response);
-                    lastToolsUsed = toolsUsed;
-                    toolsUsed.forEach(tool => completionDetector.recordToolCall(tool, true, true));
-                    const completionAnalysis = completionDetector.analyzeCompletion(response, toolsUsed);
-                    const decision = this.flowOrchestrator.decide({
-                        iteration,
-                        response,
-                        toolsUsed,
-                        completionAnalysis,
-                        verificationConfirmed: this.flowOrchestrator.isVerificationPending()
-                            ? completionDetector.isVerificationConfirmed(response)
-                            : false,
-                    });
-                    if (decision.type === 'stop') {
-                        display.showSystemMessage(decision.message);
-                        break;
-                    }
-                    if (decision.type === 'stagnation-stop') {
-                        display.showWarning(decision.message);
+            // Start streaming - no header needed, the input area already provides context
+            this.startStreamingHeartbeat('Streaming response');
+            if (orchestrate) {
+                const orchestrator = new AgentOrchestrator(agent);
+                orchestratorResult = await orchestrator.runToCompletion(request, {
+                    streaming: true,
+                    maxPasses: orchestratorPassLimit,
+                    maxStagnantPasses: 3,
+                    verificationMode: 'auto',
+                    enforceActions: true,
+                });
+                const MAX_CONTINUATIONS = 2;
+                let continuationRuns = 0;
+                while (orchestratorResult && continuationRuns < MAX_CONTINUATIONS) {
+                    const continuationDecision = this.shouldContinueOrchestrating(request, orchestratorResult);
+                    if (!continuationDecision.shouldContinue) {
                         break;
                     }
-                    if (decision.type === 'continue') {
-                        if (decision.message) {
-                            display.showSystemMessage(decision.message);
-                        }
-                        currentPrompt = decision.prompt;
-                    }
-                    await new Promise(resolve => setTimeout(resolve, 500));
+                    const reasonMessage = this.describeContinuationReason(continuationDecision.reason);
+                    display.showSystemMessage(`🔁 ${reasonMessage}`);
+                    const continuationPrompt = this.buildForcedContinuationPrompt(request, orchestratorResult, continuationDecision.reason ?? undefined);
+                    const continuationResult = await orchestrator.runToCompletion(continuationPrompt, {
+                        streaming: true,
+                        maxPasses: Math.max(orchestratorPassLimit, 12),
+                        maxStagnantPasses: 3,
+                        verificationMode: 'auto',
+                        enforceActions: true,
+                    });
+                    orchestratorResult = {
+                        finalResponse: continuationResult.finalResponse,
+                        passes: [...orchestratorResult.passes, ...continuationResult.passes],
+                        exitReason: continuationResult.exitReason,
+                    };
+                    continuationRuns++;
                 }
-                catch (error) {
-                    display.stopThinking(false);
-                    if (this.isContextOverflowError(error)) {
-                        display.showSystemMessage(`⚡ Context overflow handled. Continuing with reduced context...`);
-                        continue;
-                    }
-                    const handled = this.handleProviderError(error, () => this.runFlowControlledTask(initialRequest, options));
-                    if (!handled) {
-                        display.showError(error instanceof Error ? error.message : String(error), error);
-                        break;
+                responseText = orchestratorResult.finalResponse;
+            }
+            else {
+                responseText = await agent.send(request, true);
+            }
+            this.finishStreamingFormatter(undefined, { refreshPrompt: false, mode: 'complete' });
+            await this.awaitPendingCleanup();
+            this.captureHistorySnapshot();
+            this.autosaveIfEnabled();
+            // Track metrics with Alpha Zero 2
+            const elapsedMs = Date.now() - requestStartTime;
+            this.alphaZeroMetrics.recordMessage(elapsedMs);
+            if (!responseText?.trim()) {
+                display.showWarning('The provider returned an empty response. Check your API key/provider selection or retry the prompt.');
+            }
+            // AlphaZero: Extract and track tool calls from response
+            const toolsUsed = orchestratorResult
+                ? orchestratorResult.passes.flatMap(pass => pass.toolsUsed)
+                : this.getExecutedTools(responseText);
+            this.currentToolCalls = toolsUsed.map(name => ({
+                name,
+                arguments: {},
+                success: true, // Assume success if we got here
+                duration: 0,
+            }));
+            if (orchestratorResult && orchestratorResult.exitReason !== 'complete') {
+                const exitDetail = (() => {
+                    switch (orchestratorResult?.exitReason) {
+                        case 'max-passes':
+                            return 'Reached orchestrator pass limit; showing last response.';
+                        case 'empty-response':
+                            return 'Received empty replies while orchestrating; showing last response.';
+                        case 'stalled':
+                            return 'Orchestrator detected stagnation; showing last response.';
+                        default:
+                            return null;
                     }
+                })();
+                if (exitDetail) {
+                    display.showSystemMessage(`⚠️ ${exitDetail}`);
+                }
+            }
+            // AlphaZero: Check for failure in response
+            const failure = detectFailure(responseText, {
+                toolCalls: this.currentToolCalls,
+                userMessage: request,
+            });
+            if (failure) {
+                this.lastFailure = failure;
+                // Check if we have a recovery strategy
+                const strategy = findRecoveryStrategy(failure);
+                if (strategy) {
+                    display.showSystemMessage(`🔄 Found recovery strategy for this type of issue (success rate: ${Math.round(strategy.successRate * 100)}%)`);
+                }
+            }
+            else {
+                // Success - record the tool pattern for this task type
+                if (this.currentToolCalls.length > 0) {
+                    const toolPattern = {
+                        taskType: this.currentTaskType,
+                        toolSequence: this.currentToolCalls.map(t => t.name),
+                        successRate: 1.0,
+                        avgDuration: elapsedMs,
+                        occurrences: 1,
+                    };
+                    addToolPattern(this.currentTaskType, toolPattern);
                 }
+                // Clear action history on success
+                clearActionHistory();
+                this.lastFailure = null;
+            }
+        }
+        catch (error) {
+            const handled = this.handleProviderError(error, () => this.processRequest(request, options));
+            if (!handled) {
+                // Pass full error object for enhanced formatting with stack trace
+                display.showError(error instanceof Error ? error.message : String(error), error);
             }
-            if (!this.responseRendered && lastResponseText.trim()) {
-                const finalText = lastResponseText.trim();
+        }
+        finally {
+            // Fallback: if no assistant message was rendered (e.g., streaming hiccup), show the full response
+            if (!this.responseRendered && responseText.trim()) {
+                const finalText = responseText.trim();
                 display.showAssistantMessage(finalText, { isFinal: true });
                 this.ui.controller.recordAssistantResponse(finalText, {
                     source: 'final',
                 });
                 this.responseRendered = true;
             }
-            result = {
-                finalResponse: lastResponseText,
-                toolsUsed: lastToolsUsed,
-                elapsedMs: Date.now() - overallStartTime,
-            };
-        }
-        finally {
             this.finishStreamingFormatter(undefined, { refreshPrompt: false, mode: 'complete' });
-            const totalElapsed = Date.now() - overallStartTime;
-            const minutes = Math.floor(totalElapsed / 60000);
-            const seconds = Math.floor((totalElapsed % 60000) / 1000);
-            const completionLabel = mode === 'continuous'
-                ? `\n🏁 Continuous execution completed in ${minutes}m ${seconds}s total`
-                : `\n🏁 Flow orchestration completed in ${minutes}m ${seconds}s total`;
-            display.showSystemMessage(completionLabel);
-            resetTaskCompletionDetector();
             display.stopThinking(false);
             this.uiUpdates.setMode('processing');
             this.stopStreamingHeartbeat('complete', { quiet: true });
@@ -5950,60 +6092,20 @@ The previous reply was empty. Resume the task now.`;
             this.updateStatusMessage(null);
             this.toolsUsedThisRun = [];
             queueMicrotask(() => this.uiUpdates.setMode('idle'));
+            // CRITICAL: Ensure readline prompt is active for user input
+            // Erosolar-CLI style: New prompt naturally appears at bottom
             this.ensureReadlineReady();
             this.scheduleQueueProcessing();
             this.maybeProcessPromptInbox();
             this.refreshQueueIndicators();
         }
-        return result;
-    }
-    handleFlowRunOutcome(request, result) {
-        this.currentToolCalls = result.toolsUsed.map((name) => ({
-            name,
-            arguments: {},
-            success: true,
-            duration: 0,
-        }));
-        const failure = detectFailure(result.finalResponse, {
-            toolCalls: this.currentToolCalls,
-            userMessage: request,
-        });
-        if (failure) {
-            this.lastFailure = failure;
-            const strategy = findRecoveryStrategy(failure);
-            if (strategy) {
-                display.showSystemMessage(`🔄 Found recovery strategy for this type of issue (success rate: ${Math.round(strategy.successRate * 100)}%)`);
-            }
-            return;
-        }
-        if (this.currentToolCalls.length > 0) {
-            const toolPattern = {
-                taskType: this.currentTaskType,
-                toolSequence: this.currentToolCalls.map((t) => t.name),
-                successRate: 1.0,
-                avgDuration: result.elapsedMs,
-                occurrences: 1,
-            };
-            addToolPattern(this.currentTaskType, toolPattern);
-        }
-        clearActionHistory();
-        this.lastFailure = null;
-    }
-    async processRequest(request) {
-        const result = await this.runFlowControlledTask(request, {
-            mode: 'standard',
-        });
-        if (!result) {
-            return;
-        }
-        this.handleFlowRunOutcome(request, result);
     }
     /**
      * Process a continuous/infinite loop request.
      * Runs the agent in a loop until:
      * 1. The agent indicates completion (verified by AI confirmation)
      * 2. User interrupts (Ctrl+C)
-     * 3. The orchestrator halts due to stagnation/verification exhaustion
+     * 3. Maximum iterations reached (safety limit)
      *
      * Uses intelligent task completion detection with AI verification
      * to ensure tasks are truly complete before stopping.
@@ -6011,13 +6113,29 @@ The previous reply was empty. Resume the task now.`;
      * Context is automatically managed - overflow errors trigger auto-recovery.
      */
     async processContinuousRequest(initialRequest) {
-        const result = await this.runFlowControlledTask(initialRequest, {
-            mode: 'continuous',
-        });
-        if (!result) {
+        const MAX_PASSES = 100; // Safety limit to prevent truly infinite loops
+        if (this.isProcessing) {
+            this.enqueueFollowUpAction({ type: 'continuous', text: initialRequest });
+            return;
+        }
+        if (!this.agent && !this.rebuildAgent()) {
+            display.showWarning('Configure an API key via /secrets before sending requests.');
             return;
         }
-        this.handleFlowRunOutcome(initialRequest, result);
+        display.showSystemMessage(`Continuous mode active. Ctrl+C to stop.`);
+        const preparedRequest = this.isSelfImprovementRequest(initialRequest)
+            ? `${initialRequest}
+IMPORTANT: You have full git access. After making improvements:
+1. Use bash to run: git status (see changes)
+2. Use bash to run: git add -A (stage changes)
+3. Use bash to run: git commit -m "descriptive message" (commit)
+4. Use bash to run: git push (when milestone reached)
+Commit frequently with descriptive messages. Push when ready.
+When truly finished with ALL tasks, explicitly state "TASK_FULLY_COMPLETE".`
+            : initialRequest;
+        await this.processRequest(preparedRequest, { orchestrate: true, maxPasses: MAX_PASSES });
     }
     /**
      * Resolve executed tools for the current turn. Prefer the actual tool
@@ -6039,6 +6157,129 @@ The previous reply was empty. Resume the task now.`;
         }
         return this.extractToolsFromResponse(responseText);
     }
+    /**
+     * Detect plan-only responses that narrate intent without executing actions.
+     */
+    isPlanOnlyResponse(response) {
+        const normalized = response.trim().toLowerCase();
+        if (!normalized) {
+            return false;
+        }
+        // If the assistant is clearly declaring completion, don't treat it as plan-only
+        const completionGuards = [
+            /\bnothing\s+(left|else)\s+(to\s+do|pending)\b/i,
+            /\b(already|now)\s+(clean|complete|done)\b/i,
+            /\b(no\s+(junk|issues?|changes?)\s+found)\b/i,
+        ];
+        if (completionGuards.some((pattern) => pattern.test(response))) {
+            return false;
+        }
+        const planIndicators = [
+            /\bplan\b/i,
+            /\bapproach\b/i,
+            /\bsteps?:\b/i,
+            /\bstep\s+1\b/i,
+            /\bstart by\b/i,
+            /\bfirst[, ]/i,
+            /\bthen\b/i,
+            /\bnext\b/i,
+            /\bi['’]?\s*will\b/i,
+            /\bi['’]?\s*ll\b/i,
+            /\bi['’]?\s*can\b.{0,40}\bthen\b/i,
+            /\bi['’]?\s*(?:will|ll)\s+begin\b/i,
+        ];
+        return planIndicators.some((pattern) => pattern.test(response));
+    }
+    /**
+     * Check if a response contains indicators that work is actually incomplete,
+     * even if it also contains TASK_FULLY_COMPLETE marker.
+     * This catches contradictory responses where the AI says "done" but also "not integrated yet".
+     */
+    responseIndicatesIncompleteWork(response) {
+        // Patterns that indicate work isn't actually complete
+        // Organized by category for maintainability
+        const incompletePatterns = [
+            // === INTEGRATION/DEPLOYMENT STATE ===
+            // "hasn't been integrated/implemented/connected yet"
+            /hasn'?t\s+been\s+(integrated|implemented|connected|deployed|added|completed|tested|verified)\s*(yet|still)?/i,
+            // "not yet integrated/implemented" or "not integrated"
+            /not\s+(yet\s+)?(integrated|implemented|connected|deployed|functional|working|complete|tested|verified)/i,
+            // "ready for integration" = NOT integrated
+            /ready\s+(for|to\s+be)\s+(integration|integrated|connected|deployed|testing|review)/i,
+            // "needs to be integrated"
+            /needs?\s+to\s+be\s+(integrated|connected|deployed|added|hooked|wired|tested|reviewed|merged)/i,
+            // Passive voice: "was not performed/completed"
+            /was\s+not\s+(performed|completed|implemented|deployed|integrated|tested)/i,
+            // "the [X] service hasn't been"
+            /the\s+\w+\s+(service|module|component|feature)\s+hasn'?t\s+been/i,
+            // === PARTIAL/INCOMPLETE STATE ===
+            // "still stores/uses/has" (current bad state persists)
+            /still\s+(stores?|uses?|has|contains?|needs?|requires?|missing|lacks?|broken)/i,
+            // Partial completion: "partially", "mostly", "almost"
+            /\b(partially|mostly|almost|nearly|not\s+fully)\s+(complete|done|finished|implemented|working)/i,
+            // Explicit partial: "part of", "some of", "half of"
+            /\b(only\s+)?(part|some|half|portion)\s+of\s+(the\s+)?(task|work|feature|implementation)/i,
+            // === QUALIFIER WORDS (uncertain completion) ===
+            // "should be complete", "appears complete", "theoretically"
+            /\b(should|might|may|could|appears?\s+to)\s+be\s+(complete|done|working|functional)/i,
+            /\btheoretically\s+(complete|done|working|functional)/i,
+            // "assuming", "if everything works"
+            /\b(assuming|provided|if)\s+(everything|it|this|that)\s+(works?|is\s+correct)/i,
+            // === SELF-CONTRADICTION PHRASES ===
+            // "done but...", "complete except...", "finished however..."
+            /\b(done|complete|finished)\s+(but|except|however|although|though)/i,
+            // "however" followed by incomplete indicator
+            /however[,\s].{0,50}?(hasn'?t|not\s+yet|still\s+needs?|pending|remains?|missing|broken|failing)/i,
+            // "but" followed by negative state
+            /\bbut\s+.{0,30}?(not|hasn'?t|won'?t|can'?t|doesn'?t|isn'?t|wasn'?t)/i,
+            // === FUTURE TENSE / DEFERRED WORK ===
+            // "will need to", "will require"
+            /will\s+(need\s+to|require|have\s+to)\s+(integrate|connect|deploy|complete|implement|test|fix)/i,
+            // Deferred: "left as", "deferred", "postponed", "out of scope"
+            /\b(left\s+as|deferred|postponed|out\s+of\s+scope|for\s+later|in\s+a\s+future)/i,
+            // Time-dependent: "after restart", "takes effect after", "once you"
+            /\b(after\s+(restart|reboot|redeploy)|takes?\s+effect\s+after|once\s+you)/i,
+            // === REMAINING WORK INDICATORS ===
+            // "remaining tasks", "outstanding items"
+            /\b(remaining|outstanding|pending|leftover)\s+(tasks?|items?|work|issues?|steps?)/i,
+            // "X more to do", "still have to"
+            /\b(more\s+to\s+do|still\s+have\s+to|yet\s+to\s+be\s+done)/i,
+            // Explicit blockers
+            /\b(blocker|blocked\s+by|waiting\s+(for|on)|depends?\s+on)/i,
+            // === ERROR/FAILURE STATE ===
+            // "failing tests", "build errors"
+            /\b(failing|broken|erroring)\s+(tests?|builds?|checks?|validations?)/i,
+            // "tests? (are )?(still )?failing"
+            /\btests?\s+(are\s+)?(still\s+)?failing/i,
+            // "errors? to (address|fix)"
+            /\b(errors?|warnings?|issues?)\s+to\s+(address|fix|resolve)/i,
+            // "doesn't work", "isn't working", "not working"
+            /\b(doesn'?t|isn'?t|not)\s+(work|working|functional|functioning)/i,
+            // === MANUAL STEPS REQUIRED ===
+            // "you'll need to", "manually run", "requires user"
+            /\b(you('ll|\s+will)\s+need\s+to|manually\s+(run|configure|set|update)|requires?\s+user)/i,
+            // "run this command", "execute the following"
+            /\b(run\s+this|execute\s+the\s+following|apply\s+the\s+migration)/i,
+            // === TODO/FIXME IN PROSE ===
+            // TODO or FIXME mentioned as remaining work (not in code blocks)
+            /\b(todo|fixme|hack|xxx):\s/i,
+            // "need to add", "should implement"
+            /\b(need\s+to|should|must)\s+(add|implement|create|write|build|fix)\b/i,
+            // === SCOPE LIMITATIONS ===
+            // "didn't have time", "ran out of time"
+            /\b(didn'?t|did\s+not)\s+have\s+(time|chance|opportunity)/i,
+            // "beyond scope", "outside scope"
+            /\b(beyond|outside)\s+(the\s+)?scope/i,
+            // "for now" (temporary state)
+            /\b(for\s+now|at\s+this\s+point|currently)\s*.{0,20}?(not|without|lacks?|missing)/i,
+        ];
+        for (const pattern of incompletePatterns) {
+            if (pattern.test(response)) {
+                return true;
+            }
+        }
+        return false;
+    }
     /**
      * Extract tool names from a response by looking for tool call patterns
      */
@@ -6531,12 +6772,11 @@ Return ONLY JSON array:
                     const activity = normalized ? `Working: ${normalized}` : 'Working';
                     this.renderer?.setActivity(activity);
                 },
-                onBeforeFirstToolCall: (toolNames, _hasNarration) => {
+                onBeforeFirstToolCall: (toolNames) => {
                     const primaryTool = toolNames[0];
                     if (primaryTool) {
                         this.renderer?.setActivity(`Running ${primaryTool}`);
                     }
-                    // Don't inject synthetic thinking blocks - let the model respond naturally
                     return undefined;
                 },
                 onStreamChunk: (chunk, type) => {
@@ -6685,14 +6925,10 @@ Return ONLY JSON array:
                             activity = `Reading ${path}`;
                         }
                         this.renderer?.setActivity(activity);
-                        // Estimate tokens for tool call (~50 tokens per call)
-                        this.streamingTokenCount += 50;
-                        this.renderer?.updateStreamingTokens(this.streamingTokenCount);
+                        // Token count updated from real provider usage - do not estimate
                     }
                     else {
-                        // Tool finished - estimate result tokens (~100 per result)
-                        this.streamingTokenCount += 100;
-                        this.renderer?.updateStreamingTokens(this.streamingTokenCount);
+                        // Tool finished - token count updated from real provider usage
                         // Reset to thinking state while model generates next response
                         this.renderer?.setActivity('Thinking');
                     }
@@ -6701,6 +6937,14 @@ Return ONLY JSON array:
                     this.lastAssistantResponse = response;
                     void this.runAutoQualityChecks('verification', response, context);
                 },
+                // Real token usage from provider during streaming
+                onUsage: (usage) => {
+                    const totalTokens = this.totalTokens(usage);
+                    if (totalTokens !== null) {
+                        this.streamingTokenCount = totalTokens;
+                        this.renderer?.updateStreamingTokens(this.streamingTokenCount);
+                    }
+                },
                 // Retry notification for transient errors
                 onRetrying: (attempt, maxAttempts, error) => {
                     const shortError = error.message.slice(0, 100);
@@ -6710,6 +6954,8 @@ Return ONLY JSON array:
             });
             // Register global AI enhancer for explore tool - uses active model by default
             this.registerExploreAIEnhancer();
+            // Fetch real context window from provider API (async, updates in background)
+            void this.fetchAndUpdateContextWindow();
             const allowHistoryRestore = this.sessionRestoreConfig.mode !== 'none';
             const historyToLoad = allowHistoryRestore && this.pendingHistoryLoad && this.pendingHistoryLoad.length
                 ? this.pendingHistoryLoad