npm - @cereworker/core - Versions diffs - 26.330.1 → 26.330.3 - Mend

@cereworker/core 26.330.1 → 26.330.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/events.d.ts +21 -1
package/dist/events.d.ts.map +1 -1
package/dist/events.js.map +1 -1
package/dist/index.d.ts +1 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js.map +1 -1
package/dist/orchestrator.d.ts +23 -6
package/dist/orchestrator.d.ts.map +1 -1
package/dist/orchestrator.js +572 -146
package/dist/orchestrator.js.map +1 -1
package/dist/system-prompt.d.ts.map +1 -1
package/dist/system-prompt.js +4 -3
package/dist/system-prompt.js.map +1 -1
package/dist/types.d.ts +58 -0
package/dist/types.d.ts.map +1 -1
package/package.json +2 -2

package/dist/orchestrator.js CHANGED Viewed

@@ -11,8 +11,26 @@ import { ToolRuntime, } from './tool-runtime.js';
 const log = createLogger('orchestrator');
 const TASK_COMPLETE_TOOL = 'task_complete';
 const TASK_BLOCKED_TOOL = 'task_blocked';
-const INTERNAL_TASK_SIGNAL_TOOL_NAMES = new Set([TASK_COMPLETE_TOOL, TASK_BLOCKED_TOOL]);
-const COMPLETION_RETRY_PROMPT = '[Cerebellum] Your last turn ended without a final answer. Continue from where you left off and end by calling task_complete or task_blocked before your final answer.';
+const TASK_CHECKPOINT_TOOL = 'task_checkpoint';
+const INTERNAL_TASK_TOOL_NAMES = new Set([TASK_COMPLETE_TOOL, TASK_BLOCKED_TOOL, TASK_CHECKPOINT_TOOL]);
+const SYSTEM_FALLBACK_COMPLETION_PROMPT = '[System fallback] The last turn ended without a final answer. Continue from the last verified state and end by calling task_complete or task_blocked before your final answer.';
+const SYSTEM_FALLBACK_STALL_PROMPT = '[System fallback] The stalled turn is being retried from the last verified state.';
+const DEBUG_TOOL_OUTPUT_MAX_CHARS = 8_000;
+const DEBUG_TOOL_STRUCTURED_MAX_CHARS = 16_000;
+const READ_ONLY_TOOL_NAMES = new Set([
+    'browserGetText',
+    'browserGetUrl',
+    'browserListTabs',
+    'browserWait',
+    'browserEval',
+    'readFile',
+    'listDirectory',
+    'searchFiles',
+    'glob',
+    'memory_read',
+    'webSearch',
+    'httpFetch',
+]);
 export class Orchestrator extends TypedEventEmitter {
     conversations;
     cerebrum = null;
@@ -49,6 +67,7 @@ export class Orchestrator extends TypedEventEmitter {
     lastStreamActivityAt = 0;
     streamWatchdog = null;
     streamNudgeCount = 0;
+    streamDeferredUntil = 0;
     streamStallThreshold = 30_000;
     maxNudgeRetries = 2;
     maxCompletionRetries = 2;
@@ -56,6 +75,8 @@ export class Orchestrator extends TypedEventEmitter {
     activeToolCall = null;
     currentStreamTurn = null;
     currentAttemptCompletionState = null;
+    currentPartialContent = '';
+    pendingRecoveryDecision = null;
     streamAbortGraceMs = 1_000;
     taskConversations = new Map();
     taskRunning = new Set();
@@ -155,12 +176,26 @@ export class Orchestrator extends TypedEventEmitter {
             },
             execute: async (args) => this.recordCompletionSignal('blocked', args),
         });
+        this.internalTools.set(TASK_CHECKPOINT_TOOL, {
+            description: 'Record a completed or in-progress milestone during a multi-step task. Use this after each major verified step so retries can resume from the right place.',
+            parameters: {
+                type: 'object',
+                properties: {
+                    step: { type: 'string', description: 'Short milestone name, such as "profile continuity checked"' },
+                    status: { type: 'string', enum: ['done', 'in_progress'], description: 'Whether the milestone is done or currently in progress' },
+                    evidence: { type: 'string', description: 'Concrete evidence showing what happened at this milestone' },
+                },
+                required: ['step', 'status', 'evidence'],
+                additionalProperties: false,
+            },
+            execute: async (args) => this.recordTaskCheckpoint(args),
+        });
     }
     getAllTools() {
         return new Map([...this.tools, ...this.internalTools]);
     }
     isInternalTaskSignalTool(name) {
-        return INTERNAL_TASK_SIGNAL_TOOL_NAMES.has(name.trim() || name);
+        return INTERNAL_TASK_TOOL_NAMES.has(name.trim() || name);
     }
     async recordCompletionSignal(signal, args) {
         const state = this.currentAttemptCompletionState;
@@ -185,9 +220,11 @@ export class Orchestrator extends TypedEventEmitter {
                     isError: true,
                 };
             }
-            if (state.successfulExternalToolCount === 0) {
+            const hasVerifiedProgress = state.successfulExternalToolCount > 0
+                || state.continuity.progressLedger.some((entry) => entry.source === 'tool' && !entry.isError);
+            if (!hasVerifiedProgress) {
                 return {
-                    output: 'task_complete requires at least one successful external tool result in this attempt.',
+                    output: 'task_complete requires at least one successful external tool result in this turn.',
                     isError: true,
                 };
             }
@@ -221,6 +258,56 @@ export class Orchestrator extends TypedEventEmitter {
             },
         };
     }
+    async recordTaskCheckpoint(args) {
+        const state = this.currentAttemptCompletionState;
+        if (!state) {
+            return {
+                output: 'No active turn is available for task checkpoint tracking.',
+                isError: true,
+            };
+        }
+        const step = String(args.step ?? '').trim();
+        const evidence = String(args.evidence ?? '').trim();
+        const statusValue = String(args.status ?? '').trim();
+        const status = statusValue === 'done' || statusValue === 'in_progress'
+            ? statusValue
+            : null;
+        if (!step) {
+            return {
+                output: 'A non-empty step field is required.',
+                isError: true,
+            };
+        }
+        if (!status) {
+            return {
+                output: 'status must be either "done" or "in_progress".',
+                isError: true,
+            };
+        }
+        if (!evidence) {
+            return {
+                output: 'A non-empty evidence field is required.',
+                isError: true,
+            };
+        }
+        const checkpoint = this.recordCheckpoint(state.continuity, step, status, evidence);
+        log.info('task_checkpoint_recorded', {
+            turnId: this.currentStreamTurn?.turnId,
+            attempt: this.currentStreamTurn?.attempt,
+            conversationId: this.currentStreamTurn?.conversationId,
+            step,
+            status,
+            evidence,
+        });
+        return {
+            output: `Checkpoint recorded: ${checkpoint.summary}`,
+            isError: false,
+            metadata: {
+                internal: true,
+                checkpoint,
+            },
+        };
+    }
     registerTool(name, tool) {
         if (this.internalTools.has(name)) {
             throw new Error(`Tool name ${name} is reserved for internal task signaling`);
@@ -599,6 +686,8 @@ export class Orchestrator extends TypedEventEmitter {
     resetStreamState() {
         this.streamPhase = 'idle';
         this.activeToolCall = null;
+        this.streamDeferredUntil = 0;
+        this.currentPartialContent = '';
     }
     getStreamDiagnostics(elapsedSeconds) {
         return {
@@ -689,106 +778,201 @@ export class Orchestrator extends TypedEventEmitter {
         }
         this.emit({ type: 'cerebrum:completion', ...payload });
     }
-    createAttemptCompletionState() {
+    createAttemptCompletionState(continuity) {
         return {
             signal: 'none',
             evidence: '',
             successfulExternalToolCount: 0,
             externalToolCallCount: 0,
             internalToolCallCount: 0,
-            recentExternalToolSummaries: [],
+            continuity,
+        };
+    }
+    createTurnContinuityState() {
+        return {
+            progressLedger: [],
+            taskCheckpoints: [],
+            browserState: {},
         };
     }
-    buildStallRetrySnapshot(params) {
+    buildRecoveryRequest(params) {
         const partialContent = this.truncateResumeText(params.partialContent, 600);
-        const recentExternalToolSummaries = params.completionState.recentExternalToolSummaries.slice(-4);
-        if (!partialContent && recentExternalToolSummaries.length === 0 && !params.activeToolName) {
-            return null;
-        }
+        const continuity = params.completionState.continuity;
         return {
+            conversationId: this.currentStreamTurn?.conversationId ?? '',
+            turnId: this.currentStreamTurn?.turnId ?? '',
             attempt: params.attempt,
-            phase: params.phase,
-            activeToolName: params.activeToolName,
-            activeToolCallId: params.activeToolCallId,
+            cause: params.cause,
+            phase: this.streamPhase,
+            activeToolName: this.activeToolCall?.name,
+            activeToolCallId: this.activeToolCall?.id,
+            stallRetryCount: this.streamNudgeCount,
+            completionRetryCount: params.completionRetryCount ?? 0,
+            finishReason: params.finishMeta?.finishReason ?? params.finishMeta?.stepFinishReasons.at(-1),
+            elapsedSeconds: params.elapsedSeconds,
             partialContent: partialContent || undefined,
-            recentExternalToolSummaries,
+            latestUserMessage: params.latestUserMessage ? this.truncateResumeText(params.latestUserMessage, 600) : undefined,
+            progressEntries: continuity.progressLedger.slice(-50).map((entry) => ({ ...entry })),
+            taskCheckpoints: continuity.taskCheckpoints.map((checkpoint) => ({ ...checkpoint })),
+            browserState: this.cloneBrowserState(continuity.browserState),
         };
     }
-    buildStallRetryContextMessage(snapshot) {
-        if (!snapshot)
-            return null;
-        const lines = [
-            '[Watchdog resume context]',
-            `The previous attempt (${snapshot.attempt}) was interrupted after stalling while ${this.describeStreamLocation(snapshot.phase, snapshot.activeToolName)}.`,
-            'IMPORTANT: The tool call history from the failed attempt has been removed from this conversation. The summary below is the authoritative record of what was already done.',
-            'Do NOT repeat these steps. Start from the NEXT action after the last confirmed result.',
-        ];
-        if (snapshot.recentExternalToolSummaries.length > 0) {
-            lines.push('', 'Confirmed external tool results from the interrupted attempt:');
-            for (const summary of snapshot.recentExternalToolSummaries) {
-                const prefix = summary.isError ? '[error]' : '[ok]';
-                lines.push(`- ${summary.toolName}: ${prefix} ${summary.outputPreview}`);
-            }
-        }
-        if (snapshot.activeToolName) {
-            lines.push('', `The attempt was last waiting on: ${snapshot.activeToolName}${snapshot.activeToolCallId ? ` (${snapshot.activeToolCallId})` : ''}.`);
+    emitRecoveryTrace(cause, source, assessment, level = 'info') {
+        if (!this.currentStreamTurn)
+            return;
+        const payload = {
+            type: 'cerebellum:recovery',
+            cause,
+            action: assessment.action,
+            turnId: this.currentStreamTurn.turnId,
+            attempt: this.currentStreamTurn.attempt,
+            conversationId: this.currentStreamTurn.conversationId,
+            message: assessment.operatorMessage,
+            operatorMessage: assessment.operatorMessage,
+            diagnosis: assessment.diagnosis,
+            nextStep: assessment.nextStep,
+            completedSteps: assessment.completedSteps,
+            waitSeconds: assessment.waitSeconds,
+            source,
+            ...this.getStreamDiagnostics(),
+        };
+        switch (level) {
+            case 'debug':
+                log.debug('cerebellum_recovery', payload);
+                break;
+            case 'warn':
+                log.warn('cerebellum_recovery', payload);
+                break;
+            case 'error':
+                log.error('cerebellum_recovery', payload);
+                break;
+            default:
+                log.info('cerebellum_recovery', payload);
+                break;
         }
-        if (snapshot.partialContent) {
-            lines.push('', 'Partial assistant text emitted before interruption:', snapshot.partialContent);
+        this.emit(payload);
+    }
+    async assessTurnRecovery(request) {
+        log.debug('turn_recovery_request', {
+            turnId: request.turnId,
+            attempt: request.attempt,
+            conversationId: request.conversationId,
+            cause: request.cause,
+            phase: request.phase,
+            activeToolName: request.activeToolName,
+            activeToolCallId: request.activeToolCallId,
+            stallRetryCount: request.stallRetryCount,
+            completionRetryCount: request.completionRetryCount,
+            finishReason: request.finishReason,
+            elapsedSeconds: request.elapsedSeconds,
+            hasPartialContent: Boolean(request.partialContent),
+            latestUserMessage: request.latestUserMessage ? this.truncateResumeText(request.latestUserMessage, 300) : '',
+            browserState: request.browserState,
+            progressEntries: request.progressEntries,
+            taskCheckpoints: request.taskCheckpoints,
+        });
+        if (this.cerebellum?.isConnected() && this.cerebellum.assessTurnRecovery) {
+            try {
+                const assessment = await this.cerebellum.assessTurnRecovery(request);
+                if (assessment) {
+                    if (request.cause === 'completion' && assessment.action === 'wait') {
+                        return {
+                            source: 'cerebellum',
+                            assessment: {
+                                ...assessment,
+                                action: 'retry',
+                                waitSeconds: undefined,
+                            },
+                        };
+                    }
+                    return { source: 'cerebellum', assessment };
+                }
+            }
+            catch (error) {
+                log.warn('Turn recovery assessment failed', {
+                    turnId: request.turnId,
+                    attempt: request.attempt,
+                    conversationId: request.conversationId,
+                    cause: request.cause,
+                    error: error instanceof Error ? error.message : String(error),
+                });
+            }
         }
         return {
-            id: `system:stall-retry:${snapshot.attempt}`,
-            role: 'system',
-            content: lines.join('\n'),
-            timestamp: 0,
-            metadata: {
-                transient: true,
-                source: 'watchdog-resume',
-            },
+            source: 'fallback',
+            assessment: this.buildFallbackRecoveryAssessment(request),
         };
     }
-    buildCompletionRetrySnapshot(params) {
-        const partialContent = this.truncateResumeText(params.partialContent, 600);
-        const recentExternalToolSummaries = params.completionState.recentExternalToolSummaries.slice(-4);
-        const finishReason = params.finishMeta?.finishReason ?? params.finishMeta?.stepFinishReasons.at(-1);
-        if (!partialContent && recentExternalToolSummaries.length === 0 && !finishReason) {
-            return null;
+    deriveCompletedSteps(request) {
+        const completed = new Set();
+        for (const checkpoint of request.taskCheckpoints) {
+            if (checkpoint.status === 'done') {
+                completed.add(checkpoint.summary);
+            }
         }
-        return {
-            attempt: params.attempt,
-            finishReason,
-            partialContent: partialContent || undefined,
-            recentExternalToolSummaries,
-        };
-    }
-    buildCompletionRetryContextMessage(snapshot) {
-        if (!snapshot)
-            return null;
+        for (const entry of request.progressEntries) {
+            if (entry.source === 'tool' && entry.stateChanging && !entry.isError) {
+                completed.add(entry.summary);
+            }
+        }
+        return Array.from(completed).slice(-10);
+    }
+    buildFallbackRecoveryAssessment(request, options) {
+        const completedSteps = this.deriveCompletedSteps(request);
+        const browserHints = [];
+        if (request.browserState.currentUrl)
+            browserHints.push(`Current URL: ${request.browserState.currentUrl}`);
+        if (request.browserState.activeTabId)
+            browserHints.push(`Active tab: ${request.browserState.activeTabId}`);
+        const diagnosis = options?.reason
+            ?? (request.cause === 'stall'
+                ? `Recovery guidance is unavailable while the stream is stalled in ${this.describeStreamLocation(request.phase, request.activeToolName)}.`
+                : `Recovery guidance is unavailable after the turn ended with ${request.finishReason ?? 'no final answer'}.`);
+        const nextStep = request.cause === 'stall'
+            ? 'Resume from the last verified browser state and continue with the next unfinished step.'
+            : 'Use the verified progress below to continue from the next unfinished step and avoid repeating confirmed work.';
         const lines = [
-            '[Completion resume context]',
-            `The previous attempt (${snapshot.attempt}) ended without a valid completion${snapshot.finishReason ? ` (finish reason: ${snapshot.finishReason})` : ''}.`,
-            'IMPORTANT: The tool call history from the failed attempt has been removed from this conversation. The summary below is the authoritative record of what was already done.',
-            'Do NOT repeat these steps. Start from the NEXT action after the last confirmed result.',
-            'Continue from that state, then either finish the task or report a concrete blocker. End by calling task_complete or task_blocked before your final answer.',
+            '[System fallback recovery]',
+            diagnosis,
+            'The failed attempt tool history has been removed; rely on this verified summary instead.',
         ];
-        if (snapshot.recentExternalToolSummaries.length > 0) {
-            lines.push('', 'Confirmed external tool results from the previous attempt:');
-            for (const summary of snapshot.recentExternalToolSummaries) {
-                const prefix = summary.isError ? '[error]' : '[ok]';
-                lines.push(`- ${summary.toolName}: ${prefix} ${summary.outputPreview}`);
-            }
+        if (completedSteps.length > 0) {
+            lines.push('', 'Completed steps:');
+            for (const step of completedSteps)
+                lines.push(`- ${step}`);
         }
-        if (snapshot.partialContent) {
-            lines.push('', 'Partial assistant text emitted before the attempt ended:', snapshot.partialContent);
+        if (browserHints.length > 0) {
+            lines.push('', 'Last known browser state:');
+            for (const hint of browserHints)
+                lines.push(`- ${hint}`);
         }
+        if (request.partialContent) {
+            lines.push('', 'Partial assistant text from the failed attempt:', request.partialContent);
+        }
+        lines.push('', `Next step: ${nextStep}`);
+        lines.push('Only repeat a completed action if the current page state clearly contradicts this summary.');
+        lines.push('End your final answer by calling task_complete or task_blocked.');
         return {
-            id: `system:completion-retry:${snapshot.attempt}`,
+            action: options?.action ?? 'retry',
+            operatorMessage: request.cause === 'stall'
+                ? SYSTEM_FALLBACK_STALL_PROMPT
+                : SYSTEM_FALLBACK_COMPLETION_PROMPT,
+            modelMessage: lines.join('\n'),
+            diagnosis,
+            nextStep,
+            completedSteps,
+        };
+    }
+    buildRetryContextMessage(cause, attempt, modelMessage, source) {
+        return {
+            id: `system:${cause}-retry:${attempt}`,
             role: 'system',
-            content: lines.join('\n'),
+            content: modelMessage,
             timestamp: 0,
             metadata: {
                 transient: true,
-                source: 'completion-resume',
+                source: cause === 'stall' ? 'watchdog-resume' : 'completion-resume',
+                recoverySource: source,
             },
         };
     }
@@ -803,16 +987,171 @@ export class Orchestrator extends TypedEventEmitter {
             return normalized;
         return `${normalized.slice(0, Math.max(0, maxChars - 1)).trimEnd()}…`;
     }
-    recordAttemptToolSummary(completionState, toolName, result) {
-        completionState.recentExternalToolSummaries.push({
-            toolName,
-            outputPreview: this.formatToolOutputPreview(result.output),
+    serializeDebugValue(value, maxChars) {
+        const raw = typeof value === 'string'
+            ? value
+            : JSON.stringify(value, null, 2) ?? String(value);
+        if (raw.length <= maxChars) {
+            return { value: raw, truncated: false };
+        }
+        return {
+            value: `${raw.slice(0, Math.max(0, maxChars - 1)).trimEnd()}…`,
+            truncated: true,
+        };
+    }
+    buildToolDebugPayload(toolCall, result, toolName) {
+        const argsPreview = this.serializeDebugValue(toolCall.args, DEBUG_TOOL_STRUCTURED_MAX_CHARS);
+        if (!result) {
+            return {
+                requestedToolName: toolCall.name,
+                toolName: toolName ?? (toolCall.name.trim() || toolCall.name),
+                toolCallId: toolCall.id,
+                toolArgs: argsPreview.value,
+                debugPayloadTruncated: argsPreview.truncated,
+            };
+        }
+        const outputPreview = this.serializeDebugValue(result.output, DEBUG_TOOL_OUTPUT_MAX_CHARS);
+        const detailsPreview = result.details
+            ? this.serializeDebugValue(result.details, DEBUG_TOOL_STRUCTURED_MAX_CHARS)
+            : null;
+        const resumeMetadata = result.metadata && typeof result.metadata === 'object'
+            ? (result.metadata.resume ?? null)
+            : null;
+        const resumePreview = resumeMetadata
+            ? this.serializeDebugValue(resumeMetadata, DEBUG_TOOL_STRUCTURED_MAX_CHARS)
+            : null;
+        return {
+            requestedToolName: toolCall.name,
+            toolName: toolName ?? (toolCall.name.trim() || toolCall.name),
+            toolCallId: toolCall.id,
+            toolArgs: argsPreview.value,
+            toolOutput: outputPreview.value,
+            toolDetails: detailsPreview?.value ?? null,
+            toolResume: resumePreview?.value ?? null,
             isError: result.isError,
+            warnings: result.warnings ?? [],
+            truncated: result.truncated ?? false,
+            debugPayloadTruncated: argsPreview.truncated || outputPreview.truncated || Boolean(detailsPreview?.truncated) || Boolean(resumePreview?.truncated),
+        };
+    }
+    recordCheckpoint(continuity, step, status, evidence) {
+        const checkpoint = {
+            step,
+            status,
+            evidence,
+            summary: `${step} (${status}): ${this.truncateResumeText(evidence, 220)}`,
+        };
+        const existingIndex = continuity.taskCheckpoints.findIndex((entry) => entry.step === step);
+        if (existingIndex >= 0) {
+            continuity.taskCheckpoints[existingIndex] = checkpoint;
+        }
+        else {
+            continuity.taskCheckpoints.push(checkpoint);
+        }
+        this.recordProgressEntry(continuity, {
+            source: 'checkpoint',
+            action: 'task_checkpoint',
+            summary: checkpoint.summary,
+            stateChanging: status === 'done',
+            isError: false,
+            checkpointStatus: status,
         });
-        if (completionState.recentExternalToolSummaries.length > 6) {
-            completionState.recentExternalToolSummaries.splice(0, completionState.recentExternalToolSummaries.length - 6);
+        return checkpoint;
+    }
+    recordAttemptToolProgress(completionState, toolName, result) {
+        const continuity = completionState.continuity;
+        const entry = this.createProgressEntry(toolName, result);
+        if (!entry)
+            return;
+        this.recordProgressEntry(continuity, entry);
+        this.updateBrowserState(continuity.browserState, result);
+    }
+    createProgressEntry(toolName, result) {
+        const resume = this.getBrowserResumeMetadata(result);
+        if (resume?.summary) {
+            return {
+                source: 'tool',
+                toolName,
+                action: resume.action ?? toolName,
+                summary: this.truncateResumeText(resume.summary, 220),
+                url: resume.url,
+                tabId: resume.tabId ?? resume.activeTabId,
+                stateChanging: resume.stateChanging ?? this.isLikelyStateChangingTool(toolName),
+                isError: result.isError,
+            };
+        }
+        const outputPreview = this.formatToolOutputPreview(result.output);
+        if (!outputPreview)
+            return null;
+        return {
+            source: 'tool',
+            toolName,
+            action: toolName,
+            summary: `${toolName}: ${outputPreview}`,
+            stateChanging: this.isLikelyStateChangingTool(toolName),
+            isError: result.isError,
+        };
+    }
+    recordProgressEntry(continuity, entry) {
+        const last = continuity.progressLedger.at(-1);
+        if (last
+            && entry.source === 'tool'
+            && last.source === 'tool'
+            && !entry.stateChanging
+            && !last.stateChanging
+            && last.action === entry.action
+            && last.summary === entry.summary
+            && last.url === entry.url
+            && last.tabId === entry.tabId) {
+            return;
+        }
+        continuity.progressLedger.push(entry);
+        while (continuity.progressLedger.length > 50) {
+            const removableIndex = continuity.progressLedger.findIndex((candidate) => candidate.source === 'tool' && !candidate.stateChanging);
+            continuity.progressLedger.splice(removableIndex >= 0 ? removableIndex : 0, 1);
         }
     }
+    getBrowserResumeMetadata(result) {
+        const metadata = result.metadata;
+        if (!metadata || typeof metadata !== 'object')
+            return null;
+        const resume = metadata.resume;
+        if (!resume || typeof resume !== 'object')
+            return null;
+        return resume;
+    }
+    updateBrowserState(browserState, result) {
+        const resume = this.getBrowserResumeMetadata(result);
+        if (!resume)
+            return;
+        if (resume.url) {
+            browserState.currentUrl = resume.url;
+        }
+        if (resume.activeTabId) {
+            browserState.activeTabId = resume.activeTabId;
+        }
+        else if (resume.tabId && resume.stateChanging) {
+            browserState.activeTabId = resume.tabId;
+        }
+        if (resume.tabs?.length) {
+            browserState.tabs = resume.tabs.map((tab) => ({ ...tab }));
+            const active = resume.tabs.find((tab) => tab.active);
+            if (active) {
+                browserState.activeTabId = active.id;
+                browserState.currentUrl = active.url;
+            }
+        }
+    }
+    cloneBrowserState(browserState) {
+        return {
+            currentUrl: browserState.currentUrl,
+            activeTabId: browserState.activeTabId,
+            tabs: browserState.tabs?.map((tab) => ({ ...tab })),
+        };
+    }
+    isLikelyStateChangingTool(toolName) {
+        return !READ_ONLY_TOOL_NAMES.has(toolName);
+    }
     evaluateCompletionGuard(displayContent, finishMeta, completionState) {
         const trimmedContent = displayContent.trim();
         const hadExternalToolActivity = completionState.externalToolCallCount > 0;
@@ -886,10 +1225,15 @@ export class Orchestrator extends TypedEventEmitter {
             streamPromise.then(settleResolve, settleReject);
         });
     }
-    startStreamWatchdog() {
+    startStreamWatchdog(latestUserMessage) {
         this.stopStreamWatchdog();
         this.markStreamWaitingModel();
+        this.streamDeferredUntil = 0;
         this.streamWatchdog = setInterval(() => {
+            if (!this.currentAttemptCompletionState || !this.currentStreamTurn)
+                return;
+            if (this.streamDeferredUntil > Date.now())
+                return;
             const elapsed = Date.now() - this.lastStreamActivityAt;
             const stallThresholdMs = this.getCurrentStallThresholdMs();
             if (elapsed < stallThresholdMs)
@@ -903,31 +1247,60 @@ export class Orchestrator extends TypedEventEmitter {
             this.emitWatchdog('stalled', `Stalled after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`, { level: 'warn', elapsedSeconds });
             this.emit({ type: 'cerebrum:stall', ...diagnostics });
             if (!this.cerebellum?.isConnected()) {
-                // Cerebellum dropped mid-stream — abort the current turn
                 this.emitWatchdog('abort_issued', 'Cerebellum disconnected during an active stream; aborting the turn.', { level: 'warn', elapsedSeconds });
                 this.abortController?.abort();
                 return;
             }
             this._nudgeInFlight = true;
-            const doNudge = () => {
-                this.streamNudgeCount++;
-                this.emitWatchdog('nudge_requested', `Cerebellum requested nudge ${this.streamNudgeCount}/${this.maxNudgeRetries} after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`, { level: 'info', elapsedSeconds });
-                this.emit({ type: 'cerebrum:stall:nudge', attempt: this.streamNudgeCount, ...diagnostics });
-                this.emitWatchdog('abort_issued', `Aborting stalled stream attempt ${this.currentStreamTurn?.attempt ?? 0}.`, { level: 'warn', elapsedSeconds });
-                this.abortController?.abort();
-            };
             void (async () => {
                 try {
-                    const result = await this.cerebellum.verifyToolResult('stream_watchdog', { action: 'check_stall', elapsed: String(elapsedSeconds) }, `Stream silent for ${elapsedSeconds}s — no chunks or tool calls received`, false);
-                    // Cerebellum decides: passed=false → nudge. passed=true → wait.
-                    // null (disconnected mid-call) → nudge as safety fallback.
-                    if (!result || !result.passed) {
-                        doNudge();
+                    const request = this.buildRecoveryRequest({
+                        cause: 'stall',
+                        attempt: this.currentStreamTurn.attempt,
+                        partialContent: this.currentPartialContent,
+                        completionState: this.currentAttemptCompletionState,
+                        latestUserMessage,
+                        elapsedSeconds,
+                    });
+                    const { source, assessment } = await this.assessTurnRecovery(request);
+                    this.emitRecoveryTrace('stall', source, assessment, assessment.action === 'stop' ? 'warn' : 'info');
+                    if (assessment.action === 'wait') {
+                        const waitSeconds = Math.max(15, assessment.waitSeconds ?? this.streamStallThreshold / 1000);
+                        this.streamDeferredUntil = Date.now() + (waitSeconds * 1000);
+                        return;
+                    }
+                    if (assessment.action === 'retry') {
+                        this.streamNudgeCount++;
+                        this.pendingRecoveryDecision = { cause: 'stall', source, assessment };
+                        this.emitWatchdog('nudge_requested', `Cerebellum requested nudge ${this.streamNudgeCount}/${this.maxNudgeRetries} after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`, { level: 'info', elapsedSeconds });
+                        this.emit({ type: 'cerebrum:stall:nudge', attempt: this.streamNudgeCount, ...diagnostics });
+                        this.emitWatchdog('abort_issued', `Aborting stalled stream attempt ${this.currentStreamTurn?.attempt ?? 0}.`, { level: 'warn', elapsedSeconds });
+                        this.abortController?.abort();
+                        return;
                     }
+                    this.pendingRecoveryDecision = { cause: 'stall', source, assessment };
+                    this.emitWatchdog('abort_issued', 'Aborting stalled stream because recovery guidance requested stop.', { level: 'warn', elapsedSeconds });
+                    this.abortController?.abort();
                 }
                 catch {
-                    // gRPC error (including deadline exceeded) → nudge
-                    doNudge();
+                    const request = this.buildRecoveryRequest({
+                        cause: 'stall',
+                        attempt: this.currentStreamTurn.attempt,
+                        partialContent: this.currentPartialContent,
+                        completionState: this.currentAttemptCompletionState,
+                        latestUserMessage,
+                        elapsedSeconds,
+                    });
+                    const assessment = this.buildFallbackRecoveryAssessment(request, {
+                        reason: `Recovery assessment failed after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`,
+                    });
+                    this.pendingRecoveryDecision = { cause: 'stall', source: 'fallback', assessment };
+                    this.emitRecoveryTrace('stall', 'fallback', assessment, 'warn');
+                    this.streamNudgeCount++;
+                    this.emitWatchdog('nudge_requested', `Fallback retry ${this.streamNudgeCount}/${this.maxNudgeRetries} after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`, { level: 'info', elapsedSeconds });
+                    this.emit({ type: 'cerebrum:stall:nudge', attempt: this.streamNudgeCount, ...diagnostics });
+                    this.emitWatchdog('abort_issued', `Aborting stalled stream attempt ${this.currentStreamTurn?.attempt ?? 0}.`, { level: 'warn', elapsedSeconds });
+                    this.abortController?.abort();
                 }
                 finally {
                     this._nudgeInFlight = false;
@@ -956,6 +1329,9 @@ export class Orchestrator extends TypedEventEmitter {
             const userMessage = this.conversations.appendMessage(convId, 'user', content);
             this.emit({ type: 'message:user', message: userMessage });
         }
+        const latestUserMessage = content
+            || [...this.conversations.getMessages(convId)].reverse().find((message) => message.role === 'user')?.content
+            || '';
         this.streamNudgeCount = 0;
         let completionRetryCount = 0;
         let nextRetryContext = null;
@@ -965,13 +1341,14 @@ export class Orchestrator extends TypedEventEmitter {
         const maxTotalAttempts = 1 + this.maxNudgeRetries + this.maxCompletionRetries;
         let loopTerminated = false;
         let nextRetryCause = null;
+        const turnContinuity = this.createTurnContinuityState();
         try {
             for (let attempt = 0; attempt < maxTotalAttempts; attempt++) {
                 const abortController = new AbortController();
                 const attemptNumber = attempt + 1;
                 const retryCause = nextRetryCause;
                 nextRetryCause = null;
-                const completionState = this.createAttemptCompletionState();
+                const completionState = this.createAttemptCompletionState(turnContinuity);
                 let completionGuardFailure = null;
                 const stallRetryCountAtStart = this.streamNudgeCount;
                 const attemptMessageIds = [];
@@ -983,6 +1360,8 @@ export class Orchestrator extends TypedEventEmitter {
                     attempt: attemptNumber,
                     conversationId: convId,
                 };
+                this.currentPartialContent = '';
+                this.pendingRecoveryDecision = null;
                 log.info('stream_started', {
                     turnId,
                     attempt: attemptNumber,
@@ -992,7 +1371,7 @@ export class Orchestrator extends TypedEventEmitter {
                     retryCause,
                 });
                 this.emit({ type: 'message:cerebrum:start', conversationId: convId });
-                this.startStreamWatchdog();
+                this.startStreamWatchdog(latestUserMessage);
                 let messages = this.conversations.getMessages(convId);
                 // On retry: exclude failed attempts' messages from history.
                 // The resume context already summarizes what happened — sending the raw tool calls
@@ -1061,6 +1440,8 @@ export class Orchestrator extends TypedEventEmitter {
                 ];
                 const toolDefs = Object.fromEntries(allTools);
                 let fullContent = '';
+                let finalDisplayContent = '';
+                let attemptFinishMeta;
                 const throwIfToolAttemptAborted = () => {
                     if (!isCurrentAttempt()) {
                         throw createAbortError('Tool execution aborted');
@@ -1073,15 +1454,13 @@ export class Orchestrator extends TypedEventEmitter {
                             if (!isCurrentAttempt() || abortController.signal.aborted)
                                 return;
                             fullContent += chunk;
+                            this.currentPartialContent = fullContent;
                             this.markStreamWaitingModel();
                             this.emit({ type: 'message:cerebrum:chunk', chunk });
                         },
                         onToolCall: async (toolCall) => {
                             throwIfToolAttemptAborted();
-                            this.logStreamDebug('tool_callback_started', {
-                                toolName: toolCall.name.trim() || toolCall.name,
-                                toolCallId: toolCall.id,
-                            });
+                            this.logStreamDebug('tool_callback_started', this.buildToolDebugPayload(toolCall));
                             this.markStreamWaitingTool(toolCall);
                             const requestedToolName = toolCall.name;
                             const normalizedToolName = requestedToolName.trim() || requestedToolName;
@@ -1092,7 +1471,13 @@ export class Orchestrator extends TypedEventEmitter {
                             else {
                                 completionState.externalToolCallCount++;
                                 this.emit({ type: 'message:cerebrum:toolcall', toolCall: { ...toolCall, name: normalizedToolName } });
-                                this.emit({ type: 'tool:start', callId: toolCall.id, name: normalizedToolName });
+                                this.emit({
+                                    type: 'tool:start',
+                                    callId: toolCall.id,
+                                    name: normalizedToolName,
+                                    requestedName: requestedToolName !== normalizedToolName ? requestedToolName : undefined,
+                                    args: toolCall.args,
+                                });
                             }
                             const { toolName, result } = await this.toolRuntime.execute({
                                 toolCall,
@@ -1102,15 +1487,18 @@ export class Orchestrator extends TypedEventEmitter {
                                 scopeKey: convId,
                                 abortSignal: abortController.signal,
                             });
-                            this.logStreamDebug('tool_callback_finished', {
-                                toolName,
-                                toolCallId: toolCall.id,
-                                isError: result.isError,
-                            });
+                            this.logStreamDebug('tool_callback_finished', this.buildToolDebugPayload(toolCall, result, toolName));
                             throwIfAborted(abortController.signal, 'Tool execution aborted');
                             this.markStreamWaitingModel();
                             if (!isInternalTaskSignal) {
-                                this.emit({ type: 'tool:end', result });
+                                this.emit({
+                                    type: 'tool:end',
+                                    callId: toolCall.id,
+                                    name: toolName,
+                                    requestedName: requestedToolName !== toolName ? requestedToolName : undefined,
+                                    args: toolCall.args,
+                                    result,
+                                });
                             }
                             if (!isInternalTaskSignal && !result.isError) {
                                 completionState.successfulExternalToolCount++;
@@ -1152,7 +1540,7 @@ export class Orchestrator extends TypedEventEmitter {
                             }
                             throwIfToolAttemptAborted();
                             if (!isInternalTaskSignal) {
-                                this.recordAttemptToolSummary(completionState, toolName, result);
+                                this.recordAttemptToolProgress(completionState, toolName, result);
                                 const toolMsg = this.conversations.appendMessage(convId, 'tool', result.output, {
                                     toolResult: result,
                                     metadata: {
@@ -1169,6 +1557,8 @@ export class Orchestrator extends TypedEventEmitter {
                                 return;
                             this.stopStreamWatchdog();
                             let displayContent = content;
+                            finalDisplayContent = content;
+                            attemptFinishMeta = finishMeta;
                             const visibleToolCalls = toolCalls?.filter((toolCall) => !this.isInternalTaskSignalTool(toolCall.name));
                             log.info('stream_finish_observed', {
                                 turnId,
@@ -1189,6 +1579,7 @@ export class Orchestrator extends TypedEventEmitter {
                                 displayContent = content
                                     .replace(/<discovery_complete>[\s\S]*?<\/discovery_complete>/g, '')
                                     .trim();
+                                finalDisplayContent = displayContent;
                                 if (parsed && this.onDiscoveryComplete) {
                                     this.discoveryMode = false;
                                     this.onDiscoveryComplete(parsed);
@@ -1198,23 +1589,7 @@ export class Orchestrator extends TypedEventEmitter {
                             const guardFailure = this.evaluateCompletionGuard(displayContent, finishMeta, completionState);
                             if (guardFailure) {
                                 completionGuardFailure = guardFailure;
-                                nextRetryContext = this.buildCompletionRetryContextMessage(this.buildCompletionRetrySnapshot({
-                                    attempt: attemptNumber,
-                                    partialContent: fullContent || displayContent,
-                                    completionState,
-                                    finishMeta,
-                                }));
-                                if (nextRetryContext) {
-                                    log.info('completion_retry_context_prepared', {
-                                        turnId,
-                                        attempt: attemptNumber,
-                                        conversationId: convId,
-                                        finishReason: finishMeta?.finishReason,
-                                        rawFinishReason: finishMeta?.rawFinishReason,
-                                        hasPartialContent: (fullContent || displayContent).trim().length > 0,
-                                        recentToolSummaries: completionState.recentExternalToolSummaries.length,
-                                    });
-                                }
+                                finalDisplayContent = displayContent;
                                 this.emitCompletionTrace('guard_triggered', guardFailure.message, guardFailure.signal, 'warn');
                                 log.warn('completion_guard_triggered', {
                                     turnId,
@@ -1270,9 +1645,50 @@ export class Orchestrator extends TypedEventEmitter {
                     const completionFailure = completionGuardFailure;
                     if (completionFailure !== null) {
                         const completionSignal = completionFailure.signal;
+                        const recoveryRequest = this.buildRecoveryRequest({
+                            cause: 'completion',
+                            attempt: attemptNumber,
+                            partialContent: fullContent || finalDisplayContent,
+                            completionState,
+                            latestUserMessage,
+                            completionRetryCount,
+                            finishMeta: attemptFinishMeta,
+                        });
+                        const { source, assessment } = await this.assessTurnRecovery(recoveryRequest);
+                        this.emitRecoveryTrace('completion', source, assessment, assessment.action === 'stop' ? 'warn' : 'info');
+                        nextRetryContext = this.buildRetryContextMessage('completion', attemptNumber, assessment.modelMessage, source);
+                        log.info('completion_retry_context_prepared', {
+                            turnId,
+                            attempt: attemptNumber,
+                            conversationId: convId,
+                            source,
+                            action: assessment.action,
+                            finishReason: attemptFinishMeta?.finishReason,
+                            rawFinishReason: attemptFinishMeta?.rawFinishReason,
+                            hasPartialContent: (fullContent || finalDisplayContent).trim().length > 0,
+                            progressEntries: completionState.continuity.progressLedger.length,
+                            taskCheckpoints: completionState.continuity.taskCheckpoints.length,
+                            completedSteps: assessment.completedSteps,
+                            nextStep: assessment.nextStep,
+                        });
+                        if (assessment.action === 'stop') {
+                            failedAttemptMessageIds.push(...attemptMessageIds);
+                            const diagnosticMessage = this.conversations.appendMessage(convId, 'system', assessment.operatorMessage);
+                            this.emit({ type: 'message:system', message: diagnosticMessage });
+                            this.emitCompletionTrace('retry_failed', assessment.diagnosis, completionSignal, 'error');
+                            this.emit({
+                                type: 'error',
+                                error: new Error(assessment.diagnosis || 'Turn ended without a valid completion signal or final answer.'),
+                            });
+                            if (failedAttemptMessageIds.length > 0) {
+                                this.conversations.deleteMessages(convId, failedAttemptMessageIds);
+                            }
+                            loopTerminated = true;
+                            break;
+                        }
                         if (completionRetryCount < this.maxCompletionRetries) {
                             completionRetryCount++;
-                            const systemMessage = this.conversations.appendMessage(convId, 'system', COMPLETION_RETRY_PROMPT);
+                            const systemMessage = this.conversations.appendMessage(convId, 'system', assessment.operatorMessage);
                             attemptMessageIds.push(systemMessage.id);
                             failedAttemptMessageIds.push(...attemptMessageIds);
                             this.emit({ type: 'message:system', message: systemMessage });
@@ -1281,12 +1697,14 @@ export class Orchestrator extends TypedEventEmitter {
                             continue;
                         }
                         failedAttemptMessageIds.push(...attemptMessageIds);
-                        const diagnosticMessage = this.conversations.appendMessage(convId, 'system', '[Cerebellum] The turn ended repeatedly without a valid completion signal or final answer.');
+                        const diagnosticMessage = this.conversations.appendMessage(convId, 'system', source === 'cerebellum'
+                            ? '[Cerebellum] The turn ended repeatedly without a valid completion signal or final answer.'
+                            : '[System fallback] The turn ended repeatedly without a valid completion signal or final answer.');
                         this.emit({ type: 'message:system', message: diagnosticMessage });
-                        this.emitCompletionTrace('retry_failed', `Completion retries exhausted after ${completionRetryCount}/${this.maxCompletionRetries}: ${completionFailure.message}`, completionSignal, 'error');
+                        this.emitCompletionTrace('retry_failed', `Completion retries exhausted after ${completionRetryCount}/${this.maxCompletionRetries}: ${assessment.diagnosis || completionFailure.message}`, completionSignal, 'error');
                         this.emit({
                             type: 'error',
-                            error: new Error('Turn ended without a valid completion signal or final answer.'),
+                            error: new Error(assessment.diagnosis || 'Turn ended without a valid completion signal or final answer.'),
                         });
                         // Clean up all failed attempt messages on exhaustion
                         if (failedAttemptMessageIds.length > 0) {
@@ -1302,21 +1720,17 @@ export class Orchestrator extends TypedEventEmitter {
                     const failureState = this.getStreamState();
                     this.stopStreamWatchdog();
                     failedAttemptMessageIds.push(...attemptMessageIds);
-                    // Check if this was a nudge-abort (not emergency stop, not a real error)
-                    const isNudgeAbort = abortController.signal.aborted
+                    const recoveryDecision = this.pendingRecoveryDecision;
+                    this.pendingRecoveryDecision = null;
+                    const stallRecovery = recoveryDecision;
+                    const isRecoveryRetryAbort = abortController.signal.aborted
+                        && stallRecovery !== null
+                        && stallRecovery.assessment.action === 'retry'
                         && this.streamNudgeCount > stallRetryCountAtStart
                         && this.streamNudgeCount <= this.maxNudgeRetries;
-                    if (isNudgeAbort) {
-                        nextRetryContext = this.buildStallRetryContextMessage(this.buildStallRetrySnapshot({
-                            attempt: attemptNumber,
-                            phase: failureState.phase,
-                            activeToolName: failureState.activeToolName,
-                            activeToolCallId: failureState.activeToolCallId,
-                            partialContent: fullContent,
-                            completionState,
-                        }));
-                        // Inject nudge message and retry via the loop
-                        const systemMessage = this.conversations.appendMessage(convId, 'system', '[Cerebellum] You stopped mid-response. Continue from where you left off.');
+                    if (isRecoveryRetryAbort && stallRecovery) {
+                        nextRetryContext = this.buildRetryContextMessage('stall', attemptNumber, stallRecovery.assessment.modelMessage, stallRecovery.source);
+                        const systemMessage = this.conversations.appendMessage(convId, 'system', stallRecovery.assessment.operatorMessage);
                         attemptMessageIds.push(systemMessage.id);
                         failedAttemptMessageIds.push(...attemptMessageIds);
                         this.emit({ type: 'message:system', message: systemMessage });
@@ -1324,6 +1738,18 @@ export class Orchestrator extends TypedEventEmitter {
                         nextRetryCause = 'stall';
                         continue; // retry loop
                     }
+                    if (abortController.signal.aborted
+                        && stallRecovery !== null
+                        && stallRecovery.assessment.action === 'stop') {
+                        const systemMessage = this.conversations.appendMessage(convId, 'system', stallRecovery.assessment.operatorMessage);
+                        this.emit({ type: 'message:system', message: systemMessage });
+                        this.emit({ type: 'error', error: new Error(stallRecovery.assessment.diagnosis) });
+                        if (failedAttemptMessageIds.length > 0) {
+                            this.conversations.deleteMessages(convId, failedAttemptMessageIds);
+                        }
+                        loopTerminated = true;
+                        break;
+                    }
                     // Check if Cerebellum dropped mid-stream
                     if (this.cerebellum && !this.cerebellum.isConnected() && abortController.signal.aborted) {
                         const err = new Error('Cerebellum disconnected during active response. Restart it with: docker compose up -d cerebellum');