npm - @runtypelabs/sdk - Versions diffs - 1.7.1 → 1.7.2 - Mend

@runtypelabs/sdk 1.7.1 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/endpoints.js CHANGED Viewed

@@ -1170,9 +1170,21 @@ class AgentsEndpoint {
             throw new Error(error.error || `HTTP ${response.status}`);
         }
         let completeEvent = null;
+        // Accumulate turn delta text so finalOutput is non-empty even when the
+        // agent_complete event carries no output (e.g. model ended after tool calls)
+        let accumulatedOutput = '';
         await processAgentStream(response.body, {
             ...callbacks,
+            onTurnDelta: (event) => {
+                if (event.contentType === 'text') {
+                    accumulatedOutput += event.delta;
+                }
+                callbacks.onTurnDelta?.(event);
+            },
             onAgentComplete: (event) => {
+                if (!event.finalOutput && accumulatedOutput) {
+                    event.finalOutput = accumulatedOutput;
+                }
                 completeEvent = event;
                 callbacks.onAgentComplete?.(event);
             },
@@ -1198,7 +1210,7 @@ class AgentsEndpoint {
      * })
      * ```
      */
-    async executeWithLocalTools(id, data, localTools, callbacks) {
+    async executeWithLocalTools(id, data, localTools, callbacks, options) {
         // Build runtime tool definitions from local tool schemas and inject into request
         const runtimeTools = Object.entries(localTools).map(([name, def]) => ({
             name,
@@ -1219,16 +1231,36 @@ class AgentsEndpoint {
             throw new Error(error.error || `HTTP ${response.status}`);
         }
         let currentBody = response.body;
+        // Accumulate text output across all streams (initial + resume cycles)
+        // so finalOutput is non-empty even when the last resume stream has no text
+        let accumulatedOutput = '';
+        let pauseCount = 0;
+        let discoveryPauseCount = 0;
+        let consecutiveDiscoveryPauseCount = 0;
+        const toolNameCounts = {};
+        let recentActionKeys = [];
         while (true) {
             let pausedEvent = null;
             let completeEvent = null;
             await processAgentStream(currentBody, {
                 ...callbacks,
+                onTurnDelta: (event) => {
+                    if (event.contentType === 'text') {
+                        accumulatedOutput += event.delta;
+                    }
+                    callbacks?.onTurnDelta?.(event);
+                },
                 onAgentPaused: (event) => {
                     pausedEvent = event;
                     callbacks?.onAgentPaused?.(event);
                 },
                 onAgentComplete: (event) => {
+                    // Supplement finalOutput with accumulated turn deltas when the
+                    // agent_complete event itself carries no output (common when the
+                    // model's last action was a tool call rather than text output)
+                    if (!event.finalOutput && accumulatedOutput) {
+                        event.finalOutput = accumulatedOutput;
+                    }
                     completeEvent = event;
                     callbacks?.onAgentComplete?.(event);
                 },
@@ -1273,6 +1305,41 @@ class AgentsEndpoint {
                     // Return the error as a tool result so the agent can recover
                     toolResult = `Error: ${err instanceof Error ? err.message : String(err)}`;
                 }
+                pauseCount += 1;
+                const toolNameCount = (toolNameCounts[toolName] || 0) + 1;
+                toolNameCounts[toolName] = toolNameCount;
+                const discoveryTool = this.isDiscoveryLocalTool(toolName);
+                if (discoveryTool) {
+                    discoveryPauseCount += 1;
+                    consecutiveDiscoveryPauseCount += 1;
+                }
+                else {
+                    consecutiveDiscoveryPauseCount = 0;
+                }
+                const actionKey = this.buildLocalToolActionKey(toolName, parsedParams);
+                recentActionKeys = [...recentActionKeys, actionKey].slice(-12);
+                const actionKeyCount = recentActionKeys.filter((candidateActionKey) => candidateActionKey === actionKey).length;
+                const forcedCompleteEvent = options?.onLocalToolResult?.({
+                    executionId,
+                    pauseCount,
+                    discoveryPauseCount,
+                    consecutiveDiscoveryPauseCount,
+                    toolName,
+                    toolNameCount,
+                    parameters: parsedParams,
+                    toolResult,
+                    accumulatedOutput,
+                    actionKey,
+                    actionKeyCount,
+                    recentActionKeys,
+                });
+                if (forcedCompleteEvent) {
+                    if (!forcedCompleteEvent.finalOutput && accumulatedOutput) {
+                        forcedCompleteEvent.finalOutput = accumulatedOutput;
+                    }
+                    callbacks?.onAgentComplete?.(forcedCompleteEvent);
+                    return forcedCompleteEvent;
+                }
                 // Resume via agent resume endpoint
                 const resumeResponse = await this.client.requestStream(`/agents/${id}/resume`, {
                     method: 'POST',
@@ -1295,6 +1362,1017 @@ class AgentsEndpoint {
         }
     }
     // ─── Long-Task Agent Execution ───────────────────────────────────────
+    createEmptyToolTrace() {
+        return {
+            entries: [],
+            discoveryPerformed: false,
+            attemptedWrite: false,
+            wroteFiles: false,
+            executionFileWritten: false,
+            readFiles: false,
+            readPaths: [],
+            actionKeys: [],
+            candidatePaths: [],
+            planWritten: false,
+            bestCandidateReadFailed: false,
+            bestCandidateWritten: false,
+            bestCandidateVerified: false,
+            verificationAttempted: false,
+            verificationPassed: false,
+            localToolLoopGuardTriggered: false,
+        };
+    }
+    isDiscoveryLocalTool(toolName) {
+        return ['tree_directory', 'search_repo', 'glob_files', 'list_directory', 'read_file'].includes(toolName);
+    }
+    buildLocalToolActionKey(toolName, parameters) {
+        const pathValue = typeof parameters.path === 'string' ? this.normalizeCandidatePath(parameters.path) : '';
+        const queryValue = typeof parameters.query === 'string'
+            ? parameters.query.trim()
+            : typeof parameters.q === 'string'
+                ? parameters.q.trim()
+                : '';
+        const patternValue = typeof parameters.pattern === 'string' ? parameters.pattern.trim() : '';
+        const commandValue = typeof parameters.command === 'string' ? parameters.command.trim() : '';
+        const descriptor = pathValue || queryValue || patternValue || commandValue;
+        if (descriptor) {
+            return `${toolName}:${descriptor.slice(0, 160)}`;
+        }
+        const fallback = this.summarizeUnknownForTrace(parameters, 160);
+        return fallback ? `${toolName}:${fallback}` : toolName;
+    }
+    buildProspectiveStateForSessionTrace(state, trace) {
+        const candidatePaths = Array.from(new Set([...(state.candidatePaths || []), ...trace.candidatePaths])).slice(-20);
+        const recentReadPaths = Array.from(new Set([...(state.recentReadPaths || []), ...trace.readPaths])).slice(-20);
+        return {
+            ...state,
+            ...(trace.bestCandidatePath
+                ? {
+                    bestCandidatePath: trace.bestCandidatePath,
+                    bestCandidateReason: trace.bestCandidateReason,
+                }
+                : {}),
+            candidatePaths,
+            recentReadPaths,
+            planWritten: state.planWritten || trace.planWritten,
+        };
+    }
+    buildForcedLocalToolTurnCompleteEvent(state, snapshot, reason) {
+        const finalOutput = [
+            snapshot.accumulatedOutput.trim(),
+            `Local tool loop guard ended this ${state.workflowPhase || 'research'} turn: ${reason}`,
+            snapshot.recentActionKeys.length > 0
+                ? `Recent local tool actions: ${snapshot.recentActionKeys.slice(-5).join(' | ')}`
+                : '',
+        ]
+            .filter(Boolean)
+            .join('\n\n');
+        return {
+            type: 'agent_complete',
+            executionId: snapshot.executionId,
+            seq: 0,
+            agentId: state.agentId,
+            success: true,
+            iterations: 1,
+            stopReason: 'end_turn',
+            completedAt: new Date().toISOString(),
+            totalCost: 0,
+            finalOutput,
+            duration: 0,
+        };
+    }
+    createLocalToolLoopGuard(state, trace) {
+        return (snapshot) => {
+            const repeatedAction = snapshot.actionKeyCount >= 4;
+            const heavyDiscoveryLoop = snapshot.discoveryPauseCount >= 24;
+            const prospectiveState = this.buildProspectiveStateForSessionTrace(state, trace);
+            const sufficientResearch = state.workflowPhase === 'research' && this.hasSufficientResearchEvidence(prospectiveState);
+            let reason;
+            if (state.workflowPhase === 'research') {
+                if (sufficientResearch && snapshot.discoveryPauseCount >= 12) {
+                    reason =
+                        'research evidence is already sufficient, but this execution kept issuing discovery tools instead of ending the turn';
+                }
+                else if (repeatedAction) {
+                    reason = `the same discovery action repeated ${snapshot.actionKeyCount} times in one session`;
+                }
+                else if (snapshot.consecutiveDiscoveryPauseCount >= 18 || heavyDiscoveryLoop) {
+                    reason =
+                        'this session exceeded the discovery-tool budget without ending the turn';
+                }
+            }
+            else if (state.workflowPhase === 'planning' &&
+                !trace.planWritten &&
+                snapshot.consecutiveDiscoveryPauseCount >= 18) {
+                reason = 'planning is looping on discovery instead of writing the plan and ending the turn';
+            }
+            else if (state.workflowPhase === 'execution' &&
+                !trace.executionFileWritten &&
+                snapshot.consecutiveDiscoveryPauseCount >= 18) {
+                reason = 'execution is looping on discovery instead of editing repo files and ending the turn';
+            }
+            if (!reason) {
+                return undefined;
+            }
+            trace.localToolLoopGuardTriggered = true;
+            trace.forcedTurnEndReason = reason;
+            this.pushToolTraceEntry(trace, `local-tool loop guard forced end_turn -> ${reason}`);
+            return this.buildForcedLocalToolTurnCompleteEvent(state, snapshot, reason);
+        };
+    }
+    pushToolTraceEntry(trace, entry) {
+        const trimmed = entry.trim();
+        if (!trimmed)
+            return;
+        if (trace.entries[trace.entries.length - 1] === trimmed)
+            return;
+        trace.entries.push(trimmed);
+        if (trace.entries.length > 12) {
+            trace.entries = trace.entries.slice(-12);
+        }
+    }
+    isPreservationSensitiveTask(state) {
+        const bestCandidatePath = state.bestCandidatePath || '';
+        if (/\.(html|tsx|jsx|css|scss|sass)$/i.test(bestCandidatePath)) {
+            return true;
+        }
+        const prompt = (state.originalMessage || '').toLowerCase();
+        return [
+            'ux',
+            'ui',
+            'design',
+            'frontend',
+            'front-end',
+            'theme',
+            'editor',
+            'layout',
+            'style',
+            'accessibility',
+            'visual',
+        ].some((keyword) => prompt.includes(keyword));
+    }
+    getLikelySupportingCandidatePaths(bestCandidatePath, candidatePaths) {
+        if (!bestCandidatePath || !candidatePaths || candidatePaths.length === 0)
+            return [];
+        const normalizedBestCandidatePath = this.normalizeCandidatePath(bestCandidatePath);
+        const bestCandidateSegments = normalizedBestCandidatePath.split('/').filter(Boolean);
+        const relatedRoot = bestCandidateSegments.length >= 2
+            ? `${bestCandidateSegments[0]}/${bestCandidateSegments[1]}/`
+            : bestCandidateSegments.length === 1
+                ? `${bestCandidateSegments[0]}/`
+                : '';
+        const bestCandidateDir = normalizedBestCandidatePath.includes('/')
+            ? `${normalizedBestCandidatePath.slice(0, normalizedBestCandidatePath.lastIndexOf('/'))}/`
+            : '';
+        return candidatePaths
+            .map((candidatePath) => this.normalizeCandidatePath(candidatePath))
+            .filter((candidatePath) => candidatePath &&
+            candidatePath !== normalizedBestCandidatePath &&
+            !this.isMarathonArtifactPath(candidatePath) &&
+            ((bestCandidateDir && candidatePath.startsWith(bestCandidateDir)) ||
+                (relatedRoot && candidatePath.startsWith(relatedRoot))));
+    }
+    hasSufficientResearchEvidence(state) {
+        if (!state.bestCandidatePath)
+            return false;
+        const normalizedBestCandidatePath = this.normalizeCandidatePath(state.bestCandidatePath);
+        const normalizedRecentReadPaths = (state.recentReadPaths || []).map((readPath) => this.normalizeCandidatePath(readPath));
+        const readBestCandidate = normalizedRecentReadPaths.includes(normalizedBestCandidatePath);
+        if (!readBestCandidate) {
+            return false;
+        }
+        if (!this.isPreservationSensitiveTask(state)) {
+            return true;
+        }
+        const supportingCandidatePaths = this.getLikelySupportingCandidatePaths(state.bestCandidatePath, state.candidatePaths);
+        if (supportingCandidatePaths.length === 0) {
+            return true;
+        }
+        return normalizedRecentReadPaths.some((readPath) => readPath !== normalizedBestCandidatePath && supportingCandidatePaths.includes(readPath));
+    }
+    buildEffectiveSessionOutput(modelOutput, toolTraceSummary) {
+        return [toolTraceSummary.trim(), modelOutput.trim()].filter(Boolean).join('\n\n');
+    }
+    canAcceptTaskCompletion(output, state, sessionTrace) {
+        if (!this.detectTaskCompletion(output)) {
+            return false;
+        }
+        if (state.workflowPhase !== 'execution') {
+            return true;
+        }
+        if (!state.bestCandidatePath) {
+            return true;
+        }
+        const verificationSatisfied = !state.verificationRequired ||
+            Boolean(state.lastVerificationPassed || sessionTrace.verificationPassed);
+        return (Boolean(state.planWritten) &&
+            Boolean(state.bestCandidateVerified || sessionTrace.bestCandidateVerified) &&
+            verificationSatisfied);
+    }
+    summarizeUnknownForTrace(value, maxLength = 180) {
+        const text = typeof value === 'string'
+            ? value
+            : value === undefined
+                ? ''
+                : JSON.stringify(value);
+        return text.replace(/\s+/g, ' ').trim().slice(0, maxLength);
+    }
+    summarizeTextBlockForTrace(value, maxLines = 4) {
+        const text = typeof value === 'string'
+            ? value
+            : value === undefined
+                ? ''
+                : JSON.stringify(value);
+        if (!text)
+            return '';
+        return text
+            .split('\n')
+            .map((line) => line.trim())
+            .filter(Boolean)
+            .slice(0, maxLines)
+            .join(' | ')
+            .slice(0, 240);
+    }
+    parseVerificationResult(result) {
+        if (typeof result !== 'string')
+            return undefined;
+        try {
+            const parsed = JSON.parse(result);
+            if (typeof parsed.success !== 'boolean')
+                return undefined;
+            return {
+                success: parsed.success,
+                ...(typeof parsed.command === 'string' ? { command: parsed.command } : {}),
+                ...(typeof parsed.output === 'string' ? { output: parsed.output } : {}),
+                ...(typeof parsed.error === 'string' ? { error: parsed.error } : {}),
+            };
+        }
+        catch {
+            return undefined;
+        }
+    }
+    normalizeCandidatePath(candidatePath) {
+        return candidatePath.trim().replace(/\\/g, '/').replace(/^\.?\//, '').replace(/\/+/g, '/');
+    }
+    dedupeNormalizedCandidatePaths(paths) {
+        return Array.from(new Set((paths || [])
+            .map((candidatePath) => this.normalizeCandidatePath(candidatePath))
+            .filter((candidatePath) => {
+            if (!candidatePath)
+                return false;
+            return !this.isMarathonArtifactPath(candidatePath);
+        })));
+    }
+    isMarathonArtifactPath(candidatePath) {
+        const normalized = this.normalizeCandidatePath(candidatePath).toLowerCase();
+        return normalized === '.runtype' || normalized.startsWith('.runtype/');
+    }
+    isDiscoveryToolName(toolName) {
+        return (toolName === 'search_repo' ||
+            toolName === 'glob_files' ||
+            toolName === 'tree_directory' ||
+            toolName === 'list_directory');
+    }
+    sanitizeTaskSlug(taskName) {
+        return taskName
+            .toLowerCase()
+            .replace(/[^a-z0-9_-]+/g, '-')
+            .replace(/^-+|-+$/g, '')
+            .slice(0, 80);
+    }
+    getDefaultPlanPath(taskName) {
+        const slug = this.sanitizeTaskSlug(taskName || 'task');
+        return `.runtype/marathons/${slug}/plan.md`;
+    }
+    dirnameOfCandidatePath(candidatePath) {
+        const normalized = this.normalizeCandidatePath(candidatePath);
+        const index = normalized.lastIndexOf('/');
+        return index >= 0 ? normalized.slice(0, index) : '';
+    }
+    joinCandidatePath(baseDir, nextPath) {
+        const normalizedNext = nextPath.replace(/\\/g, '/').trim();
+        if (!normalizedNext)
+            return '';
+        if (normalizedNext.startsWith('/')) {
+            return this.normalizeCandidatePath(`${baseDir}/${normalizedNext.slice(1)}`);
+        }
+        if (normalizedNext.startsWith('./')) {
+            return this.normalizeCandidatePath(`${baseDir}/${normalizedNext.slice(2)}`);
+        }
+        return this.normalizeCandidatePath(baseDir ? `${baseDir}/${normalizedNext}` : normalizedNext);
+    }
+    scoreCandidatePath(candidatePath) {
+        const normalized = this.normalizeCandidatePath(candidatePath).toLowerCase();
+        let score = 0;
+        if (normalized.endsWith('/theme.html') || normalized.endsWith('theme.html'))
+            score += 80;
+        if (normalized.includes('agent'))
+            score += 30;
+        if (normalized.includes('editor'))
+            score += 30;
+        if (normalized.includes('theme'))
+            score += 25;
+        if (normalized.endsWith('.html'))
+            score += 20;
+        if (normalized.includes('/src/'))
+            score += 10;
+        if (normalized.includes('/app/'))
+            score += 10;
+        if (normalized.includes('index.html'))
+            score -= 10;
+        return score;
+    }
+    addCandidateToTrace(trace, candidatePath, reason) {
+        const normalized = this.normalizeCandidatePath(candidatePath);
+        if (!normalized || normalized.length < 3)
+            return;
+        if (this.isMarathonArtifactPath(normalized))
+            return;
+        if (!trace.candidatePaths.includes(normalized)) {
+            trace.candidatePaths.push(normalized);
+            if (trace.candidatePaths.length > 12) {
+                trace.candidatePaths = trace.candidatePaths.slice(-12);
+            }
+        }
+        const currentScore = trace.bestCandidatePath ? this.scoreCandidatePath(trace.bestCandidatePath) : -1;
+        const nextScore = this.scoreCandidatePath(normalized);
+        if (!trace.bestCandidatePath || nextScore >= currentScore) {
+            trace.bestCandidatePath = normalized;
+            trace.bestCandidateReason = reason.slice(0, 200);
+        }
+    }
+    extractCandidatePathsFromText(text, sourcePath) {
+        const candidates = [];
+        if (sourcePath && this.isMarathonArtifactPath(sourcePath)) {
+            return candidates;
+        }
+        const add = (candidatePath, reason) => {
+            const normalized = this.normalizeCandidatePath(candidatePath);
+            if (!normalized)
+                return;
+            if (this.isMarathonArtifactPath(normalized))
+                return;
+            if (!candidates.some((candidate) => candidate.path === normalized)) {
+                candidates.push({ path: normalized, reason });
+            }
+        };
+        const baseDir = sourcePath ? this.dirnameOfCandidatePath(sourcePath) : '';
+        for (const match of text.matchAll(/(?:href|src)=["']([^"']+\.(?:html|tsx|ts|jsx|js|md|json))["']/gi)) {
+            const target = match[1] || '';
+            const resolved = baseDir ? this.joinCandidatePath(baseDir, target) : target;
+            add(resolved, `linked from ${sourcePath || 'discovery result'} via ${target}`);
+        }
+        for (const match of text.matchAll(/\b([\w./-]+\.(?:html|tsx|ts|jsx|js|md|json))\b/g)) {
+            const target = match[1] || '';
+            const resolved = sourcePath && !target.includes('/') ? this.joinCandidatePath(baseDir, target) : this.normalizeCandidatePath(target);
+            add(resolved, `mentioned in ${sourcePath || 'discovery result'}`);
+        }
+        return candidates;
+    }
+    parseSearchRepoResultForCandidates(result) {
+        const candidates = [];
+        for (const line of result.split('\n')) {
+            const contentMatch = line.match(/^\[content\]\s+([^:]+):\d+:\s+(.*)$/);
+            if (contentMatch) {
+                const sourcePath = this.normalizeCandidatePath(contentMatch[1] || '');
+                const content = contentMatch[2] || '';
+                for (const candidate of this.extractCandidatePathsFromText(content, sourcePath)) {
+                    if (!candidates.some((existing) => existing.path === candidate.path)) {
+                        candidates.push(candidate);
+                    }
+                }
+                continue;
+            }
+            const pathMatch = line.match(/^\[path\]\s+(.+)$/);
+            if (pathMatch) {
+                const sourcePath = this.normalizeCandidatePath(pathMatch[1] || '');
+                if (/\.(html|tsx|ts|jsx|js|md|json)$/i.test(sourcePath)) {
+                    candidates.push({ path: sourcePath, reason: 'matched repository path search result' });
+                }
+            }
+        }
+        return candidates;
+    }
+    extractBestCandidateFromBootstrapContext(bootstrapContext) {
+        if (!bootstrapContext)
+            return undefined;
+        const candidates = this.parseSearchRepoResultForCandidates(bootstrapContext);
+        if (candidates.length === 0)
+            return undefined;
+        return candidates.sort((a, b) => this.scoreCandidatePath(b.path) - this.scoreCandidatePath(a.path))[0];
+    }
+    sanitizeResumeState(resumeState, taskName) {
+        if (!resumeState)
+            return undefined;
+        const planPath = typeof resumeState.planPath === 'string' && resumeState.planPath.trim()
+            ? this.normalizeCandidatePath(resumeState.planPath)
+            : this.getDefaultPlanPath(taskName);
+        const candidatePaths = this.dedupeNormalizedCandidatePaths(resumeState.candidatePaths);
+        const recentReadPaths = this.dedupeNormalizedCandidatePaths(resumeState.recentReadPaths);
+        const normalizedBestCandidatePath = typeof resumeState.bestCandidatePath === 'string' && resumeState.bestCandidatePath.trim()
+            ? this.normalizeCandidatePath(resumeState.bestCandidatePath)
+            : undefined;
+        const bestCandidatePath = normalizedBestCandidatePath && !this.isMarathonArtifactPath(normalizedBestCandidatePath)
+            ? normalizedBestCandidatePath
+            : [...candidatePaths, ...recentReadPaths].sort((left, right) => this.scoreCandidatePath(right) - this.scoreCandidatePath(left))[0];
+        const workflowPhase = resumeState.planWritten &&
+            (!resumeState.workflowPhase ||
+                resumeState.workflowPhase === 'research' ||
+                resumeState.workflowPhase === 'planning')
+            ? 'execution'
+            : resumeState.workflowPhase;
+        return {
+            ...resumeState,
+            workflowPhase,
+            planPath,
+            planWritten: Boolean(resumeState.planWritten),
+            bestCandidatePath,
+            bestCandidateReason: bestCandidatePath ? resumeState.bestCandidateReason : undefined,
+            candidatePaths,
+            recentReadPaths,
+            recentActionKeys: Array.from(new Set(resumeState.recentActionKeys || [])).slice(-20),
+            bestCandidateNeedsVerification: Boolean(resumeState.bestCandidateNeedsVerification),
+            bestCandidateVerified: Boolean(resumeState.bestCandidateVerified),
+            ...(resumeState.verificationRequired !== undefined
+                ? { verificationRequired: resumeState.verificationRequired }
+                : {}),
+            lastVerificationPassed: Boolean(resumeState.lastVerificationPassed),
+        };
+    }
+    buildPhaseInstructions(state) {
+        const phase = state.workflowPhase || 'research';
+        const planPath = state.planPath || this.getDefaultPlanPath(state.taskName);
+        if (phase === 'planning') {
+            return [
+                '--- Workflow Phase: Planning ---',
+                'Research is complete. Your current job is to write the implementation plan before any product-file edits.',
+                `Write the plan markdown to exactly: ${planPath}`,
+                'Do NOT edit the target product file yet.',
+                'The plan should summarize UX findings, explain why the current best candidate is the right file, and list concrete execution steps.',
+                'The plan must include a "Preserve existing functionality" section that lists current behaviors, linked files, integrations, and constraints that must keep working.',
+                'The plan must include a "Verification steps" section listing the concrete checks you will run before TASK_COMPLETE.',
+                'If the plan already exists, update that same plan file instead of creating a different one.',
+            ].join('\n');
+        }
+        if (phase === 'execution') {
+            return [
+                '--- Workflow Phase: Execution ---',
+                `The plan should already exist at: ${planPath}`,
+                ...(state.bestCandidatePath ? [`Primary target file: ${state.bestCandidatePath}`] : []),
+                'Execute the plan by editing the target files.',
+                'Before ending each turn, update the markdown plan with progress against the steps you completed.',
+                'Modify the existing implementation incrementally. Do not replace the whole file unless the user explicitly asked for a rewrite.',
+                'Preserve existing functionality, handlers, imports, routes, configuration, and data flow unless the plan explicitly calls for changing them.',
+                'Before TASK_COMPLETE, run a verification command that matches the repo, such as lint, tests, build, or typecheck.',
+                'Avoid broad repo discovery unless the current candidate is clearly wrong.',
+            ].join('\n');
+        }
+        return [
+            '--- Workflow Phase: Research ---',
+            'Your current job is to inspect the repo, identify the correct existing target file, and gather enough evidence for a plan.',
+            'Identify related supporting files and current behaviors that must be preserved before planning.',
+            'Do NOT edit the target product file yet.',
+            `When research is complete, the system will advance you to planning and require a plan at: ${planPath}`,
+        ].join('\n');
+    }
+    updateWorkflowPhase(state, sessionTrace) {
+        if (!state.workflowPhase)
+            state.workflowPhase = 'research';
+        if (!state.planPath)
+            state.planPath = this.getDefaultPlanPath(state.taskName);
+        state.phaseTransitionSummary = undefined;
+        const transitionSummaries = [];
+        let phaseUpdated = true;
+        while (phaseUpdated) {
+            phaseUpdated = false;
+            if (state.workflowPhase === 'research' && this.hasSufficientResearchEvidence(state)) {
+                state.workflowPhase = 'planning';
+                transitionSummaries.push([
+                    'Automatic phase transition: research -> planning.',
+                    `Best candidate confirmed: ${state.bestCandidatePath}`,
+                    `Next step: write the plan markdown to ${state.planPath} before editing the product file.`,
+                ].join('\n'));
+                phaseUpdated = true;
+                continue;
+            }
+            if (state.workflowPhase === 'planning' && (sessionTrace.planWritten || state.planWritten)) {
+                state.planWritten = true;
+                state.workflowPhase = 'execution';
+                transitionSummaries.push([
+                    'Automatic phase transition: planning -> execution.',
+                    `Plan path: ${state.planPath}`,
+                    ...(state.bestCandidatePath ? [`Execute against: ${state.bestCandidatePath}`] : []),
+                    'Next step: edit the target file(s) and update the plan with progress each turn.',
+                ].join('\n'));
+                phaseUpdated = true;
+            }
+        }
+        if (state.status === 'complete') {
+            state.workflowPhase = 'complete';
+        }
+        if (transitionSummaries.length > 0) {
+            state.phaseTransitionSummary = transitionSummaries.join('\n\n');
+        }
+    }
+    wrapLocalToolsForTrace(localTools, trace, state) {
+        if (!localTools)
+            return undefined;
+        const wrapped = {};
+        for (const [toolName, toolDef] of Object.entries(localTools)) {
+            wrapped[toolName] = {
+                ...toolDef,
+                execute: async (args) => {
+                    const actionKey = `${toolName}:${String(args.path || args.query || args.pattern || '.').slice(0, 120)}`;
+                    trace.actionKeys.push(actionKey);
+                    if (trace.actionKeys.length > 10) {
+                        trace.actionKeys = trace.actionKeys.slice(-10);
+                    }
+                    const normalizedPathArg = typeof args.path === 'string' && args.path.trim()
+                        ? this.normalizeCandidatePath(String(args.path))
+                        : undefined;
+                    const normalizedPlanPath = state.planPath
+                        ? this.normalizeCandidatePath(state.planPath)
+                        : undefined;
+                    const normalizedBestCandidatePath = state.bestCandidatePath
+                        ? this.normalizeCandidatePath(state.bestCandidatePath)
+                        : undefined;
+                    const allowedWriteTargets = new Set([
+                        normalizedPlanPath,
+                        normalizedBestCandidatePath,
+                        ...(state.recentReadPaths || []).map((readPath) => this.normalizeCandidatePath(readPath)),
+                        ...trace.readPaths.map((readPath) => this.normalizeCandidatePath(readPath)),
+                    ].filter((value) => Boolean(value)));
+                    const pathArg = typeof args.path === 'string' && args.path.trim() ? ` path=${String(args.path)}` : '';
+                    const queryArg = typeof args.query === 'string' && args.query.trim() ? ` query="${String(args.query)}"` : '';
+                    const patternArg = typeof args.pattern === 'string' && args.pattern.trim()
+                        ? ` pattern="${String(args.pattern)}"`
+                        : '';
+                    const isWriteLikeTool = toolName === 'write_file' || toolName === 'restore_file_checkpoint';
+                    const isVerificationTool = toolName === 'run_check';
+                    if (state.workflowPhase === 'execution' &&
+                        normalizedBestCandidatePath &&
+                        this.isDiscoveryToolName(toolName) &&
+                        !trace.bestCandidateReadFailed) {
+                        const blockedMessage = [
+                            `Blocked by marathon execution guard: ${toolName} is disabled during execution.`,
+                            `Read or edit "${normalizedBestCandidatePath}" instead.`,
+                            'Broad discovery is only re-enabled if a read of the current target file fails.',
+                        ].join(' ');
+                        this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
+                        return blockedMessage;
+                    }
+                    if (isWriteLikeTool) {
+                        trace.attemptedWrite = true;
+                        if (state.workflowPhase === 'planning' &&
+                            normalizedPathArg &&
+                            normalizedPlanPath &&
+                            normalizedPathArg !== normalizedPlanPath) {
+                            const blockedMessage = [
+                                `Blocked by marathon planning guard: ${toolName} must target the exact plan path during planning.`,
+                                `Write the plan to "${normalizedPlanPath}" before editing any product files.`,
+                            ].join(' ');
+                            this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
+                            return blockedMessage;
+                        }
+                        if (state.workflowPhase === 'execution' &&
+                            normalizedPathArg &&
+                            normalizedPlanPath &&
+                            normalizedBestCandidatePath &&
+                            normalizedPathArg === normalizedPlanPath &&
+                            !trace.executionFileWritten) {
+                            const blockedMessage = [
+                                `Blocked by marathon execution guard: ${toolName} cannot update the plan file before any real repo-file edit in this execution turn.`,
+                                `Edit "${normalizedBestCandidatePath}" or another previously discovered repo file first.`,
+                                `After that, you may update "${normalizedPlanPath}" with progress.`,
+                            ].join(' ');
+                            this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
+                            return blockedMessage;
+                        }
+                        if (state.workflowPhase === 'execution' &&
+                            normalizedPathArg &&
+                            normalizedPathArg !== normalizedPlanPath &&
+                            !allowedWriteTargets.has(normalizedPathArg)) {
+                            const blockedMessage = [
+                                `Blocked by marathon execution guard: ${toolName} is limited to the confirmed target, the plan file, or files already discovered/read for this task.`,
+                                `Do not create scratch files like "${normalizedPathArg}".`,
+                                normalizedBestCandidatePath
+                                    ? `Edit "${normalizedBestCandidatePath}" or another previously discovered repo file instead.`
+                                    : 'Read the current target file before writing.',
+                            ].join(' ');
+                            this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
+                            return blockedMessage;
+                        }
+                    }
+                    if (this.isDiscoveryToolName(toolName)) {
+                        trace.discoveryPerformed = true;
+                    }
+                    if (toolName === 'read_file') {
+                        trace.readFiles = true;
+                        if (normalizedPathArg) {
+                            const normalizedReadPath = normalizedPathArg;
+                            trace.readPaths.push(normalizedReadPath);
+                            if (trace.readPaths.length > 12) {
+                                trace.readPaths = trace.readPaths.slice(-12);
+                            }
+                            this.addCandidateToTrace(trace, normalizedReadPath, 'explicitly read by agent');
+                        }
+                    }
+                    let result;
+                    try {
+                        result = await toolDef.execute(args);
+                    }
+                    catch (error) {
+                        if (toolName === 'read_file' &&
+                            normalizedPathArg &&
+                            normalizedBestCandidatePath &&
+                            normalizedPathArg === normalizedBestCandidatePath) {
+                            trace.bestCandidateReadFailed = true;
+                        }
+                        this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> error: ${error instanceof Error ? error.message : String(error)}`);
+                        throw error;
+                    }
+                    if (isWriteLikeTool && normalizedPathArg) {
+                        trace.wroteFiles = true;
+                        if (normalizedPlanPath && normalizedPathArg === normalizedPlanPath) {
+                            trace.planWritten = true;
+                        }
+                        else if (state.workflowPhase === 'execution') {
+                            trace.executionFileWritten = true;
+                            trace.verificationPassed = false;
+                            if (normalizedBestCandidatePath &&
+                                normalizedPathArg === normalizedBestCandidatePath) {
+                                trace.bestCandidateWritten = true;
+                            }
+                        }
+                    }
+                    const verificationResult = isVerificationTool
+                        ? this.parseVerificationResult(result)
+                        : undefined;
+                    if (verificationResult) {
+                        trace.verificationAttempted = true;
+                        trace.verificationPassed = verificationResult.success;
+                    }
+                    const summarizedResult = verificationResult
+                        ? [
+                            verificationResult.command || 'verification',
+                            verificationResult.success ? 'passed' : 'failed',
+                            verificationResult.error || verificationResult.output,
+                        ]
+                            .filter(Boolean)
+                            .join(' | ')
+                            .slice(0, 240)
+                        : this.summarizeTextBlockForTrace(result);
+                    const resultSuffix = summarizedResult ? ` -> ${summarizedResult}` : '';
+                    this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg}${resultSuffix}`);
+                    const textResult = typeof result === 'string' ? result : '';
+                    if (toolName === 'read_file' &&
+                        normalizedPathArg &&
+                        normalizedBestCandidatePath &&
+                        normalizedPathArg === normalizedBestCandidatePath &&
+                        (trace.bestCandidateWritten || state.bestCandidateNeedsVerification)) {
+                        trace.bestCandidateVerified = true;
+                    }
+                    if (toolName === 'search_repo' && textResult) {
+                        for (const candidate of this.parseSearchRepoResultForCandidates(textResult)) {
+                            this.addCandidateToTrace(trace, candidate.path, candidate.reason);
+                        }
+                    }
+                    else if (toolName === 'glob_files' && textResult) {
+                        for (const line of textResult.split('\n')) {
+                            const candidatePath = line.trim();
+                            if (/\.(html|tsx|ts|jsx|js|md|json)$/i.test(candidatePath)) {
+                                this.addCandidateToTrace(trace, candidatePath, 'matched glob search');
+                            }
+                        }
+                    }
+                    else if (toolName === 'list_directory' && textResult && typeof args.path === 'string') {
+                        const baseDir = this.normalizeCandidatePath(String(args.path));
+                        for (const line of textResult.split('\n')) {
+                            const candidateName = line.trim();
+                            if (/\.(html|tsx|ts|jsx|js|md|json)$/i.test(candidateName)) {
+                                this.addCandidateToTrace(trace, this.joinCandidatePath(baseDir, candidateName), `listed in directory ${baseDir || '.'}`);
+                            }
+                        }
+                    }
+                    else if (toolName === 'read_file' && textResult && typeof args.path === 'string') {
+                        const sourcePath = this.normalizeCandidatePath(String(args.path));
+                        for (const candidate of this.extractCandidatePathsFromText(textResult, sourcePath)) {
+                            this.addCandidateToTrace(trace, candidate.path, candidate.reason);
+                        }
+                    }
+                    return result;
+                },
+            };
+        }
+        return wrapped;
+    }
+    createTraceCallbacks(callbacks, trace) {
+        if (!callbacks) {
+            return {
+                onToolStart: (event) => {
+                    trace.actionKeys.push(`server:${event.toolName}`);
+                    if (trace.actionKeys.length > 10)
+                        trace.actionKeys = trace.actionKeys.slice(-10);
+                    if (event.toolName === 'write_file')
+                        trace.attemptedWrite = true;
+                    this.pushToolTraceEntry(trace, `server-tool ${event.toolName} started`);
+                },
+                onToolComplete: (event) => {
+                    const resultSummary = this.summarizeTextBlockForTrace(event.result);
+                    this.pushToolTraceEntry(trace, `server-tool ${event.toolName} ${event.success ? 'completed' : 'failed'}${resultSummary ? ` -> ${resultSummary}` : ''}`);
+                },
+            };
+        }
+        return {
+            ...callbacks,
+            onToolStart: (event) => {
+                trace.actionKeys.push(`server:${event.toolName}`);
+                if (trace.actionKeys.length > 10)
+                    trace.actionKeys = trace.actionKeys.slice(-10);
+                if (event.toolName === 'write_file')
+                    trace.attemptedWrite = true;
+                this.pushToolTraceEntry(trace, `server-tool ${event.toolName} started`);
+                callbacks.onToolStart?.(event);
+            },
+            onToolComplete: (event) => {
+                const resultSummary = this.summarizeTextBlockForTrace(event.result);
+                this.pushToolTraceEntry(trace, `server-tool ${event.toolName} ${event.success ? 'completed' : 'failed'}${resultSummary ? ` -> ${resultSummary}` : ''}`);
+                callbacks.onToolComplete?.(event);
+            },
+        };
+    }
+    buildToolTraceSummary(trace) {
+        if (trace.entries.length === 0)
+            return '';
+        const lines = trace.entries.slice(-6).map((entry) => `- ${entry}`);
+        const flags = [];
+        if (trace.discoveryPerformed)
+            flags.push('repo discovery used');
+        if (trace.readFiles)
+            flags.push('candidate files read');
+        if (trace.wroteFiles)
+            flags.push('files written');
+        if (trace.localToolLoopGuardTriggered)
+            flags.push('local-tool loop guard forced end_turn');
+        if (trace.bestCandidateVerified)
+            flags.push('target re-read after write');
+        if (trace.verificationPassed)
+            flags.push('verification passed');
+        else if (trace.verificationAttempted)
+            flags.push('verification failed');
+        return [
+            'Session working memory:',
+            ...(flags.length > 0 ? [`- ${flags.join('; ')}`] : []),
+            ...(trace.bestCandidatePath
+                ? [`- best candidate: ${trace.bestCandidatePath}${trace.bestCandidateReason ? ` (${trace.bestCandidateReason})` : ''}`]
+                : []),
+            ...lines,
+        ]
+            .join('\n')
+            .slice(0, 1200);
+    }
+    extractBootstrapQueries(message) {
+        const queries = [];
+        const noisyTerms = new Set([
+            'a',
+            'against',
+            'all',
+            'analyze',
+            'and',
+            'as',
+            'at',
+            'based',
+            'before',
+            'best',
+            'by',
+            'codebase',
+            'do',
+            'exactly',
+            'files',
+            'first',
+            'following',
+            'goal',
+            'go',
+            'how',
+            'in',
+            'is',
+            'it',
+            'its',
+            'make',
+            'markdown',
+            'most',
+            'no',
+            'of',
+            'on',
+            'order',
+            'plan',
+            'progress',
+            'repo',
+            'research',
+            'right',
+            'save',
+            'session',
+            'solve',
+            'task',
+            'that',
+            'the',
+            'then',
+            'through',
+            'to',
+            'turn',
+            'update',
+            'user',
+            'ux',
+            'web',
+            'when',
+            'with',
+            'work',
+            'your',
+        ]);
+        const push = (candidate) => {
+            const normalized = candidate
+                .replace(/^[^a-z0-9/._-]+|[^a-z0-9/._ -]+$/gi, '')
+                .replace(/\s+/g, ' ')
+                .trim();
+            if (!normalized || normalized.length < 3 || normalized.length > 60)
+                return;
+            const words = normalized.toLowerCase().split(' ').filter(Boolean);
+            if (words.length > 4)
+                return;
+            if (words.every((word) => noisyTerms.has(word)))
+                return;
+            if (words.length > 1 && noisyTerms.has(words[words.length - 1] || ''))
+                return;
+            if (!queries.some((existing) => existing.toLowerCase() === normalized.toLowerCase())) {
+                queries.push(normalized);
+            }
+        };
+        const lowerMessage = message.toLowerCase();
+        const phraseHints = [
+            'agent editor',
+            'theme.html',
+            '/theme.html',
+            'style it visually',
+        ];
+        for (const hint of phraseHints) {
+            if (lowerMessage.includes(hint.toLowerCase()))
+                push(hint);
+        }
+        for (const match of message.matchAll(/"([^"]{3,60})"/g)) {
+            push(match[1] || '');
+        }
+        for (const match of message.matchAll(/(?:go through|review|inspect|edit|improve|update|fix|modify)\s+(?:the\s+)?([a-z0-9][a-z0-9/_-]*(?:\s+[a-z0-9][a-z0-9/_-]*){0,2})/gi)) {
+            push(match[1] || '');
+        }
+        for (const match of message.matchAll(/([a-z0-9][a-z0-9/_-]*(?:\s+[a-z0-9][a-z0-9/_-]*){0,2})\s+(?:page|editor|screen|view|route|component)\b/gi)) {
+            push(match[0] || '');
+            push(match[1] || '');
+        }
+        for (const match of message.matchAll(/\b[\w./-]+\.(?:html|tsx|ts|jsx|js|md|json)\b/g)) {
+            push(match[0] || '');
+        }
+        for (const match of message.matchAll(/\/[A-Za-z0-9._/-]+/g)) {
+            push(match[0] || '');
+        }
+        for (const match of message.matchAll(/\b([a-z0-9]+(?:\s+[a-z0-9]+){1,2})\b/gi)) {
+            const phrase = (match[1] || '').toLowerCase();
+            const words = phrase.split(' ');
+            if (words.some((word) => ['editor', 'page', 'screen', 'view', 'route', 'component'].includes(word))) {
+                push(match[1] || '');
+            }
+        }
+        return queries.slice(0, 4);
+    }
+    async generateBootstrapDiscoveryContext(message, localTools) {
+        if (!localTools)
+            return undefined;
+        const searchTool = localTools.search_repo;
+        const globTool = localTools.glob_files;
+        if (!searchTool && !globTool)
+            return undefined;
+        const queries = this.extractBootstrapQueries(message);
+        if (queries.length === 0)
+            return undefined;
+        const lines = [];
+        for (const query of queries) {
+            if (lines.length >= 6)
+                break;
+            if (searchTool) {
+                try {
+                    const result = await searchTool.execute({ query, path: '.', maxResults: 5 });
+                    const summary = this.summarizeTextBlockForTrace(result, 3);
+                    if (summary && !summary.startsWith('No matches found')) {
+                        lines.push(`search_repo "${query}": ${summary}`);
+                        continue;
+                    }
+                }
+                catch {
+                    // Best effort bootstrap only
+                }
+            }
+            if (globTool && /\./.test(query)) {
+                try {
+                    const result = await globTool.execute({ pattern: `**/${query}`, path: '.', maxResults: 5 });
+                    const summary = this.summarizeTextBlockForTrace(result, 3);
+                    if (summary && !summary.startsWith('No files matched')) {
+                        lines.push(`glob_files "**/${query}": ${summary}`);
+                    }
+                }
+                catch {
+                    // Best effort bootstrap only
+                }
+            }
+        }
+        if (lines.length === 0)
+            return undefined;
+        return ['Bootstrap repo hints:', ...lines].join('\n').slice(0, 1500);
+    }
+    buildStuckTurnRecoveryMessage(state) {
+        const recent = state.sessions.slice(-2);
+        const normalizedPlanPath = typeof state.planPath === 'string' && state.planPath.trim()
+            ? this.normalizeCandidatePath(state.planPath)
+            : undefined;
+        const recentPlanOnlyLoop = Boolean(normalizedPlanPath) &&
+            recent.length === 2 &&
+            recent.every((session) => {
+                const specificActionKeys = (session.actionKeys || [])
+                    .map((actionKey) => actionKey.replace(/\\/g, '/'))
+                    .filter((actionKey) => !actionKey.startsWith('server:'));
+                return (specificActionKeys.length > 0 &&
+                    specificActionKeys.every((actionKey) => actionKey.includes(normalizedPlanPath)));
+            });
+        if (recent.length < 2 ||
+            !(recent.every((session) => session.hadTextOutput === false && session.wroteFiles === false) ||
+                recentPlanOnlyLoop)) {
+            return undefined;
+        }
+        const repeatedSameActions = recent.length === 2 &&
+            recent.every((session) => (session.actionKeys?.length || 0) > 0) &&
+            JSON.stringify(recent[0]?.actionKeys || []) === JSON.stringify(recent[1]?.actionKeys || []);
+        if (state.workflowPhase === 'planning' && state.planPath) {
+            return [
+                'Recovery instruction:',
+                'Research is already complete. Stop rediscovering and write the plan now.',
+                `Your next action must be write_file to "${state.planPath}".`,
+                'The plan must summarize UX findings, include a "Preserve existing functionality" section, name the best candidate file, and list execution steps.',
+                'Do not edit the product file until the plan exists.',
+                ...(repeatedSameActions
+                    ? ['You are repeating the same discovery actions; break the loop by writing the plan file now.']
+                    : []),
+            ].join('\n');
+        }
+        if (state.workflowPhase === 'execution' && state.bestCandidatePath) {
+            const normalizedBestCandidatePath = this.normalizeCandidatePath(state.bestCandidatePath);
+            const recentlyReadBestCandidate = (state.recentReadPaths || [])
+                .map((readPath) => this.normalizeCandidatePath(readPath))
+                .includes(normalizedBestCandidatePath);
+            return [
+                'Recovery instruction:',
+                'Planning should already be complete. Stop rediscovering and execute the plan.',
+                recentlyReadBestCandidate
+                    ? `Your next action must be write_file on "${state.bestCandidatePath}".`
+                    : `Your next action must be read_file on "${state.bestCandidatePath}" so you can edit it next.`,
+                ...(state.planPath
+                    ? [`Do not write "${state.planPath}" again until after you complete a real repo-file edit in this session.`]
+                    : []),
+                'After editing, run a verification command with run_check before TASK_COMPLETE.',
+                'Do not call broad discovery tools again unless the target file is missing or invalid.',
+                ...(repeatedSameActions
+                    ? ['You are repeating the same discovery actions; break the loop by editing the target file now.']
+                    : []),
+            ].join('\n');
+        }
+        if (state.bestCandidatePath) {
+            const recentlyReadBestCandidate = (state.recentReadPaths || []).includes(state.bestCandidatePath);
+            return [
+                'Recovery instruction:',
+                'Your previous sessions produced no final text and did not complete a useful edit.',
+                `You already have a best candidate file: "${state.bestCandidatePath}".`,
+                ...(state.bestCandidateReason ? [`Reason: ${state.bestCandidateReason}`] : []),
+                recentlyReadBestCandidate
+                    ? `Do not keep searching. Your next action must be to edit "${state.bestCandidatePath}" with write_file, or explain why that file is not the correct target.`
+                    : `Do not keep searching. Your next action must be read_file on "${state.bestCandidatePath}".`,
+                'Do not call list_directory, tree_directory, glob_files, or search_repo again unless that candidate path is missing or clearly wrong.',
+                ...(repeatedSameActions
+                    ? ['You are repeating the same discovery actions; break the loop by acting on the best candidate now.']
+                    : []),
+            ].join('\n');
+        }
+        const queries = this.extractBootstrapQueries(state.originalMessage || '');
+        const queryHint = queries.length > 0
+            ? `Start with these exact repo searches: ${queries.map((query) => `"${query}"`).join(', ')}.`
+            : 'Start with a concrete repo search using the key nouns from the original task.';
+        return [
+            'Recovery instruction:',
+            'Your previous sessions produced no final text and did not edit files.',
+            queryHint,
+            'Then read the most relevant existing file you find before any write_file call.',
+            'If a route, link, or page already exists, edit that existing file instead of creating a new one.',
+            ...(repeatedSameActions
+                ? ['You are repeating the same discovery actions; pick one candidate and act on it.']
+                : []),
+        ].join('\n');
+    }
     /**
      * Run a long-task agent across multiple sessions with automatic state management.
      *
@@ -1328,12 +2406,21 @@ class AgentsEndpoint {
             : options.trackProgress
                 ? `${agent.name} task`
                 : '';
+        const resolvedTaskName = taskName || `${agent.name} task`;
+        const seededResumeState = this.sanitizeResumeState(options.resumeState, resolvedTaskName);
         // Initialize state
         const state = {
             agentId: id,
             agentName: agent.name,
-            taskName: taskName || `${agent.name} task`,
+            taskName: resolvedTaskName,
             status: 'running',
+            workflowPhase: seededResumeState?.workflowPhase || 'research',
+            planPath: seededResumeState?.planPath || this.getDefaultPlanPath(resolvedTaskName),
+            planWritten: seededResumeState?.planWritten || false,
+            bestCandidateNeedsVerification: seededResumeState?.bestCandidateNeedsVerification || false,
+            bestCandidateVerified: seededResumeState?.bestCandidateVerified || false,
+            verificationRequired: seededResumeState?.verificationRequired ?? Boolean(options.localTools?.run_check),
+            lastVerificationPassed: seededResumeState?.lastVerificationPassed || false,
             sessionCount: 0,
             totalCost: 0,
             lastOutput: '',
@@ -1341,13 +2428,39 @@ class AgentsEndpoint {
             sessions: [],
             startedAt: new Date().toISOString(),
             updatedAt: new Date().toISOString(),
+            ...(seededResumeState?.originalMessage ? { originalMessage: seededResumeState.originalMessage } : {}),
+            ...(seededResumeState?.bootstrapContext ? { bootstrapContext: seededResumeState.bootstrapContext } : {}),
+            ...(seededResumeState?.bestCandidatePath
+                ? {
+                    bestCandidatePath: seededResumeState.bestCandidatePath,
+                    bestCandidateReason: seededResumeState.bestCandidateReason,
+                }
+                : {}),
+            ...(seededResumeState?.candidatePaths ? { candidatePaths: seededResumeState.candidatePaths } : {}),
+            ...(seededResumeState?.recentReadPaths ? { recentReadPaths: seededResumeState.recentReadPaths } : {}),
+            ...(seededResumeState?.recentActionKeys
+                ? { recentActionKeys: seededResumeState.recentActionKeys }
+                : {}),
         };
+        this.updateWorkflowPhase(state, this.createEmptyToolTrace());
         // Track the record ID if we're syncing
         let recordId;
         // Extract local tool names for prompt injection
         const localToolNames = options.localTools ? Object.keys(options.localTools) : undefined;
+        if (!options.previousMessages) {
+            state.bootstrapContext = await this.generateBootstrapDiscoveryContext(options.message, options.localTools);
+            const bootstrapCandidate = this.extractBestCandidateFromBootstrapContext(state.bootstrapContext);
+            if (bootstrapCandidate) {
+                state.bestCandidatePath = bootstrapCandidate.path;
+                state.bestCandidateReason = bootstrapCandidate.reason;
+                state.candidatePaths = [bootstrapCandidate.path];
+            }
+        }
         // Session loop
         for (let session = 0; session < maxSessions; session++) {
+            const sessionTrace = this.createEmptyToolTrace();
+            const sessionLocalTools = this.wrapLocalToolsForTrace(options.localTools, sessionTrace, state);
+            const sessionCallbacks = this.createTraceCallbacks(options.streamCallbacks, sessionTrace);
             // Build continuation context for resumed runs (first session only)
             const continuationContext = session === 0 && options.previousMessages
                 ? {
@@ -1371,7 +2484,9 @@ class AgentsEndpoint {
             };
             if (useStream && options.localTools) {
                 // Local tools require the pause/resume streaming loop
-                const completeEvent = await this.executeWithLocalTools(id, sessionData, options.localTools, options.streamCallbacks);
+                const completeEvent = await this.executeWithLocalTools(id, sessionData, sessionLocalTools || options.localTools, sessionCallbacks, {
+                    onLocalToolResult: this.createLocalToolLoopGuard(state, sessionTrace),
+                });
                 if (!completeEvent) {
                     throw new Error('Agent stream ended without a complete event');
                 }
@@ -1385,7 +2500,7 @@ class AgentsEndpoint {
                 };
             }
             else if (useStream && options.streamCallbacks) {
-                const completeEvent = await this.executeWithCallbacks(id, sessionData, options.streamCallbacks);
+                const completeEvent = await this.executeWithCallbacks(id, sessionData, sessionCallbacks || options.streamCallbacks);
                 if (!completeEvent) {
                     throw new Error('Agent stream ended without a complete event');
                 }
@@ -1401,11 +2516,17 @@ class AgentsEndpoint {
             else {
                 sessionResult = await this.execute(id, sessionData);
             }
+            const toolTraceSummary = this.buildToolTraceSummary(sessionTrace);
+            const effectiveSessionOutput = this.buildEffectiveSessionOutput(sessionResult.result, toolTraceSummary);
             // Update state
             const sessionCost = sessionResult.totalCost;
             state.sessionCount = session + 1;
             state.totalCost += sessionCost;
-            state.lastOutput = sessionResult.result;
+            state.lastOutput = effectiveSessionOutput;
+            state.lastError =
+                sessionResult.stopReason === 'error'
+                    ? sessionResult.error || 'Agent session ended with an error.'
+                    : undefined;
             state.lastStopReason = sessionResult.stopReason;
             state.updatedAt = new Date().toISOString();
             state.sessions.push({
@@ -1413,22 +2534,89 @@ class AgentsEndpoint {
                 cost: sessionCost,
                 iterations: sessionResult.iterations,
                 stopReason: sessionResult.stopReason,
-                outputPreview: sessionResult.result.slice(0, 300),
+                outputPreview: effectiveSessionOutput.slice(0, 300),
+                toolTraceSummary: toolTraceSummary || undefined,
+                discoveryPerformed: sessionTrace.discoveryPerformed,
+                attemptedWrite: sessionTrace.attemptedWrite,
+                wroteFiles: sessionTrace.wroteFiles,
+                hadTextOutput: Boolean(sessionResult.result.trim()),
+                verificationAttempted: sessionTrace.verificationAttempted,
+                verificationPassed: sessionTrace.verificationPassed,
+                bestCandidatePath: sessionTrace.bestCandidatePath || undefined,
+                actionKeys: sessionTrace.actionKeys.slice(-5),
                 completedAt: new Date().toISOString(),
             });
+            if (sessionTrace.bestCandidatePath) {
+                state.bestCandidatePath = sessionTrace.bestCandidatePath;
+                state.bestCandidateReason = sessionTrace.bestCandidateReason;
+            }
+            if (sessionTrace.candidatePaths.length > 0) {
+                state.candidatePaths = Array.from(new Set([...(state.candidatePaths || []), ...sessionTrace.candidatePaths])).slice(-20);
+            }
+            if (sessionTrace.readPaths.length > 0) {
+                state.recentReadPaths = Array.from(new Set([...(state.recentReadPaths || []), ...sessionTrace.readPaths])).slice(-20);
+            }
+            if (sessionTrace.actionKeys.length > 0) {
+                state.recentActionKeys = [...(state.recentActionKeys || []), ...sessionTrace.actionKeys].slice(-20);
+            }
+            if (sessionTrace.planWritten) {
+                state.planWritten = true;
+            }
+            if (sessionTrace.executionFileWritten) {
+                state.lastVerificationPassed = false;
+            }
+            if (sessionTrace.bestCandidateWritten) {
+                state.bestCandidateNeedsVerification = true;
+                state.bestCandidateVerified = false;
+            }
+            if (sessionTrace.bestCandidateVerified) {
+                state.bestCandidateNeedsVerification = false;
+                state.bestCandidateVerified = true;
+            }
+            if (sessionTrace.verificationAttempted) {
+                state.lastVerificationPassed = sessionTrace.verificationPassed;
+            }
             // Track cost by model
             const modelKey = options.model || 'default';
             if (!state.costByModel)
                 state.costByModel = {};
             state.costByModel[modelKey] = (state.costByModel[modelKey] || 0) + sessionCost;
-            // Accumulate messages for future continuation
+            this.updateWorkflowPhase(state, sessionTrace);
+            const phaseTransitionSummary = state.phaseTransitionSummary;
+            if (phaseTransitionSummary) {
+                state.lastOutput = [phaseTransitionSummary, '', state.lastOutput].join('\n').trim();
+                const latestSession = state.sessions[state.sessions.length - 1];
+                if (latestSession) {
+                    latestSession.outputPreview = [phaseTransitionSummary, '', latestSession.outputPreview]
+                        .join('\n')
+                        .slice(0, 300);
+                    latestSession.toolTraceSummary = [phaseTransitionSummary, '', latestSession.toolTraceSummary || '']
+                        .join('\n')
+                        .trim()
+                        .slice(0, 1200);
+                }
+            }
+            // Accumulate messages for future continuation.
+            // When buildSessionMessages returns the full history + a new continuation
+            // message, only the NEW messages at the end are appended — otherwise the
+            // history would be re-pushed on every session and grow exponentially.
             if (!state.messages)
                 state.messages = [];
-            state.messages.push(...messages);
-            // Also store the assistant's response as a message
-            if (sessionResult.result) {
-                state.messages.push({ role: 'assistant', content: sessionResult.result });
+            if (state.messages.length > 0 && messages.length > state.messages.length) {
+                // Continuation session: history was replayed, only append the new tail
+                const newMessages = messages.slice(state.messages.length);
+                state.messages.push(...newMessages);
+            }
+            else {
+                // First session (or no prior history): all messages are new
+                state.messages.push(...messages);
             }
+            // Always store an assistant message so continuation sessions have full
+            // conversation history. When the agent only made tool calls and produced
+            // no text, fall back to a synthetic summary so the history stays coherent.
+            const assistantContent = effectiveSessionOutput ||
+                `[Session ${session + 1} completed (${sessionResult.stopReason}). No text output captured.]`;
+            state.messages.push({ role: 'assistant', content: assistantContent });
             // Keep session log trimmed to last 50 entries
             if (state.sessions.length > 50) {
                 state.sessions = state.sessions.slice(-50);
@@ -1438,12 +2626,12 @@ class AgentsEndpoint {
                 state.status = 'complete';
             }
             else if (sessionResult.stopReason === 'error') {
-                state.status = 'complete';
+                state.status = 'error';
             }
             else if (sessionResult.stopReason === 'max_cost') {
                 state.status = 'budget_exceeded';
             }
-            else if (this.detectTaskCompletion(sessionResult.result)) {
+            else if (this.canAcceptTaskCompletion(sessionResult.result, state, sessionTrace)) {
                 // Client-side stop-phrase detection for non-loop agents returning 'end_turn'
                 state.status = 'complete';
             }
@@ -1497,8 +2685,18 @@ class AgentsEndpoint {
         return [
             `Task: ${state.taskName}`,
             `Status: ${state.status}`,
+            `Workflow phase: ${state.workflowPhase || 'research'}`,
             `Sessions completed: ${state.sessionCount}`,
             `Total cost: $${state.totalCost.toFixed(4)}`,
+            ...(state.planPath ? [`Plan path: ${state.planPath}`] : []),
+            ...(state.planWritten ? ['Plan written: yes'] : []),
+            ...(state.bestCandidatePath
+                ? [
+                    `Best candidate: ${state.bestCandidatePath}`,
+                    ...(state.bestCandidateReason ? [`Candidate reason: ${state.bestCandidateReason}`] : []),
+                ]
+                : []),
+            ...(state.bootstrapContext ? ['', state.bootstrapContext] : []),
             '',
             'Session history:',
             sessionSummaries,
@@ -1513,17 +2711,63 @@ class AgentsEndpoint {
      */
     buildSessionMessages(originalMessage, state, sessionIndex, maxSessions, localToolNames, continuationContext) {
         // Build local tools guidance block when tools are available
+        const phase = state.workflowPhase || 'research';
         const toolsBlock = localToolNames?.length
             ? [
                 '',
                 '--- Local Tools ---',
                 `You have access to local filesystem tools (${localToolNames.join(', ')}) that execute directly on the user's machine.`,
-                'Use these tools to create working, runnable files — not just code in your response.',
-                'Prefer creating self-contained HTML files that the user can open in a web browser.',
-                'For example, write a single .html file with inline CSS and JavaScript that demonstrates the result.',
+                'Use these tools to inspect the existing repository and make real file edits — not just code in your response.',
+                ...(phase === 'research'
+                    ? [
+                        'For repository modification tasks, before any write_file call you must perform at least one discovery action (search_repo, glob_files, or tree_directory).',
+                        'If discovery finds a plausible existing file, you must read at least one candidate file before writing.',
+                        'Before creating a new file, search the repo for existing relevant files, routes, links, components, or pages.',
+                        'Prefer editing an existing file when one already implements or links to the feature you were asked to change.',
+                        'Use search_repo, glob_files, and tree_directory to discover the right files before you call write_file.',
+                        'Only create a new file when no suitable existing file exists, and make that decision intentionally.',
+                    ]
+                    : phase === 'planning'
+                        ? [
+                            `Research is already complete. Focus on writing or updating the plan at: ${state.planPath || this.getDefaultPlanPath(state.taskName)}.`,
+                            'Do not restart broad repo discovery unless the saved best candidate is clearly invalid.',
+                            'You may read the current target file or supporting source files if you need evidence for the plan.',
+                            'Ground the plan in the existing implementation. Identify which current behaviors and linked files must be preserved.',
+                            'List the exact verification commands you expect to run after editing, such as lint, typecheck, tests, or build.',
+                        ]
+                        : [
+                            ...(state.bestCandidatePath
+                                ? [
+                                    `Execution-phase guard: broad discovery tools (search_repo, glob_files, tree_directory, list_directory) are locked while executing against "${state.bestCandidatePath}".`,
+                                ]
+                                : [
+                                    'Execution-phase guard: broad discovery tools are locked unless a read of the current target fails.',
+                                ]),
+                            'Reading the markdown plan for status does not change the product target. Do not treat the plan file as the file to implement.',
+                            'Do not write the plan file first in execution. Make a real repo-file edit before you update the plan with progress.',
+                            'Do not create scratch or test files to probe the repo or tool behavior.',
+                            'write_file automatically checkpoints original repo files before overwriting them. If an edit regresses behavior, use restore_file_checkpoint on that file.',
+                            'Read the target file and edit it with write_file. Update the plan file with progress after completing real edits.',
+                            'Before large edits, read any already discovered supporting source/style files that power the target so you preserve existing behavior.',
+                            'Prefer minimal diffs over rewrites. If you cannot verify related behavior, stop and record what is still unverified instead of rewriting blindly.',
+                            'Use run_check for real verification before TASK_COMPLETE. Good examples: "pnpm lint", "pnpm exec tsc --noEmit", "pnpm test", or a focused vitest/pytest command.',
+                            'Broad discovery is only allowed if a read of the current target file fails.',
+                        ]),
                 'Always use write_file to save your output so the user can run it immediately.',
             ].join('\n')
             : '';
+        const bootstrapBlock = state.bootstrapContext
+            ? ['', '--- Initial Repository Discovery ---', state.bootstrapContext].join('\n')
+            : '';
+        const phaseBlock = ['', this.buildPhaseInstructions(state)].join('\n');
+        const candidateBlock = state.bestCandidatePath
+            ? [
+                '',
+                '--- Best Candidate ---',
+                `Current best candidate file: ${state.bestCandidatePath}`,
+                ...(state.bestCandidateReason ? [`Why: ${state.bestCandidateReason}`] : []),
+            ].join('\n')
+            : '';
         const multiSessionInstruction = `This is a multi-session task (session ${sessionIndex + 1}/${maxSessions}). When you have fully completed the task, end your response with TASK_COMPLETE on its own line.`;
         // Continuation resume: first session of a resumed run with prior context
         if (continuationContext && sessionIndex === 0) {
@@ -1539,7 +2783,7 @@ class AgentsEndpoint {
                     },
                     {
                         role: 'user',
-                        content: [userMessage, toolsBlock, '', multiSessionInstruction].join('\n'),
+                        content: [userMessage, phaseBlock, toolsBlock, bootstrapBlock, candidateBlock, '', multiSessionInstruction].join('\n'),
                     },
                 ];
                 return messages;
@@ -1553,14 +2797,14 @@ class AgentsEndpoint {
                 },
                 {
                     role: 'user',
-                    content: [userMessage, toolsBlock, '', multiSessionInstruction].join('\n'),
+                    content: [userMessage, phaseBlock, toolsBlock, bootstrapBlock, candidateBlock, '', multiSessionInstruction].join('\n'),
                 },
             ];
             return messages;
         }
         // First session (non-continuation): user message + completion signal instruction
         if (sessionIndex === 0) {
-            const content = [originalMessage, toolsBlock, '', multiSessionInstruction].join('\n');
+            const content = [originalMessage, phaseBlock, toolsBlock, bootstrapBlock, candidateBlock, '', multiSessionInstruction].join('\n');
             return [{ role: 'user', content }];
         }
         // Continuation sessions within a run: inject progress context
@@ -1568,14 +2812,57 @@ class AgentsEndpoint {
         const progressSummary = recentSessions
             .map((s) => `  Session ${s.index}: ${s.stopReason} ($${s.cost.toFixed(4)}) — ${s.outputPreview.slice(0, 100)}`)
             .join('\n');
+        // When we have accumulated message history, replay the full conversation
+        // so the model has complete context and doesn't start fresh each session.
+        if (state.messages && state.messages.length > 0) {
+            const recoveryMessage = this.buildStuckTurnRecoveryMessage(state);
+            const continuationContent = [
+                'Continue the task.',
+                phaseBlock,
+                toolsBlock,
+                bootstrapBlock,
+                candidateBlock,
+                '',
+                `--- Progress (session ${sessionIndex + 1}/${maxSessions}, $${state.totalCost.toFixed(4)} spent) ---`,
+                `Previous sessions:`,
+                progressSummary,
+                '',
+                ...(recoveryMessage ? [recoveryMessage, ''] : []),
+                'Do not redo previous work. If the task is already complete, respond with TASK_COMPLETE.',
+            ].join('\n');
+            // Cap history to prevent context overflow on long-running marathons.
+            // Keep the most recent 40 messages; prepend a system summary for trimmed ones.
+            const MAX_HISTORY_MESSAGES = 40;
+            let historyMessages = state.messages;
+            if (historyMessages.length > MAX_HISTORY_MESSAGES) {
+                const trimmedCount = historyMessages.length - MAX_HISTORY_MESSAGES;
+                historyMessages = [
+                    {
+                        role: 'system',
+                        content: `[${trimmedCount} earlier messages trimmed to stay within context limits. Original task: ${(state.originalMessage || originalMessage).slice(0, 500)}]`,
+                    },
+                    ...historyMessages.slice(-MAX_HISTORY_MESSAGES),
+                ];
+            }
+            return [
+                ...historyMessages,
+                { role: 'user', content: continuationContent },
+            ];
+        }
+        // Fallback when no message history is available: single-message summary
+        const recoveryMessage = this.buildStuckTurnRecoveryMessage(state);
         const content = [
             originalMessage,
+            phaseBlock,
             toolsBlock,
+            bootstrapBlock,
+            candidateBlock,
             '',
             `--- Progress (session ${sessionIndex + 1}/${maxSessions}, $${state.totalCost.toFixed(4)} spent) ---`,
             `Previous sessions:`,
             progressSummary,
             '',
+            ...(recoveryMessage ? [recoveryMessage, ''] : []),
             `Last output (do NOT repeat this — build on it):`,
             state.lastOutput.slice(0, 1000),
             '',