npm - @blockrun/franklin - Versions diffs - 3.3.3 → 3.5.0 - Mend

@blockrun/franklin 3.3.3 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/README.md +55 -4
package/dist/agent/commands.d.ts +1 -1
package/dist/agent/commands.js +128 -17
package/dist/agent/compact.d.ts +2 -2
package/dist/agent/compact.js +148 -22
package/dist/agent/context.d.ts +8 -3
package/dist/agent/context.js +301 -108
package/dist/agent/error-classifier.d.ts +11 -2
package/dist/agent/error-classifier.js +64 -10
package/dist/agent/llm.d.ts +8 -1
package/dist/agent/llm.js +114 -19
package/dist/agent/loop.d.ts +1 -2
package/dist/agent/loop.js +509 -61
package/dist/agent/optimize.d.ts +2 -2
package/dist/agent/optimize.js +9 -7
package/dist/agent/permissions.d.ts +1 -1
package/dist/agent/permissions.js +1 -1
package/dist/agent/planner.d.ts +42 -0
package/dist/agent/planner.js +110 -0
package/dist/agent/reduce.d.ts +7 -1
package/dist/agent/reduce.js +85 -3
package/dist/agent/streaming-executor.d.ts +6 -1
package/dist/agent/streaming-executor.js +83 -5
package/dist/agent/tokens.d.ts +11 -2
package/dist/agent/tokens.js +38 -5
package/dist/agent/tool-guard.d.ts +27 -0
package/dist/agent/tool-guard.js +324 -0
package/dist/agent/types.d.ts +7 -1
package/dist/agent/types.js +1 -1
package/dist/brain/extract.d.ts +11 -0
package/dist/brain/extract.js +154 -0
package/dist/brain/index.d.ts +3 -0
package/dist/brain/index.js +2 -0
package/dist/brain/store.d.ts +42 -0
package/dist/brain/store.js +225 -0
package/dist/brain/types.d.ts +45 -0
package/dist/brain/types.js +5 -0
package/dist/commands/daemon.js +2 -1
package/dist/commands/start.js +16 -3
package/dist/config.js +1 -1
package/dist/index.js +27 -2
package/dist/learnings/extractor.d.ts +13 -0
package/dist/learnings/extractor.js +69 -8
package/dist/learnings/index.d.ts +1 -1
package/dist/learnings/index.js +1 -1
package/dist/learnings/store.js +42 -13
package/dist/learnings/types.d.ts +1 -1
package/dist/mcp/client.d.ts +1 -1
package/dist/mcp/client.js +5 -5
package/dist/mcp/config.d.ts +1 -1
package/dist/mcp/config.js +1 -1
package/dist/panel/html.d.ts +2 -0
package/dist/panel/html.js +409 -146
package/dist/panel/server.js +19 -0
package/dist/pricing.js +3 -2
package/dist/proxy/fallback.d.ts +3 -1
package/dist/proxy/fallback.js +4 -4
package/dist/proxy/server.js +29 -11
package/dist/proxy/sse-translator.js +1 -1
package/dist/router/categories.d.ts +21 -0
package/dist/router/categories.js +96 -0
package/dist/router/index.d.ts +9 -2
package/dist/router/index.js +106 -27
package/dist/router/local-elo.d.ts +32 -0
package/dist/router/local-elo.js +107 -0
package/dist/router/selector.d.ts +46 -0
package/dist/router/selector.js +106 -0
package/dist/session/storage.d.ts +5 -1
package/dist/session/storage.js +24 -2
package/dist/social/a11y.d.ts +1 -1
package/dist/social/a11y.js +5 -1
package/dist/social/browser.d.ts +5 -0
package/dist/social/browser.js +22 -0
package/dist/social/preflight.d.ts +4 -0
package/dist/social/preflight.js +42 -3
package/dist/stats/failures.d.ts +20 -0
package/dist/stats/failures.js +63 -0
package/dist/stats/format.d.ts +6 -0
package/dist/stats/format.js +23 -0
package/dist/stats/insights.js +1 -21
package/dist/stats/session-tracker.d.ts +21 -0
package/dist/stats/session-tracker.js +28 -0
package/dist/stats/tracker.d.ts +1 -1
package/dist/stats/tracker.js +1 -1
package/dist/tools/bash.d.ts +14 -1
package/dist/tools/bash.js +132 -7
package/dist/tools/edit.js +77 -14
package/dist/tools/glob.js +13 -3
package/dist/tools/grep.js +30 -12
package/dist/tools/imagegen.js +3 -3
package/dist/tools/index.d.ts +1 -1
package/dist/tools/index.js +5 -1
package/dist/tools/read.d.ts +16 -2
package/dist/tools/read.js +36 -8
package/dist/tools/searchx.d.ts +6 -2
package/dist/tools/searchx.js +221 -44
package/dist/tools/subagent.js +37 -3
package/dist/tools/task.js +43 -7
package/dist/tools/validate.d.ts +11 -0
package/dist/tools/validate.js +42 -0
package/dist/tools/webfetch.js +18 -7
package/dist/tools/websearch.js +41 -7
package/dist/tools/write.js +26 -6
package/dist/ui/app.js +31 -6
package/dist/ui/model-picker.d.ts +1 -1
package/dist/ui/model-picker.js +1 -1
package/dist/ui/terminal.d.ts +1 -1
package/dist/ui/terminal.js +1 -1
package/package.json +2 -2

package/dist/agent/loop.js CHANGED Viewed

@@ -1,20 +1,201 @@
 /**
- * runcode Agent Loop
+ * Franklin Agent Loop
  * The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
- * Original implementation with different architecture from any reference codebase.
  */
 import { ModelClient } from './llm.js';
-import { autoCompactIfNeeded, microCompact } from './compact.js';
-import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow } from './tokens.js';
+import { autoCompactIfNeeded, forceCompact, microCompact } from './compact.js';
+import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow, setEstimationModel } from './tokens.js';
 import { handleSlashCommand } from './commands.js';
 import { reduceTokens } from './reduce.js';
 import { PermissionManager } from './permissions.js';
 import { StreamingExecutor } from './streaming-executor.js';
 import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
 import { classifyAgentError } from './error-classifier.js';
+import { SessionToolGuard } from './tool-guard.js';
 import { recordUsage } from '../stats/tracker.js';
-import { estimateCost } from '../pricing.js';
+import { recordSessionUsage } from '../stats/session-tracker.js';
+import { estimateCost, OPUS_PRICING } from '../pricing.js';
+import { maybeMidSessionExtract } from '../learnings/extractor.js';
+import { routeRequest, parseRoutingProfile } from '../router/index.js';
+import { recordOutcome } from '../router/local-elo.js';
+import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
 import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
+/**
+ * Atomically replace all elements in a history array.
+ * Safer than `history.length = 0; history.push(...)` because if push throws
+ * (e.g., OOM), the array is already in its new state — not empty.
+ * Uses splice to do a single atomic operation on the array.
+ */
+function replaceHistory(target, replacement) {
+    target.splice(0, target.length, ...replacement);
+}
+/**
+ * Sanitize history: fix orphaned tool results AND inject missing results.
+ * Inspired by Claude Code's yieldMissingToolResultBlocks + Hermes _sanitize_api_messages().
+ *
+ * Two problems this solves:
+ * 1. Orphaned tool_results — results without matching tool_use calls (remove them)
+ * 2. Missing tool_results — tool_use calls without matching results (inject stubs)
+ *    This happens when the model response includes tool calls that weren't executed
+ *    (e.g., abort mid-stream, error before tool execution). The API requires every
+ *    tool_use to have a corresponding tool_result or it rejects the request.
+ */
+function sanitizeHistory(history) {
+    // Collect all tool_use IDs from assistant messages
+    const callIds = new Set();
+    // Collect all tool_result IDs from user messages
+    const resultIds = new Set();
+    for (const msg of history) {
+        if (msg.role === 'assistant' && Array.isArray(msg.content)) {
+            for (const part of msg.content) {
+                if (part.type === 'tool_use' && part.id) {
+                    callIds.add(part.id);
+                }
+            }
+        }
+        if (msg.role === 'user' && Array.isArray(msg.content)) {
+            for (const part of msg.content) {
+                if (part.type === 'tool_result' && part.tool_use_id) {
+                    resultIds.add(part.tool_use_id);
+                }
+            }
+        }
+    }
+    // 1. Remove orphaned tool results (results without matching calls)
+    const orphanedResults = new Set([...resultIds].filter(id => !callIds.has(id)));
+    // 2. Find missing tool results (calls without matching results)
+    const missingResults = new Set([...callIds].filter(id => !resultIds.has(id)));
+    if (orphanedResults.size === 0 && missingResults.size === 0)
+        return history;
+    const result = [];
+    for (let i = 0; i < history.length; i++) {
+        const msg = history[i];
+        if (msg.role === 'user' && Array.isArray(msg.content)) {
+            // Remove orphaned tool results
+            if (orphanedResults.size > 0) {
+                const filtered = msg.content.filter(p => !(p.type === 'tool_result' && orphanedResults.has(p.tool_use_id)));
+                if (filtered.length === 0)
+                    continue; // Skip empty messages
+                result.push({ ...msg, content: filtered });
+            }
+            else {
+                result.push(msg);
+            }
+            continue;
+        }
+        result.push(msg);
+        // After each assistant message with tool_use, check if the next message
+        // contains all the required tool_results. If not, inject stubs.
+        if (msg.role === 'assistant' && Array.isArray(msg.content) && missingResults.size > 0) {
+            const toolUseIds = [];
+            for (const part of msg.content) {
+                if (part.type === 'tool_use' && missingResults.has(part.id)) {
+                    toolUseIds.push(part.id);
+                }
+            }
+            if (toolUseIds.length > 0) {
+                // Check if the next message already has some of these results
+                const nextMsg = history[i + 1];
+                const nextResultIds = new Set();
+                if (nextMsg?.role === 'user' && Array.isArray(nextMsg.content)) {
+                    for (const part of nextMsg.content) {
+                        if (part.type === 'tool_result') {
+                            nextResultIds.add(part.tool_use_id);
+                        }
+                    }
+                }
+                // Inject stub results for any tool_use IDs that are truly missing
+                const stubParts = [];
+                for (const id of toolUseIds) {
+                    if (!nextResultIds.has(id)) {
+                        stubParts.push({
+                            type: 'tool_result',
+                            tool_use_id: id,
+                            content: '[Tool execution was interrupted — result not available]',
+                            is_error: true,
+                        });
+                        missingResults.delete(id); // Don't inject twice
+                    }
+                }
+                if (stubParts.length > 0) {
+                    // If next message is a user message, prepend stubs to it
+                    if (nextMsg?.role === 'user' && Array.isArray(nextMsg.content)) {
+                        // Will be handled when we process that message next
+                        const existingContent = orphanedResults.size > 0
+                            ? nextMsg.content.filter(p => !(p.type === 'tool_result' && orphanedResults.has(p.tool_use_id)))
+                            : [...nextMsg.content];
+                        // Replace the next message with merged content
+                        history[i + 1] = { role: 'user', content: [...stubParts, ...existingContent] };
+                    }
+                    else {
+                        // No user message follows — insert a new one with the stubs
+                        result.push({ role: 'user', content: stubParts });
+                    }
+                }
+            }
+        }
+    }
+    return result;
+}
+/**
+ * Detect media-related errors (image too large, too many images, PDF too large).
+ * These can be recovered by stripping media blocks and retrying.
+ */
+function isMediaSizeError(msg) {
+    return ((msg.includes('image exceeds') && msg.includes('maximum')) ||
+        (msg.includes('image dimensions exceed')) ||
+        /maximum of \d+ PDF pages/.test(msg) ||
+        (msg.includes('image') && msg.includes('too large')) ||
+        (msg.includes('PDF') && msg.includes('too large')));
+}
+/**
+ * Strip image and document blocks from history, replacing with text placeholders.
+ * Used for media error recovery — retry without the oversized media.
+ */
+function stripMediaFromHistory(history) {
+    let stripped = false;
+    const result = history.map(msg => {
+        if (typeof msg.content === 'string' || !Array.isArray(msg.content))
+            return msg;
+        let modified = false;
+        const cleaned = msg.content.map((part) => {
+            if (part.type === 'image') {
+                modified = true;
+                stripped = true;
+                return { type: 'text', text: '[image removed — too large for context]' };
+            }
+            if (part.type === 'document') {
+                modified = true;
+                stripped = true;
+                return { type: 'text', text: '[document removed — too large for context]' };
+            }
+            // Also strip media nested inside tool_result content arrays
+            if (part.type === 'tool_result' && Array.isArray(part.content)) {
+                const cleanedContent = part.content.map((c) => {
+                    if (c.type === 'image' || c.type === 'document') {
+                        modified = true;
+                        stripped = true;
+                        return { type: 'text', text: `[${c.type} removed — too large for context]` };
+                    }
+                    return c;
+                });
+                return modified ? { ...part, content: cleanedContent } : part;
+            }
+            return part;
+        });
+        return modified ? { ...msg, content: cleaned } : msg;
+    });
+    return { history: stripped ? result : history, stripped };
+}
+/**
+ * Calculate backoff delay with jitter to avoid thundering herd.
+ * Base: exponential (2^attempt * 1000ms), jitter: ±25%.
+ */
+function getBackoffDelay(attempt, maxDelayMs = 32_000) {
+    const base = Math.min(Math.pow(2, attempt) * 1000, maxDelayMs);
+    const jitter = base * 0.25 * (Math.random() * 2 - 1); // ±25%
+    return Math.max(500, Math.round(base + jitter));
+}
 // ─── Interactive Session ───────────────────────────────────────────────────
 /**
  * Run a multi-turn interactive session.
@@ -37,18 +218,35 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
     const permissions = new PermissionManager(config.permissionMode ?? 'default', config.permissionPromptFn);
     const history = [];
     let lastUserInput = ''; // For /retry
-    const failedModels = new Set(); // Models that failed payment/rate-limit (session-level)
+    const originalModel = config.model; // Preserve original model/routing profile for recovery
+    let turnFailedModels = new Set(); // Models that failed this turn (cleared each new turn)
+    // Track models that failed with 402 (payment required) across turns.
+    // These persist until the session ends — unlike transient errors, payment failures
+    // will keep failing until the user adds funds. Map stores failure timestamp for future TTL.
+    const paymentFailedModels = new Map(); // model → timestamp
+    // Plan-then-execute: session-level disable flag lives on config (set by /noplan command)
     // Session persistence
     const sessionId = createSessionId();
     let turnCount = 0;
     let tokenBudgetWarned = false; // Emit token budget warning at most once per session
     let lastSessionActivity = Date.now();
+    let lastRoutedModel = ''; // last model chosen by router (for local elo)
+    let lastRoutedCategory = ''; // last category detected (for local elo)
+    let sessionInputTokens = 0;
+    let sessionOutputTokens = 0;
+    let sessionCostUsd = 0;
+    let sessionSavedVsOpus = 0;
+    const toolGuard = new SessionToolGuard();
     const persistSessionMeta = () => {
         updateSessionMeta(sessionId, {
             model: config.model,
             workDir,
             turnCount,
             messageCount: history.length,
+            inputTokens: sessionInputTokens,
+            outputTokens: sessionOutputTokens,
+            costUsd: sessionCostUsd,
+            savedVsOpusUsd: sessionSavedVsOpus,
         });
     };
     const persistSessionMessage = (message) => {
@@ -67,6 +265,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         if (input.startsWith('/')) {
             // /retry re-sends the last user message
             if (input === '/retry') {
+                // Record retry as negative signal for local elo
+                if (lastRoutedCategory && lastRoutedModel) {
+                    recordOutcome(lastRoutedCategory, lastRoutedModel, 'retried');
+                }
                 if (!lastUserInput) {
                     onEvent({ kind: 'text_delta', text: 'No previous message to retry.\n' });
                     onEvent({ kind: 'turn_done', reason: 'completed' });
@@ -87,15 +289,38 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         lastUserInput = input;
         history.push({ role: 'user', content: input });
         turnCount++;
+        toolGuard.startTurn();
         persistSessionMessage({ role: 'user', content: input });
+        // ── Model recovery: try original model at the start of each new turn ──
+        // If we fell back to a free model last turn due to a transient error, try original again.
+        // But DON'T reset if the original model had a payment failure — it will just fail again.
+        if (config.model !== originalModel && !paymentFailedModels.has(originalModel)) {
+            config.model = originalModel;
+            config.onModelChange?.(originalModel);
+        }
+        turnFailedModels = new Set(); // Fresh slate for transient failures this turn
         const abort = new AbortController();
         onAbortReady?.(() => abort.abort());
         let loopCount = 0;
         let recoveryAttempts = 0;
+        const MAX_RECOVERY_ATTEMPTS = 5; // Up from 3 — Claude Code uses 10, we split the difference
         let compactFailures = 0;
         let maxTokensOverride;
         const turnIdleReference = lastSessionActivity;
         lastSessionActivity = Date.now();
+        // ── Plan-then-execute state (per turn) ──
+        let planActive = false;
+        let planPlannerModel = '';
+        let planExecutorModel = '';
+        let planEscalationCount = 0;
+        let planConsecutiveErrors = 0;
+        let lastToolSig = ''; // For same-tool repeat detection
+        // ── Tool call guardrails (inspired by hermes-agent) ──
+        let turnToolCalls = 0; // Total tool calls this user turn
+        const turnToolCounts = new Map(); // Per-tool-name counts this turn
+        const readFileCache = new Set(); // Files already read (dedup)
+        const MAX_TOOL_CALLS_PER_TURN = 25; // Hard cap per user turn
+        const SAME_TOOL_WARN_THRESHOLD = 5; // Warn after N calls to same tool
         // Agent loop for this user message
         while (loopCount < maxTurns) {
             loopCount++;
@@ -110,21 +335,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 lastActivityTimestamp: loopCount === 1 ? turnIdleReference : lastSessionActivity,
             });
             if (optimized !== history) {
-                history.length = 0;
-                history.push(...optimized);
+                replaceHistory(history, optimized);
             }
             // 2. Token reduction: age old results, normalize whitespace, trim verbose messages
             const reduced = reduceTokens(history, config.debug);
             if (reduced !== history) {
-                history.length = 0;
-                history.push(...reduced);
+                replaceHistory(history, reduced);
             }
             // 3. Microcompact: clear old tool results to prevent context snowball
             if (history.length > 6) {
                 const microCompacted = microCompact(history, 3);
                 if (microCompacted !== history) {
-                    history.length = 0;
-                    history.push(...microCompacted);
+                    replaceHistory(history, microCompacted);
                     resetTokenAnchor(); // History shrunk — resync token tracking
                 }
             }
@@ -134,19 +356,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 try {
                     const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
                     if (didCompact) {
-                        history.length = 0;
-                        history.push(...compacted);
+                        replaceHistory(history, compacted);
                         resetTokenAnchor();
                         compactFailures = 0;
                         if (config.debug) {
-                            console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
+                            console.error(`[franklin] History compacted: ~${estimateHistoryTokens(history)} tokens`);
                         }
                     }
                 }
                 catch (compactErr) {
                     compactFailures++;
                     if (config.debug) {
-                        console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
+                        console.error(`[franklin] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
                     }
                 }
             }
@@ -161,6 +382,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     '4. Think step by step — show your reasoning explicitly when it adds value\n' +
                     'Prioritize correctness and thoroughness over speed.');
             }
+            // ── Context awareness injection ──
+            // Tell the model how full its context window is so it can self-regulate.
+            // At high usage, nudge it to be concise and avoid unnecessary tool calls.
+            const { contextUsagePct: preCallPct } = getAnchoredTokenCount(history);
+            if (preCallPct > 50) {
+                let contextNote = `# Context Window Status\nYou have used approximately ${Math.round(preCallPct)}% of your context window.`;
+                if (preCallPct > 80) {
+                    contextNote += ' Context is critically full. Be extremely concise. Avoid re-reading files already in context. Prioritize completing the current task over exploring new questions.';
+                }
+                else if (preCallPct > 65) {
+                    contextNote += ' Be concise in responses. Avoid unnecessary tool calls. Do not re-read files you already have in context.';
+                }
+                systemParts.push(contextNote);
+            }
             const systemPrompt = systemParts.join('\n\n');
             const modelMaxOut = getMaxOutputTokens(config.model);
             let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
@@ -172,16 +407,73 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 handlers: capabilityMap,
                 scope: { workingDir: workDir, abortSignal: abort.signal, onAskUser: config.onAskUser },
                 permissions,
+                guard: toolGuard,
                 onStart: (id, name, preview) => onEvent({ kind: 'capability_start', id, name, preview }),
                 onProgress: (id, text) => onEvent({ kind: 'capability_progress', id, text }),
+                sessionId,
             });
+            // ── Router: resolve routing profiles to concrete models ──
+            const routingProfile = parseRoutingProfile(config.model);
+            let resolvedModel = config.model;
+            let routingTier;
+            let routingConfidence;
+            let routingSavings;
+            if (routingProfile) {
+                // Extract latest user text for classification
+                const lastUser = [...history].reverse().find((m) => m.role === 'user');
+                const userText = typeof lastUser?.content === 'string'
+                    ? lastUser.content
+                    : Array.isArray(lastUser?.content)
+                        ? lastUser.content
+                            .filter(p => p.type === 'text')
+                            .map(p => p.text ?? '')
+                            .join(' ')
+                        : '';
+                const routing = routeRequest(userText, routingProfile);
+                resolvedModel = routing.model;
+                routingTier = routing.tier;
+                routingConfidence = routing.confidence;
+                routingSavings = routing.savings;
+                lastRoutedModel = routing.model;
+                lastRoutedCategory = routing.signals[0] || '';
+            }
+            // Update token estimation model for more accurate byte-per-token ratio
+            setEstimationModel(resolvedModel);
+            // ── Plan-then-execute: detect and activate ──
+            if (loopCount === 1 && !planActive && routingProfile &&
+                shouldPlan(routingTier, routingProfile, lastUserInput, !!config.ultrathink, !!config.planDisabled)) {
+                planActive = true;
+                planPlannerModel = resolvedModel;
+                planExecutorModel = getExecutorModel(routingProfile);
+                onEvent({ kind: 'text_delta', text: '\n*Planning...*\n' });
+            }
+            // Plan-then-execute: override model on execution iterations
+            if (planActive && loopCount > 1) {
+                resolvedModel = planExecutorModel;
+            }
+            // Build per-call tool defs, max_tokens, and system prompt
+            // (planning calls get no tools + short output + planning prompt)
+            let callToolDefs = toolDefs;
+            let callMaxTokens = maxTokens;
+            let callSystemPrompt = systemPrompt;
+            if (planActive && loopCount === 1) {
+                callToolDefs = []; // No tools during planning
+                callMaxTokens = 2048; // Short plan output
+                callSystemPrompt = systemPrompt + '\n\n' + getPlanningPrompt();
+            }
+            // Safety net: handled in llm.ts resolveVirtualModel()
+            // Sanitize: remove orphaned tool results that could confuse the API
+            const sanitized = sanitizeHistory(history);
+            if (sanitized.length !== history.length) {
+                replaceHistory(history, sanitized);
+            }
             try {
                 const result = await client.complete({
-                    model: config.model,
+                    model: resolvedModel,
                     messages: history,
-                    system: systemPrompt,
-                    tools: toolDefs,
-                    max_tokens: maxTokens,
+                    system: callSystemPrompt,
+                    tools: callToolDefs,
+                    max_tokens: callMaxTokens,
                     stream: true,
                 }, abort.signal,
                 // Start concurrent tools as soon as their input is fully received
@@ -198,6 +490,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 responseParts = result.content;
                 usage = result.usage;
                 stopReason = result.stopReason;
+                // ── Empty response recovery (inspired by Hermes _empty_content_retries) ──
+                const hasText = responseParts.some(p => p.type === 'text' && p.text?.trim());
+                const hasTools = responseParts.some(p => p.type === 'tool_use');
+                const hasThinking = responseParts.some(p => p.type === 'thinking');
+                if (!hasText && !hasTools && !hasThinking && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
+                    recoveryAttempts++;
+                    if (config.debug) {
+                        console.error(`[franklin] Empty response — retrying (${recoveryAttempts}/${MAX_RECOVERY_ATTEMPTS})`);
+                    }
+                    onEvent({ kind: 'text_delta', text: `\n*Empty response — retrying (${recoveryAttempts}/${MAX_RECOVERY_ATTEMPTS})...*\n` });
+                    continue;
+                }
             }
             catch (err) {
                 // ── User abort (Esc key) ──
@@ -215,42 +519,63 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 }
                 const errMsg = err.message || '';
                 const classified = classifyAgentError(errMsg);
-                // ── Prompt too long recovery ──
-                if (classified.category === 'context_limit' && recoveryAttempts < 3) {
+                // ── Media size error recovery (strip images/PDFs + retry) ──
+                if (isMediaSizeError(errMsg) && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
                     recoveryAttempts++;
                     if (config.debug) {
-                        console.error(`[runcode] Prompt too long — forcing compact (attempt ${recoveryAttempts})`);
+                        console.error(`[franklin] Media too large — stripping and retrying (attempt ${recoveryAttempts})`);
                     }
-                    const { history: compactedAgain } = await autoCompactIfNeeded(history, config.model, client, config.debug);
-                    history.length = 0;
-                    history.push(...compactedAgain);
+                    const { history: stripped, stripped: didStrip } = stripMediaFromHistory(history);
+                    if (didStrip) {
+                        replaceHistory(history, stripped);
+                        onEvent({ kind: 'text_delta', text: '\n*Media too large — retrying without images/documents...*\n' });
+                        continue;
+                    }
+                    // No media to strip — fall through to other error handling
+                }
+                // ── Prompt too long recovery (reactive compaction) ──
+                // Use forceCompact instead of autoCompactIfNeeded — the API already told us
+                // the prompt is too long, so we must compact regardless of our threshold estimate.
+                // This is the key insight from Claude Code: reactive compaction must FORCE compress.
+                if (classified.category === 'context_limit' && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
+                    recoveryAttempts++;
+                    if (config.debug) {
+                        console.error(`[franklin] Prompt too long — force compacting (attempt ${recoveryAttempts})`);
+                    }
+                    onEvent({ kind: 'text_delta', text: '\n*Context limit hit — compacting conversation...*\n' });
+                    const { history: compactedAgain } = await forceCompact(history, config.model, client, config.debug);
+                    replaceHistory(history, compactedAgain);
+                    resetTokenAnchor(); // History mutated — resync tracking
                     continue; // Retry
                 }
                 // ── Transient error recovery (network, rate limit, server errors) ──
-                if (classified.isTransient && recoveryAttempts < 3) {
+                // Respect per-error maxRetries (e.g., 529/overloaded gets only 3 retries)
+                const effectiveMaxRetries = classified.maxRetries ?? MAX_RECOVERY_ATTEMPTS;
+                if (classified.isTransient && recoveryAttempts < effectiveMaxRetries) {
                     recoveryAttempts++;
-                    const backoffMs = Math.pow(2, recoveryAttempts) * 1000;
+                    const backoffMs = getBackoffDelay(recoveryAttempts);
                     if (config.debug) {
-                        console.error(`[runcode] ${classified.label} error — retrying in ${backoffMs / 1000}s (attempt ${recoveryAttempts}): ${errMsg.slice(0, 100)}`);
+                        console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
                     }
                     onEvent({
                         kind: 'text_delta',
-                        text: `\n*Retrying (${recoveryAttempts}/3) after ${classified.label} error...*\n`,
+                        text: `\n*Retrying (${recoveryAttempts}/${effectiveMaxRetries}) after ${classified.label} error...*\n`,
                     });
                     await new Promise(r => setTimeout(r, backoffMs));
                     continue;
                 }
-                // Add recovery suggestions based on error type
-                let suggestion = '';
-                if (classified.category === 'rate_limit') {
-                    suggestion = '\nTip: Try /model to switch to a different model, or wait a moment and /retry.';
-                }
-                else if (classified.category === 'payment') {
-                    // Auto-fallback to free models on payment/rate limit failure
-                    // Track failed models at session level to prevent ping-pong loops
-                    failedModels.add(config.model);
+                // ── Payment failure: auto-fallback to free models ──
+                // Track payment-failed models for the entire session — unlike transient errors,
+                // 402s will keep failing until the user adds funds.
+                if (classified.category === 'payment') {
+                    turnFailedModels.add(config.model);
+                    paymentFailedModels.set(config.model, Date.now());
+                    // Record to local Elo so the router learns to avoid this model
+                    if (lastRoutedCategory) {
+                        recordOutcome(lastRoutedCategory, config.model, 'payment');
+                    }
                     const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/nemotron-ultra-253b', 'nvidia/devstral-2-123b'];
-                    const nextFree = FREE_MODELS.find(m => !failedModels.has(m));
+                    const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
                     if (nextFree) {
                         const oldModel = config.model;
                         config.model = nextFree;
@@ -258,14 +583,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                         onEvent({ kind: 'text_delta', text: `\n*${oldModel} failed — switching to ${nextFree}*\n` });
                         continue; // Retry with next model
                     }
-                    suggestion = '\nTip: Run `runcode balance` to check funds. Try /model free for free models.';
-                }
-                else if (classified.category === 'timeout' || classified.category === 'network') {
-                    suggestion = '\nTip: Check your network connection. Use /retry to try again.';
-                }
-                else if (classified.category === 'context_limit') {
-                    suggestion = '\nTip: Run /compact to compress conversation history.';
                 }
+                // ── Unrecoverable: show error with suggestion from classifier ──
+                const suggestion = classified.suggestion ? `\nTip: ${classified.suggestion}` : '';
                 onEvent({
                     kind: 'turn_done',
                     reason: 'error',
@@ -281,31 +601,51 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 : estimateHistoryTokens(history);
             // Anchor token tracking to actual API counts
             updateActualTokens(inputTokens, usage.outputTokens, history.length);
+            const { contextUsagePct } = getAnchoredTokenCount(history);
             onEvent({
                 kind: 'usage',
                 inputTokens,
                 outputTokens: usage.outputTokens,
-                model: config.model,
+                model: resolvedModel,
                 calls: 1,
+                tier: routingTier,
+                confidence: routingConfidence,
+                savings: routingSavings,
+                contextPct: Math.round(contextUsagePct),
             });
-            // Record usage for stats tracking (runcode stats command)
-            const costEstimate = estimateCost(config.model, inputTokens, usage.outputTokens, 1);
-            recordUsage(config.model, inputTokens, usage.outputTokens, costEstimate, 0);
+            // Record usage for stats tracking (franklin stats command)
+            const costEstimate = estimateCost(resolvedModel, inputTokens, usage.outputTokens, 1);
+            recordUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, 0);
+            recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, routingTier);
+            // Accumulate session-level totals for session meta
+            sessionInputTokens += inputTokens;
+            sessionOutputTokens += usage.outputTokens;
+            sessionCostUsd += costEstimate;
+            const opusCost = (inputTokens / 1_000_000) * OPUS_PRICING.input
+                + (usage.outputTokens / 1_000_000) * OPUS_PRICING.output;
+            sessionSavedVsOpus += Math.max(0, opusCost - costEstimate);
             // ── Max output tokens recovery ──
-            if (stopReason === 'max_tokens' && recoveryAttempts < 3) {
+            if (stopReason === 'max_tokens' && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
                 recoveryAttempts++;
                 if (maxTokensOverride === undefined) {
                     // First hit: escalate to 64K
                     maxTokensOverride = ESCALATED_MAX_TOKENS;
                     if (config.debug) {
-                        console.error(`[runcode] Max tokens hit — escalating to ${maxTokensOverride}`);
+                        console.error(`[franklin] Max tokens hit — escalating to ${maxTokensOverride}`);
                     }
                 }
                 // Append what we got + a continuation prompt (text already streamed)
                 const partialAssistant = { role: 'assistant', content: responseParts };
                 const continuationPrompt = {
                     role: 'user',
-                    content: 'Continue where you left off. Do not repeat what you already said.',
+                    content: [
+                        'Output token limit hit. Continue with these rules:',
+                        '1. Resume directly — no apology, no recap of what you already said. Pick up mid-sentence if that is where the cut happened.',
+                        '2. Do NOT repeat any text or code that was already output above.',
+                        '3. Break remaining work into smaller pieces — use multiple tool calls if needed instead of one large output.',
+                        '4. Skip extended reasoning for the continuation — focus on executing.',
+                        '5. If you were in the middle of outputting code, finish the code block first.',
+                    ].join('\n'),
                 };
                 history.push(partialAssistant);
                 persistSessionMessage(partialAssistant);
@@ -326,6 +666,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             const assistantMessage = { role: 'assistant', content: responseParts };
             history.push(assistantMessage);
             persistSessionMessage(assistantMessage);
+            // ── Plan-then-execute: transition from planning to execution ──
+            if (planActive && loopCount === 1 && invocations.length === 0) {
+                // Planning call completed — inject execution kickoff
+                const execKickoff = {
+                    role: 'user',
+                    content: 'Execute the plan above step by step. Use tools to complete each step. After each step, briefly state what you did and move to the next.',
+                };
+                history.push(execKickoff);
+                persistSessionMessage(execKickoff);
+                onEvent({ kind: 'text_delta', text: `\n*Executing with ${planExecutorModel}...*\n` });
+                continue; // Next iteration uses the cheap executor model
+            }
             // No more capabilities → done with this user message
             if (invocations.length === 0) {
                 lastSessionActivity = Date.now();
@@ -343,6 +695,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                         });
                     }
                 }
+                // Record success for local Elo learning (include tool call count for efficiency)
+                if (lastRoutedCategory && lastRoutedModel) {
+                    recordOutcome(lastRoutedCategory, lastRoutedModel, 'continued', turnToolCalls);
+                }
                 onEvent({ kind: 'turn_done', reason: 'completed' });
                 break;
             }
@@ -351,22 +707,114 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             for (const [inv, result] of results) {
                 onEvent({ kind: 'capability_done', id: inv.id, result });
             }
+            // ── Tool call guardrails ──
+            turnToolCalls += results.length;
+            for (const [inv] of results) {
+                const name = inv.name;
+                turnToolCounts.set(name, (turnToolCounts.get(name) || 0) + 1);
+                // Read file dedup: track paths already read
+                if (name === 'Read' && inv.input.file_path) {
+                    readFileCache.add(inv.input.file_path);
+                }
+            }
             // Refresh activity timestamp after tool execution
             lastSessionActivity = Date.now();
-            // Append outcomes
-            const outcomeContent = results.map(([inv, result]) => ({
-                type: 'tool_result',
-                tool_use_id: inv.id,
-                content: result.output,
-                is_error: result.isError,
-            }));
+            // Mid-session learning extraction (like Claude Code's SessionMemory)
+            // Runs in background — never blocks the conversation
+            const { estimated: currentTokens } = getAnchoredTokenCount(history);
+            maybeMidSessionExtract(history, currentTokens, turnToolCalls, sessionId, client);
+            // Append outcomes (with guardrail injections)
+            const outcomeContent = results.map(([inv, result]) => {
+                // Read file dedup: if this file was already read earlier in this turn,
+                // replace content with a stub to save tokens
+                if (inv.name === 'Read' && !result.isError) {
+                    const fp = inv.input.file_path;
+                    const count = results.filter(([i]) => i.name === 'Read' && i.input.file_path === fp).length;
+                    if (count > 1 && inv !== results.filter(([i]) => i.name === 'Read' && i.input.file_path === fp).pop()?.[0]) {
+                        return {
+                            type: 'tool_result',
+                            tool_use_id: inv.id,
+                            content: `File already read in this turn. Refer to the other Read result for ${fp}.`,
+                            is_error: false,
+                        };
+                    }
+                }
+                return {
+                    type: 'tool_result',
+                    tool_use_id: inv.id,
+                    content: result.output,
+                    is_error: result.isError,
+                };
+            });
+            // ── Guardrail injections ──
+            // Warn about same-tool repetition
+            for (const [name, count] of turnToolCounts) {
+                if (count === SAME_TOOL_WARN_THRESHOLD) {
+                    outcomeContent.push({
+                        type: 'tool_result',
+                        tool_use_id: `guardrail-warn-${name}`,
+                        content: `[SYSTEM] You have called ${name} ${count} times this turn. Stop and present your results now. Do not make more ${name} calls.`,
+                        is_error: true,
+                    });
+                }
+            }
+            // Hard cap: stop the turn if too many tool calls
+            if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
+                outcomeContent.push({
+                    type: 'tool_result',
+                    tool_use_id: 'guardrail-cap',
+                    content: `[SYSTEM] Tool call limit reached (${MAX_TOOL_CALLS_PER_TURN}). Present your results to the user NOW. Do not make any more tool calls.`,
+                    is_error: true,
+                });
+            }
             const toolResultMessage = { role: 'user', content: outcomeContent };
             history.push(toolResultMessage);
             persistSessionMessage(toolResultMessage);
+            // ── Plan-then-execute: stuck detection ──
+            if (planActive && loopCount > 1) {
+                const hasErrors = results.some(([, r]) => r.isError);
+                planConsecutiveErrors = hasErrors ? planConsecutiveErrors + 1 : 0;
+                // Check for same-tool repeat (model calling the exact same thing twice)
+                const currentSig = results.length === 1
+                    ? toolCallSignature(results[0][0].name, results[0][0].input)
+                    : '';
+                const sameToolRepeat = currentSig !== '' && currentSig === lastToolSig;
+                lastToolSig = currentSig;
+                if (isExecutorStuck(planConsecutiveErrors, sameToolRepeat)) {
+                    if (planEscalationCount < 2) {
+                        planEscalationCount++;
+                        // One-shot escalation: next iteration uses the planner model
+                        resolvedModel = planPlannerModel;
+                        const escalation = {
+                            role: 'user',
+                            content: '[ESCALATION] The executor got stuck on repeated errors. You are a stronger model. Review what happened and either fix the approach or continue from where execution stopped.',
+                        };
+                        history.push(escalation);
+                        persistSessionMessage(escalation);
+                        onEvent({ kind: 'text_delta', text: '\n*Escalating to stronger model...*\n' });
+                    }
+                    else {
+                        // Abandon plan — strong model finishes the task directly
+                        planActive = false;
+                        onEvent({ kind: 'text_delta', text: '\n*Plan abandoned — switching to full model...*\n' });
+                    }
+                }
+            }
+            // Hard stop: if cap exceeded, force end this agent loop iteration
+            if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
+                if (config.debug) {
+                    console.error(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
+                }
+                // Don't break — let the model respond one more time to summarize,
+                // but inject the stop signal above so it knows to finish up.
+            }
         }
         if (loopCount >= maxTurns) {
             lastSessionActivity = Date.now();
             persistSessionMeta();
+            if (lastRoutedCategory && lastRoutedModel) {
+                recordOutcome(lastRoutedCategory, lastRoutedModel, 'max_turns', turnToolCalls);
+            }
             onEvent({ kind: 'turn_done', reason: 'max_turns' });
         }
     }