npm - @blockrun/franklin - Versions diffs - 3.0.0 - Mend

@blockrun/franklin 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

package/LICENSE +190 -0
package/README.md +256 -0
package/dist/agent/commands.d.ts +27 -0
package/dist/agent/commands.js +659 -0
package/dist/agent/compact.d.ts +31 -0
package/dist/agent/compact.js +366 -0
package/dist/agent/context.d.ts +11 -0
package/dist/agent/context.js +184 -0
package/dist/agent/error-classifier.d.ts +10 -0
package/dist/agent/error-classifier.js +61 -0
package/dist/agent/llm.d.ts +63 -0
package/dist/agent/llm.js +448 -0
package/dist/agent/loop.d.ts +12 -0
package/dist/agent/loop.js +346 -0
package/dist/agent/optimize.d.ts +53 -0
package/dist/agent/optimize.js +262 -0
package/dist/agent/permissions.d.ts +39 -0
package/dist/agent/permissions.js +226 -0
package/dist/agent/reduce.d.ts +49 -0
package/dist/agent/reduce.js +317 -0
package/dist/agent/streaming-executor.d.ts +36 -0
package/dist/agent/streaming-executor.js +149 -0
package/dist/agent/tokens.d.ts +53 -0
package/dist/agent/tokens.js +185 -0
package/dist/agent/types.d.ts +125 -0
package/dist/agent/types.js +5 -0
package/dist/banner.d.ts +1 -0
package/dist/banner.js +27 -0
package/dist/commands/balance.d.ts +1 -0
package/dist/commands/balance.js +40 -0
package/dist/commands/config.d.ts +14 -0
package/dist/commands/config.js +107 -0
package/dist/commands/daemon.d.ts +3 -0
package/dist/commands/daemon.js +117 -0
package/dist/commands/history.d.ts +5 -0
package/dist/commands/history.js +31 -0
package/dist/commands/init.d.ts +3 -0
package/dist/commands/init.js +92 -0
package/dist/commands/logs.d.ts +5 -0
package/dist/commands/logs.js +89 -0
package/dist/commands/models.d.ts +1 -0
package/dist/commands/models.js +56 -0
package/dist/commands/plugin.d.ts +14 -0
package/dist/commands/plugin.js +176 -0
package/dist/commands/proxy.d.ts +13 -0
package/dist/commands/proxy.js +106 -0
package/dist/commands/setup.d.ts +1 -0
package/dist/commands/setup.js +49 -0
package/dist/commands/start.d.ts +8 -0
package/dist/commands/start.js +292 -0
package/dist/commands/stats.d.ts +10 -0
package/dist/commands/stats.js +94 -0
package/dist/commands/uninit.d.ts +1 -0
package/dist/commands/uninit.js +63 -0
package/dist/config.d.ts +9 -0
package/dist/config.js +41 -0
package/dist/index.d.ts +2 -0
package/dist/index.js +179 -0
package/dist/mcp/client.d.ts +44 -0
package/dist/mcp/client.js +147 -0
package/dist/mcp/config.d.ts +20 -0
package/dist/mcp/config.js +138 -0
package/dist/plugin-sdk/channel.d.ts +100 -0
package/dist/plugin-sdk/channel.js +10 -0
package/dist/plugin-sdk/index.d.ts +14 -0
package/dist/plugin-sdk/index.js +9 -0
package/dist/plugin-sdk/plugin.d.ts +87 -0
package/dist/plugin-sdk/plugin.js +7 -0
package/dist/plugin-sdk/search.d.ts +13 -0
package/dist/plugin-sdk/search.js +4 -0
package/dist/plugin-sdk/tracker.d.ts +27 -0
package/dist/plugin-sdk/tracker.js +5 -0
package/dist/plugin-sdk/workflow.d.ts +126 -0
package/dist/plugin-sdk/workflow.js +11 -0
package/dist/plugins/registry.d.ts +33 -0
package/dist/plugins/registry.js +155 -0
package/dist/plugins/runner.d.ts +21 -0
package/dist/plugins/runner.js +453 -0
package/dist/plugins-bundled/social/index.d.ts +10 -0
package/dist/plugins-bundled/social/index.js +363 -0
package/dist/plugins-bundled/social/plugin.json +14 -0
package/dist/plugins-bundled/social/prompts.d.ts +19 -0
package/dist/plugins-bundled/social/prompts.js +67 -0
package/dist/plugins-bundled/social/types.d.ts +58 -0
package/dist/plugins-bundled/social/types.js +16 -0
package/dist/pricing.d.ts +21 -0
package/dist/pricing.js +91 -0
package/dist/proxy/fallback.d.ts +38 -0
package/dist/proxy/fallback.js +144 -0
package/dist/proxy/server.d.ts +18 -0
package/dist/proxy/server.js +576 -0
package/dist/proxy/sse-translator.d.ts +29 -0
package/dist/proxy/sse-translator.js +270 -0
package/dist/router/index.d.ts +22 -0
package/dist/router/index.js +269 -0
package/dist/session/search.d.ts +33 -0
package/dist/session/search.js +229 -0
package/dist/session/storage.d.ts +48 -0
package/dist/session/storage.js +173 -0
package/dist/stats/insights.d.ts +55 -0
package/dist/stats/insights.js +195 -0
package/dist/stats/tracker.d.ts +54 -0
package/dist/stats/tracker.js +165 -0
package/dist/tools/askuser.d.ts +6 -0
package/dist/tools/askuser.js +76 -0
package/dist/tools/bash.d.ts +5 -0
package/dist/tools/bash.js +336 -0
package/dist/tools/edit.d.ts +5 -0
package/dist/tools/edit.js +148 -0
package/dist/tools/glob.d.ts +5 -0
package/dist/tools/glob.js +158 -0
package/dist/tools/grep.d.ts +5 -0
package/dist/tools/grep.js +194 -0
package/dist/tools/imagegen.d.ts +6 -0
package/dist/tools/imagegen.js +172 -0
package/dist/tools/index.d.ts +17 -0
package/dist/tools/index.js +30 -0
package/dist/tools/read.d.ts +11 -0
package/dist/tools/read.js +90 -0
package/dist/tools/subagent.d.ts +5 -0
package/dist/tools/subagent.js +116 -0
package/dist/tools/task.d.ts +5 -0
package/dist/tools/task.js +91 -0
package/dist/tools/webfetch.d.ts +5 -0
package/dist/tools/webfetch.js +166 -0
package/dist/tools/websearch.d.ts +5 -0
package/dist/tools/websearch.js +103 -0
package/dist/tools/write.d.ts +5 -0
package/dist/tools/write.js +114 -0
package/dist/ui/app.d.ts +26 -0
package/dist/ui/app.js +545 -0
package/dist/ui/model-picker.d.ts +14 -0
package/dist/ui/model-picker.js +161 -0
package/dist/ui/terminal.d.ts +35 -0
package/dist/ui/terminal.js +337 -0
package/dist/wallet/manager.d.ts +10 -0
package/dist/wallet/manager.js +23 -0
package/package.json +79 -0

package/dist/agent/loop.js ADDED Viewed

@@ -0,0 +1,346 @@
+/**
+ * runcode Agent Loop
+ * The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
+ * Original implementation with different architecture from any reference codebase.
+ */
+import { ModelClient } from './llm.js';
+import { autoCompactIfNeeded, microCompact } from './compact.js';
+import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow } from './tokens.js';
+import { handleSlashCommand } from './commands.js';
+import { reduceTokens } from './reduce.js';
+import { PermissionManager } from './permissions.js';
+import { StreamingExecutor } from './streaming-executor.js';
+import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
+import { classifyAgentError } from './error-classifier.js';
+import { recordUsage } from '../stats/tracker.js';
+import { estimateCost } from '../pricing.js';
+import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
+// ─── Interactive Session ───────────────────────────────────────────────────
+/**
+ * Run a multi-turn interactive session.
+ * Each user message triggers a full agent loop.
+ * Returns the accumulated conversation history.
+ */
+export async function interactiveSession(config, getUserInput, onEvent, onAbortReady) {
+    const client = new ModelClient({
+        apiUrl: config.apiUrl,
+        chain: config.chain,
+        debug: config.debug,
+    });
+    const capabilityMap = new Map();
+    for (const cap of config.capabilities) {
+        capabilityMap.set(cap.spec.name, cap);
+    }
+    const toolDefs = config.capabilities.map((c) => c.spec);
+    const maxTurns = config.maxTurns ?? 100;
+    const workDir = config.workingDir ?? process.cwd();
+    const permissions = new PermissionManager(config.permissionMode ?? 'default', config.permissionPromptFn);
+    const history = [];
+    let lastUserInput = ''; // For /retry
+    const failedModels = new Set(); // Models that failed payment/rate-limit (session-level)
+    // Session persistence
+    const sessionId = createSessionId();
+    let turnCount = 0;
+    let tokenBudgetWarned = false; // Emit token budget warning at most once per session
+    pruneOldSessions(sessionId); // Cleanup old sessions on start, protect current
+    while (true) {
+        let input = await getUserInput();
+        if (input === null)
+            break; // User wants to exit
+        if (input === '')
+            continue; // Empty input → re-prompt
+        // ── Slash command dispatch ──
+        if (input.startsWith('/')) {
+            // /retry re-sends the last user message
+            if (input === '/retry') {
+                if (!lastUserInput) {
+                    onEvent({ kind: 'text_delta', text: 'No previous message to retry.\n' });
+                    onEvent({ kind: 'turn_done', reason: 'completed' });
+                    continue;
+                }
+                input = lastUserInput;
+            }
+            else {
+                const cmdResult = await handleSlashCommand(input, {
+                    history, config, client, sessionId, onEvent,
+                });
+                if (cmdResult.handled)
+                    continue;
+                if (cmdResult.rewritten)
+                    input = cmdResult.rewritten;
+            }
+        }
+        lastUserInput = input;
+        history.push({ role: 'user', content: input });
+        appendToSession(sessionId, { role: 'user', content: input });
+        turnCount++;
+        const abort = new AbortController();
+        onAbortReady?.(() => abort.abort());
+        let loopCount = 0;
+        let recoveryAttempts = 0;
+        let compactFailures = 0;
+        let maxTokensOverride;
+        let lastActivity = Date.now();
+        // Agent loop for this user message
+        while (loopCount < maxTurns) {
+            loopCount++;
+            // ── Token optimization pipeline ──
+            // 1. Strip thinking, budget tool results, time-based cleanup (always — cheap)
+            const optimized = optimizeHistory(history, {
+                debug: config.debug,
+                lastActivityTimestamp: lastActivity,
+            });
+            if (optimized !== history) {
+                history.length = 0;
+                history.push(...optimized);
+            }
+            // 2. Token reduction: age old results, normalize whitespace, trim verbose messages
+            const reduced = reduceTokens(history, config.debug);
+            if (reduced !== history) {
+                history.length = 0;
+                history.push(...reduced);
+            }
+            // 3. Microcompact: clear old tool results to prevent context snowball
+            if (history.length > 6) {
+                const microCompacted = microCompact(history, 3);
+                if (microCompacted !== history) {
+                    history.length = 0;
+                    history.push(...microCompacted);
+                    resetTokenAnchor(); // History shrunk — resync token tracking
+                }
+            }
+            // 3. Auto-compact: summarize history if approaching context limit
+            // Circuit breaker: stop retrying after 3 consecutive failures
+            if (compactFailures < 3) {
+                try {
+                    const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
+                    if (didCompact) {
+                        history.length = 0;
+                        history.push(...compacted);
+                        resetTokenAnchor();
+                        compactFailures = 0;
+                        if (config.debug) {
+                            console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
+                        }
+                    }
+                }
+                catch (compactErr) {
+                    compactFailures++;
+                    if (config.debug) {
+                        console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
+                    }
+                }
+            }
+            // Inject ultrathink instruction when mode is active
+            const systemParts = [...config.systemInstructions];
+            if (config.ultrathink) {
+                systemParts.push('# Ultrathink Mode\n' +
+                    'You are in deep reasoning mode. Before responding to any request:\n' +
+                    '1. Thoroughly analyze the problem from multiple angles\n' +
+                    '2. Consider edge cases, failure modes, and second-order effects\n' +
+                    '3. Challenge your initial assumptions before committing to an approach\n' +
+                    '4. Think step by step — show your reasoning explicitly when it adds value\n' +
+                    'Prioritize correctness and thoroughness over speed.');
+            }
+            const systemPrompt = systemParts.join('\n\n');
+            const modelMaxOut = getMaxOutputTokens(config.model);
+            let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
+            let responseParts = [];
+            let usage;
+            let stopReason;
+            // Create streaming executor for concurrent tool execution
+            const streamExec = new StreamingExecutor({
+                handlers: capabilityMap,
+                scope: { workingDir: workDir, abortSignal: abort.signal, onAskUser: config.onAskUser },
+                permissions,
+                onStart: (id, name, preview) => onEvent({ kind: 'capability_start', id, name, preview }),
+                onProgress: (id, text) => onEvent({ kind: 'capability_progress', id, text }),
+            });
+            try {
+                const result = await client.complete({
+                    model: config.model,
+                    messages: history,
+                    system: systemPrompt,
+                    tools: toolDefs,
+                    max_tokens: maxTokens,
+                    stream: true,
+                }, abort.signal,
+                // Start concurrent tools as soon as their input is fully received
+                (tool) => streamExec.onToolReceived(tool),
+                // Stream text/thinking deltas to UI in real-time
+                (delta) => {
+                    if (delta.type === 'text') {
+                        onEvent({ kind: 'text_delta', text: delta.text });
+                    }
+                    else if (delta.type === 'thinking') {
+                        onEvent({ kind: 'thinking_delta', text: delta.text });
+                    }
+                });
+                responseParts = result.content;
+                usage = result.usage;
+                stopReason = result.stopReason;
+            }
+            catch (err) {
+                // ── User abort (Esc key) ──
+                if (err.name === 'AbortError' || abort.signal.aborted) {
+                    // Save any partial response that was streamed before abort
+                    if (responseParts && responseParts.length > 0) {
+                        history.push({ role: 'assistant', content: responseParts });
+                        appendToSession(sessionId, { role: 'assistant', content: responseParts });
+                    }
+                    onEvent({ kind: 'turn_done', reason: 'aborted' });
+                    break;
+                }
+                const errMsg = err.message || '';
+                const classified = classifyAgentError(errMsg);
+                // ── Prompt too long recovery ──
+                if (classified.category === 'context_limit' && recoveryAttempts < 3) {
+                    recoveryAttempts++;
+                    if (config.debug) {
+                        console.error(`[runcode] Prompt too long — forcing compact (attempt ${recoveryAttempts})`);
+                    }
+                    const { history: compactedAgain } = await autoCompactIfNeeded(history, config.model, client, config.debug);
+                    history.length = 0;
+                    history.push(...compactedAgain);
+                    continue; // Retry
+                }
+                // ── Transient error recovery (network, rate limit, server errors) ──
+                if (classified.isTransient && recoveryAttempts < 3) {
+                    recoveryAttempts++;
+                    const backoffMs = Math.pow(2, recoveryAttempts) * 1000;
+                    if (config.debug) {
+                        console.error(`[runcode] ${classified.label} error — retrying in ${backoffMs / 1000}s (attempt ${recoveryAttempts}): ${errMsg.slice(0, 100)}`);
+                    }
+                    onEvent({
+                        kind: 'text_delta',
+                        text: `\n*Retrying (${recoveryAttempts}/3) after ${classified.label} error...*\n`,
+                    });
+                    await new Promise(r => setTimeout(r, backoffMs));
+                    continue;
+                }
+                // Add recovery suggestions based on error type
+                let suggestion = '';
+                if (classified.category === 'rate_limit') {
+                    suggestion = '\nTip: Try /model to switch to a different model, or wait a moment and /retry.';
+                }
+                else if (classified.category === 'payment') {
+                    // Auto-fallback to free models on payment/rate limit failure
+                    // Track failed models at session level to prevent ping-pong loops
+                    failedModels.add(config.model);
+                    const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/nemotron-ultra-253b', 'nvidia/devstral-2-123b'];
+                    const nextFree = FREE_MODELS.find(m => !failedModels.has(m));
+                    if (nextFree) {
+                        const oldModel = config.model;
+                        config.model = nextFree;
+                        config.onModelChange?.(nextFree);
+                        onEvent({ kind: 'text_delta', text: `\n*${oldModel} failed — switching to ${nextFree}*\n` });
+                        continue; // Retry with next model
+                    }
+                    suggestion = '\nTip: Run `runcode balance` to check funds. Try /model free for free models.';
+                }
+                else if (classified.category === 'timeout' || classified.category === 'network') {
+                    suggestion = '\nTip: Check your network connection. Use /retry to try again.';
+                }
+                else if (classified.category === 'context_limit') {
+                    suggestion = '\nTip: Run /compact to compress conversation history.';
+                }
+                onEvent({
+                    kind: 'turn_done',
+                    reason: 'error',
+                    error: `[${classified.label}] ${errMsg}${suggestion}`,
+                });
+                break;
+            }
+            // When API doesn't return input tokens (some models return 0), estimate from history
+            const inputTokens = usage.inputTokens > 0
+                ? usage.inputTokens
+                : estimateHistoryTokens(history);
+            // Anchor token tracking to actual API counts
+            updateActualTokens(inputTokens, usage.outputTokens, history.length);
+            onEvent({
+                kind: 'usage',
+                inputTokens,
+                outputTokens: usage.outputTokens,
+                model: config.model,
+                calls: 1,
+            });
+            // Record usage for stats tracking (runcode stats command)
+            const costEstimate = estimateCost(config.model, inputTokens, usage.outputTokens, 1);
+            recordUsage(config.model, inputTokens, usage.outputTokens, costEstimate, 0);
+            // ── Max output tokens recovery ──
+            if (stopReason === 'max_tokens' && recoveryAttempts < 3) {
+                recoveryAttempts++;
+                if (maxTokensOverride === undefined) {
+                    // First hit: escalate to 64K
+                    maxTokensOverride = ESCALATED_MAX_TOKENS;
+                    if (config.debug) {
+                        console.error(`[runcode] Max tokens hit — escalating to ${maxTokensOverride}`);
+                    }
+                }
+                // Append what we got + a continuation prompt (text already streamed)
+                history.push({ role: 'assistant', content: responseParts });
+                history.push({
+                    role: 'user',
+                    content: 'Continue where you left off. Do not repeat what you already said.',
+                });
+                continue; // Retry with higher limit
+            }
+            // Reset recovery counter on successful completion
+            recoveryAttempts = 0;
+            // Extract tool invocations (text/thinking already streamed in real-time)
+            const invocations = [];
+            for (const part of responseParts) {
+                if (part.type === 'tool_use') {
+                    invocations.push(part);
+                }
+            }
+            history.push({ role: 'assistant', content: responseParts });
+            // No more capabilities → done with this user message
+            if (invocations.length === 0) {
+                // Save session on completed turn
+                appendToSession(sessionId, { role: 'assistant', content: responseParts });
+                updateSessionMeta(sessionId, {
+                    model: config.model,
+                    workDir: config.workingDir || process.cwd(),
+                    turnCount,
+                    messageCount: history.length,
+                });
+                // Token budget warning — emit once per session when crossing 70%
+                if (!tokenBudgetWarned) {
+                    const { estimated } = getAnchoredTokenCount(history);
+                    const contextWindow = getContextWindow(config.model);
+                    const pct = (estimated / contextWindow) * 100;
+                    if (pct >= 70) {
+                        tokenBudgetWarned = true;
+                        onEvent({
+                            kind: 'text_delta',
+                            text: `\n\n> **Token budget: ${pct.toFixed(0)}% used** (~${estimated.toLocaleString()} / ${(contextWindow / 1000).toFixed(0)}k tokens). Run \`/compact\` to free up space.\n`,
+                        });
+                    }
+                }
+                onEvent({ kind: 'turn_done', reason: 'completed' });
+                break;
+            }
+            // Collect results — concurrent tools may already be running from streaming
+            const results = await streamExec.collectResults(invocations);
+            for (const [inv, result] of results) {
+                onEvent({ kind: 'capability_done', id: inv.id, result });
+            }
+            // Refresh activity timestamp after tool execution
+            lastActivity = Date.now();
+            // Append outcomes
+            const outcomeContent = results.map(([inv, result]) => ({
+                type: 'tool_result',
+                tool_use_id: inv.id,
+                content: result.output,
+                is_error: result.isError,
+            }));
+            history.push({ role: 'user', content: outcomeContent });
+        }
+        if (loopCount >= maxTurns) {
+            onEvent({ kind: 'turn_done', reason: 'max_turns' });
+        }
+    }
+    return history;
+}
+// Cost estimation now uses shared pricing from src/pricing.ts

package/dist/agent/optimize.d.ts ADDED Viewed

@@ -0,0 +1,53 @@
+/**
+ * Token optimization strategies for runcode.
+ *
+ * Five layers of optimization to minimize token usage:
+ * 1. Tool result size budgeting — cap large outputs, keep preview
+ * 2. Thinking block stripping — remove old thinking from history
+ * 3. Time-based cleanup — clear stale tool results after idle gap
+ * 4. Adaptive max_tokens — start low (8K), escalate on hit
+ * 5. Pre-compact stripping — remove images/docs before summarization
+ */
+import type { Dialogue } from './types.js';
+/** Default max_tokens (low to save output slot reservation) */
+export declare const CAPPED_MAX_TOKENS = 16384;
+/** Escalated max_tokens after hitting the cap */
+export declare const ESCALATED_MAX_TOKENS = 65536;
+/** Get max output tokens for a model */
+export declare function getMaxOutputTokens(model: string): number;
+/**
+ * Cap tool result sizes to prevent context bloat.
+ * Large results (>50K chars) are truncated with a preview.
+ * Per-message aggregate is also capped at 200K chars.
+ */
+export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
+export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
+/**
+ * After an idle gap (>60 min), clear old tool results.
+ * When the user comes back after being away, old results are stale anyway.
+ */
+export declare function timeBasedCleanup(history: Dialogue[], lastActivityTimestamp?: number): {
+    history: Dialogue[];
+    cleaned: boolean;
+};
+/**
+ * Strip heavy content before sending to compaction model.
+ * Removes image/document references since the summarizer can't see them anyway.
+ */
+export declare function stripHeavyContent(history: Dialogue[]): Dialogue[];
+export interface OptimizeOptions {
+    debug?: boolean;
+    lastActivityTimestamp?: number;
+}
+/**
+ * Run the full optimization pipeline on conversation history.
+ * Called before each model request to minimize token usage.
+ *
+ * Pipeline order (cheapest first):
+ * 1. Strip old thinking blocks (free, local)
+ * 2. Budget tool results (free, local)
+ * 3. Time-based cleanup (free, local, only after idle)
+ *
+ * Returns the optimized history (may be same reference if no changes).
+ */
+export declare function optimizeHistory(history: Dialogue[], opts?: OptimizeOptions): Dialogue[];

package/dist/agent/optimize.js ADDED Viewed

@@ -0,0 +1,262 @@
+/**
+ * Token optimization strategies for runcode.
+ *
+ * Five layers of optimization to minimize token usage:
+ * 1. Tool result size budgeting — cap large outputs, keep preview
+ * 2. Thinking block stripping — remove old thinking from history
+ * 3. Time-based cleanup — clear stale tool results after idle gap
+ * 4. Adaptive max_tokens — start low (8K), escalate on hit
+ * 5. Pre-compact stripping — remove images/docs before summarization
+ */
+// ─── Constants ─────────────────────────────────────────────────────────────
+/** Max chars per individual tool result before truncation (history-level safety net) */
+const MAX_TOOL_RESULT_CHARS = 32_000;
+/** Max aggregate tool result chars per user message */
+const MAX_TOOL_RESULTS_PER_MESSAGE_CHARS = 100_000;
+/** Preview size when truncating */
+const PREVIEW_CHARS = 2_000;
+/** Default max_tokens (low to save output slot reservation) */
+export const CAPPED_MAX_TOKENS = 16_384;
+/** Escalated max_tokens after hitting the cap */
+export const ESCALATED_MAX_TOKENS = 65_536;
+/** Per-model max output tokens — prevents requesting more than the model supports */
+const MODEL_MAX_OUTPUT = {
+    'anthropic/claude-opus-4.6': 32_000,
+    'anthropic/claude-sonnet-4.6': 64_000,
+    'anthropic/claude-haiku-4.5-20251001': 16_384,
+    'openai/gpt-5.4': 32_768,
+    'openai/gpt-5-mini': 16_384,
+    'google/gemini-2.5-pro': 65_536,
+    'google/gemini-2.5-flash': 65_536,
+    'deepseek/deepseek-chat': 8_192,
+};
+/** Get max output tokens for a model */
+export function getMaxOutputTokens(model) {
+    return MODEL_MAX_OUTPUT[model] ?? 16_384;
+}
+/** Idle gap (minutes) after which old tool results are cleared */
+const IDLE_GAP_THRESHOLD_MINUTES = 5;
+/** Number of recent tool results to keep during time-based cleanup */
+const KEEP_RECENT_TOOL_RESULTS = 3;
+// ─── 1. Tool Result Size Budgeting ─────────────────────────────────────────
+/**
+ * Cap tool result sizes to prevent context bloat.
+ * Large results (>50K chars) are truncated with a preview.
+ * Per-message aggregate is also capped at 200K chars.
+ */
+export function budgetToolResults(history) {
+    const result = [];
+    for (const msg of history) {
+        if (msg.role !== 'user' || typeof msg.content === 'string' || !Array.isArray(msg.content)) {
+            result.push(msg);
+            continue;
+        }
+        let messageTotal = 0;
+        let modified = false;
+        const budgeted = [];
+        for (const part of msg.content) {
+            if (part.type !== 'tool_result') {
+                budgeted.push(part);
+                continue;
+            }
+            const content = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
+            const size = content.length;
+            // Per-tool cap
+            if (size > MAX_TOOL_RESULT_CHARS) {
+                modified = true;
+                // Truncate at line boundary for cleaner output
+                let preview = content.slice(0, PREVIEW_CHARS);
+                const lastNewline = preview.lastIndexOf('\n');
+                if (lastNewline > PREVIEW_CHARS * 0.5) {
+                    preview = preview.slice(0, lastNewline);
+                }
+                budgeted.push({
+                    type: 'tool_result',
+                    tool_use_id: part.tool_use_id,
+                    content: `[Output truncated: ${size.toLocaleString()} chars → ${PREVIEW_CHARS} preview]\n\n${preview}\n\n... (${size - PREVIEW_CHARS} chars omitted)`,
+                    is_error: part.is_error,
+                });
+                messageTotal += PREVIEW_CHARS + 200;
+                continue;
+            }
+            // Per-message aggregate cap — once exceeded, truncate remaining results
+            if (messageTotal + size > MAX_TOOL_RESULTS_PER_MESSAGE_CHARS) {
+                modified = true;
+                budgeted.push({
+                    type: 'tool_result',
+                    tool_use_id: part.tool_use_id,
+                    content: `[Output omitted: message budget exceeded (${MAX_TOOL_RESULTS_PER_MESSAGE_CHARS / 1000}K chars/msg)]`,
+                    is_error: part.is_error,
+                });
+                messageTotal = MAX_TOOL_RESULTS_PER_MESSAGE_CHARS;
+                continue;
+            }
+            budgeted.push(part);
+            messageTotal += size;
+        }
+        result.push(modified ? { role: 'user', content: budgeted } : msg);
+    }
+    return result;
+}
+// ─── 2. Thinking Block Stripping ───────────────────────────────────────────
+/**
+ * Remove thinking blocks from older assistant messages.
+ * Keeps thinking only in the most recent N assistant messages (default: last 2 turns).
+ * Older thinking blocks are large and not needed after the decision is made.
+ */
+const KEEP_THINKING_TURNS = 2;
+export function stripOldThinking(history) {
+    // Find the last N assistant message indices to preserve their thinking
+    const assistantIndices = [];
+    for (let i = history.length - 1; i >= 0; i--) {
+        if (history[i].role === 'assistant') {
+            assistantIndices.push(i);
+            if (assistantIndices.length >= KEEP_THINKING_TURNS)
+                break;
+        }
+    }
+    if (assistantIndices.length === 0)
+        return history;
+    const keepSet = new Set(assistantIndices);
+    const result = [];
+    let modified = false;
+    for (let i = 0; i < history.length; i++) {
+        const msg = history[i];
+        // Strip thinking from assistant messages NOT in the keep set
+        if (msg.role === 'assistant' && !keepSet.has(i) && Array.isArray(msg.content)) {
+            const filtered = msg.content.filter((part) => part.type !== 'thinking');
+            if (filtered.length < msg.content.length) {
+                modified = true;
+                result.push({
+                    role: 'assistant',
+                    content: filtered.length > 0 ? filtered : [{ type: 'text', text: '[thinking omitted]' }],
+                });
+                continue;
+            }
+        }
+        result.push(msg);
+    }
+    return modified ? result : history;
+}
+// ─── 3. Time-Based Cleanup ─────────────────────────────────────────────────
+/**
+ * After an idle gap (>60 min), clear old tool results.
+ * When the user comes back after being away, old results are stale anyway.
+ */
+export function timeBasedCleanup(history, lastActivityTimestamp) {
+    if (!lastActivityTimestamp) {
+        return { history, cleaned: false };
+    }
+    const gapMs = Date.now() - lastActivityTimestamp;
+    if (gapMs < 0)
+        return { history, cleaned: false }; // Clock skew protection
+    const gapMinutes = gapMs / 60_000;
+    if (gapMinutes < IDLE_GAP_THRESHOLD_MINUTES) {
+        return { history, cleaned: false };
+    }
+    // Find all tool_result positions
+    const toolPositions = [];
+    for (let i = 0; i < history.length; i++) {
+        const msg = history[i];
+        if (msg.role === 'user' &&
+            Array.isArray(msg.content) &&
+            msg.content.length > 0 &&
+            typeof msg.content[0] !== 'string' &&
+            'type' in msg.content[0] &&
+            msg.content[0].type === 'tool_result') {
+            toolPositions.push(i);
+        }
+    }
+    if (toolPositions.length <= KEEP_RECENT_TOOL_RESULTS) {
+        return { history, cleaned: false };
+    }
+    // Clear all but the most recent N
+    const toClear = toolPositions.slice(0, -KEEP_RECENT_TOOL_RESULTS);
+    const result = [...history];
+    for (const pos of toClear) {
+        const msg = result[pos];
+        if (!Array.isArray(msg.content))
+            continue;
+        const cleared = msg.content.map((part) => {
+            if (part.type === 'tool_result') {
+                return {
+                    type: 'tool_result',
+                    tool_use_id: part.tool_use_id,
+                    content: '[Stale tool result cleared after idle gap]',
+                    is_error: part.is_error,
+                };
+            }
+            return part;
+        });
+        result[pos] = { role: 'user', content: cleared };
+    }
+    return { history: result, cleaned: true };
+}
+// ─── 4. Pre-Compact Stripping ──────────────────────────────────────────────
+/**
+ * Strip heavy content before sending to compaction model.
+ * Removes image/document references since the summarizer can't see them anyway.
+ */
+export function stripHeavyContent(history) {
+    return history.map((msg) => {
+        if (typeof msg.content === 'string')
+            return msg;
+        if (!Array.isArray(msg.content))
+            return msg;
+        let modified = false;
+        const stripped = msg.content.map((part) => {
+            // Strip image blocks (if they ever appear)
+            if ('type' in part && part.type === 'image') {
+                modified = true;
+                return { type: 'text', text: '[image]' };
+            }
+            // Strip document blocks
+            if ('type' in part && part.type === 'document') {
+                modified = true;
+                return { type: 'text', text: '[document]' };
+            }
+            return part;
+        });
+        return modified ? { ...msg, content: stripped } : msg;
+    });
+}
+/**
+ * Run the full optimization pipeline on conversation history.
+ * Called before each model request to minimize token usage.
+ *
+ * Pipeline order (cheapest first):
+ * 1. Strip old thinking blocks (free, local)
+ * 2. Budget tool results (free, local)
+ * 3. Time-based cleanup (free, local, only after idle)
+ *
+ * Returns the optimized history (may be same reference if no changes).
+ */
+export function optimizeHistory(history, opts) {
+    let result = history;
+    let changed = false;
+    // 1. Strip old thinking
+    const stripped = stripOldThinking(result);
+    if (stripped !== result) {
+        result = stripped;
+        changed = true;
+        if (opts?.debug)
+            console.error('[runcode] Stripped old thinking blocks');
+    }
+    // 2. Budget tool results
+    const budgeted = budgetToolResults(result);
+    if (budgeted !== result) {
+        result = budgeted;
+        changed = true;
+        if (opts?.debug)
+            console.error('[runcode] Budgeted oversized tool results');
+    }
+    // 3. Time-based cleanup
+    const { history: cleaned, cleaned: didClean } = timeBasedCleanup(result, opts?.lastActivityTimestamp);
+    if (didClean) {
+        result = cleaned;
+        changed = true;
+        if (opts?.debug)
+            console.error('[runcode] Cleared stale tool results after idle gap');
+    }
+    return result;
+}