npm - @blockrun/runcode - Versions diffs - 2.2.6 → 2.3.0 - Mend

@blockrun/runcode 2.2.6 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/agent/commands.js CHANGED Viewed

@@ -71,6 +71,42 @@ const DIRECT_COMMANDS = {
             ctx.onEvent({ kind: 'text_delta', text: 'Last commit undone. Changes preserved in staging.\n' });
         emitDone(ctx);
     },
+    '/tokens': (ctx) => {
+        const { estimated, apiAnchored } = getAnchoredTokenCount(ctx.history);
+        const contextWindow = getContextWindow(ctx.config.model);
+        const pct = (estimated / contextWindow) * 100;
+        // Count tool results and thinking blocks
+        let toolResults = 0;
+        let thinkingBlocks = 0;
+        let totalToolChars = 0;
+        for (const msg of ctx.history) {
+            if (typeof msg.content === 'string')
+                continue;
+            if (!Array.isArray(msg.content))
+                continue;
+            for (const part of msg.content) {
+                if ('type' in part) {
+                    if (part.type === 'tool_result') {
+                        toolResults++;
+                        const c = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
+                        totalToolChars += c.length;
+                    }
+                    if (part.type === 'thinking')
+                        thinkingBlocks++;
+                }
+            }
+        }
+        ctx.onEvent({ kind: 'text_delta', text: `**Token Usage**\n` +
+                `  Estimated:  ~${estimated.toLocaleString()} tokens ${apiAnchored ? '(API-anchored)' : '(estimated)'}\n` +
+                `  Context:    ${(contextWindow / 1000).toFixed(0)}k window (${pct.toFixed(1)}% used)\n` +
+                `  Messages:   ${ctx.history.length}\n` +
+                `  Tool results: ${toolResults} (${(totalToolChars / 1024).toFixed(0)}KB)\n` +
+                `  Thinking:   ${thinkingBlocks} blocks\n` +
+                (pct > 80 ? '  ⚠ Near limit — run /compact\n' : '') +
+                (pct > 60 ? '' : '  ✓ Healthy\n')
+        });
+        emitDone(ctx);
+    },
     '/help': (ctx) => {
         ctx.onEvent({ kind: 'text_delta', text: `**RunCode Commands**\n\n` +
                 `  **Coding:** /commit /review /test /fix /debug /explain /search /find /refactor /scaffold\n` +

package/dist/agent/compact.js CHANGED Viewed

@@ -204,15 +204,19 @@ function formatForSummarization(messages) {
  * Pick a cheaper/faster model for compaction to save cost.
  */
 function pickCompactionModel(primaryModel) {
-    // Use a fast model for summarization — no need for the expensive primary
-    if (primaryModel.includes('opus') || primaryModel.includes('gpt-5.4-pro')) {
+    // Use cheapest capable model for summarization to save cost
+    // Tier down: opus/pro → sonnet, sonnet → haiku, everything else → flash (cheapest capable)
+    if (primaryModel.includes('opus') || primaryModel.includes('pro')) {
         return 'anthropic/claude-sonnet-4.6';
     }
-    if (primaryModel.includes('sonnet')) {
+    if (primaryModel.includes('sonnet') || primaryModel.includes('gpt-5.4') || primaryModel.includes('gemini-2.5-pro')) {
         return 'anthropic/claude-haiku-4.5-20251001';
     }
-    // For cheaper models, just use the same one
-    return primaryModel;
+    if (primaryModel.includes('haiku') || primaryModel.includes('mini') || primaryModel.includes('nano')) {
+        return 'google/gemini-2.5-flash'; // Cheapest capable model
+    }
+    // Free/unknown models — use flash
+    return 'google/gemini-2.5-flash';
 }
 /**
  * Emergency fallback: drop oldest messages until under threshold.

package/dist/agent/loop.js CHANGED Viewed

@@ -9,7 +9,7 @@ import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor } from './t
 import { handleSlashCommand } from './commands.js';
 import { PermissionManager } from './permissions.js';
 import { StreamingExecutor } from './streaming-executor.js';
-import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS } from './optimize.js';
+import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
 import { recordUsage } from '../stats/tracker.js';
 import { estimateCost } from '../pricing.js';
 import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
@@ -237,13 +237,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         onAbortReady?.(() => abort.abort());
         let loopCount = 0;
         let recoveryAttempts = 0;
+        let compactFailures = 0;
         let maxTokensOverride;
-        const lastActivity = Date.now();
+        let lastActivity = Date.now();
         // Agent loop for this user message
         while (loopCount < maxTurns) {
             loopCount++;
             // ── Token optimization pipeline ──
-            // 1. Strip thinking, budget tool results, time-based cleanup
+            // 1. Strip thinking, budget tool results, time-based cleanup (always — cheap)
             const optimized = optimizeHistory(history, {
                 debug: config.debug,
                 lastActivityTimestamp: lastActivity,
@@ -252,24 +253,39 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 history.length = 0;
                 history.push(...optimized);
             }
-            // 2. Microcompact: clear old tool results to save tokens
-            const microCompacted = microCompact(history, 8);
-            if (microCompacted !== history) {
-                history.length = 0;
-                history.push(...microCompacted);
+            // 2. Microcompact: only when history has >15 messages (skip for short conversations)
+            if (history.length > 15) {
+                const microCompacted = microCompact(history, 8);
+                if (microCompacted !== history) {
+                    history.length = 0;
+                    history.push(...microCompacted);
+                }
             }
-            // Auto-compact: summarize history if approaching context limit
-            const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
-            if (didCompact) {
-                history.length = 0;
-                history.push(...compacted);
-                resetTokenAnchor(); // Reset anchor after compaction — estimates will be used
-                if (config.debug) {
-                    console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
+            // 3. Auto-compact: summarize history if approaching context limit
+            // Circuit breaker: stop retrying after 3 consecutive failures
+            if (compactFailures < 3) {
+                try {
+                    const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
+                    if (didCompact) {
+                        history.length = 0;
+                        history.push(...compacted);
+                        resetTokenAnchor();
+                        compactFailures = 0;
+                        if (config.debug) {
+                            console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
+                        }
+                    }
+                }
+                catch (compactErr) {
+                    compactFailures++;
+                    if (config.debug) {
+                        console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
+                    }
                 }
             }
             const systemPrompt = config.systemInstructions.join('\n\n');
-            let maxTokens = maxTokensOverride ?? CAPPED_MAX_TOKENS;
+            const modelMaxOut = getMaxOutputTokens(config.model);
+            let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
             let responseParts = [];
             let usage;
             let stopReason;
@@ -418,6 +434,8 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             for (const [inv, result] of results) {
                 onEvent({ kind: 'capability_done', id: inv.id, result });
             }
+            // Refresh activity timestamp after tool execution
+            lastActivity = Date.now();
             // Append outcomes
             const outcomeContent = results.map(([inv, result]) => ({
                 type: 'tool_result',

package/dist/agent/optimize.d.ts CHANGED Viewed

@@ -9,21 +9,18 @@
  * 5. Pre-compact stripping — remove images/docs before summarization
  */
 import type { Dialogue } from './types.js';
-/** Default max_tokens (low to save slot reservation) */
-export declare const CAPPED_MAX_TOKENS = 8192;
+/** Default max_tokens (low to save output slot reservation) */
+export declare const CAPPED_MAX_TOKENS = 16384;
 /** Escalated max_tokens after hitting the cap */
 export declare const ESCALATED_MAX_TOKENS = 65536;
+/** Get max output tokens for a model */
+export declare function getMaxOutputTokens(model: string): number;
 /**
  * Cap tool result sizes to prevent context bloat.
  * Large results (>50K chars) are truncated with a preview.
  * Per-message aggregate is also capped at 200K chars.
  */
 export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
-/**
- * Remove thinking blocks from older assistant messages.
- * Keeps thinking only in the most recent assistant message.
- * Thinking blocks are large and not needed for context after the decision is made.
- */
 export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
 /**
  * After an idle gap (>60 min), clear old tool results.

package/dist/agent/optimize.js CHANGED Viewed

@@ -15,10 +15,25 @@ const MAX_TOOL_RESULT_CHARS = 50_000;
 const MAX_TOOL_RESULTS_PER_MESSAGE_CHARS = 200_000;
 /** Preview size when truncating */
 const PREVIEW_CHARS = 2_000;
-/** Default max_tokens (low to save slot reservation) */
-export const CAPPED_MAX_TOKENS = 8_192;
+/** Default max_tokens (low to save output slot reservation) */
+export const CAPPED_MAX_TOKENS = 16_384;
 /** Escalated max_tokens after hitting the cap */
 export const ESCALATED_MAX_TOKENS = 65_536;
+/** Per-model max output tokens — prevents requesting more than the model supports */
+const MODEL_MAX_OUTPUT = {
+    'anthropic/claude-opus-4.6': 32_000,
+    'anthropic/claude-sonnet-4.6': 64_000,
+    'anthropic/claude-haiku-4.5-20251001': 16_384,
+    'openai/gpt-5.4': 32_768,
+    'openai/gpt-5-mini': 16_384,
+    'google/gemini-2.5-pro': 65_536,
+    'google/gemini-2.5-flash': 65_536,
+    'deepseek/deepseek-chat': 8_192,
+};
+/** Get max output tokens for a model */
+export function getMaxOutputTokens(model) {
+    return MODEL_MAX_OUTPUT[model] ?? 16_384;
+}
 /** Idle gap (minutes) after which old tool results are cleared */
 const IDLE_GAP_THRESHOLD_MINUTES = 60;
 /** Number of recent tool results to keep during time-based cleanup */
@@ -86,26 +101,29 @@ export function budgetToolResults(history) {
 // ─── 2. Thinking Block Stripping ───────────────────────────────────────────
 /**
  * Remove thinking blocks from older assistant messages.
- * Keeps thinking only in the most recent assistant message.
- * Thinking blocks are large and not needed for context after the decision is made.
+ * Keeps thinking only in the most recent N assistant messages (default: last 2 turns).
+ * Older thinking blocks are large and not needed after the decision is made.
  */
+const KEEP_THINKING_TURNS = 2;
 export function stripOldThinking(history) {
-    // Find the last assistant message index
-    let lastAssistantIdx = -1;
+    // Find the last N assistant message indices to preserve their thinking
+    const assistantIndices = [];
     for (let i = history.length - 1; i >= 0; i--) {
         if (history[i].role === 'assistant') {
-            lastAssistantIdx = i;
-            break;
+            assistantIndices.push(i);
+            if (assistantIndices.length >= KEEP_THINKING_TURNS)
+                break;
         }
     }
-    if (lastAssistantIdx <= 0)
+    if (assistantIndices.length === 0)
         return history;
+    const keepSet = new Set(assistantIndices);
     const result = [];
     let modified = false;
     for (let i = 0; i < history.length; i++) {
         const msg = history[i];
-        // Only strip from older assistant messages (not the latest)
-        if (msg.role === 'assistant' && i < lastAssistantIdx && Array.isArray(msg.content)) {
+        // Strip thinking from assistant messages NOT in the keep set
+        if (msg.role === 'assistant' && !keepSet.has(i) && Array.isArray(msg.content)) {
             const filtered = msg.content.filter((part) => part.type !== 'thinking');
             if (filtered.length < msg.content.length) {
                 modified = true;

package/dist/agent/tokens.js CHANGED Viewed

@@ -64,7 +64,8 @@ export function resetTokenAnchor() {
  * JSON-heavy content uses 2 bytes/token; general text uses 4.
  */
 export function estimateTokens(text, bytesPerToken = DEFAULT_BYTES_PER_TOKEN) {
-    return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken);
+    // Pad by 4/3 (~33%) for conservative estimation — better to over-count than under-count
+    return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken * 1.33);
 }
 /**
  * Estimate tokens for a content part.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/runcode",
-  "version": "2.2.6",
+  "version": "2.3.0",
   "description": "RunCode — AI coding agent powered by 41+ models. Pay per use with USDC.",
   "type": "module",
   "bin": {