@blockrun/runcode 2.2.6 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,6 +71,42 @@ const DIRECT_COMMANDS = {
71
71
  ctx.onEvent({ kind: 'text_delta', text: 'Last commit undone. Changes preserved in staging.\n' });
72
72
  emitDone(ctx);
73
73
  },
74
+ '/tokens': (ctx) => {
75
+ const { estimated, apiAnchored } = getAnchoredTokenCount(ctx.history);
76
+ const contextWindow = getContextWindow(ctx.config.model);
77
+ const pct = (estimated / contextWindow) * 100;
78
+ // Count tool results and thinking blocks
79
+ let toolResults = 0;
80
+ let thinkingBlocks = 0;
81
+ let totalToolChars = 0;
82
+ for (const msg of ctx.history) {
83
+ if (typeof msg.content === 'string')
84
+ continue;
85
+ if (!Array.isArray(msg.content))
86
+ continue;
87
+ for (const part of msg.content) {
88
+ if ('type' in part) {
89
+ if (part.type === 'tool_result') {
90
+ toolResults++;
91
+ const c = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
92
+ totalToolChars += c.length;
93
+ }
94
+ if (part.type === 'thinking')
95
+ thinkingBlocks++;
96
+ }
97
+ }
98
+ }
99
+ ctx.onEvent({ kind: 'text_delta', text: `**Token Usage**\n` +
100
+ ` Estimated: ~${estimated.toLocaleString()} tokens ${apiAnchored ? '(API-anchored)' : '(estimated)'}\n` +
101
+ ` Context: ${(contextWindow / 1000).toFixed(0)}k window (${pct.toFixed(1)}% used)\n` +
102
+ ` Messages: ${ctx.history.length}\n` +
103
+ ` Tool results: ${toolResults} (${(totalToolChars / 1024).toFixed(0)}KB)\n` +
104
+ ` Thinking: ${thinkingBlocks} blocks\n` +
105
+ (pct > 80 ? ' ⚠ Near limit — run /compact\n' : '') +
106
+ (pct > 60 ? '' : ' ✓ Healthy\n')
107
+ });
108
+ emitDone(ctx);
109
+ },
74
110
  '/help': (ctx) => {
75
111
  ctx.onEvent({ kind: 'text_delta', text: `**RunCode Commands**\n\n` +
76
112
  ` **Coding:** /commit /review /test /fix /debug /explain /search /find /refactor /scaffold\n` +
@@ -204,15 +204,19 @@ function formatForSummarization(messages) {
204
204
  * Pick a cheaper/faster model for compaction to save cost.
205
205
  */
206
206
  function pickCompactionModel(primaryModel) {
207
- // Use a fast model for summarization no need for the expensive primary
208
- if (primaryModel.includes('opus') || primaryModel.includes('gpt-5.4-pro')) {
207
+ // Use cheapest capable model for summarization to save cost
208
+ // Tier down: opus/pro sonnet, sonnet → haiku, everything else → flash (cheapest capable)
209
+ if (primaryModel.includes('opus') || primaryModel.includes('pro')) {
209
210
  return 'anthropic/claude-sonnet-4.6';
210
211
  }
211
- if (primaryModel.includes('sonnet')) {
212
+ if (primaryModel.includes('sonnet') || primaryModel.includes('gpt-5.4') || primaryModel.includes('gemini-2.5-pro')) {
212
213
  return 'anthropic/claude-haiku-4.5-20251001';
213
214
  }
214
- // For cheaper models, just use the same one
215
- return primaryModel;
215
+ if (primaryModel.includes('haiku') || primaryModel.includes('mini') || primaryModel.includes('nano')) {
216
+ return 'google/gemini-2.5-flash'; // Cheapest capable model
217
+ }
218
+ // Free/unknown models — use flash
219
+ return 'google/gemini-2.5-flash';
216
220
  }
217
221
  /**
218
222
  * Emergency fallback: drop oldest messages until under threshold.
@@ -9,7 +9,7 @@ import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor } from './t
9
9
  import { handleSlashCommand } from './commands.js';
10
10
  import { PermissionManager } from './permissions.js';
11
11
  import { StreamingExecutor } from './streaming-executor.js';
12
- import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS } from './optimize.js';
12
+ import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
13
13
  import { recordUsage } from '../stats/tracker.js';
14
14
  import { estimateCost } from '../pricing.js';
15
15
  import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
@@ -237,13 +237,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
237
237
  onAbortReady?.(() => abort.abort());
238
238
  let loopCount = 0;
239
239
  let recoveryAttempts = 0;
240
+ let compactFailures = 0;
240
241
  let maxTokensOverride;
241
- const lastActivity = Date.now();
242
+ let lastActivity = Date.now();
242
243
  // Agent loop for this user message
243
244
  while (loopCount < maxTurns) {
244
245
  loopCount++;
245
246
  // ── Token optimization pipeline ──
246
- // 1. Strip thinking, budget tool results, time-based cleanup
247
+ // 1. Strip thinking, budget tool results, time-based cleanup (always — cheap)
247
248
  const optimized = optimizeHistory(history, {
248
249
  debug: config.debug,
249
250
  lastActivityTimestamp: lastActivity,
@@ -252,24 +253,39 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
252
253
  history.length = 0;
253
254
  history.push(...optimized);
254
255
  }
255
- // 2. Microcompact: clear old tool results to save tokens
256
- const microCompacted = microCompact(history, 8);
257
- if (microCompacted !== history) {
258
- history.length = 0;
259
- history.push(...microCompacted);
256
+ // 2. Microcompact: only when history has >15 messages (skip for short conversations)
257
+ if (history.length > 15) {
258
+ const microCompacted = microCompact(history, 8);
259
+ if (microCompacted !== history) {
260
+ history.length = 0;
261
+ history.push(...microCompacted);
262
+ }
260
263
  }
261
- // Auto-compact: summarize history if approaching context limit
262
- const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
263
- if (didCompact) {
264
- history.length = 0;
265
- history.push(...compacted);
266
- resetTokenAnchor(); // Reset anchor after compaction — estimates will be used
267
- if (config.debug) {
268
- console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
264
+ // 3. Auto-compact: summarize history if approaching context limit
265
+ // Circuit breaker: stop retrying after 3 consecutive failures
266
+ if (compactFailures < 3) {
267
+ try {
268
+ const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
269
+ if (didCompact) {
270
+ history.length = 0;
271
+ history.push(...compacted);
272
+ resetTokenAnchor();
273
+ compactFailures = 0;
274
+ if (config.debug) {
275
+ console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
276
+ }
277
+ }
278
+ }
279
+ catch (compactErr) {
280
+ compactFailures++;
281
+ if (config.debug) {
282
+ console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
283
+ }
269
284
  }
270
285
  }
271
286
  const systemPrompt = config.systemInstructions.join('\n\n');
272
- let maxTokens = maxTokensOverride ?? CAPPED_MAX_TOKENS;
287
+ const modelMaxOut = getMaxOutputTokens(config.model);
288
+ let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
273
289
  let responseParts = [];
274
290
  let usage;
275
291
  let stopReason;
@@ -418,6 +434,8 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
418
434
  for (const [inv, result] of results) {
419
435
  onEvent({ kind: 'capability_done', id: inv.id, result });
420
436
  }
437
+ // Refresh activity timestamp after tool execution
438
+ lastActivity = Date.now();
421
439
  // Append outcomes
422
440
  const outcomeContent = results.map(([inv, result]) => ({
423
441
  type: 'tool_result',
@@ -9,21 +9,18 @@
9
9
  * 5. Pre-compact stripping — remove images/docs before summarization
10
10
  */
11
11
  import type { Dialogue } from './types.js';
12
- /** Default max_tokens (low to save slot reservation) */
13
- export declare const CAPPED_MAX_TOKENS = 8192;
12
+ /** Default max_tokens (low to save output slot reservation) */
13
+ export declare const CAPPED_MAX_TOKENS = 16384;
14
14
  /** Escalated max_tokens after hitting the cap */
15
15
  export declare const ESCALATED_MAX_TOKENS = 65536;
16
+ /** Get max output tokens for a model */
17
+ export declare function getMaxOutputTokens(model: string): number;
16
18
  /**
17
19
  * Cap tool result sizes to prevent context bloat.
18
20
  * Large results (>50K chars) are truncated with a preview.
19
21
  * Per-message aggregate is also capped at 200K chars.
20
22
  */
21
23
  export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
22
- /**
23
- * Remove thinking blocks from older assistant messages.
24
- * Keeps thinking only in the most recent assistant message.
25
- * Thinking blocks are large and not needed for context after the decision is made.
26
- */
27
24
  export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
28
25
  /**
29
26
  * After an idle gap (>60 min), clear old tool results.
@@ -15,10 +15,25 @@ const MAX_TOOL_RESULT_CHARS = 50_000;
15
15
  const MAX_TOOL_RESULTS_PER_MESSAGE_CHARS = 200_000;
16
16
  /** Preview size when truncating */
17
17
  const PREVIEW_CHARS = 2_000;
18
- /** Default max_tokens (low to save slot reservation) */
19
- export const CAPPED_MAX_TOKENS = 8_192;
18
+ /** Default max_tokens (low to save output slot reservation) */
19
+ export const CAPPED_MAX_TOKENS = 16_384;
20
20
  /** Escalated max_tokens after hitting the cap */
21
21
  export const ESCALATED_MAX_TOKENS = 65_536;
22
+ /** Per-model max output tokens — prevents requesting more than the model supports */
23
+ const MODEL_MAX_OUTPUT = {
24
+ 'anthropic/claude-opus-4.6': 32_000,
25
+ 'anthropic/claude-sonnet-4.6': 64_000,
26
+ 'anthropic/claude-haiku-4.5-20251001': 16_384,
27
+ 'openai/gpt-5.4': 32_768,
28
+ 'openai/gpt-5-mini': 16_384,
29
+ 'google/gemini-2.5-pro': 65_536,
30
+ 'google/gemini-2.5-flash': 65_536,
31
+ 'deepseek/deepseek-chat': 8_192,
32
+ };
33
+ /** Get max output tokens for a model */
34
+ export function getMaxOutputTokens(model) {
35
+ return MODEL_MAX_OUTPUT[model] ?? 16_384;
36
+ }
22
37
  /** Idle gap (minutes) after which old tool results are cleared */
23
38
  const IDLE_GAP_THRESHOLD_MINUTES = 60;
24
39
  /** Number of recent tool results to keep during time-based cleanup */
@@ -86,26 +101,29 @@ export function budgetToolResults(history) {
86
101
  // ─── 2. Thinking Block Stripping ───────────────────────────────────────────
87
102
  /**
88
103
  * Remove thinking blocks from older assistant messages.
89
- * Keeps thinking only in the most recent assistant message.
90
- * Thinking blocks are large and not needed for context after the decision is made.
104
+ * Keeps thinking only in the most recent N assistant messages (default: last 2 turns).
105
+ * Older thinking blocks are large and not needed after the decision is made.
91
106
  */
107
+ const KEEP_THINKING_TURNS = 2;
92
108
  export function stripOldThinking(history) {
93
- // Find the last assistant message index
94
- let lastAssistantIdx = -1;
109
+ // Find the last N assistant message indices to preserve their thinking
110
+ const assistantIndices = [];
95
111
  for (let i = history.length - 1; i >= 0; i--) {
96
112
  if (history[i].role === 'assistant') {
97
- lastAssistantIdx = i;
98
- break;
113
+ assistantIndices.push(i);
114
+ if (assistantIndices.length >= KEEP_THINKING_TURNS)
115
+ break;
99
116
  }
100
117
  }
101
- if (lastAssistantIdx <= 0)
118
+ if (assistantIndices.length === 0)
102
119
  return history;
120
+ const keepSet = new Set(assistantIndices);
103
121
  const result = [];
104
122
  let modified = false;
105
123
  for (let i = 0; i < history.length; i++) {
106
124
  const msg = history[i];
107
- // Only strip from older assistant messages (not the latest)
108
- if (msg.role === 'assistant' && i < lastAssistantIdx && Array.isArray(msg.content)) {
125
+ // Strip thinking from assistant messages NOT in the keep set
126
+ if (msg.role === 'assistant' && !keepSet.has(i) && Array.isArray(msg.content)) {
109
127
  const filtered = msg.content.filter((part) => part.type !== 'thinking');
110
128
  if (filtered.length < msg.content.length) {
111
129
  modified = true;
@@ -64,7 +64,8 @@ export function resetTokenAnchor() {
64
64
  * JSON-heavy content uses 2 bytes/token; general text uses 4.
65
65
  */
66
66
  export function estimateTokens(text, bytesPerToken = DEFAULT_BYTES_PER_TOKEN) {
67
- return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken);
67
+ // Pad by 4/3 (~33%) for conservative estimation — better to over-count than under-count
68
+ return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken * 1.33);
68
69
  }
69
70
  /**
70
71
  * Estimate tokens for a content part.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/runcode",
3
- "version": "2.2.6",
3
+ "version": "2.3.0",
4
4
  "description": "RunCode — AI coding agent powered by 41+ models. Pay per use with USDC.",
5
5
  "type": "module",
6
6
  "bin": {