@blockrun/runcode 2.2.6 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/commands.js +36 -0
- package/dist/agent/compact.js +9 -5
- package/dist/agent/loop.js +35 -17
- package/dist/agent/optimize.d.ts +4 -7
- package/dist/agent/optimize.js +29 -11
- package/dist/agent/tokens.js +2 -1
- package/package.json +1 -1
package/dist/agent/commands.js
CHANGED
|
@@ -71,6 +71,42 @@ const DIRECT_COMMANDS = {
|
|
|
71
71
|
ctx.onEvent({ kind: 'text_delta', text: 'Last commit undone. Changes preserved in staging.\n' });
|
|
72
72
|
emitDone(ctx);
|
|
73
73
|
},
|
|
74
|
+
'/tokens': (ctx) => {
|
|
75
|
+
const { estimated, apiAnchored } = getAnchoredTokenCount(ctx.history);
|
|
76
|
+
const contextWindow = getContextWindow(ctx.config.model);
|
|
77
|
+
const pct = (estimated / contextWindow) * 100;
|
|
78
|
+
// Count tool results and thinking blocks
|
|
79
|
+
let toolResults = 0;
|
|
80
|
+
let thinkingBlocks = 0;
|
|
81
|
+
let totalToolChars = 0;
|
|
82
|
+
for (const msg of ctx.history) {
|
|
83
|
+
if (typeof msg.content === 'string')
|
|
84
|
+
continue;
|
|
85
|
+
if (!Array.isArray(msg.content))
|
|
86
|
+
continue;
|
|
87
|
+
for (const part of msg.content) {
|
|
88
|
+
if ('type' in part) {
|
|
89
|
+
if (part.type === 'tool_result') {
|
|
90
|
+
toolResults++;
|
|
91
|
+
const c = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
|
|
92
|
+
totalToolChars += c.length;
|
|
93
|
+
}
|
|
94
|
+
if (part.type === 'thinking')
|
|
95
|
+
thinkingBlocks++;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
ctx.onEvent({ kind: 'text_delta', text: `**Token Usage**\n` +
|
|
100
|
+
` Estimated: ~${estimated.toLocaleString()} tokens ${apiAnchored ? '(API-anchored)' : '(estimated)'}\n` +
|
|
101
|
+
` Context: ${(contextWindow / 1000).toFixed(0)}k window (${pct.toFixed(1)}% used)\n` +
|
|
102
|
+
` Messages: ${ctx.history.length}\n` +
|
|
103
|
+
` Tool results: ${toolResults} (${(totalToolChars / 1024).toFixed(0)}KB)\n` +
|
|
104
|
+
` Thinking: ${thinkingBlocks} blocks\n` +
|
|
105
|
+
(pct > 80 ? ' ⚠ Near limit — run /compact\n' : '') +
|
|
106
|
+
(pct > 60 ? '' : ' ✓ Healthy\n')
|
|
107
|
+
});
|
|
108
|
+
emitDone(ctx);
|
|
109
|
+
},
|
|
74
110
|
'/help': (ctx) => {
|
|
75
111
|
ctx.onEvent({ kind: 'text_delta', text: `**RunCode Commands**\n\n` +
|
|
76
112
|
` **Coding:** /commit /review /test /fix /debug /explain /search /find /refactor /scaffold\n` +
|
package/dist/agent/compact.js
CHANGED
|
@@ -204,15 +204,19 @@ function formatForSummarization(messages) {
|
|
|
204
204
|
* Pick a cheaper/faster model for compaction to save cost.
|
|
205
205
|
*/
|
|
206
206
|
function pickCompactionModel(primaryModel) {
|
|
207
|
-
// Use
|
|
208
|
-
|
|
207
|
+
// Use cheapest capable model for summarization to save cost
|
|
208
|
+
// Tier down: opus/pro → sonnet, sonnet → haiku, everything else → flash (cheapest capable)
|
|
209
|
+
if (primaryModel.includes('opus') || primaryModel.includes('pro')) {
|
|
209
210
|
return 'anthropic/claude-sonnet-4.6';
|
|
210
211
|
}
|
|
211
|
-
if (primaryModel.includes('sonnet')) {
|
|
212
|
+
if (primaryModel.includes('sonnet') || primaryModel.includes('gpt-5.4') || primaryModel.includes('gemini-2.5-pro')) {
|
|
212
213
|
return 'anthropic/claude-haiku-4.5-20251001';
|
|
213
214
|
}
|
|
214
|
-
|
|
215
|
-
|
|
215
|
+
if (primaryModel.includes('haiku') || primaryModel.includes('mini') || primaryModel.includes('nano')) {
|
|
216
|
+
return 'google/gemini-2.5-flash'; // Cheapest capable model
|
|
217
|
+
}
|
|
218
|
+
// Free/unknown models — use flash
|
|
219
|
+
return 'google/gemini-2.5-flash';
|
|
216
220
|
}
|
|
217
221
|
/**
|
|
218
222
|
* Emergency fallback: drop oldest messages until under threshold.
|
package/dist/agent/loop.js
CHANGED
|
@@ -9,7 +9,7 @@ import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor } from './t
|
|
|
9
9
|
import { handleSlashCommand } from './commands.js';
|
|
10
10
|
import { PermissionManager } from './permissions.js';
|
|
11
11
|
import { StreamingExecutor } from './streaming-executor.js';
|
|
12
|
-
import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS } from './optimize.js';
|
|
12
|
+
import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
|
|
13
13
|
import { recordUsage } from '../stats/tracker.js';
|
|
14
14
|
import { estimateCost } from '../pricing.js';
|
|
15
15
|
import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
|
|
@@ -237,13 +237,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
237
237
|
onAbortReady?.(() => abort.abort());
|
|
238
238
|
let loopCount = 0;
|
|
239
239
|
let recoveryAttempts = 0;
|
|
240
|
+
let compactFailures = 0;
|
|
240
241
|
let maxTokensOverride;
|
|
241
|
-
|
|
242
|
+
let lastActivity = Date.now();
|
|
242
243
|
// Agent loop for this user message
|
|
243
244
|
while (loopCount < maxTurns) {
|
|
244
245
|
loopCount++;
|
|
245
246
|
// ── Token optimization pipeline ──
|
|
246
|
-
// 1. Strip thinking, budget tool results, time-based cleanup
|
|
247
|
+
// 1. Strip thinking, budget tool results, time-based cleanup (always — cheap)
|
|
247
248
|
const optimized = optimizeHistory(history, {
|
|
248
249
|
debug: config.debug,
|
|
249
250
|
lastActivityTimestamp: lastActivity,
|
|
@@ -252,24 +253,39 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
252
253
|
history.length = 0;
|
|
253
254
|
history.push(...optimized);
|
|
254
255
|
}
|
|
255
|
-
// 2. Microcompact:
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
history
|
|
259
|
-
|
|
256
|
+
// 2. Microcompact: only when history has >15 messages (skip for short conversations)
|
|
257
|
+
if (history.length > 15) {
|
|
258
|
+
const microCompacted = microCompact(history, 8);
|
|
259
|
+
if (microCompacted !== history) {
|
|
260
|
+
history.length = 0;
|
|
261
|
+
history.push(...microCompacted);
|
|
262
|
+
}
|
|
260
263
|
}
|
|
261
|
-
// Auto-compact: summarize history if approaching context limit
|
|
262
|
-
|
|
263
|
-
if (
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
264
|
+
// 3. Auto-compact: summarize history if approaching context limit
|
|
265
|
+
// Circuit breaker: stop retrying after 3 consecutive failures
|
|
266
|
+
if (compactFailures < 3) {
|
|
267
|
+
try {
|
|
268
|
+
const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
|
|
269
|
+
if (didCompact) {
|
|
270
|
+
history.length = 0;
|
|
271
|
+
history.push(...compacted);
|
|
272
|
+
resetTokenAnchor();
|
|
273
|
+
compactFailures = 0;
|
|
274
|
+
if (config.debug) {
|
|
275
|
+
console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
catch (compactErr) {
|
|
280
|
+
compactFailures++;
|
|
281
|
+
if (config.debug) {
|
|
282
|
+
console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
|
|
283
|
+
}
|
|
269
284
|
}
|
|
270
285
|
}
|
|
271
286
|
const systemPrompt = config.systemInstructions.join('\n\n');
|
|
272
|
-
|
|
287
|
+
const modelMaxOut = getMaxOutputTokens(config.model);
|
|
288
|
+
let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
|
|
273
289
|
let responseParts = [];
|
|
274
290
|
let usage;
|
|
275
291
|
let stopReason;
|
|
@@ -418,6 +434,8 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
418
434
|
for (const [inv, result] of results) {
|
|
419
435
|
onEvent({ kind: 'capability_done', id: inv.id, result });
|
|
420
436
|
}
|
|
437
|
+
// Refresh activity timestamp after tool execution
|
|
438
|
+
lastActivity = Date.now();
|
|
421
439
|
// Append outcomes
|
|
422
440
|
const outcomeContent = results.map(([inv, result]) => ({
|
|
423
441
|
type: 'tool_result',
|
package/dist/agent/optimize.d.ts
CHANGED
|
@@ -9,21 +9,18 @@
|
|
|
9
9
|
* 5. Pre-compact stripping — remove images/docs before summarization
|
|
10
10
|
*/
|
|
11
11
|
import type { Dialogue } from './types.js';
|
|
12
|
-
/** Default max_tokens (low to save slot reservation) */
|
|
13
|
-
export declare const CAPPED_MAX_TOKENS =
|
|
12
|
+
/** Default max_tokens (low to save output slot reservation) */
|
|
13
|
+
export declare const CAPPED_MAX_TOKENS = 16384;
|
|
14
14
|
/** Escalated max_tokens after hitting the cap */
|
|
15
15
|
export declare const ESCALATED_MAX_TOKENS = 65536;
|
|
16
|
+
/** Get max output tokens for a model */
|
|
17
|
+
export declare function getMaxOutputTokens(model: string): number;
|
|
16
18
|
/**
|
|
17
19
|
* Cap tool result sizes to prevent context bloat.
|
|
18
20
|
* Large results (>50K chars) are truncated with a preview.
|
|
19
21
|
* Per-message aggregate is also capped at 200K chars.
|
|
20
22
|
*/
|
|
21
23
|
export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
|
|
22
|
-
/**
|
|
23
|
-
* Remove thinking blocks from older assistant messages.
|
|
24
|
-
* Keeps thinking only in the most recent assistant message.
|
|
25
|
-
* Thinking blocks are large and not needed for context after the decision is made.
|
|
26
|
-
*/
|
|
27
24
|
export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
|
|
28
25
|
/**
|
|
29
26
|
* After an idle gap (>60 min), clear old tool results.
|
package/dist/agent/optimize.js
CHANGED
|
@@ -15,10 +15,25 @@ const MAX_TOOL_RESULT_CHARS = 50_000;
|
|
|
15
15
|
const MAX_TOOL_RESULTS_PER_MESSAGE_CHARS = 200_000;
|
|
16
16
|
/** Preview size when truncating */
|
|
17
17
|
const PREVIEW_CHARS = 2_000;
|
|
18
|
-
/** Default max_tokens (low to save slot reservation) */
|
|
19
|
-
export const CAPPED_MAX_TOKENS =
|
|
18
|
+
/** Default max_tokens (low to save output slot reservation) */
|
|
19
|
+
export const CAPPED_MAX_TOKENS = 16_384;
|
|
20
20
|
/** Escalated max_tokens after hitting the cap */
|
|
21
21
|
export const ESCALATED_MAX_TOKENS = 65_536;
|
|
22
|
+
/** Per-model max output tokens — prevents requesting more than the model supports */
|
|
23
|
+
const MODEL_MAX_OUTPUT = {
|
|
24
|
+
'anthropic/claude-opus-4.6': 32_000,
|
|
25
|
+
'anthropic/claude-sonnet-4.6': 64_000,
|
|
26
|
+
'anthropic/claude-haiku-4.5-20251001': 16_384,
|
|
27
|
+
'openai/gpt-5.4': 32_768,
|
|
28
|
+
'openai/gpt-5-mini': 16_384,
|
|
29
|
+
'google/gemini-2.5-pro': 65_536,
|
|
30
|
+
'google/gemini-2.5-flash': 65_536,
|
|
31
|
+
'deepseek/deepseek-chat': 8_192,
|
|
32
|
+
};
|
|
33
|
+
/** Get max output tokens for a model */
|
|
34
|
+
export function getMaxOutputTokens(model) {
|
|
35
|
+
return MODEL_MAX_OUTPUT[model] ?? 16_384;
|
|
36
|
+
}
|
|
22
37
|
/** Idle gap (minutes) after which old tool results are cleared */
|
|
23
38
|
const IDLE_GAP_THRESHOLD_MINUTES = 60;
|
|
24
39
|
/** Number of recent tool results to keep during time-based cleanup */
|
|
@@ -86,26 +101,29 @@ export function budgetToolResults(history) {
|
|
|
86
101
|
// ─── 2. Thinking Block Stripping ───────────────────────────────────────────
|
|
87
102
|
/**
|
|
88
103
|
* Remove thinking blocks from older assistant messages.
|
|
89
|
-
* Keeps thinking only in the most recent assistant
|
|
90
|
-
*
|
|
104
|
+
* Keeps thinking only in the most recent N assistant messages (default: last 2 turns).
|
|
105
|
+
* Older thinking blocks are large and not needed after the decision is made.
|
|
91
106
|
*/
|
|
107
|
+
const KEEP_THINKING_TURNS = 2;
|
|
92
108
|
export function stripOldThinking(history) {
|
|
93
|
-
// Find the last assistant message
|
|
94
|
-
|
|
109
|
+
// Find the last N assistant message indices to preserve their thinking
|
|
110
|
+
const assistantIndices = [];
|
|
95
111
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
96
112
|
if (history[i].role === 'assistant') {
|
|
97
|
-
|
|
98
|
-
|
|
113
|
+
assistantIndices.push(i);
|
|
114
|
+
if (assistantIndices.length >= KEEP_THINKING_TURNS)
|
|
115
|
+
break;
|
|
99
116
|
}
|
|
100
117
|
}
|
|
101
|
-
if (
|
|
118
|
+
if (assistantIndices.length === 0)
|
|
102
119
|
return history;
|
|
120
|
+
const keepSet = new Set(assistantIndices);
|
|
103
121
|
const result = [];
|
|
104
122
|
let modified = false;
|
|
105
123
|
for (let i = 0; i < history.length; i++) {
|
|
106
124
|
const msg = history[i];
|
|
107
|
-
//
|
|
108
|
-
if (msg.role === 'assistant' && i
|
|
125
|
+
// Strip thinking from assistant messages NOT in the keep set
|
|
126
|
+
if (msg.role === 'assistant' && !keepSet.has(i) && Array.isArray(msg.content)) {
|
|
109
127
|
const filtered = msg.content.filter((part) => part.type !== 'thinking');
|
|
110
128
|
if (filtered.length < msg.content.length) {
|
|
111
129
|
modified = true;
|
package/dist/agent/tokens.js
CHANGED
|
@@ -64,7 +64,8 @@ export function resetTokenAnchor() {
|
|
|
64
64
|
* JSON-heavy content uses 2 bytes/token; general text uses 4.
|
|
65
65
|
*/
|
|
66
66
|
export function estimateTokens(text, bytesPerToken = DEFAULT_BYTES_PER_TOKEN) {
|
|
67
|
-
|
|
67
|
+
// Pad by 4/3 (~33%) for conservative estimation — better to over-count than under-count
|
|
68
|
+
return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken * 1.33);
|
|
68
69
|
}
|
|
69
70
|
/**
|
|
70
71
|
* Estimate tokens for a content part.
|