@blockrun/runcode 2.2.7 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/commands.js +36 -0
- package/dist/agent/compact.js +9 -5
- package/dist/agent/loop.js +38 -15
- package/dist/agent/optimize.d.ts +4 -7
- package/dist/agent/optimize.js +29 -11
- package/dist/agent/reduce.d.ts +38 -0
- package/dist/agent/reduce.js +231 -0
- package/dist/agent/tokens.js +2 -1
- package/package.json +1 -1
package/dist/agent/commands.js
CHANGED
|
@@ -71,6 +71,42 @@ const DIRECT_COMMANDS = {
|
|
|
71
71
|
ctx.onEvent({ kind: 'text_delta', text: 'Last commit undone. Changes preserved in staging.\n' });
|
|
72
72
|
emitDone(ctx);
|
|
73
73
|
},
|
|
74
|
+
'/tokens': (ctx) => {
|
|
75
|
+
const { estimated, apiAnchored } = getAnchoredTokenCount(ctx.history);
|
|
76
|
+
const contextWindow = getContextWindow(ctx.config.model);
|
|
77
|
+
const pct = (estimated / contextWindow) * 100;
|
|
78
|
+
// Count tool results and thinking blocks
|
|
79
|
+
let toolResults = 0;
|
|
80
|
+
let thinkingBlocks = 0;
|
|
81
|
+
let totalToolChars = 0;
|
|
82
|
+
for (const msg of ctx.history) {
|
|
83
|
+
if (typeof msg.content === 'string')
|
|
84
|
+
continue;
|
|
85
|
+
if (!Array.isArray(msg.content))
|
|
86
|
+
continue;
|
|
87
|
+
for (const part of msg.content) {
|
|
88
|
+
if ('type' in part) {
|
|
89
|
+
if (part.type === 'tool_result') {
|
|
90
|
+
toolResults++;
|
|
91
|
+
const c = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
|
|
92
|
+
totalToolChars += c.length;
|
|
93
|
+
}
|
|
94
|
+
if (part.type === 'thinking')
|
|
95
|
+
thinkingBlocks++;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
ctx.onEvent({ kind: 'text_delta', text: `**Token Usage**\n` +
|
|
100
|
+
` Estimated: ~${estimated.toLocaleString()} tokens ${apiAnchored ? '(API-anchored)' : '(estimated)'}\n` +
|
|
101
|
+
` Context: ${(contextWindow / 1000).toFixed(0)}k window (${pct.toFixed(1)}% used)\n` +
|
|
102
|
+
` Messages: ${ctx.history.length}\n` +
|
|
103
|
+
` Tool results: ${toolResults} (${(totalToolChars / 1024).toFixed(0)}KB)\n` +
|
|
104
|
+
` Thinking: ${thinkingBlocks} blocks\n` +
|
|
105
|
+
(pct > 80 ? ' ⚠ Near limit — run /compact\n' : '') +
|
|
106
|
+
(pct > 60 ? '' : ' ✓ Healthy\n')
|
|
107
|
+
});
|
|
108
|
+
emitDone(ctx);
|
|
109
|
+
},
|
|
74
110
|
'/help': (ctx) => {
|
|
75
111
|
ctx.onEvent({ kind: 'text_delta', text: `**RunCode Commands**\n\n` +
|
|
76
112
|
` **Coding:** /commit /review /test /fix /debug /explain /search /find /refactor /scaffold\n` +
|
package/dist/agent/compact.js
CHANGED
|
@@ -204,15 +204,19 @@ function formatForSummarization(messages) {
|
|
|
204
204
|
* Pick a cheaper/faster model for compaction to save cost.
|
|
205
205
|
*/
|
|
206
206
|
function pickCompactionModel(primaryModel) {
|
|
207
|
-
// Use
|
|
208
|
-
|
|
207
|
+
// Use cheapest capable model for summarization to save cost
|
|
208
|
+
// Tier down: opus/pro → sonnet, sonnet → haiku, everything else → flash (cheapest capable)
|
|
209
|
+
if (primaryModel.includes('opus') || primaryModel.includes('pro')) {
|
|
209
210
|
return 'anthropic/claude-sonnet-4.6';
|
|
210
211
|
}
|
|
211
|
-
if (primaryModel.includes('sonnet')) {
|
|
212
|
+
if (primaryModel.includes('sonnet') || primaryModel.includes('gpt-5.4') || primaryModel.includes('gemini-2.5-pro')) {
|
|
212
213
|
return 'anthropic/claude-haiku-4.5-20251001';
|
|
213
214
|
}
|
|
214
|
-
|
|
215
|
-
|
|
215
|
+
if (primaryModel.includes('haiku') || primaryModel.includes('mini') || primaryModel.includes('nano')) {
|
|
216
|
+
return 'google/gemini-2.5-flash'; // Cheapest capable model
|
|
217
|
+
}
|
|
218
|
+
// Free/unknown models — use flash
|
|
219
|
+
return 'google/gemini-2.5-flash';
|
|
216
220
|
}
|
|
217
221
|
/**
|
|
218
222
|
* Emergency fallback: drop oldest messages until under threshold.
|
package/dist/agent/loop.js
CHANGED
|
@@ -7,9 +7,10 @@ import { ModelClient } from './llm.js';
|
|
|
7
7
|
import { autoCompactIfNeeded, microCompact } from './compact.js';
|
|
8
8
|
import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor } from './tokens.js';
|
|
9
9
|
import { handleSlashCommand } from './commands.js';
|
|
10
|
+
import { reduceTokens } from './reduce.js';
|
|
10
11
|
import { PermissionManager } from './permissions.js';
|
|
11
12
|
import { StreamingExecutor } from './streaming-executor.js';
|
|
12
|
-
import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS } from './optimize.js';
|
|
13
|
+
import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
|
|
13
14
|
import { recordUsage } from '../stats/tracker.js';
|
|
14
15
|
import { estimateCost } from '../pricing.js';
|
|
15
16
|
import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
|
|
@@ -237,13 +238,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
237
238
|
onAbortReady?.(() => abort.abort());
|
|
238
239
|
let loopCount = 0;
|
|
239
240
|
let recoveryAttempts = 0;
|
|
241
|
+
let compactFailures = 0;
|
|
240
242
|
let maxTokensOverride;
|
|
241
243
|
let lastActivity = Date.now();
|
|
242
244
|
// Agent loop for this user message
|
|
243
245
|
while (loopCount < maxTurns) {
|
|
244
246
|
loopCount++;
|
|
245
247
|
// ── Token optimization pipeline ──
|
|
246
|
-
// 1. Strip thinking, budget tool results, time-based cleanup
|
|
248
|
+
// 1. Strip thinking, budget tool results, time-based cleanup (always — cheap)
|
|
247
249
|
const optimized = optimizeHistory(history, {
|
|
248
250
|
debug: config.debug,
|
|
249
251
|
lastActivityTimestamp: lastActivity,
|
|
@@ -252,24 +254,45 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
252
254
|
history.length = 0;
|
|
253
255
|
history.push(...optimized);
|
|
254
256
|
}
|
|
255
|
-
// 2.
|
|
256
|
-
const
|
|
257
|
-
if (
|
|
257
|
+
// 2. Token reduction: age old results, normalize whitespace, trim verbose messages
|
|
258
|
+
const reduced = reduceTokens(history, config.debug);
|
|
259
|
+
if (reduced !== history) {
|
|
258
260
|
history.length = 0;
|
|
259
|
-
history.push(...
|
|
261
|
+
history.push(...reduced);
|
|
260
262
|
}
|
|
261
|
-
//
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
history
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
263
|
+
// 3. Microcompact: only when history has >15 messages (skip for short conversations)
|
|
264
|
+
if (history.length > 15) {
|
|
265
|
+
const microCompacted = microCompact(history, 8);
|
|
266
|
+
if (microCompacted !== history) {
|
|
267
|
+
history.length = 0;
|
|
268
|
+
history.push(...microCompacted);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
// 3. Auto-compact: summarize history if approaching context limit
|
|
272
|
+
// Circuit breaker: stop retrying after 3 consecutive failures
|
|
273
|
+
if (compactFailures < 3) {
|
|
274
|
+
try {
|
|
275
|
+
const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
|
|
276
|
+
if (didCompact) {
|
|
277
|
+
history.length = 0;
|
|
278
|
+
history.push(...compacted);
|
|
279
|
+
resetTokenAnchor();
|
|
280
|
+
compactFailures = 0;
|
|
281
|
+
if (config.debug) {
|
|
282
|
+
console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
catch (compactErr) {
|
|
287
|
+
compactFailures++;
|
|
288
|
+
if (config.debug) {
|
|
289
|
+
console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
|
|
290
|
+
}
|
|
269
291
|
}
|
|
270
292
|
}
|
|
271
293
|
const systemPrompt = config.systemInstructions.join('\n\n');
|
|
272
|
-
|
|
294
|
+
const modelMaxOut = getMaxOutputTokens(config.model);
|
|
295
|
+
let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
|
|
273
296
|
let responseParts = [];
|
|
274
297
|
let usage;
|
|
275
298
|
let stopReason;
|
package/dist/agent/optimize.d.ts
CHANGED
|
@@ -9,21 +9,18 @@
|
|
|
9
9
|
* 5. Pre-compact stripping — remove images/docs before summarization
|
|
10
10
|
*/
|
|
11
11
|
import type { Dialogue } from './types.js';
|
|
12
|
-
/** Default max_tokens (low to save slot reservation) */
|
|
13
|
-
export declare const CAPPED_MAX_TOKENS =
|
|
12
|
+
/** Default max_tokens (low to save output slot reservation) */
|
|
13
|
+
export declare const CAPPED_MAX_TOKENS = 16384;
|
|
14
14
|
/** Escalated max_tokens after hitting the cap */
|
|
15
15
|
export declare const ESCALATED_MAX_TOKENS = 65536;
|
|
16
|
+
/** Get max output tokens for a model */
|
|
17
|
+
export declare function getMaxOutputTokens(model: string): number;
|
|
16
18
|
/**
|
|
17
19
|
* Cap tool result sizes to prevent context bloat.
|
|
18
20
|
* Large results (>50K chars) are truncated with a preview.
|
|
19
21
|
* Per-message aggregate is also capped at 200K chars.
|
|
20
22
|
*/
|
|
21
23
|
export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
|
|
22
|
-
/**
|
|
23
|
-
* Remove thinking blocks from older assistant messages.
|
|
24
|
-
* Keeps thinking only in the most recent assistant message.
|
|
25
|
-
* Thinking blocks are large and not needed for context after the decision is made.
|
|
26
|
-
*/
|
|
27
24
|
export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
|
|
28
25
|
/**
|
|
29
26
|
* After an idle gap (>60 min), clear old tool results.
|
package/dist/agent/optimize.js
CHANGED
|
@@ -15,10 +15,25 @@ const MAX_TOOL_RESULT_CHARS = 50_000;
|
|
|
15
15
|
const MAX_TOOL_RESULTS_PER_MESSAGE_CHARS = 200_000;
|
|
16
16
|
/** Preview size when truncating */
|
|
17
17
|
const PREVIEW_CHARS = 2_000;
|
|
18
|
-
/** Default max_tokens (low to save slot reservation) */
|
|
19
|
-
export const CAPPED_MAX_TOKENS =
|
|
18
|
+
/** Default max_tokens (low to save output slot reservation) */
|
|
19
|
+
export const CAPPED_MAX_TOKENS = 16_384;
|
|
20
20
|
/** Escalated max_tokens after hitting the cap */
|
|
21
21
|
export const ESCALATED_MAX_TOKENS = 65_536;
|
|
22
|
+
/** Per-model max output tokens — prevents requesting more than the model supports */
|
|
23
|
+
const MODEL_MAX_OUTPUT = {
|
|
24
|
+
'anthropic/claude-opus-4.6': 32_000,
|
|
25
|
+
'anthropic/claude-sonnet-4.6': 64_000,
|
|
26
|
+
'anthropic/claude-haiku-4.5-20251001': 16_384,
|
|
27
|
+
'openai/gpt-5.4': 32_768,
|
|
28
|
+
'openai/gpt-5-mini': 16_384,
|
|
29
|
+
'google/gemini-2.5-pro': 65_536,
|
|
30
|
+
'google/gemini-2.5-flash': 65_536,
|
|
31
|
+
'deepseek/deepseek-chat': 8_192,
|
|
32
|
+
};
|
|
33
|
+
/** Get max output tokens for a model */
|
|
34
|
+
export function getMaxOutputTokens(model) {
|
|
35
|
+
return MODEL_MAX_OUTPUT[model] ?? 16_384;
|
|
36
|
+
}
|
|
22
37
|
/** Idle gap (minutes) after which old tool results are cleared */
|
|
23
38
|
const IDLE_GAP_THRESHOLD_MINUTES = 60;
|
|
24
39
|
/** Number of recent tool results to keep during time-based cleanup */
|
|
@@ -86,26 +101,29 @@ export function budgetToolResults(history) {
|
|
|
86
101
|
// ─── 2. Thinking Block Stripping ───────────────────────────────────────────
|
|
87
102
|
/**
|
|
88
103
|
* Remove thinking blocks from older assistant messages.
|
|
89
|
-
* Keeps thinking only in the most recent assistant
|
|
90
|
-
*
|
|
104
|
+
* Keeps thinking only in the most recent N assistant messages (default: last 2 turns).
|
|
105
|
+
* Older thinking blocks are large and not needed after the decision is made.
|
|
91
106
|
*/
|
|
107
|
+
const KEEP_THINKING_TURNS = 2;
|
|
92
108
|
export function stripOldThinking(history) {
|
|
93
|
-
// Find the last assistant message
|
|
94
|
-
|
|
109
|
+
// Find the last N assistant message indices to preserve their thinking
|
|
110
|
+
const assistantIndices = [];
|
|
95
111
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
96
112
|
if (history[i].role === 'assistant') {
|
|
97
|
-
|
|
98
|
-
|
|
113
|
+
assistantIndices.push(i);
|
|
114
|
+
if (assistantIndices.length >= KEEP_THINKING_TURNS)
|
|
115
|
+
break;
|
|
99
116
|
}
|
|
100
117
|
}
|
|
101
|
-
if (
|
|
118
|
+
if (assistantIndices.length === 0)
|
|
102
119
|
return history;
|
|
120
|
+
const keepSet = new Set(assistantIndices);
|
|
103
121
|
const result = [];
|
|
104
122
|
let modified = false;
|
|
105
123
|
for (let i = 0; i < history.length; i++) {
|
|
106
124
|
const msg = history[i];
|
|
107
|
-
//
|
|
108
|
-
if (msg.role === 'assistant' && i
|
|
125
|
+
// Strip thinking from assistant messages NOT in the keep set
|
|
126
|
+
if (msg.role === 'assistant' && !keepSet.has(i) && Array.isArray(msg.content)) {
|
|
109
127
|
const filtered = msg.content.filter((part) => part.type !== 'thinking');
|
|
110
128
|
if (filtered.length < msg.content.length) {
|
|
111
129
|
modified = true;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Reduction for runcode.
|
|
3
|
+
* Original implementation — reduces context size through intelligent pruning.
|
|
4
|
+
*
|
|
5
|
+
* Strategy: instead of compression/encoding, we PRUNE redundant content.
|
|
6
|
+
* The model doesn't need verbose tool outputs from 20 turns ago.
|
|
7
|
+
*
|
|
8
|
+
* Three reduction passes:
|
|
9
|
+
* 1. Tool result aging — progressively shorten old tool results
|
|
10
|
+
* 2. Whitespace normalization — remove excessive blank lines and indentation
|
|
11
|
+
* 3. Stale context removal — drop system info that's been superseded
|
|
12
|
+
*/
|
|
13
|
+
import type { Dialogue } from './types.js';
|
|
14
|
+
/**
|
|
15
|
+
* Progressively shorten tool results based on age.
|
|
16
|
+
* Recent results: keep full. Older results: keep summary. Very old: keep one line.
|
|
17
|
+
*
|
|
18
|
+
* This is the biggest token saver — a 10KB bash output from 20 turns ago
|
|
19
|
+
* can be reduced to "✓ Bash: ran npm test (exit 0)" saving ~2500 tokens.
|
|
20
|
+
*/
|
|
21
|
+
export declare function ageToolResults(history: Dialogue[]): Dialogue[];
|
|
22
|
+
/**
|
|
23
|
+
* Normalize whitespace in text messages.
|
|
24
|
+
* - Collapse 3+ blank lines to 2
|
|
25
|
+
* - Remove trailing spaces
|
|
26
|
+
* - Reduce indentation beyond 8 spaces to 8
|
|
27
|
+
*/
|
|
28
|
+
export declare function normalizeWhitespace(history: Dialogue[]): Dialogue[];
|
|
29
|
+
/**
|
|
30
|
+
* Trim very long assistant text messages from old turns.
|
|
31
|
+
* Recent messages: keep full. Old long messages: keep first 1000 chars.
|
|
32
|
+
*/
|
|
33
|
+
export declare function trimOldAssistantMessages(history: Dialogue[]): Dialogue[];
|
|
34
|
+
/**
|
|
35
|
+
* Run all token reduction passes on conversation history.
|
|
36
|
+
* Returns same reference if nothing changed (cheap identity check).
|
|
37
|
+
*/
|
|
38
|
+
export declare function reduceTokens(history: Dialogue[], debug?: boolean): Dialogue[];
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Reduction for runcode.
|
|
3
|
+
* Original implementation — reduces context size through intelligent pruning.
|
|
4
|
+
*
|
|
5
|
+
* Strategy: instead of compression/encoding, we PRUNE redundant content.
|
|
6
|
+
* The model doesn't need verbose tool outputs from 20 turns ago.
|
|
7
|
+
*
|
|
8
|
+
* Three reduction passes:
|
|
9
|
+
* 1. Tool result aging — progressively shorten old tool results
|
|
10
|
+
* 2. Whitespace normalization — remove excessive blank lines and indentation
|
|
11
|
+
* 3. Stale context removal — drop system info that's been superseded
|
|
12
|
+
*/
|
|
13
|
+
// ─── 1. Tool Result Aging ─────────────────────────────────────────────────
|
|
14
|
+
/**
|
|
15
|
+
* Progressively shorten tool results based on age.
|
|
16
|
+
* Recent results: keep full. Older results: keep summary. Very old: keep one line.
|
|
17
|
+
*
|
|
18
|
+
* This is the biggest token saver — a 10KB bash output from 20 turns ago
|
|
19
|
+
* can be reduced to "✓ Bash: ran npm test (exit 0)" saving ~2500 tokens.
|
|
20
|
+
*/
|
|
21
|
+
export function ageToolResults(history) {
|
|
22
|
+
// Find all tool_result positions
|
|
23
|
+
const toolPositions = [];
|
|
24
|
+
for (let i = 0; i < history.length; i++) {
|
|
25
|
+
const msg = history[i];
|
|
26
|
+
if (msg.role === 'user' &&
|
|
27
|
+
Array.isArray(msg.content) &&
|
|
28
|
+
msg.content.some(p => p.type === 'tool_result')) {
|
|
29
|
+
toolPositions.push(i);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (toolPositions.length <= 3)
|
|
33
|
+
return history; // Nothing to age
|
|
34
|
+
const result = [...history];
|
|
35
|
+
const totalResults = toolPositions.length;
|
|
36
|
+
for (let idx = 0; idx < toolPositions.length; idx++) {
|
|
37
|
+
const pos = toolPositions[idx];
|
|
38
|
+
const age = totalResults - idx; // Higher = older
|
|
39
|
+
const msg = result[pos];
|
|
40
|
+
if (!Array.isArray(msg.content))
|
|
41
|
+
continue;
|
|
42
|
+
const parts = msg.content;
|
|
43
|
+
let modified = false;
|
|
44
|
+
const aged = parts.map(part => {
|
|
45
|
+
if (part.type !== 'tool_result')
|
|
46
|
+
return part;
|
|
47
|
+
const content = typeof part.content === 'string'
|
|
48
|
+
? part.content
|
|
49
|
+
: JSON.stringify(part.content);
|
|
50
|
+
const charLen = content.length;
|
|
51
|
+
// Recent 3 results: keep full
|
|
52
|
+
if (age <= 3)
|
|
53
|
+
return part;
|
|
54
|
+
// Age 4-8: keep first 500 chars
|
|
55
|
+
if (age <= 8 && charLen > 500) {
|
|
56
|
+
modified = true;
|
|
57
|
+
const truncated = content.slice(0, 500);
|
|
58
|
+
const lastNl = truncated.lastIndexOf('\n');
|
|
59
|
+
const clean = lastNl > 250 ? truncated.slice(0, lastNl) : truncated;
|
|
60
|
+
return {
|
|
61
|
+
...part,
|
|
62
|
+
content: `${clean}\n... (${charLen - clean.length} chars omitted, ${age} turns ago)`,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
// Age 9-15: keep first 200 chars
|
|
66
|
+
if (age <= 15 && charLen > 200) {
|
|
67
|
+
modified = true;
|
|
68
|
+
const firstLine = content.split('\n')[0].slice(0, 150);
|
|
69
|
+
return {
|
|
70
|
+
...part,
|
|
71
|
+
content: `${firstLine}\n... (${charLen} chars, ${age} turns ago)`,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
// Age 16+: one line summary
|
|
75
|
+
if (age > 15 && charLen > 80) {
|
|
76
|
+
modified = true;
|
|
77
|
+
const summary = content.split('\n')[0].slice(0, 60);
|
|
78
|
+
return {
|
|
79
|
+
...part,
|
|
80
|
+
content: part.is_error
|
|
81
|
+
? `[Error: ${summary}...]`
|
|
82
|
+
: `[Result: ${summary}...]`,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
return part;
|
|
86
|
+
});
|
|
87
|
+
if (modified) {
|
|
88
|
+
result[pos] = { role: 'user', content: aged };
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
// ─── 2. Whitespace Normalization ──────────────────────────────────────────
|
|
94
|
+
/**
|
|
95
|
+
* Normalize whitespace in text messages.
|
|
96
|
+
* - Collapse 3+ blank lines to 2
|
|
97
|
+
* - Remove trailing spaces
|
|
98
|
+
* - Reduce indentation beyond 8 spaces to 8
|
|
99
|
+
*/
|
|
100
|
+
export function normalizeWhitespace(history) {
|
|
101
|
+
let modified = false;
|
|
102
|
+
const result = history.map(msg => {
|
|
103
|
+
if (typeof msg.content !== 'string')
|
|
104
|
+
return msg;
|
|
105
|
+
const original = msg.content;
|
|
106
|
+
const cleaned = original
|
|
107
|
+
.replace(/[ \t]+$/gm, '') // Trailing spaces
|
|
108
|
+
.replace(/\n{4,}/g, '\n\n\n') // Max 3 consecutive newlines
|
|
109
|
+
.replace(/^( {9,})/gm, ' '); // Cap indentation at 8 spaces
|
|
110
|
+
if (cleaned !== original) {
|
|
111
|
+
modified = true;
|
|
112
|
+
return { ...msg, content: cleaned };
|
|
113
|
+
}
|
|
114
|
+
return msg;
|
|
115
|
+
});
|
|
116
|
+
return modified ? result : history;
|
|
117
|
+
}
|
|
118
|
+
// ─── 3. Verbose Assistant Message Trimming ────────────────────────────────
|
|
119
|
+
/**
|
|
120
|
+
* Trim very long assistant text messages from old turns.
|
|
121
|
+
* Recent messages: keep full. Old long messages: keep first 1000 chars.
|
|
122
|
+
*/
|
|
123
|
+
export function trimOldAssistantMessages(history) {
|
|
124
|
+
const MAX_OLD_ASSISTANT_CHARS = 1500;
|
|
125
|
+
const KEEP_RECENT = 4; // Keep last 4 assistant messages full
|
|
126
|
+
let assistantCount = 0;
|
|
127
|
+
for (const msg of history) {
|
|
128
|
+
if (msg.role === 'assistant')
|
|
129
|
+
assistantCount++;
|
|
130
|
+
}
|
|
131
|
+
if (assistantCount <= KEEP_RECENT)
|
|
132
|
+
return history;
|
|
133
|
+
let seenAssistant = 0;
|
|
134
|
+
let modified = false;
|
|
135
|
+
const result = history.map(msg => {
|
|
136
|
+
if (msg.role !== 'assistant')
|
|
137
|
+
return msg;
|
|
138
|
+
seenAssistant++;
|
|
139
|
+
// Keep recent messages full
|
|
140
|
+
if (assistantCount - seenAssistant < KEEP_RECENT)
|
|
141
|
+
return msg;
|
|
142
|
+
if (typeof msg.content === 'string' && msg.content.length > MAX_OLD_ASSISTANT_CHARS) {
|
|
143
|
+
modified = true;
|
|
144
|
+
const truncated = msg.content.slice(0, MAX_OLD_ASSISTANT_CHARS);
|
|
145
|
+
const lastNl = truncated.lastIndexOf('\n');
|
|
146
|
+
const clean = lastNl > MAX_OLD_ASSISTANT_CHARS / 2 ? truncated.slice(0, lastNl) : truncated;
|
|
147
|
+
return { ...msg, content: clean + '\n... (response truncated)' };
|
|
148
|
+
}
|
|
149
|
+
// Also handle content array with text parts
|
|
150
|
+
if (Array.isArray(msg.content)) {
|
|
151
|
+
const parts = msg.content;
|
|
152
|
+
let totalChars = 0;
|
|
153
|
+
for (const p of parts) {
|
|
154
|
+
if (p.type === 'text')
|
|
155
|
+
totalChars += p.text.length;
|
|
156
|
+
}
|
|
157
|
+
if (totalChars > MAX_OLD_ASSISTANT_CHARS) {
|
|
158
|
+
modified = true;
|
|
159
|
+
const trimmedParts = parts.map(p => {
|
|
160
|
+
if (p.type !== 'text' || p.text.length <= 500)
|
|
161
|
+
return p;
|
|
162
|
+
return { ...p, text: p.text.slice(0, 500) + '\n... (trimmed)' };
|
|
163
|
+
});
|
|
164
|
+
return { ...msg, content: trimmedParts };
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return msg;
|
|
168
|
+
});
|
|
169
|
+
return modified ? result : history;
|
|
170
|
+
}
|
|
171
|
+
// ─── Pipeline ─────────────────────────────────────────────────────────────
|
|
172
|
+
/**
|
|
173
|
+
* Run all token reduction passes on conversation history.
|
|
174
|
+
* Returns same reference if nothing changed (cheap identity check).
|
|
175
|
+
*/
|
|
176
|
+
export function reduceTokens(history, debug) {
|
|
177
|
+
if (history.length < 8)
|
|
178
|
+
return history; // Skip for short conversations
|
|
179
|
+
let current = history;
|
|
180
|
+
let totalSaved = 0;
|
|
181
|
+
// Pass 1: Age old tool results
|
|
182
|
+
const aged = ageToolResults(current);
|
|
183
|
+
if (aged !== current) {
|
|
184
|
+
const before = estimateChars(current);
|
|
185
|
+
current = aged;
|
|
186
|
+
const saved = before - estimateChars(current);
|
|
187
|
+
totalSaved += saved;
|
|
188
|
+
}
|
|
189
|
+
// Pass 2: Normalize whitespace
|
|
190
|
+
const normalized = normalizeWhitespace(current);
|
|
191
|
+
if (normalized !== current) {
|
|
192
|
+
const before = estimateChars(current);
|
|
193
|
+
current = normalized;
|
|
194
|
+
totalSaved += before - estimateChars(current);
|
|
195
|
+
}
|
|
196
|
+
// Pass 3: Trim old verbose assistant messages
|
|
197
|
+
const trimmed = trimOldAssistantMessages(current);
|
|
198
|
+
if (trimmed !== current) {
|
|
199
|
+
const before = estimateChars(current);
|
|
200
|
+
current = trimmed;
|
|
201
|
+
totalSaved += before - estimateChars(current);
|
|
202
|
+
}
|
|
203
|
+
if (debug && totalSaved > 500) {
|
|
204
|
+
const tokensSaved = Math.round(totalSaved / 4);
|
|
205
|
+
console.error(`[runcode] Token reduction: ~${tokensSaved} tokens saved`);
|
|
206
|
+
}
|
|
207
|
+
return current;
|
|
208
|
+
}
|
|
209
|
+
function estimateChars(history) {
|
|
210
|
+
let total = 0;
|
|
211
|
+
for (const msg of history) {
|
|
212
|
+
if (typeof msg.content === 'string') {
|
|
213
|
+
total += msg.content.length;
|
|
214
|
+
}
|
|
215
|
+
else if (Array.isArray(msg.content)) {
|
|
216
|
+
for (const p of msg.content) {
|
|
217
|
+
if ('type' in p) {
|
|
218
|
+
if (p.type === 'text')
|
|
219
|
+
total += p.text.length;
|
|
220
|
+
else if (p.type === 'tool_result') {
|
|
221
|
+
total += typeof p.content === 'string' ? p.content.length : JSON.stringify(p.content).length;
|
|
222
|
+
}
|
|
223
|
+
else if (p.type === 'tool_use') {
|
|
224
|
+
total += JSON.stringify(p.input).length;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return total;
|
|
231
|
+
}
|
package/dist/agent/tokens.js
CHANGED
|
@@ -64,7 +64,8 @@ export function resetTokenAnchor() {
|
|
|
64
64
|
* JSON-heavy content uses 2 bytes/token; general text uses 4.
|
|
65
65
|
*/
|
|
66
66
|
export function estimateTokens(text, bytesPerToken = DEFAULT_BYTES_PER_TOKEN) {
|
|
67
|
-
|
|
67
|
+
// Pad by 4/3 (~33%) for conservative estimation — better to over-count than under-count
|
|
68
|
+
return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken * 1.33);
|
|
68
69
|
}
|
|
69
70
|
/**
|
|
70
71
|
* Estimate tokens for a content part.
|