@blockrun/runcode 2.2.7 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/commands.js +36 -0
- package/dist/agent/compact.js +9 -5
- package/dist/agent/loop.js +41 -16
- package/dist/agent/optimize.d.ts +4 -7
- package/dist/agent/optimize.js +29 -11
- package/dist/agent/tokens.js +2 -1
- package/dist/compression/adapter.d.ts +13 -0
- package/dist/compression/adapter.js +104 -0
- package/dist/compression/codebook.d.ts +23 -0
- package/dist/compression/codebook.js +118 -0
- package/dist/compression/index.d.ts +32 -0
- package/dist/compression/index.js +258 -0
- package/dist/compression/layers/deduplication.d.ts +27 -0
- package/dist/compression/layers/deduplication.js +97 -0
- package/dist/compression/layers/dictionary.d.ts +20 -0
- package/dist/compression/layers/dictionary.js +67 -0
- package/dist/compression/layers/dynamic-codebook.d.ts +25 -0
- package/dist/compression/layers/dynamic-codebook.js +145 -0
- package/dist/compression/layers/json-compact.d.ts +22 -0
- package/dist/compression/layers/json-compact.js +74 -0
- package/dist/compression/layers/observation.d.ts +20 -0
- package/dist/compression/layers/observation.js +126 -0
- package/dist/compression/layers/paths.d.ts +23 -0
- package/dist/compression/layers/paths.js +107 -0
- package/dist/compression/layers/whitespace.d.ts +26 -0
- package/dist/compression/layers/whitespace.js +57 -0
- package/dist/compression/types.d.ts +83 -0
- package/dist/compression/types.js +26 -0
- package/package.json +1 -1
package/dist/agent/commands.js
CHANGED
|
@@ -71,6 +71,42 @@ const DIRECT_COMMANDS = {
|
|
|
71
71
|
ctx.onEvent({ kind: 'text_delta', text: 'Last commit undone. Changes preserved in staging.\n' });
|
|
72
72
|
emitDone(ctx);
|
|
73
73
|
},
|
|
74
|
+
'/tokens': (ctx) => {
|
|
75
|
+
const { estimated, apiAnchored } = getAnchoredTokenCount(ctx.history);
|
|
76
|
+
const contextWindow = getContextWindow(ctx.config.model);
|
|
77
|
+
const pct = (estimated / contextWindow) * 100;
|
|
78
|
+
// Count tool results and thinking blocks
|
|
79
|
+
let toolResults = 0;
|
|
80
|
+
let thinkingBlocks = 0;
|
|
81
|
+
let totalToolChars = 0;
|
|
82
|
+
for (const msg of ctx.history) {
|
|
83
|
+
if (typeof msg.content === 'string')
|
|
84
|
+
continue;
|
|
85
|
+
if (!Array.isArray(msg.content))
|
|
86
|
+
continue;
|
|
87
|
+
for (const part of msg.content) {
|
|
88
|
+
if ('type' in part) {
|
|
89
|
+
if (part.type === 'tool_result') {
|
|
90
|
+
toolResults++;
|
|
91
|
+
const c = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
|
|
92
|
+
totalToolChars += c.length;
|
|
93
|
+
}
|
|
94
|
+
if (part.type === 'thinking')
|
|
95
|
+
thinkingBlocks++;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
ctx.onEvent({ kind: 'text_delta', text: `**Token Usage**\n` +
|
|
100
|
+
` Estimated: ~${estimated.toLocaleString()} tokens ${apiAnchored ? '(API-anchored)' : '(estimated)'}\n` +
|
|
101
|
+
` Context: ${(contextWindow / 1000).toFixed(0)}k window (${pct.toFixed(1)}% used)\n` +
|
|
102
|
+
` Messages: ${ctx.history.length}\n` +
|
|
103
|
+
` Tool results: ${toolResults} (${(totalToolChars / 1024).toFixed(0)}KB)\n` +
|
|
104
|
+
` Thinking: ${thinkingBlocks} blocks\n` +
|
|
105
|
+
(pct > 80 ? ' ⚠ Near limit — run /compact\n' : '') +
|
|
106
|
+
(pct > 60 ? '' : ' ✓ Healthy\n')
|
|
107
|
+
});
|
|
108
|
+
emitDone(ctx);
|
|
109
|
+
},
|
|
74
110
|
'/help': (ctx) => {
|
|
75
111
|
ctx.onEvent({ kind: 'text_delta', text: `**RunCode Commands**\n\n` +
|
|
76
112
|
` **Coding:** /commit /review /test /fix /debug /explain /search /find /refactor /scaffold\n` +
|
package/dist/agent/compact.js
CHANGED
|
@@ -204,15 +204,19 @@ function formatForSummarization(messages) {
|
|
|
204
204
|
* Pick a cheaper/faster model for compaction to save cost.
|
|
205
205
|
*/
|
|
206
206
|
function pickCompactionModel(primaryModel) {
|
|
207
|
-
// Use
|
|
208
|
-
|
|
207
|
+
// Use cheapest capable model for summarization to save cost
|
|
208
|
+
// Tier down: opus/pro → sonnet, sonnet → haiku, everything else → flash (cheapest capable)
|
|
209
|
+
if (primaryModel.includes('opus') || primaryModel.includes('pro')) {
|
|
209
210
|
return 'anthropic/claude-sonnet-4.6';
|
|
210
211
|
}
|
|
211
|
-
if (primaryModel.includes('sonnet')) {
|
|
212
|
+
if (primaryModel.includes('sonnet') || primaryModel.includes('gpt-5.4') || primaryModel.includes('gemini-2.5-pro')) {
|
|
212
213
|
return 'anthropic/claude-haiku-4.5-20251001';
|
|
213
214
|
}
|
|
214
|
-
|
|
215
|
-
|
|
215
|
+
if (primaryModel.includes('haiku') || primaryModel.includes('mini') || primaryModel.includes('nano')) {
|
|
216
|
+
return 'google/gemini-2.5-flash'; // Cheapest capable model
|
|
217
|
+
}
|
|
218
|
+
// Free/unknown models — use flash
|
|
219
|
+
return 'google/gemini-2.5-flash';
|
|
216
220
|
}
|
|
217
221
|
/**
|
|
218
222
|
* Emergency fallback: drop oldest messages until under threshold.
|
package/dist/agent/loop.js
CHANGED
|
@@ -7,9 +7,10 @@ import { ModelClient } from './llm.js';
|
|
|
7
7
|
import { autoCompactIfNeeded, microCompact } from './compact.js';
|
|
8
8
|
import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor } from './tokens.js';
|
|
9
9
|
import { handleSlashCommand } from './commands.js';
|
|
10
|
+
import { compressHistory } from '../compression/adapter.js';
|
|
10
11
|
import { PermissionManager } from './permissions.js';
|
|
11
12
|
import { StreamingExecutor } from './streaming-executor.js';
|
|
12
|
-
import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS } from './optimize.js';
|
|
13
|
+
import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
|
|
13
14
|
import { recordUsage } from '../stats/tracker.js';
|
|
14
15
|
import { estimateCost } from '../pricing.js';
|
|
15
16
|
import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
|
|
@@ -237,13 +238,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
237
238
|
onAbortReady?.(() => abort.abort());
|
|
238
239
|
let loopCount = 0;
|
|
239
240
|
let recoveryAttempts = 0;
|
|
241
|
+
let compactFailures = 0;
|
|
240
242
|
let maxTokensOverride;
|
|
241
243
|
let lastActivity = Date.now();
|
|
242
244
|
// Agent loop for this user message
|
|
243
245
|
while (loopCount < maxTurns) {
|
|
244
246
|
loopCount++;
|
|
245
247
|
// ── Token optimization pipeline ──
|
|
246
|
-
// 1. Strip thinking, budget tool results, time-based cleanup
|
|
248
|
+
// 1. Strip thinking, budget tool results, time-based cleanup (always — cheap)
|
|
247
249
|
const optimized = optimizeHistory(history, {
|
|
248
250
|
debug: config.debug,
|
|
249
251
|
lastActivityTimestamp: lastActivity,
|
|
@@ -252,24 +254,47 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
252
254
|
history.length = 0;
|
|
253
255
|
history.push(...optimized);
|
|
254
256
|
}
|
|
255
|
-
// 2. Microcompact:
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
history
|
|
259
|
-
|
|
257
|
+
// 2. Microcompact: only when history has >15 messages (skip for short conversations)
|
|
258
|
+
if (history.length > 15) {
|
|
259
|
+
const microCompacted = microCompact(history, 8);
|
|
260
|
+
if (microCompacted !== history) {
|
|
261
|
+
history.length = 0;
|
|
262
|
+
history.push(...microCompacted);
|
|
263
|
+
}
|
|
260
264
|
}
|
|
261
|
-
//
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
265
|
+
// 3. Context compression: 7-layer compression for 15-40% token savings
|
|
266
|
+
if (history.length > 10) {
|
|
267
|
+
const compressed = await compressHistory(history, config.debug);
|
|
268
|
+
if (compressed) {
|
|
269
|
+
history.length = 0;
|
|
270
|
+
history.push(...compressed.history);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
// 4. Auto-compact: summarize history if approaching context limit
|
|
274
|
+
// Circuit breaker: stop retrying after 3 consecutive failures
|
|
275
|
+
if (compactFailures < 3) {
|
|
276
|
+
try {
|
|
277
|
+
const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
|
|
278
|
+
if (didCompact) {
|
|
279
|
+
history.length = 0;
|
|
280
|
+
history.push(...compacted);
|
|
281
|
+
resetTokenAnchor();
|
|
282
|
+
compactFailures = 0;
|
|
283
|
+
if (config.debug) {
|
|
284
|
+
console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
catch (compactErr) {
|
|
289
|
+
compactFailures++;
|
|
290
|
+
if (config.debug) {
|
|
291
|
+
console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
|
|
292
|
+
}
|
|
269
293
|
}
|
|
270
294
|
}
|
|
271
295
|
const systemPrompt = config.systemInstructions.join('\n\n');
|
|
272
|
-
|
|
296
|
+
const modelMaxOut = getMaxOutputTokens(config.model);
|
|
297
|
+
let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
|
|
273
298
|
let responseParts = [];
|
|
274
299
|
let usage;
|
|
275
300
|
let stopReason;
|
package/dist/agent/optimize.d.ts
CHANGED
|
@@ -9,21 +9,18 @@
|
|
|
9
9
|
* 5. Pre-compact stripping — remove images/docs before summarization
|
|
10
10
|
*/
|
|
11
11
|
import type { Dialogue } from './types.js';
|
|
12
|
-
/** Default max_tokens (low to save slot reservation) */
|
|
13
|
-
export declare const CAPPED_MAX_TOKENS =
|
|
12
|
+
/** Default max_tokens (low to save output slot reservation) */
|
|
13
|
+
export declare const CAPPED_MAX_TOKENS = 16384;
|
|
14
14
|
/** Escalated max_tokens after hitting the cap */
|
|
15
15
|
export declare const ESCALATED_MAX_TOKENS = 65536;
|
|
16
|
+
/** Get max output tokens for a model */
|
|
17
|
+
export declare function getMaxOutputTokens(model: string): number;
|
|
16
18
|
/**
|
|
17
19
|
* Cap tool result sizes to prevent context bloat.
|
|
18
20
|
* Large results (>50K chars) are truncated with a preview.
|
|
19
21
|
* Per-message aggregate is also capped at 200K chars.
|
|
20
22
|
*/
|
|
21
23
|
export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
|
|
22
|
-
/**
|
|
23
|
-
* Remove thinking blocks from older assistant messages.
|
|
24
|
-
* Keeps thinking only in the most recent assistant message.
|
|
25
|
-
* Thinking blocks are large and not needed for context after the decision is made.
|
|
26
|
-
*/
|
|
27
24
|
export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
|
|
28
25
|
/**
|
|
29
26
|
* After an idle gap (>60 min), clear old tool results.
|
package/dist/agent/optimize.js
CHANGED
|
@@ -15,10 +15,25 @@ const MAX_TOOL_RESULT_CHARS = 50_000;
|
|
|
15
15
|
const MAX_TOOL_RESULTS_PER_MESSAGE_CHARS = 200_000;
|
|
16
16
|
/** Preview size when truncating */
|
|
17
17
|
const PREVIEW_CHARS = 2_000;
|
|
18
|
-
/** Default max_tokens (low to save slot reservation) */
|
|
19
|
-
export const CAPPED_MAX_TOKENS =
|
|
18
|
+
/** Default max_tokens (low to save output slot reservation) */
|
|
19
|
+
export const CAPPED_MAX_TOKENS = 16_384;
|
|
20
20
|
/** Escalated max_tokens after hitting the cap */
|
|
21
21
|
export const ESCALATED_MAX_TOKENS = 65_536;
|
|
22
|
+
/** Per-model max output tokens — prevents requesting more than the model supports */
|
|
23
|
+
const MODEL_MAX_OUTPUT = {
|
|
24
|
+
'anthropic/claude-opus-4.6': 32_000,
|
|
25
|
+
'anthropic/claude-sonnet-4.6': 64_000,
|
|
26
|
+
'anthropic/claude-haiku-4.5-20251001': 16_384,
|
|
27
|
+
'openai/gpt-5.4': 32_768,
|
|
28
|
+
'openai/gpt-5-mini': 16_384,
|
|
29
|
+
'google/gemini-2.5-pro': 65_536,
|
|
30
|
+
'google/gemini-2.5-flash': 65_536,
|
|
31
|
+
'deepseek/deepseek-chat': 8_192,
|
|
32
|
+
};
|
|
33
|
+
/** Get max output tokens for a model */
|
|
34
|
+
export function getMaxOutputTokens(model) {
|
|
35
|
+
return MODEL_MAX_OUTPUT[model] ?? 16_384;
|
|
36
|
+
}
|
|
22
37
|
/** Idle gap (minutes) after which old tool results are cleared */
|
|
23
38
|
const IDLE_GAP_THRESHOLD_MINUTES = 60;
|
|
24
39
|
/** Number of recent tool results to keep during time-based cleanup */
|
|
@@ -86,26 +101,29 @@ export function budgetToolResults(history) {
|
|
|
86
101
|
// ─── 2. Thinking Block Stripping ───────────────────────────────────────────
|
|
87
102
|
/**
|
|
88
103
|
* Remove thinking blocks from older assistant messages.
|
|
89
|
-
* Keeps thinking only in the most recent assistant
|
|
90
|
-
*
|
|
104
|
+
* Keeps thinking only in the most recent N assistant messages (default: last 2 turns).
|
|
105
|
+
* Older thinking blocks are large and not needed after the decision is made.
|
|
91
106
|
*/
|
|
107
|
+
const KEEP_THINKING_TURNS = 2;
|
|
92
108
|
export function stripOldThinking(history) {
|
|
93
|
-
// Find the last assistant message
|
|
94
|
-
|
|
109
|
+
// Find the last N assistant message indices to preserve their thinking
|
|
110
|
+
const assistantIndices = [];
|
|
95
111
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
96
112
|
if (history[i].role === 'assistant') {
|
|
97
|
-
|
|
98
|
-
|
|
113
|
+
assistantIndices.push(i);
|
|
114
|
+
if (assistantIndices.length >= KEEP_THINKING_TURNS)
|
|
115
|
+
break;
|
|
99
116
|
}
|
|
100
117
|
}
|
|
101
|
-
if (
|
|
118
|
+
if (assistantIndices.length === 0)
|
|
102
119
|
return history;
|
|
120
|
+
const keepSet = new Set(assistantIndices);
|
|
103
121
|
const result = [];
|
|
104
122
|
let modified = false;
|
|
105
123
|
for (let i = 0; i < history.length; i++) {
|
|
106
124
|
const msg = history[i];
|
|
107
|
-
//
|
|
108
|
-
if (msg.role === 'assistant' && i
|
|
125
|
+
// Strip thinking from assistant messages NOT in the keep set
|
|
126
|
+
if (msg.role === 'assistant' && !keepSet.has(i) && Array.isArray(msg.content)) {
|
|
109
127
|
const filtered = msg.content.filter((part) => part.type !== 'thinking');
|
|
110
128
|
if (filtered.length < msg.content.length) {
|
|
111
129
|
modified = true;
|
package/dist/agent/tokens.js
CHANGED
|
@@ -64,7 +64,8 @@ export function resetTokenAnchor() {
|
|
|
64
64
|
* JSON-heavy content uses 2 bytes/token; general text uses 4.
|
|
65
65
|
*/
|
|
66
66
|
export function estimateTokens(text, bytesPerToken = DEFAULT_BYTES_PER_TOKEN) {
|
|
67
|
-
|
|
67
|
+
// Pad by 4/3 (~33%) for conservative estimation — better to over-count than under-count
|
|
68
|
+
return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken * 1.33);
|
|
68
69
|
}
|
|
69
70
|
/**
|
|
70
71
|
* Estimate tokens for a content part.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapter between brcc's Dialogue type and the compression lib's NormalizedMessage type.
|
|
3
|
+
*/
|
|
4
|
+
import type { Dialogue } from '../agent/types.js';
|
|
5
|
+
/**
|
|
6
|
+
* Compress conversation history to reduce token usage.
|
|
7
|
+
* Returns compressed Dialogue[] with stats.
|
|
8
|
+
*/
|
|
9
|
+
export declare function compressHistory(history: Dialogue[], debug?: boolean): Promise<{
|
|
10
|
+
history: Dialogue[];
|
|
11
|
+
saved: number;
|
|
12
|
+
ratio: number;
|
|
13
|
+
} | null>;
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapter between brcc's Dialogue type and the compression lib's NormalizedMessage type.
|
|
3
|
+
*/
|
|
4
|
+
import { compressContext, shouldCompress } from './index.js';
|
|
5
|
+
/**
|
|
6
|
+
* Convert brcc Dialogue[] to NormalizedMessage[] for compression.
|
|
7
|
+
*/
|
|
8
|
+
function dialogueToNormalized(history) {
|
|
9
|
+
return history.map(msg => {
|
|
10
|
+
if (typeof msg.content === 'string') {
|
|
11
|
+
return { role: msg.role, content: msg.content };
|
|
12
|
+
}
|
|
13
|
+
// Convert content parts to string representation
|
|
14
|
+
const parts = [];
|
|
15
|
+
const toolCalls = [];
|
|
16
|
+
for (const part of msg.content) {
|
|
17
|
+
if ('type' in part) {
|
|
18
|
+
if (part.type === 'text') {
|
|
19
|
+
parts.push(part.text);
|
|
20
|
+
}
|
|
21
|
+
else if (part.type === 'tool_use') {
|
|
22
|
+
const inv = part;
|
|
23
|
+
toolCalls.push({
|
|
24
|
+
id: inv.id,
|
|
25
|
+
type: 'function',
|
|
26
|
+
function: { name: inv.name, arguments: JSON.stringify(inv.input) },
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
else if (part.type === 'tool_result') {
|
|
30
|
+
const res = part;
|
|
31
|
+
const content = typeof res.content === 'string' ? res.content : JSON.stringify(res.content);
|
|
32
|
+
parts.push(`[Tool result: ${content}]`);
|
|
33
|
+
}
|
|
34
|
+
else if (part.type === 'thinking') {
|
|
35
|
+
// Skip thinking in compression (already handled by stripOldThinking)
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
const normalized = {
|
|
40
|
+
role: msg.role,
|
|
41
|
+
content: parts.join('\n') || null,
|
|
42
|
+
};
|
|
43
|
+
if (toolCalls.length > 0) {
|
|
44
|
+
normalized.tool_calls = toolCalls;
|
|
45
|
+
}
|
|
46
|
+
return normalized;
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Compress conversation history to reduce token usage.
|
|
51
|
+
* Returns compressed Dialogue[] with stats.
|
|
52
|
+
*/
|
|
53
|
+
export async function compressHistory(history, debug) {
|
|
54
|
+
// Convert to NormalizedMessage format
|
|
55
|
+
const normalized = dialogueToNormalized(history);
|
|
56
|
+
// Check if compression is worthwhile
|
|
57
|
+
if (!shouldCompress(normalized)) {
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
try {
|
|
61
|
+
const result = await compressContext(normalized);
|
|
62
|
+
const savedPct = Math.round((1 - result.compressionRatio) * 100);
|
|
63
|
+
if (debug) {
|
|
64
|
+
console.error(`[runcode] Compressed context: ${result.originalChars} → ${result.compressedChars} chars (${savedPct}% saved)`);
|
|
65
|
+
if (result.stats) {
|
|
66
|
+
const layers = Object.entries(result.stats)
|
|
67
|
+
.filter(([, v]) => typeof v === 'number' && v > 0)
|
|
68
|
+
.map(([k, v]) => `${k}: ${v}`)
|
|
69
|
+
.join(', ');
|
|
70
|
+
if (layers)
|
|
71
|
+
console.error(`[runcode] Compression layers: ${layers}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Convert compressed messages back to Dialogue format
|
|
75
|
+
// We only compress the string content, keeping the original structure
|
|
76
|
+
const compressed = [];
|
|
77
|
+
for (let i = 0; i < history.length && i < result.messages.length; i++) {
|
|
78
|
+
const original = history[i];
|
|
79
|
+
const comp = result.messages[i];
|
|
80
|
+
if (typeof original.content === 'string' && typeof comp.content === 'string') {
|
|
81
|
+
compressed.push({ role: original.role, content: comp.content });
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
// Keep complex content as-is (tool_use/tool_result structure can't be modified)
|
|
85
|
+
compressed.push(original);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// Append any remaining original messages
|
|
89
|
+
for (let i = result.messages.length; i < history.length; i++) {
|
|
90
|
+
compressed.push(history[i]);
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
history: compressed,
|
|
94
|
+
saved: result.originalChars - result.compressedChars,
|
|
95
|
+
ratio: result.compressionRatio,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
catch (err) {
|
|
99
|
+
if (debug) {
|
|
100
|
+
console.error(`[runcode] Compression failed: ${err.message}`);
|
|
101
|
+
}
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dictionary Codebook
|
|
3
|
+
*
|
|
4
|
+
* Static dictionary of frequently repeated phrases observed in LLM prompts.
|
|
5
|
+
* Built from analysis of BlockRun production logs.
|
|
6
|
+
*
|
|
7
|
+
* Format: Short code ($XX) -> Long phrase
|
|
8
|
+
* The LLM receives a codebook header and decodes in-context.
|
|
9
|
+
*/
|
|
10
|
+
export declare const STATIC_CODEBOOK: Record<string, string>;
|
|
11
|
+
/**
|
|
12
|
+
* Get the inverse codebook for decompression.
|
|
13
|
+
*/
|
|
14
|
+
export declare function getInverseCodebook(): Record<string, string>;
|
|
15
|
+
/**
|
|
16
|
+
* Generate the codebook header for inclusion in system message.
|
|
17
|
+
* LLMs can decode in-context using this header.
|
|
18
|
+
*/
|
|
19
|
+
export declare function generateCodebookHeader(usedCodes: Set<string>, pathMap?: Record<string, string>): string;
|
|
20
|
+
/**
|
|
21
|
+
* Decompress a string using the codebook (for logging).
|
|
22
|
+
*/
|
|
23
|
+
export declare function decompressContent(content: string, codebook?: Record<string, string>): string;
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dictionary Codebook
|
|
3
|
+
*
|
|
4
|
+
* Static dictionary of frequently repeated phrases observed in LLM prompts.
|
|
5
|
+
* Built from analysis of BlockRun production logs.
|
|
6
|
+
*
|
|
7
|
+
* Format: Short code ($XX) -> Long phrase
|
|
8
|
+
* The LLM receives a codebook header and decodes in-context.
|
|
9
|
+
*/
|
|
10
|
+
// Static codebook - common patterns from system prompts
|
|
11
|
+
// Ordered by expected frequency and impact
|
|
12
|
+
export const STATIC_CODEBOOK = {
|
|
13
|
+
// High-impact: OpenClaw/Agent system prompt patterns (very common)
|
|
14
|
+
"$OC01": "unbrowse_", // Common prefix in tool names
|
|
15
|
+
"$OC02": "<location>",
|
|
16
|
+
"$OC03": "</location>",
|
|
17
|
+
"$OC04": "<name>",
|
|
18
|
+
"$OC05": "</name>",
|
|
19
|
+
"$OC06": "<description>",
|
|
20
|
+
"$OC07": "</description>",
|
|
21
|
+
"$OC08": "(may need login)",
|
|
22
|
+
"$OC09": "API skill for OpenClaw",
|
|
23
|
+
"$OC10": "endpoints",
|
|
24
|
+
// Skill/tool markers
|
|
25
|
+
"$SK01": "<available_skills>",
|
|
26
|
+
"$SK02": "</available_skills>",
|
|
27
|
+
"$SK03": "<skill>",
|
|
28
|
+
"$SK04": "</skill>",
|
|
29
|
+
// Schema patterns (very common in tool definitions)
|
|
30
|
+
"$T01": 'type: "function"',
|
|
31
|
+
"$T02": '"type": "function"',
|
|
32
|
+
"$T03": '"type": "string"',
|
|
33
|
+
"$T04": '"type": "object"',
|
|
34
|
+
"$T05": '"type": "array"',
|
|
35
|
+
"$T06": '"type": "boolean"',
|
|
36
|
+
"$T07": '"type": "number"',
|
|
37
|
+
// Common descriptions
|
|
38
|
+
"$D01": "description:",
|
|
39
|
+
"$D02": '"description":',
|
|
40
|
+
// Common instructions
|
|
41
|
+
"$I01": "You are a personal assistant",
|
|
42
|
+
"$I02": "Tool names are case-sensitive",
|
|
43
|
+
"$I03": "Call tools exactly as listed",
|
|
44
|
+
"$I04": "Use when",
|
|
45
|
+
"$I05": "without asking",
|
|
46
|
+
// Safety phrases
|
|
47
|
+
"$S01": "Do not manipulate or persuade",
|
|
48
|
+
"$S02": "Prioritize safety and human oversight",
|
|
49
|
+
"$S03": "unless explicitly requested",
|
|
50
|
+
// JSON patterns
|
|
51
|
+
"$J01": '"required": ["',
|
|
52
|
+
"$J02": '"properties": {',
|
|
53
|
+
"$J03": '"additionalProperties": false',
|
|
54
|
+
// Heartbeat patterns
|
|
55
|
+
"$H01": "HEARTBEAT_OK",
|
|
56
|
+
"$H02": "Read HEARTBEAT.md if it exists",
|
|
57
|
+
// Role markers
|
|
58
|
+
"$R01": '"role": "system"',
|
|
59
|
+
"$R02": '"role": "user"',
|
|
60
|
+
"$R03": '"role": "assistant"',
|
|
61
|
+
"$R04": '"role": "tool"',
|
|
62
|
+
// Common endings/phrases
|
|
63
|
+
"$E01": "would you like to",
|
|
64
|
+
"$E02": "Let me know if you",
|
|
65
|
+
"$E03": "internal APIs",
|
|
66
|
+
"$E04": "session cookies",
|
|
67
|
+
// BlockRun model aliases (common in prompts)
|
|
68
|
+
"$M01": "blockrun/",
|
|
69
|
+
"$M02": "openai/",
|
|
70
|
+
"$M03": "anthropic/",
|
|
71
|
+
"$M04": "google/",
|
|
72
|
+
"$M05": "xai/",
|
|
73
|
+
};
|
|
74
|
+
/**
|
|
75
|
+
* Get the inverse codebook for decompression.
|
|
76
|
+
*/
|
|
77
|
+
export function getInverseCodebook() {
|
|
78
|
+
const inverse = {};
|
|
79
|
+
for (const [code, phrase] of Object.entries(STATIC_CODEBOOK)) {
|
|
80
|
+
inverse[phrase] = code;
|
|
81
|
+
}
|
|
82
|
+
return inverse;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Generate the codebook header for inclusion in system message.
|
|
86
|
+
* LLMs can decode in-context using this header.
|
|
87
|
+
*/
|
|
88
|
+
export function generateCodebookHeader(usedCodes, pathMap = {}) {
|
|
89
|
+
if (usedCodes.size === 0 && Object.keys(pathMap).length === 0) {
|
|
90
|
+
return "";
|
|
91
|
+
}
|
|
92
|
+
const parts = [];
|
|
93
|
+
// Add used dictionary codes
|
|
94
|
+
if (usedCodes.size > 0) {
|
|
95
|
+
const codeEntries = Array.from(usedCodes)
|
|
96
|
+
.map((code) => `${code}=${STATIC_CODEBOOK[code]}`)
|
|
97
|
+
.join(", ");
|
|
98
|
+
parts.push(`[Dict: ${codeEntries}]`);
|
|
99
|
+
}
|
|
100
|
+
// Add path map
|
|
101
|
+
if (Object.keys(pathMap).length > 0) {
|
|
102
|
+
const pathEntries = Object.entries(pathMap)
|
|
103
|
+
.map(([code, path]) => `${code}=${path}`)
|
|
104
|
+
.join(", ");
|
|
105
|
+
parts.push(`[Paths: ${pathEntries}]`);
|
|
106
|
+
}
|
|
107
|
+
return parts.join("\n");
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Decompress a string using the codebook (for logging).
|
|
111
|
+
*/
|
|
112
|
+
export function decompressContent(content, codebook = STATIC_CODEBOOK) {
|
|
113
|
+
let result = content;
|
|
114
|
+
for (const [code, phrase] of Object.entries(codebook)) {
|
|
115
|
+
result = result.split(code).join(phrase);
|
|
116
|
+
}
|
|
117
|
+
return result;
|
|
118
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-Safe Context Compression
|
|
3
|
+
*
|
|
4
|
+
* Reduces token usage by 15-40% while preserving semantic meaning.
|
|
5
|
+
* Implements 7 compression layers inspired by claw-compactor.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const result = await compressContext(messages);
|
|
9
|
+
* // result.messages -> compressed version to send to provider
|
|
10
|
+
* // result.originalMessages -> original for logging
|
|
11
|
+
*/
|
|
12
|
+
import { NormalizedMessage, CompressionConfig, CompressionResult } from "./types.js";
|
|
13
|
+
export * from "./types.js";
|
|
14
|
+
export { STATIC_CODEBOOK } from "./codebook.js";
|
|
15
|
+
/**
|
|
16
|
+
* Main compression function.
|
|
17
|
+
*
|
|
18
|
+
* Applies 5 layers in sequence:
|
|
19
|
+
* 1. Deduplication - Remove exact duplicate messages
|
|
20
|
+
* 2. Whitespace - Normalize excessive whitespace
|
|
21
|
+
* 3. Dictionary - Replace common phrases with codes
|
|
22
|
+
* 4. Paths - Shorten repeated file paths
|
|
23
|
+
* 5. JSON - Compact JSON in tool calls
|
|
24
|
+
*
|
|
25
|
+
* Then prepends a codebook header for the LLM to decode in-context.
|
|
26
|
+
*/
|
|
27
|
+
export declare function compressContext(messages: NormalizedMessage[], config?: Partial<CompressionConfig>): Promise<CompressionResult>;
|
|
28
|
+
/**
|
|
29
|
+
* Quick check if compression would benefit these messages.
|
|
30
|
+
* Returns true if messages are large enough to warrant compression.
|
|
31
|
+
*/
|
|
32
|
+
export declare function shouldCompress(messages: NormalizedMessage[]): boolean;
|