@blockrun/runcode 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +10 -1
- package/dist/compression/adapter.d.ts +13 -0
- package/dist/compression/adapter.js +104 -0
- package/dist/compression/codebook.d.ts +23 -0
- package/dist/compression/codebook.js +118 -0
- package/dist/compression/index.d.ts +32 -0
- package/dist/compression/index.js +258 -0
- package/dist/compression/layers/deduplication.d.ts +27 -0
- package/dist/compression/layers/deduplication.js +97 -0
- package/dist/compression/layers/dictionary.d.ts +20 -0
- package/dist/compression/layers/dictionary.js +67 -0
- package/dist/compression/layers/dynamic-codebook.d.ts +25 -0
- package/dist/compression/layers/dynamic-codebook.js +145 -0
- package/dist/compression/layers/json-compact.d.ts +22 -0
- package/dist/compression/layers/json-compact.js +74 -0
- package/dist/compression/layers/observation.d.ts +20 -0
- package/dist/compression/layers/observation.js +126 -0
- package/dist/compression/layers/paths.d.ts +23 -0
- package/dist/compression/layers/paths.js +107 -0
- package/dist/compression/layers/whitespace.d.ts +26 -0
- package/dist/compression/layers/whitespace.js +57 -0
- package/dist/compression/types.d.ts +83 -0
- package/dist/compression/types.js +26 -0
- package/package.json +1 -1
package/dist/agent/loop.js
CHANGED
|
@@ -7,6 +7,7 @@ import { ModelClient } from './llm.js';
|
|
|
7
7
|
import { autoCompactIfNeeded, microCompact } from './compact.js';
|
|
8
8
|
import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor } from './tokens.js';
|
|
9
9
|
import { handleSlashCommand } from './commands.js';
|
|
10
|
+
import { compressHistory } from '../compression/adapter.js';
|
|
10
11
|
import { PermissionManager } from './permissions.js';
|
|
11
12
|
import { StreamingExecutor } from './streaming-executor.js';
|
|
12
13
|
import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
|
|
@@ -261,7 +262,15 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
261
262
|
history.push(...microCompacted);
|
|
262
263
|
}
|
|
263
264
|
}
|
|
264
|
-
// 3.
|
|
265
|
+
// 3. Context compression: 7-layer compression for 15-40% token savings
|
|
266
|
+
if (history.length > 10) {
|
|
267
|
+
const compressed = await compressHistory(history, config.debug);
|
|
268
|
+
if (compressed) {
|
|
269
|
+
history.length = 0;
|
|
270
|
+
history.push(...compressed.history);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
// 4. Auto-compact: summarize history if approaching context limit
|
|
265
274
|
// Circuit breaker: stop retrying after 3 consecutive failures
|
|
266
275
|
if (compactFailures < 3) {
|
|
267
276
|
try {
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapter between brcc's Dialogue type and the compression lib's NormalizedMessage type.
|
|
3
|
+
*/
|
|
4
|
+
import type { Dialogue } from '../agent/types.js';
|
|
5
|
+
/**
|
|
6
|
+
* Compress conversation history to reduce token usage.
|
|
7
|
+
* Returns compressed Dialogue[] with stats.
|
|
8
|
+
*/
|
|
9
|
+
export declare function compressHistory(history: Dialogue[], debug?: boolean): Promise<{
|
|
10
|
+
history: Dialogue[];
|
|
11
|
+
saved: number;
|
|
12
|
+
ratio: number;
|
|
13
|
+
} | null>;
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapter between brcc's Dialogue type and the compression lib's NormalizedMessage type.
|
|
3
|
+
*/
|
|
4
|
+
import { compressContext, shouldCompress } from './index.js';
|
|
5
|
+
/**
|
|
6
|
+
* Convert brcc Dialogue[] to NormalizedMessage[] for compression.
|
|
7
|
+
*/
|
|
8
|
+
function dialogueToNormalized(history) {
|
|
9
|
+
return history.map(msg => {
|
|
10
|
+
if (typeof msg.content === 'string') {
|
|
11
|
+
return { role: msg.role, content: msg.content };
|
|
12
|
+
}
|
|
13
|
+
// Convert content parts to string representation
|
|
14
|
+
const parts = [];
|
|
15
|
+
const toolCalls = [];
|
|
16
|
+
for (const part of msg.content) {
|
|
17
|
+
if ('type' in part) {
|
|
18
|
+
if (part.type === 'text') {
|
|
19
|
+
parts.push(part.text);
|
|
20
|
+
}
|
|
21
|
+
else if (part.type === 'tool_use') {
|
|
22
|
+
const inv = part;
|
|
23
|
+
toolCalls.push({
|
|
24
|
+
id: inv.id,
|
|
25
|
+
type: 'function',
|
|
26
|
+
function: { name: inv.name, arguments: JSON.stringify(inv.input) },
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
else if (part.type === 'tool_result') {
|
|
30
|
+
const res = part;
|
|
31
|
+
const content = typeof res.content === 'string' ? res.content : JSON.stringify(res.content);
|
|
32
|
+
parts.push(`[Tool result: ${content}]`);
|
|
33
|
+
}
|
|
34
|
+
else if (part.type === 'thinking') {
|
|
35
|
+
// Skip thinking in compression (already handled by stripOldThinking)
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
const normalized = {
|
|
40
|
+
role: msg.role,
|
|
41
|
+
content: parts.join('\n') || null,
|
|
42
|
+
};
|
|
43
|
+
if (toolCalls.length > 0) {
|
|
44
|
+
normalized.tool_calls = toolCalls;
|
|
45
|
+
}
|
|
46
|
+
return normalized;
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Compress conversation history to reduce token usage.
|
|
51
|
+
* Returns compressed Dialogue[] with stats.
|
|
52
|
+
*/
|
|
53
|
+
export async function compressHistory(history, debug) {
|
|
54
|
+
// Convert to NormalizedMessage format
|
|
55
|
+
const normalized = dialogueToNormalized(history);
|
|
56
|
+
// Check if compression is worthwhile
|
|
57
|
+
if (!shouldCompress(normalized)) {
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
try {
|
|
61
|
+
const result = await compressContext(normalized);
|
|
62
|
+
const savedPct = Math.round((1 - result.compressionRatio) * 100);
|
|
63
|
+
if (debug) {
|
|
64
|
+
console.error(`[runcode] Compressed context: ${result.originalChars} → ${result.compressedChars} chars (${savedPct}% saved)`);
|
|
65
|
+
if (result.stats) {
|
|
66
|
+
const layers = Object.entries(result.stats)
|
|
67
|
+
.filter(([, v]) => typeof v === 'number' && v > 0)
|
|
68
|
+
.map(([k, v]) => `${k}: ${v}`)
|
|
69
|
+
.join(', ');
|
|
70
|
+
if (layers)
|
|
71
|
+
console.error(`[runcode] Compression layers: ${layers}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Convert compressed messages back to Dialogue format
|
|
75
|
+
// We only compress the string content, keeping the original structure
|
|
76
|
+
const compressed = [];
|
|
77
|
+
for (let i = 0; i < history.length && i < result.messages.length; i++) {
|
|
78
|
+
const original = history[i];
|
|
79
|
+
const comp = result.messages[i];
|
|
80
|
+
if (typeof original.content === 'string' && typeof comp.content === 'string') {
|
|
81
|
+
compressed.push({ role: original.role, content: comp.content });
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
// Keep complex content as-is (tool_use/tool_result structure can't be modified)
|
|
85
|
+
compressed.push(original);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// Append any remaining original messages
|
|
89
|
+
for (let i = result.messages.length; i < history.length; i++) {
|
|
90
|
+
compressed.push(history[i]);
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
history: compressed,
|
|
94
|
+
saved: result.originalChars - result.compressedChars,
|
|
95
|
+
ratio: result.compressionRatio,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
catch (err) {
|
|
99
|
+
if (debug) {
|
|
100
|
+
console.error(`[runcode] Compression failed: ${err.message}`);
|
|
101
|
+
}
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dictionary Codebook
|
|
3
|
+
*
|
|
4
|
+
* Static dictionary of frequently repeated phrases observed in LLM prompts.
|
|
5
|
+
* Built from analysis of BlockRun production logs.
|
|
6
|
+
*
|
|
7
|
+
* Format: Short code ($XX) -> Long phrase
|
|
8
|
+
* The LLM receives a codebook header and decodes in-context.
|
|
9
|
+
*/
|
|
10
|
+
export declare const STATIC_CODEBOOK: Record<string, string>;
|
|
11
|
+
/**
|
|
12
|
+
* Get the inverse codebook for decompression.
|
|
13
|
+
*/
|
|
14
|
+
export declare function getInverseCodebook(): Record<string, string>;
|
|
15
|
+
/**
|
|
16
|
+
* Generate the codebook header for inclusion in system message.
|
|
17
|
+
* LLMs can decode in-context using this header.
|
|
18
|
+
*/
|
|
19
|
+
export declare function generateCodebookHeader(usedCodes: Set<string>, pathMap?: Record<string, string>): string;
|
|
20
|
+
/**
|
|
21
|
+
* Decompress a string using the codebook (for logging).
|
|
22
|
+
*/
|
|
23
|
+
export declare function decompressContent(content: string, codebook?: Record<string, string>): string;
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dictionary Codebook
|
|
3
|
+
*
|
|
4
|
+
* Static dictionary of frequently repeated phrases observed in LLM prompts.
|
|
5
|
+
* Built from analysis of BlockRun production logs.
|
|
6
|
+
*
|
|
7
|
+
* Format: Short code ($XX) -> Long phrase
|
|
8
|
+
* The LLM receives a codebook header and decodes in-context.
|
|
9
|
+
*/
|
|
10
|
+
// Static codebook - common patterns from system prompts
|
|
11
|
+
// Ordered by expected frequency and impact
|
|
12
|
+
export const STATIC_CODEBOOK = {
|
|
13
|
+
// High-impact: OpenClaw/Agent system prompt patterns (very common)
|
|
14
|
+
"$OC01": "unbrowse_", // Common prefix in tool names
|
|
15
|
+
"$OC02": "<location>",
|
|
16
|
+
"$OC03": "</location>",
|
|
17
|
+
"$OC04": "<name>",
|
|
18
|
+
"$OC05": "</name>",
|
|
19
|
+
"$OC06": "<description>",
|
|
20
|
+
"$OC07": "</description>",
|
|
21
|
+
"$OC08": "(may need login)",
|
|
22
|
+
"$OC09": "API skill for OpenClaw",
|
|
23
|
+
"$OC10": "endpoints",
|
|
24
|
+
// Skill/tool markers
|
|
25
|
+
"$SK01": "<available_skills>",
|
|
26
|
+
"$SK02": "</available_skills>",
|
|
27
|
+
"$SK03": "<skill>",
|
|
28
|
+
"$SK04": "</skill>",
|
|
29
|
+
// Schema patterns (very common in tool definitions)
|
|
30
|
+
"$T01": 'type: "function"',
|
|
31
|
+
"$T02": '"type": "function"',
|
|
32
|
+
"$T03": '"type": "string"',
|
|
33
|
+
"$T04": '"type": "object"',
|
|
34
|
+
"$T05": '"type": "array"',
|
|
35
|
+
"$T06": '"type": "boolean"',
|
|
36
|
+
"$T07": '"type": "number"',
|
|
37
|
+
// Common descriptions
|
|
38
|
+
"$D01": "description:",
|
|
39
|
+
"$D02": '"description":',
|
|
40
|
+
// Common instructions
|
|
41
|
+
"$I01": "You are a personal assistant",
|
|
42
|
+
"$I02": "Tool names are case-sensitive",
|
|
43
|
+
"$I03": "Call tools exactly as listed",
|
|
44
|
+
"$I04": "Use when",
|
|
45
|
+
"$I05": "without asking",
|
|
46
|
+
// Safety phrases
|
|
47
|
+
"$S01": "Do not manipulate or persuade",
|
|
48
|
+
"$S02": "Prioritize safety and human oversight",
|
|
49
|
+
"$S03": "unless explicitly requested",
|
|
50
|
+
// JSON patterns
|
|
51
|
+
"$J01": '"required": ["',
|
|
52
|
+
"$J02": '"properties": {',
|
|
53
|
+
"$J03": '"additionalProperties": false',
|
|
54
|
+
// Heartbeat patterns
|
|
55
|
+
"$H01": "HEARTBEAT_OK",
|
|
56
|
+
"$H02": "Read HEARTBEAT.md if it exists",
|
|
57
|
+
// Role markers
|
|
58
|
+
"$R01": '"role": "system"',
|
|
59
|
+
"$R02": '"role": "user"',
|
|
60
|
+
"$R03": '"role": "assistant"',
|
|
61
|
+
"$R04": '"role": "tool"',
|
|
62
|
+
// Common endings/phrases
|
|
63
|
+
"$E01": "would you like to",
|
|
64
|
+
"$E02": "Let me know if you",
|
|
65
|
+
"$E03": "internal APIs",
|
|
66
|
+
"$E04": "session cookies",
|
|
67
|
+
// BlockRun model aliases (common in prompts)
|
|
68
|
+
"$M01": "blockrun/",
|
|
69
|
+
"$M02": "openai/",
|
|
70
|
+
"$M03": "anthropic/",
|
|
71
|
+
"$M04": "google/",
|
|
72
|
+
"$M05": "xai/",
|
|
73
|
+
};
|
|
74
|
+
/**
|
|
75
|
+
* Get the inverse codebook for decompression.
|
|
76
|
+
*/
|
|
77
|
+
export function getInverseCodebook() {
|
|
78
|
+
const inverse = {};
|
|
79
|
+
for (const [code, phrase] of Object.entries(STATIC_CODEBOOK)) {
|
|
80
|
+
inverse[phrase] = code;
|
|
81
|
+
}
|
|
82
|
+
return inverse;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Generate the codebook header for inclusion in system message.
|
|
86
|
+
* LLMs can decode in-context using this header.
|
|
87
|
+
*/
|
|
88
|
+
export function generateCodebookHeader(usedCodes, pathMap = {}) {
|
|
89
|
+
if (usedCodes.size === 0 && Object.keys(pathMap).length === 0) {
|
|
90
|
+
return "";
|
|
91
|
+
}
|
|
92
|
+
const parts = [];
|
|
93
|
+
// Add used dictionary codes
|
|
94
|
+
if (usedCodes.size > 0) {
|
|
95
|
+
const codeEntries = Array.from(usedCodes)
|
|
96
|
+
.map((code) => `${code}=${STATIC_CODEBOOK[code]}`)
|
|
97
|
+
.join(", ");
|
|
98
|
+
parts.push(`[Dict: ${codeEntries}]`);
|
|
99
|
+
}
|
|
100
|
+
// Add path map
|
|
101
|
+
if (Object.keys(pathMap).length > 0) {
|
|
102
|
+
const pathEntries = Object.entries(pathMap)
|
|
103
|
+
.map(([code, path]) => `${code}=${path}`)
|
|
104
|
+
.join(", ");
|
|
105
|
+
parts.push(`[Paths: ${pathEntries}]`);
|
|
106
|
+
}
|
|
107
|
+
return parts.join("\n");
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Decompress a string using the codebook (for logging).
|
|
111
|
+
*/
|
|
112
|
+
export function decompressContent(content, codebook = STATIC_CODEBOOK) {
|
|
113
|
+
let result = content;
|
|
114
|
+
for (const [code, phrase] of Object.entries(codebook)) {
|
|
115
|
+
result = result.split(code).join(phrase);
|
|
116
|
+
}
|
|
117
|
+
return result;
|
|
118
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-Safe Context Compression
|
|
3
|
+
*
|
|
4
|
+
* Reduces token usage by 15-40% while preserving semantic meaning.
|
|
5
|
+
* Implements 7 compression layers inspired by claw-compactor.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const result = await compressContext(messages);
|
|
9
|
+
* // result.messages -> compressed version to send to provider
|
|
10
|
+
* // result.originalMessages -> original for logging
|
|
11
|
+
*/
|
|
12
|
+
import { NormalizedMessage, CompressionConfig, CompressionResult } from "./types.js";
|
|
13
|
+
export * from "./types.js";
|
|
14
|
+
export { STATIC_CODEBOOK } from "./codebook.js";
|
|
15
|
+
/**
|
|
16
|
+
* Main compression function.
|
|
17
|
+
*
|
|
18
|
+
* Applies 5 layers in sequence:
|
|
19
|
+
* 1. Deduplication - Remove exact duplicate messages
|
|
20
|
+
* 2. Whitespace - Normalize excessive whitespace
|
|
21
|
+
* 3. Dictionary - Replace common phrases with codes
|
|
22
|
+
* 4. Paths - Shorten repeated file paths
|
|
23
|
+
* 5. JSON - Compact JSON in tool calls
|
|
24
|
+
*
|
|
25
|
+
* Then prepends a codebook header for the LLM to decode in-context.
|
|
26
|
+
*/
|
|
27
|
+
export declare function compressContext(messages: NormalizedMessage[], config?: Partial<CompressionConfig>): Promise<CompressionResult>;
|
|
28
|
+
/**
|
|
29
|
+
* Quick check if compression would benefit these messages.
|
|
30
|
+
* Returns true if messages are large enough to warrant compression.
|
|
31
|
+
*/
|
|
32
|
+
export declare function shouldCompress(messages: NormalizedMessage[]): boolean;
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-Safe Context Compression
|
|
3
|
+
*
|
|
4
|
+
* Reduces token usage by 15-40% while preserving semantic meaning.
|
|
5
|
+
* Implements 7 compression layers inspired by claw-compactor.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const result = await compressContext(messages);
|
|
9
|
+
* // result.messages -> compressed version to send to provider
|
|
10
|
+
* // result.originalMessages -> original for logging
|
|
11
|
+
*/
|
|
12
|
+
import { DEFAULT_COMPRESSION_CONFIG, } from "./types.js";
|
|
13
|
+
import { deduplicateMessages } from "./layers/deduplication.js";
|
|
14
|
+
import { normalizeMessagesWhitespace } from "./layers/whitespace.js";
|
|
15
|
+
import { encodeMessages } from "./layers/dictionary.js";
|
|
16
|
+
import { shortenPaths } from "./layers/paths.js";
|
|
17
|
+
import { compactMessagesJson } from "./layers/json-compact.js";
|
|
18
|
+
import { compressObservations } from "./layers/observation.js";
|
|
19
|
+
import { applyDynamicCodebook, generateDynamicCodebookHeader } from "./layers/dynamic-codebook.js";
|
|
20
|
+
import { generateCodebookHeader, STATIC_CODEBOOK } from "./codebook.js";
|
|
21
|
+
export * from "./types.js";
|
|
22
|
+
export { STATIC_CODEBOOK } from "./codebook.js";
|
|
23
|
+
/**
|
|
24
|
+
* Calculate total character count for messages.
|
|
25
|
+
*/
|
|
26
|
+
function calculateTotalChars(messages) {
|
|
27
|
+
return messages.reduce((total, msg) => {
|
|
28
|
+
let chars = 0;
|
|
29
|
+
if (Array.isArray(msg.content)) {
|
|
30
|
+
for (const part of msg.content) {
|
|
31
|
+
if (part.type === "text" && part.text)
|
|
32
|
+
chars += part.text.length;
|
|
33
|
+
else if (part.type === "image_url")
|
|
34
|
+
chars += 2500; // ~1000 tokens worth
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
chars = msg.content?.length || 0;
|
|
39
|
+
}
|
|
40
|
+
if (msg.tool_calls) {
|
|
41
|
+
chars += JSON.stringify(msg.tool_calls).length;
|
|
42
|
+
}
|
|
43
|
+
return total + chars;
|
|
44
|
+
}, 0);
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Check if any message contains image_url content parts.
|
|
48
|
+
*/
|
|
49
|
+
function hasVisionContent(messages) {
|
|
50
|
+
return messages.some((m) => Array.isArray(m.content) && m.content.some((p) => p.type === "image_url"));
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Deep clone messages to preserve originals.
|
|
54
|
+
*/
|
|
55
|
+
function cloneMessages(messages) {
|
|
56
|
+
return JSON.parse(JSON.stringify(messages));
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Prepend codebook header to the first USER message (not system).
|
|
60
|
+
*
|
|
61
|
+
* Why not system message?
|
|
62
|
+
* - Google Gemini uses systemInstruction which doesn't support codebook format
|
|
63
|
+
* - The codebook header in user message is still visible to all LLMs
|
|
64
|
+
* - This ensures compatibility across all providers
|
|
65
|
+
*/
|
|
66
|
+
function prependCodebookHeader(messages, usedCodes, pathMap) {
|
|
67
|
+
const header = generateCodebookHeader(usedCodes, pathMap);
|
|
68
|
+
if (!header)
|
|
69
|
+
return messages;
|
|
70
|
+
// Find first user message (not system - Google's systemInstruction doesn't support codebook)
|
|
71
|
+
const userIndex = messages.findIndex((m) => m.role === "user");
|
|
72
|
+
if (userIndex === -1) {
|
|
73
|
+
// No user message, add codebook as system (fallback)
|
|
74
|
+
return [
|
|
75
|
+
{ role: "system", content: header },
|
|
76
|
+
...messages,
|
|
77
|
+
];
|
|
78
|
+
}
|
|
79
|
+
// Prepend to first user message
|
|
80
|
+
return messages.map((msg, i) => {
|
|
81
|
+
if (i === userIndex) {
|
|
82
|
+
return {
|
|
83
|
+
...msg,
|
|
84
|
+
content: `${header}\n\n${msg.content || ""}`,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
return msg;
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Main compression function.
|
|
92
|
+
*
|
|
93
|
+
* Applies 5 layers in sequence:
|
|
94
|
+
* 1. Deduplication - Remove exact duplicate messages
|
|
95
|
+
* 2. Whitespace - Normalize excessive whitespace
|
|
96
|
+
* 3. Dictionary - Replace common phrases with codes
|
|
97
|
+
* 4. Paths - Shorten repeated file paths
|
|
98
|
+
* 5. JSON - Compact JSON in tool calls
|
|
99
|
+
*
|
|
100
|
+
* Then prepends a codebook header for the LLM to decode in-context.
|
|
101
|
+
*/
|
|
102
|
+
export async function compressContext(messages, config = {}) {
|
|
103
|
+
const fullConfig = {
|
|
104
|
+
...DEFAULT_COMPRESSION_CONFIG,
|
|
105
|
+
...config,
|
|
106
|
+
layers: {
|
|
107
|
+
...DEFAULT_COMPRESSION_CONFIG.layers,
|
|
108
|
+
...config.layers,
|
|
109
|
+
},
|
|
110
|
+
dictionary: {
|
|
111
|
+
...DEFAULT_COMPRESSION_CONFIG.dictionary,
|
|
112
|
+
...config.dictionary,
|
|
113
|
+
},
|
|
114
|
+
};
|
|
115
|
+
// If compression disabled, return as-is
|
|
116
|
+
if (!fullConfig.enabled) {
|
|
117
|
+
const originalChars = calculateTotalChars(messages);
|
|
118
|
+
return {
|
|
119
|
+
messages,
|
|
120
|
+
originalMessages: messages,
|
|
121
|
+
originalChars,
|
|
122
|
+
compressedChars: originalChars,
|
|
123
|
+
compressionRatio: 1,
|
|
124
|
+
stats: {
|
|
125
|
+
duplicatesRemoved: 0,
|
|
126
|
+
whitespaceSavedChars: 0,
|
|
127
|
+
dictionarySubstitutions: 0,
|
|
128
|
+
pathsShortened: 0,
|
|
129
|
+
jsonCompactedChars: 0,
|
|
130
|
+
observationsCompressed: 0,
|
|
131
|
+
observationCharsSaved: 0,
|
|
132
|
+
dynamicSubstitutions: 0,
|
|
133
|
+
dynamicCharsSaved: 0,
|
|
134
|
+
},
|
|
135
|
+
codebook: {},
|
|
136
|
+
pathMap: {},
|
|
137
|
+
dynamicCodes: {},
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
// Preserve originals for logging
|
|
141
|
+
const originalMessages = fullConfig.preserveRaw
|
|
142
|
+
? cloneMessages(messages)
|
|
143
|
+
: messages;
|
|
144
|
+
const originalChars = calculateTotalChars(messages);
|
|
145
|
+
// Initialize stats
|
|
146
|
+
const stats = {
|
|
147
|
+
duplicatesRemoved: 0,
|
|
148
|
+
whitespaceSavedChars: 0,
|
|
149
|
+
dictionarySubstitutions: 0,
|
|
150
|
+
pathsShortened: 0,
|
|
151
|
+
jsonCompactedChars: 0,
|
|
152
|
+
observationsCompressed: 0,
|
|
153
|
+
observationCharsSaved: 0,
|
|
154
|
+
dynamicSubstitutions: 0,
|
|
155
|
+
dynamicCharsSaved: 0,
|
|
156
|
+
};
|
|
157
|
+
let result = cloneMessages(messages);
|
|
158
|
+
let usedCodes = new Set();
|
|
159
|
+
let pathMap = {};
|
|
160
|
+
let dynamicCodes = {};
|
|
161
|
+
// Layer 1: Deduplication
|
|
162
|
+
if (fullConfig.layers.deduplication) {
|
|
163
|
+
const dedupResult = deduplicateMessages(result);
|
|
164
|
+
result = dedupResult.messages;
|
|
165
|
+
stats.duplicatesRemoved = dedupResult.duplicatesRemoved;
|
|
166
|
+
}
|
|
167
|
+
// Layer 2: Whitespace normalization
|
|
168
|
+
if (fullConfig.layers.whitespace) {
|
|
169
|
+
const wsResult = normalizeMessagesWhitespace(result);
|
|
170
|
+
result = wsResult.messages;
|
|
171
|
+
stats.whitespaceSavedChars = wsResult.charsSaved;
|
|
172
|
+
}
|
|
173
|
+
// Layer 3: Dictionary encoding
|
|
174
|
+
if (fullConfig.layers.dictionary) {
|
|
175
|
+
const dictResult = encodeMessages(result);
|
|
176
|
+
result = dictResult.messages;
|
|
177
|
+
stats.dictionarySubstitutions = dictResult.substitutionCount;
|
|
178
|
+
usedCodes = dictResult.usedCodes;
|
|
179
|
+
}
|
|
180
|
+
// Layer 4: Path shortening
|
|
181
|
+
if (fullConfig.layers.paths) {
|
|
182
|
+
const pathResult = shortenPaths(result);
|
|
183
|
+
result = pathResult.messages;
|
|
184
|
+
pathMap = pathResult.pathMap;
|
|
185
|
+
stats.pathsShortened = Object.keys(pathMap).length;
|
|
186
|
+
}
|
|
187
|
+
// Layer 5: JSON compaction
|
|
188
|
+
if (fullConfig.layers.jsonCompact) {
|
|
189
|
+
const jsonResult = compactMessagesJson(result);
|
|
190
|
+
result = jsonResult.messages;
|
|
191
|
+
stats.jsonCompactedChars = jsonResult.charsSaved;
|
|
192
|
+
}
|
|
193
|
+
// Layer 6: Observation compression (BIG WIN - 97% on tool results)
|
|
194
|
+
if (fullConfig.layers.observation) {
|
|
195
|
+
const obsResult = compressObservations(result);
|
|
196
|
+
result = obsResult.messages;
|
|
197
|
+
stats.observationsCompressed = obsResult.observationsCompressed;
|
|
198
|
+
stats.observationCharsSaved = obsResult.charsSaved;
|
|
199
|
+
}
|
|
200
|
+
// Layer 7: Dynamic codebook (learns from actual content)
|
|
201
|
+
if (fullConfig.layers.dynamicCodebook) {
|
|
202
|
+
const dynResult = applyDynamicCodebook(result);
|
|
203
|
+
result = dynResult.messages;
|
|
204
|
+
stats.dynamicSubstitutions = dynResult.substitutions;
|
|
205
|
+
stats.dynamicCharsSaved = dynResult.charsSaved;
|
|
206
|
+
dynamicCodes = dynResult.dynamicCodes;
|
|
207
|
+
}
|
|
208
|
+
// Add codebook header if enabled and we have codes to include
|
|
209
|
+
if (fullConfig.dictionary.includeCodebookHeader &&
|
|
210
|
+
(usedCodes.size > 0 || Object.keys(pathMap).length > 0 || Object.keys(dynamicCodes).length > 0)) {
|
|
211
|
+
result = prependCodebookHeader(result, usedCodes, pathMap);
|
|
212
|
+
// Also add dynamic codebook header if we have dynamic codes
|
|
213
|
+
if (Object.keys(dynamicCodes).length > 0) {
|
|
214
|
+
const dynHeader = generateDynamicCodebookHeader(dynamicCodes);
|
|
215
|
+
if (dynHeader) {
|
|
216
|
+
const systemIndex = result.findIndex((m) => m.role === "system");
|
|
217
|
+
if (systemIndex >= 0) {
|
|
218
|
+
result[systemIndex] = {
|
|
219
|
+
...result[systemIndex],
|
|
220
|
+
content: `${dynHeader}\n${result[systemIndex].content || ""}`,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
// Calculate final stats
|
|
227
|
+
const compressedChars = calculateTotalChars(result);
|
|
228
|
+
const compressionRatio = compressedChars / originalChars;
|
|
229
|
+
// Build used codebook for logging
|
|
230
|
+
const usedCodebook = {};
|
|
231
|
+
usedCodes.forEach((code) => {
|
|
232
|
+
usedCodebook[code] = STATIC_CODEBOOK[code];
|
|
233
|
+
});
|
|
234
|
+
return {
|
|
235
|
+
messages: result,
|
|
236
|
+
originalMessages,
|
|
237
|
+
originalChars,
|
|
238
|
+
compressedChars,
|
|
239
|
+
compressionRatio,
|
|
240
|
+
stats,
|
|
241
|
+
codebook: usedCodebook,
|
|
242
|
+
pathMap,
|
|
243
|
+
dynamicCodes,
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Quick check if compression would benefit these messages.
|
|
248
|
+
* Returns true if messages are large enough to warrant compression.
|
|
249
|
+
*/
|
|
250
|
+
export function shouldCompress(messages) {
|
|
251
|
+
// Skip compression entirely when messages contain images —
|
|
252
|
+
// compression layers operate on string content and would corrupt image_url parts
|
|
253
|
+
if (hasVisionContent(messages))
|
|
254
|
+
return false;
|
|
255
|
+
const chars = calculateTotalChars(messages);
|
|
256
|
+
// Only compress if > 5000 chars (roughly 1000 tokens)
|
|
257
|
+
return chars > 5000;
|
|
258
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Layer 1: Message Deduplication
|
|
3
|
+
*
|
|
4
|
+
* Removes exact duplicate messages from conversation history.
|
|
5
|
+
* Common in heartbeat patterns and repeated tool calls.
|
|
6
|
+
*
|
|
7
|
+
* Safe for LLM: Identical messages add no new information.
|
|
8
|
+
* Expected savings: 2-5%
|
|
9
|
+
*/
|
|
10
|
+
import { NormalizedMessage } from "../types.js";
|
|
11
|
+
export interface DeduplicationResult {
|
|
12
|
+
messages: NormalizedMessage[];
|
|
13
|
+
duplicatesRemoved: number;
|
|
14
|
+
originalCount: number;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Remove exact duplicate messages from the conversation.
|
|
18
|
+
*
|
|
19
|
+
* Strategy:
|
|
20
|
+
* - Keep first occurrence of each unique message
|
|
21
|
+
* - Preserve order for semantic coherence
|
|
22
|
+
* - Never dedupe system messages (they set context)
|
|
23
|
+
* - Allow duplicate user messages (user might repeat intentionally)
|
|
24
|
+
* - CRITICAL: Never dedupe assistant messages with tool_calls that are
|
|
25
|
+
* referenced by subsequent tool messages (breaks Anthropic tool_use/tool_result pairing)
|
|
26
|
+
*/
|
|
27
|
+
export declare function deduplicateMessages(messages: NormalizedMessage[]): DeduplicationResult;
|