@blockrun/runcode 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/commands.js +37 -3
- package/dist/agent/context.js +3 -2
- package/dist/agent/loop.js +36 -13
- package/dist/agent/reduce.d.ts +42 -0
- package/dist/agent/reduce.js +258 -0
- package/dist/agent/types.d.ts +2 -0
- package/package.json +1 -1
- package/dist/compression/adapter.d.ts +0 -13
- package/dist/compression/adapter.js +0 -104
- package/dist/compression/codebook.d.ts +0 -23
- package/dist/compression/codebook.js +0 -118
- package/dist/compression/index.d.ts +0 -32
- package/dist/compression/index.js +0 -258
- package/dist/compression/layers/deduplication.d.ts +0 -27
- package/dist/compression/layers/deduplication.js +0 -97
- package/dist/compression/layers/dictionary.d.ts +0 -20
- package/dist/compression/layers/dictionary.js +0 -67
- package/dist/compression/layers/dynamic-codebook.d.ts +0 -25
- package/dist/compression/layers/dynamic-codebook.js +0 -145
- package/dist/compression/layers/json-compact.d.ts +0 -22
- package/dist/compression/layers/json-compact.js +0 -74
- package/dist/compression/layers/observation.d.ts +0 -20
- package/dist/compression/layers/observation.js +0 -126
- package/dist/compression/layers/paths.d.ts +0 -23
- package/dist/compression/layers/paths.js +0 -107
- package/dist/compression/layers/whitespace.d.ts +0 -26
- package/dist/compression/layers/whitespace.js +0 -57
- package/dist/compression/types.d.ts +0 -83
- package/dist/compression/types.js +0 -26
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* L7: Dynamic Codebook Builder
|
|
3
|
-
*
|
|
4
|
-
* Inspired by claw-compactor's frequency-based codebook.
|
|
5
|
-
* Builds codebook from actual content being compressed,
|
|
6
|
-
* rather than relying on static patterns.
|
|
7
|
-
*
|
|
8
|
-
* Finds phrases that appear 3+ times and replaces with short codes.
|
|
9
|
-
*/
|
|
10
|
-
// Config
|
|
11
|
-
const MIN_PHRASE_LENGTH = 20;
|
|
12
|
-
const MAX_PHRASE_LENGTH = 200;
|
|
13
|
-
const MIN_FREQUENCY = 3;
|
|
14
|
-
const MAX_ENTRIES = 100;
|
|
15
|
-
const CODE_PREFIX = "$D"; // Dynamic codes: $D01, $D02, etc.
|
|
16
|
-
/**
|
|
17
|
-
* Find repeated phrases in content.
|
|
18
|
-
*/
|
|
19
|
-
function findRepeatedPhrases(allContent) {
|
|
20
|
-
const phrases = new Map();
|
|
21
|
-
// Split by sentence-like boundaries
|
|
22
|
-
const segments = allContent.split(/(?<=[.!?\n])\s+/);
|
|
23
|
-
for (const segment of segments) {
|
|
24
|
-
const trimmed = segment.trim();
|
|
25
|
-
if (trimmed.length >= MIN_PHRASE_LENGTH &&
|
|
26
|
-
trimmed.length <= MAX_PHRASE_LENGTH) {
|
|
27
|
-
phrases.set(trimmed, (phrases.get(trimmed) || 0) + 1);
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
// Also find repeated lines
|
|
31
|
-
const lines = allContent.split("\n");
|
|
32
|
-
for (const line of lines) {
|
|
33
|
-
const trimmed = line.trim();
|
|
34
|
-
if (trimmed.length >= MIN_PHRASE_LENGTH &&
|
|
35
|
-
trimmed.length <= MAX_PHRASE_LENGTH) {
|
|
36
|
-
phrases.set(trimmed, (phrases.get(trimmed) || 0) + 1);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
return phrases;
|
|
40
|
-
}
|
|
41
|
-
/**
|
|
42
|
-
* Build dynamic codebook from message content.
|
|
43
|
-
*/
|
|
44
|
-
function buildDynamicCodebook(messages) {
|
|
45
|
-
// Combine all content
|
|
46
|
-
let allContent = "";
|
|
47
|
-
for (const msg of messages) {
|
|
48
|
-
if (msg.content) {
|
|
49
|
-
allContent += msg.content + "\n";
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
// Find repeated phrases
|
|
53
|
-
const phrases = findRepeatedPhrases(allContent);
|
|
54
|
-
// Filter by frequency and sort by savings potential
|
|
55
|
-
const candidates = [];
|
|
56
|
-
for (const [phrase, count] of phrases.entries()) {
|
|
57
|
-
if (count >= MIN_FREQUENCY) {
|
|
58
|
-
// Savings = (phrase length - code length) * occurrences
|
|
59
|
-
const codeLength = 4; // e.g., "$D01"
|
|
60
|
-
const savings = (phrase.length - codeLength) * count;
|
|
61
|
-
if (savings > 50) {
|
|
62
|
-
candidates.push({ phrase, count, savings });
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
// Sort by savings (descending) and take top entries
|
|
67
|
-
candidates.sort((a, b) => b.savings - a.savings);
|
|
68
|
-
const topCandidates = candidates.slice(0, MAX_ENTRIES);
|
|
69
|
-
// Build codebook
|
|
70
|
-
const codebook = {};
|
|
71
|
-
topCandidates.forEach((c, i) => {
|
|
72
|
-
const code = `${CODE_PREFIX}${String(i + 1).padStart(2, "0")}`;
|
|
73
|
-
codebook[code] = c.phrase;
|
|
74
|
-
});
|
|
75
|
-
return codebook;
|
|
76
|
-
}
|
|
77
|
-
/**
|
|
78
|
-
* Escape special regex characters.
|
|
79
|
-
*/
|
|
80
|
-
function escapeRegex(str) {
|
|
81
|
-
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
82
|
-
}
|
|
83
|
-
/**
|
|
84
|
-
* Apply dynamic codebook to messages.
|
|
85
|
-
*/
|
|
86
|
-
export function applyDynamicCodebook(messages) {
|
|
87
|
-
// Build codebook from content
|
|
88
|
-
const codebook = buildDynamicCodebook(messages);
|
|
89
|
-
if (Object.keys(codebook).length === 0) {
|
|
90
|
-
return {
|
|
91
|
-
messages,
|
|
92
|
-
charsSaved: 0,
|
|
93
|
-
dynamicCodes: {},
|
|
94
|
-
substitutions: 0,
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
// Create inverse map for replacement
|
|
98
|
-
const phraseToCode = {};
|
|
99
|
-
for (const [code, phrase] of Object.entries(codebook)) {
|
|
100
|
-
phraseToCode[phrase] = code;
|
|
101
|
-
}
|
|
102
|
-
// Sort phrases by length (longest first) to avoid partial replacements
|
|
103
|
-
const sortedPhrases = Object.keys(phraseToCode).sort((a, b) => b.length - a.length);
|
|
104
|
-
let charsSaved = 0;
|
|
105
|
-
let substitutions = 0;
|
|
106
|
-
// Apply replacements
|
|
107
|
-
const result = messages.map((msg) => {
|
|
108
|
-
if (!msg.content || typeof msg.content !== "string")
|
|
109
|
-
return msg;
|
|
110
|
-
let content = msg.content;
|
|
111
|
-
for (const phrase of sortedPhrases) {
|
|
112
|
-
const code = phraseToCode[phrase];
|
|
113
|
-
const regex = new RegExp(escapeRegex(phrase), "g");
|
|
114
|
-
const matches = content.match(regex);
|
|
115
|
-
if (matches) {
|
|
116
|
-
content = content.replace(regex, code);
|
|
117
|
-
charsSaved += (phrase.length - code.length) * matches.length;
|
|
118
|
-
substitutions += matches.length;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
return { ...msg, content };
|
|
122
|
-
});
|
|
123
|
-
return {
|
|
124
|
-
messages: result,
|
|
125
|
-
charsSaved,
|
|
126
|
-
dynamicCodes: codebook,
|
|
127
|
-
substitutions,
|
|
128
|
-
};
|
|
129
|
-
}
|
|
130
|
-
/**
|
|
131
|
-
* Generate header for dynamic codes (to include in system message).
|
|
132
|
-
*/
|
|
133
|
-
export function generateDynamicCodebookHeader(codebook) {
|
|
134
|
-
if (Object.keys(codebook).length === 0)
|
|
135
|
-
return "";
|
|
136
|
-
const entries = Object.entries(codebook)
|
|
137
|
-
.slice(0, 20) // Limit header size
|
|
138
|
-
.map(([code, phrase]) => {
|
|
139
|
-
// Truncate long phrases in header
|
|
140
|
-
const displayPhrase = phrase.length > 40 ? phrase.slice(0, 37) + "..." : phrase;
|
|
141
|
-
return `${code}=${displayPhrase}`;
|
|
142
|
-
})
|
|
143
|
-
.join(", ");
|
|
144
|
-
return `[DynDict: ${entries}]`;
|
|
145
|
-
}
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Layer 5: JSON Compaction
|
|
3
|
-
*
|
|
4
|
-
* Minifies JSON in tool_call arguments and tool results.
|
|
5
|
-
* Removes pretty-print whitespace from JSON strings.
|
|
6
|
-
*
|
|
7
|
-
* Safe for LLM: JSON semantics unchanged.
|
|
8
|
-
* Expected savings: 2-4%
|
|
9
|
-
*/
|
|
10
|
-
import { NormalizedMessage } from "../types.js";
|
|
11
|
-
export interface JsonCompactResult {
|
|
12
|
-
messages: NormalizedMessage[];
|
|
13
|
-
charsSaved: number;
|
|
14
|
-
}
|
|
15
|
-
/**
|
|
16
|
-
* Apply JSON compaction to all messages.
|
|
17
|
-
*
|
|
18
|
-
* Targets:
|
|
19
|
-
* - tool_call arguments (in assistant messages)
|
|
20
|
-
* - tool message content (often JSON)
|
|
21
|
-
*/
|
|
22
|
-
export declare function compactMessagesJson(messages: NormalizedMessage[]): JsonCompactResult;
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Layer 5: JSON Compaction
|
|
3
|
-
*
|
|
4
|
-
* Minifies JSON in tool_call arguments and tool results.
|
|
5
|
-
* Removes pretty-print whitespace from JSON strings.
|
|
6
|
-
*
|
|
7
|
-
* Safe for LLM: JSON semantics unchanged.
|
|
8
|
-
* Expected savings: 2-4%
|
|
9
|
-
*/
|
|
10
|
-
/**
|
|
11
|
-
* Compact a JSON string by parsing and re-stringifying without formatting.
|
|
12
|
-
*/
|
|
13
|
-
function compactJson(jsonString) {
|
|
14
|
-
try {
|
|
15
|
-
const parsed = JSON.parse(jsonString);
|
|
16
|
-
return JSON.stringify(parsed);
|
|
17
|
-
}
|
|
18
|
-
catch {
|
|
19
|
-
// Not valid JSON, return as-is
|
|
20
|
-
return jsonString;
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* Check if a string looks like JSON (starts with { or [).
|
|
25
|
-
*/
|
|
26
|
-
function looksLikeJson(str) {
|
|
27
|
-
const trimmed = str.trim();
|
|
28
|
-
return ((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
|
|
29
|
-
(trimmed.startsWith("[") && trimmed.endsWith("]")));
|
|
30
|
-
}
|
|
31
|
-
/**
|
|
32
|
-
* Compact tool_call arguments in a message.
|
|
33
|
-
*/
|
|
34
|
-
function compactToolCalls(toolCalls) {
|
|
35
|
-
return toolCalls.map((tc) => ({
|
|
36
|
-
...tc,
|
|
37
|
-
function: {
|
|
38
|
-
...tc.function,
|
|
39
|
-
arguments: compactJson(tc.function.arguments),
|
|
40
|
-
},
|
|
41
|
-
}));
|
|
42
|
-
}
|
|
43
|
-
/**
|
|
44
|
-
* Apply JSON compaction to all messages.
|
|
45
|
-
*
|
|
46
|
-
* Targets:
|
|
47
|
-
* - tool_call arguments (in assistant messages)
|
|
48
|
-
* - tool message content (often JSON)
|
|
49
|
-
*/
|
|
50
|
-
export function compactMessagesJson(messages) {
|
|
51
|
-
let charsSaved = 0;
|
|
52
|
-
const result = messages.map((message) => {
|
|
53
|
-
const newMessage = { ...message };
|
|
54
|
-
// Compact tool_calls arguments
|
|
55
|
-
if (message.tool_calls && message.tool_calls.length > 0) {
|
|
56
|
-
const originalLength = JSON.stringify(message.tool_calls).length;
|
|
57
|
-
newMessage.tool_calls = compactToolCalls(message.tool_calls);
|
|
58
|
-
const newLength = JSON.stringify(newMessage.tool_calls).length;
|
|
59
|
-
charsSaved += originalLength - newLength;
|
|
60
|
-
}
|
|
61
|
-
// Compact tool message content if it looks like JSON
|
|
62
|
-
if (message.role === "tool" && message.content && typeof message.content === "string" && looksLikeJson(message.content)) {
|
|
63
|
-
const originalLength = message.content.length;
|
|
64
|
-
const compacted = compactJson(message.content);
|
|
65
|
-
charsSaved += originalLength - compacted.length;
|
|
66
|
-
newMessage.content = compacted;
|
|
67
|
-
}
|
|
68
|
-
return newMessage;
|
|
69
|
-
});
|
|
70
|
-
return {
|
|
71
|
-
messages: result,
|
|
72
|
-
charsSaved,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* L6: Observation Compression (AGGRESSIVE)
|
|
3
|
-
*
|
|
4
|
-
* Inspired by claw-compactor's 97% compression on tool results.
|
|
5
|
-
* Tool call results (especially large ones) are summarized to key info only.
|
|
6
|
-
*
|
|
7
|
-
* This is the biggest compression win - tool outputs can be 10KB+ but
|
|
8
|
-
* only ~200 chars of actual useful information.
|
|
9
|
-
*/
|
|
10
|
-
import { NormalizedMessage } from "../types.js";
|
|
11
|
-
interface ObservationResult {
|
|
12
|
-
messages: NormalizedMessage[];
|
|
13
|
-
charsSaved: number;
|
|
14
|
-
observationsCompressed: number;
|
|
15
|
-
}
|
|
16
|
-
/**
|
|
17
|
-
* Compress tool results in messages.
|
|
18
|
-
*/
|
|
19
|
-
export declare function compressObservations(messages: NormalizedMessage[]): ObservationResult;
|
|
20
|
-
export {};
|
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* L6: Observation Compression (AGGRESSIVE)
|
|
3
|
-
*
|
|
4
|
-
* Inspired by claw-compactor's 97% compression on tool results.
|
|
5
|
-
* Tool call results (especially large ones) are summarized to key info only.
|
|
6
|
-
*
|
|
7
|
-
* This is the biggest compression win - tool outputs can be 10KB+ but
|
|
8
|
-
* only ~200 chars of actual useful information.
|
|
9
|
-
*/
|
|
10
|
-
// Max length for tool results before compression kicks in
|
|
11
|
-
const TOOL_RESULT_THRESHOLD = 500;
|
|
12
|
-
// Max length to compress tool results down to
|
|
13
|
-
const COMPRESSED_RESULT_MAX = 300;
|
|
14
|
-
/**
|
|
15
|
-
* Extract key information from tool result.
|
|
16
|
-
* Keeps: errors, key values, status, first/last important lines.
|
|
17
|
-
*/
|
|
18
|
-
function compressToolResult(content) {
|
|
19
|
-
if (!content || content.length <= TOOL_RESULT_THRESHOLD) {
|
|
20
|
-
return content;
|
|
21
|
-
}
|
|
22
|
-
const lines = content.split("\n").map((l) => l.trim()).filter(Boolean);
|
|
23
|
-
// Priority 1: Error messages (always keep)
|
|
24
|
-
const errorLines = lines.filter((l) => /error|exception|failed|denied|refused|timeout|invalid/i.test(l) &&
|
|
25
|
-
l.length < 200);
|
|
26
|
-
// Priority 2: Status/result lines
|
|
27
|
-
const statusLines = lines.filter((l) => /success|complete|created|updated|found|result|status|total|count/i.test(l) &&
|
|
28
|
-
l.length < 150);
|
|
29
|
-
// Priority 3: Key JSON fields (extract important values)
|
|
30
|
-
const jsonMatches = [];
|
|
31
|
-
const jsonPattern = /"(id|name|status|error|message|count|total|url|path)":\s*"?([^",}\n]+)"?/gi;
|
|
32
|
-
let match;
|
|
33
|
-
while ((match = jsonPattern.exec(content)) !== null) {
|
|
34
|
-
jsonMatches.push(`${match[1]}: ${match[2].slice(0, 50)}`);
|
|
35
|
-
}
|
|
36
|
-
// Priority 4: First and last meaningful lines
|
|
37
|
-
const firstLine = lines[0]?.slice(0, 100);
|
|
38
|
-
const lastLine = lines.length > 1 ? lines[lines.length - 1]?.slice(0, 100) : "";
|
|
39
|
-
// Build compressed observation
|
|
40
|
-
const parts = [];
|
|
41
|
-
if (errorLines.length > 0) {
|
|
42
|
-
parts.push("[ERR] " + errorLines.slice(0, 3).join(" | "));
|
|
43
|
-
}
|
|
44
|
-
if (statusLines.length > 0) {
|
|
45
|
-
parts.push(statusLines.slice(0, 3).join(" | "));
|
|
46
|
-
}
|
|
47
|
-
if (jsonMatches.length > 0) {
|
|
48
|
-
parts.push(jsonMatches.slice(0, 5).join(", "));
|
|
49
|
-
}
|
|
50
|
-
if (parts.length === 0) {
|
|
51
|
-
// Fallback: keep first/last lines with truncation marker
|
|
52
|
-
parts.push(firstLine || "");
|
|
53
|
-
if (lines.length > 2) {
|
|
54
|
-
parts.push(`[...${lines.length - 2} lines...]`);
|
|
55
|
-
}
|
|
56
|
-
if (lastLine && lastLine !== firstLine) {
|
|
57
|
-
parts.push(lastLine);
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
let result = parts.join("\n");
|
|
61
|
-
// Final length cap
|
|
62
|
-
if (result.length > COMPRESSED_RESULT_MAX) {
|
|
63
|
-
result = result.slice(0, COMPRESSED_RESULT_MAX - 20) + "\n[...truncated]";
|
|
64
|
-
}
|
|
65
|
-
return result;
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Compress large repeated content blocks.
|
|
69
|
-
* Detects when same large block appears multiple times.
|
|
70
|
-
*/
|
|
71
|
-
function deduplicateLargeBlocks(messages) {
|
|
72
|
-
const blockHashes = new Map(); // hash -> first occurrence index
|
|
73
|
-
let charsSaved = 0;
|
|
74
|
-
const result = messages.map((msg, idx) => {
|
|
75
|
-
if (!msg.content || typeof msg.content !== "string" || msg.content.length < 500) {
|
|
76
|
-
return msg;
|
|
77
|
-
}
|
|
78
|
-
// Hash first 200 chars as block identifier
|
|
79
|
-
const blockKey = msg.content.slice(0, 200);
|
|
80
|
-
if (blockHashes.has(blockKey)) {
|
|
81
|
-
const firstIdx = blockHashes.get(blockKey);
|
|
82
|
-
const original = msg.content;
|
|
83
|
-
const compressed = `[See message #${firstIdx + 1} - same content]`;
|
|
84
|
-
charsSaved += original.length - compressed.length;
|
|
85
|
-
return { ...msg, content: compressed };
|
|
86
|
-
}
|
|
87
|
-
blockHashes.set(blockKey, idx);
|
|
88
|
-
return msg;
|
|
89
|
-
});
|
|
90
|
-
return { messages: result, charsSaved };
|
|
91
|
-
}
|
|
92
|
-
/**
|
|
93
|
-
* Compress tool results in messages.
|
|
94
|
-
*/
|
|
95
|
-
export function compressObservations(messages) {
|
|
96
|
-
let charsSaved = 0;
|
|
97
|
-
let observationsCompressed = 0;
|
|
98
|
-
// First pass: compress individual tool results
|
|
99
|
-
let result = messages.map((msg) => {
|
|
100
|
-
// Only compress tool role messages (these are tool call results)
|
|
101
|
-
if (msg.role !== "tool" || !msg.content || typeof msg.content !== "string") {
|
|
102
|
-
return msg;
|
|
103
|
-
}
|
|
104
|
-
const original = msg.content;
|
|
105
|
-
if (original.length <= TOOL_RESULT_THRESHOLD) {
|
|
106
|
-
return msg;
|
|
107
|
-
}
|
|
108
|
-
const compressed = compressToolResult(original);
|
|
109
|
-
const saved = original.length - compressed.length;
|
|
110
|
-
if (saved > 50) {
|
|
111
|
-
charsSaved += saved;
|
|
112
|
-
observationsCompressed++;
|
|
113
|
-
return { ...msg, content: compressed };
|
|
114
|
-
}
|
|
115
|
-
return msg;
|
|
116
|
-
});
|
|
117
|
-
// Second pass: deduplicate large repeated blocks
|
|
118
|
-
const dedupResult = deduplicateLargeBlocks(result);
|
|
119
|
-
result = dedupResult.messages;
|
|
120
|
-
charsSaved += dedupResult.charsSaved;
|
|
121
|
-
return {
|
|
122
|
-
messages: result,
|
|
123
|
-
charsSaved,
|
|
124
|
-
observationsCompressed,
|
|
125
|
-
};
|
|
126
|
-
}
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Layer 4: Path Shortening
|
|
3
|
-
*
|
|
4
|
-
* Detects common filesystem path prefixes and replaces them with short codes.
|
|
5
|
-
* Common in coding assistant contexts with repeated file paths.
|
|
6
|
-
*
|
|
7
|
-
* Safe for LLM: Lossless abbreviation with path map header.
|
|
8
|
-
* Expected savings: 1-3%
|
|
9
|
-
*/
|
|
10
|
-
import { NormalizedMessage } from "../types.js";
|
|
11
|
-
export interface PathShorteningResult {
|
|
12
|
-
messages: NormalizedMessage[];
|
|
13
|
-
pathMap: Record<string, string>;
|
|
14
|
-
charsSaved: number;
|
|
15
|
-
}
|
|
16
|
-
/**
|
|
17
|
-
* Apply path shortening to all messages.
|
|
18
|
-
*/
|
|
19
|
-
export declare function shortenPaths(messages: NormalizedMessage[]): PathShorteningResult;
|
|
20
|
-
/**
|
|
21
|
-
* Generate the path map header for the codebook.
|
|
22
|
-
*/
|
|
23
|
-
export declare function generatePathMapHeader(pathMap: Record<string, string>): string;
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Layer 4: Path Shortening
|
|
3
|
-
*
|
|
4
|
-
* Detects common filesystem path prefixes and replaces them with short codes.
|
|
5
|
-
* Common in coding assistant contexts with repeated file paths.
|
|
6
|
-
*
|
|
7
|
-
* Safe for LLM: Lossless abbreviation with path map header.
|
|
8
|
-
* Expected savings: 1-3%
|
|
9
|
-
*/
|
|
10
|
-
// Regex to match filesystem paths
|
|
11
|
-
const PATH_REGEX = /(?:\/[\w.-]+){3,}/g;
|
|
12
|
-
/**
|
|
13
|
-
* Extract all paths from messages and find common prefixes.
|
|
14
|
-
*/
|
|
15
|
-
function extractPaths(messages) {
|
|
16
|
-
const paths = [];
|
|
17
|
-
for (const message of messages) {
|
|
18
|
-
if (!message.content || typeof message.content !== "string")
|
|
19
|
-
continue;
|
|
20
|
-
const matches = message.content.match(PATH_REGEX);
|
|
21
|
-
if (matches) {
|
|
22
|
-
paths.push(...matches);
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
return paths;
|
|
26
|
-
}
|
|
27
|
-
/**
|
|
28
|
-
* Group paths by their common prefixes.
|
|
29
|
-
* Returns prefixes that appear at least 3 times.
|
|
30
|
-
*/
|
|
31
|
-
function findFrequentPrefixes(paths) {
|
|
32
|
-
const prefixCounts = new Map();
|
|
33
|
-
for (const path of paths) {
|
|
34
|
-
const parts = path.split("/").filter(Boolean);
|
|
35
|
-
// Try prefixes of different lengths
|
|
36
|
-
for (let i = 2; i < parts.length; i++) {
|
|
37
|
-
const prefix = "/" + parts.slice(0, i).join("/") + "/";
|
|
38
|
-
prefixCounts.set(prefix, (prefixCounts.get(prefix) || 0) + 1);
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
// Return prefixes that appear 3+ times, sorted by length (longest first)
|
|
42
|
-
return Array.from(prefixCounts.entries())
|
|
43
|
-
.filter(([, count]) => count >= 3)
|
|
44
|
-
.sort((a, b) => b[0].length - a[0].length)
|
|
45
|
-
.slice(0, 5) // Max 5 path codes
|
|
46
|
-
.map(([prefix]) => prefix);
|
|
47
|
-
}
|
|
48
|
-
/**
|
|
49
|
-
* Apply path shortening to all messages.
|
|
50
|
-
*/
|
|
51
|
-
export function shortenPaths(messages) {
|
|
52
|
-
const allPaths = extractPaths(messages);
|
|
53
|
-
if (allPaths.length < 5) {
|
|
54
|
-
// Not enough paths to benefit from shortening
|
|
55
|
-
return {
|
|
56
|
-
messages,
|
|
57
|
-
pathMap: {},
|
|
58
|
-
charsSaved: 0,
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
const prefixes = findFrequentPrefixes(allPaths);
|
|
62
|
-
if (prefixes.length === 0) {
|
|
63
|
-
return {
|
|
64
|
-
messages,
|
|
65
|
-
pathMap: {},
|
|
66
|
-
charsSaved: 0,
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
// Create path map
|
|
70
|
-
const pathMap = {};
|
|
71
|
-
prefixes.forEach((prefix, i) => {
|
|
72
|
-
pathMap[`$P${i + 1}`] = prefix;
|
|
73
|
-
});
|
|
74
|
-
// Replace paths in messages
|
|
75
|
-
let charsSaved = 0;
|
|
76
|
-
const result = messages.map((message) => {
|
|
77
|
-
if (!message.content || typeof message.content !== "string")
|
|
78
|
-
return message;
|
|
79
|
-
let content = message.content;
|
|
80
|
-
const originalLength = content.length;
|
|
81
|
-
// Replace prefixes (longest first to avoid partial replacements)
|
|
82
|
-
for (const [code, prefix] of Object.entries(pathMap)) {
|
|
83
|
-
content = content.split(prefix).join(code + "/");
|
|
84
|
-
}
|
|
85
|
-
charsSaved += originalLength - content.length;
|
|
86
|
-
return {
|
|
87
|
-
...message,
|
|
88
|
-
content,
|
|
89
|
-
};
|
|
90
|
-
});
|
|
91
|
-
return {
|
|
92
|
-
messages: result,
|
|
93
|
-
pathMap,
|
|
94
|
-
charsSaved,
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Generate the path map header for the codebook.
|
|
99
|
-
*/
|
|
100
|
-
export function generatePathMapHeader(pathMap) {
|
|
101
|
-
if (Object.keys(pathMap).length === 0)
|
|
102
|
-
return "";
|
|
103
|
-
const entries = Object.entries(pathMap)
|
|
104
|
-
.map(([code, path]) => `${code}=${path}`)
|
|
105
|
-
.join(", ");
|
|
106
|
-
return `[Paths: ${entries}]`;
|
|
107
|
-
}
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Layer 2: Whitespace Normalization
|
|
3
|
-
*
|
|
4
|
-
* Reduces excessive whitespace without changing semantic meaning.
|
|
5
|
-
*
|
|
6
|
-
* Safe for LLM: Tokenizers normalize whitespace anyway.
|
|
7
|
-
* Expected savings: 3-8%
|
|
8
|
-
*/
|
|
9
|
-
import { NormalizedMessage } from "../types.js";
|
|
10
|
-
export interface WhitespaceResult {
|
|
11
|
-
messages: NormalizedMessage[];
|
|
12
|
-
charsSaved: number;
|
|
13
|
-
}
|
|
14
|
-
/**
|
|
15
|
-
* Normalize whitespace in a string.
|
|
16
|
-
*
|
|
17
|
-
* - Max 2 consecutive newlines
|
|
18
|
-
* - Remove trailing whitespace from lines
|
|
19
|
-
* - Normalize tabs to spaces
|
|
20
|
-
* - Trim start/end
|
|
21
|
-
*/
|
|
22
|
-
export declare function normalizeWhitespace(content: string): string;
|
|
23
|
-
/**
|
|
24
|
-
* Apply whitespace normalization to all messages.
|
|
25
|
-
*/
|
|
26
|
-
export declare function normalizeMessagesWhitespace(messages: NormalizedMessage[]): WhitespaceResult;
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Layer 2: Whitespace Normalization
|
|
3
|
-
*
|
|
4
|
-
* Reduces excessive whitespace without changing semantic meaning.
|
|
5
|
-
*
|
|
6
|
-
* Safe for LLM: Tokenizers normalize whitespace anyway.
|
|
7
|
-
* Expected savings: 3-8%
|
|
8
|
-
*/
|
|
9
|
-
/**
|
|
10
|
-
* Normalize whitespace in a string.
|
|
11
|
-
*
|
|
12
|
-
* - Max 2 consecutive newlines
|
|
13
|
-
* - Remove trailing whitespace from lines
|
|
14
|
-
* - Normalize tabs to spaces
|
|
15
|
-
* - Trim start/end
|
|
16
|
-
*/
|
|
17
|
-
export function normalizeWhitespace(content) {
|
|
18
|
-
if (!content)
|
|
19
|
-
return content;
|
|
20
|
-
return content
|
|
21
|
-
// Normalize line endings
|
|
22
|
-
.replace(/\r\n/g, "\n")
|
|
23
|
-
.replace(/\r/g, "\n")
|
|
24
|
-
// Max 2 consecutive newlines (preserve paragraph breaks)
|
|
25
|
-
.replace(/\n{3,}/g, "\n\n")
|
|
26
|
-
// Remove trailing whitespace from each line
|
|
27
|
-
.replace(/[ \t]+$/gm, "")
|
|
28
|
-
// Normalize multiple spaces to single (except at line start for indentation)
|
|
29
|
-
.replace(/([^\n]) {2,}/g, "$1 ")
|
|
30
|
-
// Reduce excessive indentation (more than 8 spaces → 2 spaces per level)
|
|
31
|
-
.replace(/^[ ]{8,}/gm, (match) => " ".repeat(Math.ceil(match.length / 4)))
|
|
32
|
-
// Normalize tabs to 2 spaces
|
|
33
|
-
.replace(/\t/g, " ")
|
|
34
|
-
// Trim
|
|
35
|
-
.trim();
|
|
36
|
-
}
|
|
37
|
-
/**
|
|
38
|
-
* Apply whitespace normalization to all messages.
|
|
39
|
-
*/
|
|
40
|
-
export function normalizeMessagesWhitespace(messages) {
|
|
41
|
-
let charsSaved = 0;
|
|
42
|
-
const result = messages.map((message) => {
|
|
43
|
-
if (!message.content || typeof message.content !== "string")
|
|
44
|
-
return message;
|
|
45
|
-
const originalLength = message.content.length;
|
|
46
|
-
const normalizedContent = normalizeWhitespace(message.content);
|
|
47
|
-
charsSaved += originalLength - normalizedContent.length;
|
|
48
|
-
return {
|
|
49
|
-
...message,
|
|
50
|
-
content: normalizedContent,
|
|
51
|
-
};
|
|
52
|
-
});
|
|
53
|
-
return {
|
|
54
|
-
messages: result,
|
|
55
|
-
charsSaved,
|
|
56
|
-
};
|
|
57
|
-
}
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LLM-Safe Context Compression Types
|
|
3
|
-
*
|
|
4
|
-
* Types for the 5-layer compression system that reduces token usage
|
|
5
|
-
* while preserving semantic meaning for LLM queries.
|
|
6
|
-
*/
|
|
7
|
-
export type ContentPart = {
|
|
8
|
-
type: string;
|
|
9
|
-
text?: string;
|
|
10
|
-
image_url?: {
|
|
11
|
-
url: string;
|
|
12
|
-
detail?: string;
|
|
13
|
-
};
|
|
14
|
-
};
|
|
15
|
-
export interface NormalizedMessage {
|
|
16
|
-
role: "system" | "user" | "assistant" | "tool";
|
|
17
|
-
content: string | ContentPart[] | null;
|
|
18
|
-
tool_call_id?: string;
|
|
19
|
-
tool_calls?: ToolCall[];
|
|
20
|
-
name?: string;
|
|
21
|
-
}
|
|
22
|
-
export interface ToolCall {
|
|
23
|
-
id: string;
|
|
24
|
-
type: "function";
|
|
25
|
-
function: {
|
|
26
|
-
name: string;
|
|
27
|
-
arguments: string;
|
|
28
|
-
};
|
|
29
|
-
}
|
|
30
|
-
export interface CompressionConfig {
|
|
31
|
-
enabled: boolean;
|
|
32
|
-
preserveRaw: boolean;
|
|
33
|
-
layers: {
|
|
34
|
-
deduplication: boolean;
|
|
35
|
-
whitespace: boolean;
|
|
36
|
-
dictionary: boolean;
|
|
37
|
-
paths: boolean;
|
|
38
|
-
jsonCompact: boolean;
|
|
39
|
-
observation: boolean;
|
|
40
|
-
dynamicCodebook: boolean;
|
|
41
|
-
};
|
|
42
|
-
dictionary: {
|
|
43
|
-
maxEntries: number;
|
|
44
|
-
minPhraseLength: number;
|
|
45
|
-
includeCodebookHeader: boolean;
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
export interface CompressionStats {
|
|
49
|
-
duplicatesRemoved: number;
|
|
50
|
-
whitespaceSavedChars: number;
|
|
51
|
-
dictionarySubstitutions: number;
|
|
52
|
-
pathsShortened: number;
|
|
53
|
-
jsonCompactedChars: number;
|
|
54
|
-
observationsCompressed: number;
|
|
55
|
-
observationCharsSaved: number;
|
|
56
|
-
dynamicSubstitutions: number;
|
|
57
|
-
dynamicCharsSaved: number;
|
|
58
|
-
}
|
|
59
|
-
export interface CompressionResult {
|
|
60
|
-
messages: NormalizedMessage[];
|
|
61
|
-
originalMessages: NormalizedMessage[];
|
|
62
|
-
originalChars: number;
|
|
63
|
-
compressedChars: number;
|
|
64
|
-
compressionRatio: number;
|
|
65
|
-
stats: CompressionStats;
|
|
66
|
-
codebook: Record<string, string>;
|
|
67
|
-
pathMap: Record<string, string>;
|
|
68
|
-
dynamicCodes: Record<string, string>;
|
|
69
|
-
}
|
|
70
|
-
export interface CompressionLogData {
|
|
71
|
-
enabled: boolean;
|
|
72
|
-
ratio: number;
|
|
73
|
-
original_chars: number;
|
|
74
|
-
compressed_chars: number;
|
|
75
|
-
stats: {
|
|
76
|
-
duplicates_removed: number;
|
|
77
|
-
whitespace_saved: number;
|
|
78
|
-
dictionary_subs: number;
|
|
79
|
-
paths_shortened: number;
|
|
80
|
-
json_compacted: number;
|
|
81
|
-
};
|
|
82
|
-
}
|
|
83
|
-
export declare const DEFAULT_COMPRESSION_CONFIG: CompressionConfig;
|