@blockrun/runcode 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +10 -1
- package/dist/compression/adapter.d.ts +13 -0
- package/dist/compression/adapter.js +104 -0
- package/dist/compression/codebook.d.ts +23 -0
- package/dist/compression/codebook.js +118 -0
- package/dist/compression/index.d.ts +32 -0
- package/dist/compression/index.js +258 -0
- package/dist/compression/layers/deduplication.d.ts +27 -0
- package/dist/compression/layers/deduplication.js +97 -0
- package/dist/compression/layers/dictionary.d.ts +20 -0
- package/dist/compression/layers/dictionary.js +67 -0
- package/dist/compression/layers/dynamic-codebook.d.ts +25 -0
- package/dist/compression/layers/dynamic-codebook.js +145 -0
- package/dist/compression/layers/json-compact.d.ts +22 -0
- package/dist/compression/layers/json-compact.js +74 -0
- package/dist/compression/layers/observation.d.ts +20 -0
- package/dist/compression/layers/observation.js +126 -0
- package/dist/compression/layers/paths.d.ts +23 -0
- package/dist/compression/layers/paths.js +107 -0
- package/dist/compression/layers/whitespace.d.ts +26 -0
- package/dist/compression/layers/whitespace.js +57 -0
- package/dist/compression/types.d.ts +83 -0
- package/dist/compression/types.js +26 -0
- package/package.json +1 -1
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Layer 4: Path Shortening
|
|
3
|
+
*
|
|
4
|
+
* Detects common filesystem path prefixes and replaces them with short codes.
|
|
5
|
+
* Common in coding assistant contexts with repeated file paths.
|
|
6
|
+
*
|
|
7
|
+
* Safe for LLM: Lossless abbreviation with path map header.
|
|
8
|
+
* Expected savings: 1-3%
|
|
9
|
+
*/
|
|
10
|
+
// Regex to match filesystem paths
|
|
11
|
+
const PATH_REGEX = /(?:\/[\w.-]+){3,}/g;
|
|
12
|
+
/**
|
|
13
|
+
* Extract all paths from messages and find common prefixes.
|
|
14
|
+
*/
|
|
15
|
+
function extractPaths(messages) {
|
|
16
|
+
const paths = [];
|
|
17
|
+
for (const message of messages) {
|
|
18
|
+
if (!message.content || typeof message.content !== "string")
|
|
19
|
+
continue;
|
|
20
|
+
const matches = message.content.match(PATH_REGEX);
|
|
21
|
+
if (matches) {
|
|
22
|
+
paths.push(...matches);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return paths;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Group paths by their common prefixes.
|
|
29
|
+
* Returns prefixes that appear at least 3 times.
|
|
30
|
+
*/
|
|
31
|
+
function findFrequentPrefixes(paths) {
|
|
32
|
+
const prefixCounts = new Map();
|
|
33
|
+
for (const path of paths) {
|
|
34
|
+
const parts = path.split("/").filter(Boolean);
|
|
35
|
+
// Try prefixes of different lengths
|
|
36
|
+
for (let i = 2; i < parts.length; i++) {
|
|
37
|
+
const prefix = "/" + parts.slice(0, i).join("/") + "/";
|
|
38
|
+
prefixCounts.set(prefix, (prefixCounts.get(prefix) || 0) + 1);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
// Return prefixes that appear 3+ times, sorted by length (longest first)
|
|
42
|
+
return Array.from(prefixCounts.entries())
|
|
43
|
+
.filter(([, count]) => count >= 3)
|
|
44
|
+
.sort((a, b) => b[0].length - a[0].length)
|
|
45
|
+
.slice(0, 5) // Max 5 path codes
|
|
46
|
+
.map(([prefix]) => prefix);
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Apply path shortening to all messages.
|
|
50
|
+
*/
|
|
51
|
+
export function shortenPaths(messages) {
|
|
52
|
+
const allPaths = extractPaths(messages);
|
|
53
|
+
if (allPaths.length < 5) {
|
|
54
|
+
// Not enough paths to benefit from shortening
|
|
55
|
+
return {
|
|
56
|
+
messages,
|
|
57
|
+
pathMap: {},
|
|
58
|
+
charsSaved: 0,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
const prefixes = findFrequentPrefixes(allPaths);
|
|
62
|
+
if (prefixes.length === 0) {
|
|
63
|
+
return {
|
|
64
|
+
messages,
|
|
65
|
+
pathMap: {},
|
|
66
|
+
charsSaved: 0,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
// Create path map
|
|
70
|
+
const pathMap = {};
|
|
71
|
+
prefixes.forEach((prefix, i) => {
|
|
72
|
+
pathMap[`$P${i + 1}`] = prefix;
|
|
73
|
+
});
|
|
74
|
+
// Replace paths in messages
|
|
75
|
+
let charsSaved = 0;
|
|
76
|
+
const result = messages.map((message) => {
|
|
77
|
+
if (!message.content || typeof message.content !== "string")
|
|
78
|
+
return message;
|
|
79
|
+
let content = message.content;
|
|
80
|
+
const originalLength = content.length;
|
|
81
|
+
// Replace prefixes (longest first to avoid partial replacements)
|
|
82
|
+
for (const [code, prefix] of Object.entries(pathMap)) {
|
|
83
|
+
content = content.split(prefix).join(code + "/");
|
|
84
|
+
}
|
|
85
|
+
charsSaved += originalLength - content.length;
|
|
86
|
+
return {
|
|
87
|
+
...message,
|
|
88
|
+
content,
|
|
89
|
+
};
|
|
90
|
+
});
|
|
91
|
+
return {
|
|
92
|
+
messages: result,
|
|
93
|
+
pathMap,
|
|
94
|
+
charsSaved,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Generate the path map header for the codebook.
|
|
99
|
+
*/
|
|
100
|
+
export function generatePathMapHeader(pathMap) {
|
|
101
|
+
if (Object.keys(pathMap).length === 0)
|
|
102
|
+
return "";
|
|
103
|
+
const entries = Object.entries(pathMap)
|
|
104
|
+
.map(([code, path]) => `${code}=${path}`)
|
|
105
|
+
.join(", ");
|
|
106
|
+
return `[Paths: ${entries}]`;
|
|
107
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Layer 2: Whitespace Normalization
|
|
3
|
+
*
|
|
4
|
+
* Reduces excessive whitespace without changing semantic meaning.
|
|
5
|
+
*
|
|
6
|
+
* Safe for LLM: Tokenizers normalize whitespace anyway.
|
|
7
|
+
* Expected savings: 3-8%
|
|
8
|
+
*/
|
|
9
|
+
import { NormalizedMessage } from "../types.js";
|
|
10
|
+
export interface WhitespaceResult {
|
|
11
|
+
messages: NormalizedMessage[];
|
|
12
|
+
charsSaved: number;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Normalize whitespace in a string.
|
|
16
|
+
*
|
|
17
|
+
* - Max 2 consecutive newlines
|
|
18
|
+
* - Remove trailing whitespace from lines
|
|
19
|
+
* - Normalize tabs to spaces
|
|
20
|
+
* - Trim start/end
|
|
21
|
+
*/
|
|
22
|
+
export declare function normalizeWhitespace(content: string): string;
|
|
23
|
+
/**
|
|
24
|
+
* Apply whitespace normalization to all messages.
|
|
25
|
+
*/
|
|
26
|
+
export declare function normalizeMessagesWhitespace(messages: NormalizedMessage[]): WhitespaceResult;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Layer 2: Whitespace Normalization
|
|
3
|
+
*
|
|
4
|
+
* Reduces excessive whitespace without changing semantic meaning.
|
|
5
|
+
*
|
|
6
|
+
* Safe for LLM: Tokenizers normalize whitespace anyway.
|
|
7
|
+
* Expected savings: 3-8%
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Normalize whitespace in a string.
|
|
11
|
+
*
|
|
12
|
+
* - Max 2 consecutive newlines
|
|
13
|
+
* - Remove trailing whitespace from lines
|
|
14
|
+
* - Normalize tabs to spaces
|
|
15
|
+
* - Trim start/end
|
|
16
|
+
*/
|
|
17
|
+
export function normalizeWhitespace(content) {
|
|
18
|
+
if (!content)
|
|
19
|
+
return content;
|
|
20
|
+
return content
|
|
21
|
+
// Normalize line endings
|
|
22
|
+
.replace(/\r\n/g, "\n")
|
|
23
|
+
.replace(/\r/g, "\n")
|
|
24
|
+
// Max 2 consecutive newlines (preserve paragraph breaks)
|
|
25
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
26
|
+
// Remove trailing whitespace from each line
|
|
27
|
+
.replace(/[ \t]+$/gm, "")
|
|
28
|
+
// Normalize multiple spaces to single (except at line start for indentation)
|
|
29
|
+
.replace(/([^\n]) {2,}/g, "$1 ")
|
|
30
|
+
// Reduce excessive indentation (more than 8 spaces → 2 spaces per level)
|
|
31
|
+
.replace(/^[ ]{8,}/gm, (match) => " ".repeat(Math.ceil(match.length / 4)))
|
|
32
|
+
// Normalize tabs to 2 spaces
|
|
33
|
+
.replace(/\t/g, " ")
|
|
34
|
+
// Trim
|
|
35
|
+
.trim();
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Apply whitespace normalization to all messages.
|
|
39
|
+
*/
|
|
40
|
+
export function normalizeMessagesWhitespace(messages) {
|
|
41
|
+
let charsSaved = 0;
|
|
42
|
+
const result = messages.map((message) => {
|
|
43
|
+
if (!message.content || typeof message.content !== "string")
|
|
44
|
+
return message;
|
|
45
|
+
const originalLength = message.content.length;
|
|
46
|
+
const normalizedContent = normalizeWhitespace(message.content);
|
|
47
|
+
charsSaved += originalLength - normalizedContent.length;
|
|
48
|
+
return {
|
|
49
|
+
...message,
|
|
50
|
+
content: normalizedContent,
|
|
51
|
+
};
|
|
52
|
+
});
|
|
53
|
+
return {
|
|
54
|
+
messages: result,
|
|
55
|
+
charsSaved,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-Safe Context Compression Types
|
|
3
|
+
*
|
|
4
|
+
* Types for the 5-layer compression system that reduces token usage
|
|
5
|
+
* while preserving semantic meaning for LLM queries.
|
|
6
|
+
*/
|
|
7
|
+
export type ContentPart = {
|
|
8
|
+
type: string;
|
|
9
|
+
text?: string;
|
|
10
|
+
image_url?: {
|
|
11
|
+
url: string;
|
|
12
|
+
detail?: string;
|
|
13
|
+
};
|
|
14
|
+
};
|
|
15
|
+
export interface NormalizedMessage {
|
|
16
|
+
role: "system" | "user" | "assistant" | "tool";
|
|
17
|
+
content: string | ContentPart[] | null;
|
|
18
|
+
tool_call_id?: string;
|
|
19
|
+
tool_calls?: ToolCall[];
|
|
20
|
+
name?: string;
|
|
21
|
+
}
|
|
22
|
+
export interface ToolCall {
|
|
23
|
+
id: string;
|
|
24
|
+
type: "function";
|
|
25
|
+
function: {
|
|
26
|
+
name: string;
|
|
27
|
+
arguments: string;
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
export interface CompressionConfig {
|
|
31
|
+
enabled: boolean;
|
|
32
|
+
preserveRaw: boolean;
|
|
33
|
+
layers: {
|
|
34
|
+
deduplication: boolean;
|
|
35
|
+
whitespace: boolean;
|
|
36
|
+
dictionary: boolean;
|
|
37
|
+
paths: boolean;
|
|
38
|
+
jsonCompact: boolean;
|
|
39
|
+
observation: boolean;
|
|
40
|
+
dynamicCodebook: boolean;
|
|
41
|
+
};
|
|
42
|
+
dictionary: {
|
|
43
|
+
maxEntries: number;
|
|
44
|
+
minPhraseLength: number;
|
|
45
|
+
includeCodebookHeader: boolean;
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
export interface CompressionStats {
|
|
49
|
+
duplicatesRemoved: number;
|
|
50
|
+
whitespaceSavedChars: number;
|
|
51
|
+
dictionarySubstitutions: number;
|
|
52
|
+
pathsShortened: number;
|
|
53
|
+
jsonCompactedChars: number;
|
|
54
|
+
observationsCompressed: number;
|
|
55
|
+
observationCharsSaved: number;
|
|
56
|
+
dynamicSubstitutions: number;
|
|
57
|
+
dynamicCharsSaved: number;
|
|
58
|
+
}
|
|
59
|
+
export interface CompressionResult {
|
|
60
|
+
messages: NormalizedMessage[];
|
|
61
|
+
originalMessages: NormalizedMessage[];
|
|
62
|
+
originalChars: number;
|
|
63
|
+
compressedChars: number;
|
|
64
|
+
compressionRatio: number;
|
|
65
|
+
stats: CompressionStats;
|
|
66
|
+
codebook: Record<string, string>;
|
|
67
|
+
pathMap: Record<string, string>;
|
|
68
|
+
dynamicCodes: Record<string, string>;
|
|
69
|
+
}
|
|
70
|
+
export interface CompressionLogData {
|
|
71
|
+
enabled: boolean;
|
|
72
|
+
ratio: number;
|
|
73
|
+
original_chars: number;
|
|
74
|
+
compressed_chars: number;
|
|
75
|
+
stats: {
|
|
76
|
+
duplicates_removed: number;
|
|
77
|
+
whitespace_saved: number;
|
|
78
|
+
dictionary_subs: number;
|
|
79
|
+
paths_shortened: number;
|
|
80
|
+
json_compacted: number;
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
export declare const DEFAULT_COMPRESSION_CONFIG: CompressionConfig;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-Safe Context Compression Types
|
|
3
|
+
*
|
|
4
|
+
* Types for the 5-layer compression system that reduces token usage
|
|
5
|
+
* while preserving semantic meaning for LLM queries.
|
|
6
|
+
*/
|
|
7
|
+
// Default configuration - CONSERVATIVE settings for model compatibility
|
|
8
|
+
// Only enable layers that don't require the model to decode anything
|
|
9
|
+
export const DEFAULT_COMPRESSION_CONFIG = {
|
|
10
|
+
enabled: true,
|
|
11
|
+
preserveRaw: true,
|
|
12
|
+
layers: {
|
|
13
|
+
deduplication: true, // Safe: removes duplicate messages
|
|
14
|
+
whitespace: true, // Safe: normalizes whitespace
|
|
15
|
+
dictionary: false, // DISABLED: requires model to understand codebook
|
|
16
|
+
paths: false, // DISABLED: requires model to understand path codes
|
|
17
|
+
jsonCompact: true, // Safe: just removes JSON whitespace
|
|
18
|
+
observation: false, // DISABLED: may lose important context
|
|
19
|
+
dynamicCodebook: false, // DISABLED: requires model to understand codes
|
|
20
|
+
},
|
|
21
|
+
dictionary: {
|
|
22
|
+
maxEntries: 50,
|
|
23
|
+
minPhraseLength: 15,
|
|
24
|
+
includeCodebookHeader: false, // No codebook header needed
|
|
25
|
+
},
|
|
26
|
+
};
|