@blockrun/runcode 2.2.7 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,6 +71,42 @@ const DIRECT_COMMANDS = {
71
71
  ctx.onEvent({ kind: 'text_delta', text: 'Last commit undone. Changes preserved in staging.\n' });
72
72
  emitDone(ctx);
73
73
  },
74
+ '/tokens': (ctx) => {
75
+ const { estimated, apiAnchored } = getAnchoredTokenCount(ctx.history);
76
+ const contextWindow = getContextWindow(ctx.config.model);
77
+ const pct = (estimated / contextWindow) * 100;
78
+ // Count tool results and thinking blocks
79
+ let toolResults = 0;
80
+ let thinkingBlocks = 0;
81
+ let totalToolChars = 0;
82
+ for (const msg of ctx.history) {
83
+ if (typeof msg.content === 'string')
84
+ continue;
85
+ if (!Array.isArray(msg.content))
86
+ continue;
87
+ for (const part of msg.content) {
88
+ if ('type' in part) {
89
+ if (part.type === 'tool_result') {
90
+ toolResults++;
91
+ const c = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
92
+ totalToolChars += c.length;
93
+ }
94
+ if (part.type === 'thinking')
95
+ thinkingBlocks++;
96
+ }
97
+ }
98
+ }
99
+ ctx.onEvent({ kind: 'text_delta', text: `**Token Usage**\n` +
100
+ ` Estimated: ~${estimated.toLocaleString()} tokens ${apiAnchored ? '(API-anchored)' : '(estimated)'}\n` +
101
+ ` Context: ${(contextWindow / 1000).toFixed(0)}k window (${pct.toFixed(1)}% used)\n` +
102
+ ` Messages: ${ctx.history.length}\n` +
103
+ ` Tool results: ${toolResults} (${(totalToolChars / 1024).toFixed(0)}KB)\n` +
104
+ ` Thinking: ${thinkingBlocks} blocks\n` +
105
+ (pct > 80 ? ' ⚠ Near limit — run /compact\n' : '') +
106
+ (pct > 60 ? '' : ' ✓ Healthy\n')
107
+ });
108
+ emitDone(ctx);
109
+ },
74
110
  '/help': (ctx) => {
75
111
  ctx.onEvent({ kind: 'text_delta', text: `**RunCode Commands**\n\n` +
76
112
  ` **Coding:** /commit /review /test /fix /debug /explain /search /find /refactor /scaffold\n` +
@@ -204,15 +204,19 @@ function formatForSummarization(messages) {
204
204
  * Pick a cheaper/faster model for compaction to save cost.
205
205
  */
206
206
  function pickCompactionModel(primaryModel) {
207
- // Use a fast model for summarization no need for the expensive primary
208
- if (primaryModel.includes('opus') || primaryModel.includes('gpt-5.4-pro')) {
207
+ // Use cheapest capable model for summarization to save cost
208
+ // Tier down: opus/pro sonnet, sonnet → haiku, everything else → flash (cheapest capable)
209
+ if (primaryModel.includes('opus') || primaryModel.includes('pro')) {
209
210
  return 'anthropic/claude-sonnet-4.6';
210
211
  }
211
- if (primaryModel.includes('sonnet')) {
212
+ if (primaryModel.includes('sonnet') || primaryModel.includes('gpt-5.4') || primaryModel.includes('gemini-2.5-pro')) {
212
213
  return 'anthropic/claude-haiku-4.5-20251001';
213
214
  }
214
- // For cheaper models, just use the same one
215
- return primaryModel;
215
+ if (primaryModel.includes('haiku') || primaryModel.includes('mini') || primaryModel.includes('nano')) {
216
+ return 'google/gemini-2.5-flash'; // Cheapest capable model
217
+ }
218
+ // Free/unknown models — use flash
219
+ return 'google/gemini-2.5-flash';
216
220
  }
217
221
  /**
218
222
  * Emergency fallback: drop oldest messages until under threshold.
@@ -7,9 +7,10 @@ import { ModelClient } from './llm.js';
7
7
  import { autoCompactIfNeeded, microCompact } from './compact.js';
8
8
  import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor } from './tokens.js';
9
9
  import { handleSlashCommand } from './commands.js';
10
+ import { compressHistory } from '../compression/adapter.js';
10
11
  import { PermissionManager } from './permissions.js';
11
12
  import { StreamingExecutor } from './streaming-executor.js';
12
- import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS } from './optimize.js';
13
+ import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
13
14
  import { recordUsage } from '../stats/tracker.js';
14
15
  import { estimateCost } from '../pricing.js';
15
16
  import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
@@ -237,13 +238,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
237
238
  onAbortReady?.(() => abort.abort());
238
239
  let loopCount = 0;
239
240
  let recoveryAttempts = 0;
241
+ let compactFailures = 0;
240
242
  let maxTokensOverride;
241
243
  let lastActivity = Date.now();
242
244
  // Agent loop for this user message
243
245
  while (loopCount < maxTurns) {
244
246
  loopCount++;
245
247
  // ── Token optimization pipeline ──
246
- // 1. Strip thinking, budget tool results, time-based cleanup
248
+ // 1. Strip thinking, budget tool results, time-based cleanup (always — cheap)
247
249
  const optimized = optimizeHistory(history, {
248
250
  debug: config.debug,
249
251
  lastActivityTimestamp: lastActivity,
@@ -252,24 +254,47 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
252
254
  history.length = 0;
253
255
  history.push(...optimized);
254
256
  }
255
- // 2. Microcompact: clear old tool results to save tokens
256
- const microCompacted = microCompact(history, 8);
257
- if (microCompacted !== history) {
258
- history.length = 0;
259
- history.push(...microCompacted);
257
+ // 2. Microcompact: only when history has >15 messages (skip for short conversations)
258
+ if (history.length > 15) {
259
+ const microCompacted = microCompact(history, 8);
260
+ if (microCompacted !== history) {
261
+ history.length = 0;
262
+ history.push(...microCompacted);
263
+ }
260
264
  }
261
- // Auto-compact: summarize history if approaching context limit
262
- const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
263
- if (didCompact) {
264
- history.length = 0;
265
- history.push(...compacted);
266
- resetTokenAnchor(); // Reset anchor after compaction — estimates will be used
267
- if (config.debug) {
268
- console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
265
+ // 3. Context compression: 7-layer compression for 15-40% token savings
266
+ if (history.length > 10) {
267
+ const compressed = await compressHistory(history, config.debug);
268
+ if (compressed) {
269
+ history.length = 0;
270
+ history.push(...compressed.history);
271
+ }
272
+ }
273
+ // 4. Auto-compact: summarize history if approaching context limit
274
+ // Circuit breaker: stop retrying after 3 consecutive failures
275
+ if (compactFailures < 3) {
276
+ try {
277
+ const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
278
+ if (didCompact) {
279
+ history.length = 0;
280
+ history.push(...compacted);
281
+ resetTokenAnchor();
282
+ compactFailures = 0;
283
+ if (config.debug) {
284
+ console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
285
+ }
286
+ }
287
+ }
288
+ catch (compactErr) {
289
+ compactFailures++;
290
+ if (config.debug) {
291
+ console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
292
+ }
269
293
  }
270
294
  }
271
295
  const systemPrompt = config.systemInstructions.join('\n\n');
272
- let maxTokens = maxTokensOverride ?? CAPPED_MAX_TOKENS;
296
+ const modelMaxOut = getMaxOutputTokens(config.model);
297
+ let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
273
298
  let responseParts = [];
274
299
  let usage;
275
300
  let stopReason;
@@ -9,21 +9,18 @@
9
9
  * 5. Pre-compact stripping — remove images/docs before summarization
10
10
  */
11
11
  import type { Dialogue } from './types.js';
12
- /** Default max_tokens (low to save slot reservation) */
13
- export declare const CAPPED_MAX_TOKENS = 8192;
12
+ /** Default max_tokens (low to save output slot reservation) */
13
+ export declare const CAPPED_MAX_TOKENS = 16384;
14
14
  /** Escalated max_tokens after hitting the cap */
15
15
  export declare const ESCALATED_MAX_TOKENS = 65536;
16
+ /** Get max output tokens for a model */
17
+ export declare function getMaxOutputTokens(model: string): number;
16
18
  /**
17
19
  * Cap tool result sizes to prevent context bloat.
18
20
  * Large results (>50K chars) are truncated with a preview.
19
21
  * Per-message aggregate is also capped at 200K chars.
20
22
  */
21
23
  export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
22
- /**
23
- * Remove thinking blocks from older assistant messages.
24
- * Keeps thinking only in the most recent assistant message.
25
- * Thinking blocks are large and not needed for context after the decision is made.
26
- */
27
24
  export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
28
25
  /**
29
26
  * After an idle gap (>60 min), clear old tool results.
@@ -15,10 +15,25 @@ const MAX_TOOL_RESULT_CHARS = 50_000;
15
15
  const MAX_TOOL_RESULTS_PER_MESSAGE_CHARS = 200_000;
16
16
  /** Preview size when truncating */
17
17
  const PREVIEW_CHARS = 2_000;
18
- /** Default max_tokens (low to save slot reservation) */
19
- export const CAPPED_MAX_TOKENS = 8_192;
18
+ /** Default max_tokens (low to save output slot reservation) */
19
+ export const CAPPED_MAX_TOKENS = 16_384;
20
20
  /** Escalated max_tokens after hitting the cap */
21
21
  export const ESCALATED_MAX_TOKENS = 65_536;
22
+ /** Per-model max output tokens — prevents requesting more than the model supports */
23
+ const MODEL_MAX_OUTPUT = {
24
+ 'anthropic/claude-opus-4.6': 32_000,
25
+ 'anthropic/claude-sonnet-4.6': 64_000,
26
+ 'anthropic/claude-haiku-4.5-20251001': 16_384,
27
+ 'openai/gpt-5.4': 32_768,
28
+ 'openai/gpt-5-mini': 16_384,
29
+ 'google/gemini-2.5-pro': 65_536,
30
+ 'google/gemini-2.5-flash': 65_536,
31
+ 'deepseek/deepseek-chat': 8_192,
32
+ };
33
+ /** Get max output tokens for a model */
34
+ export function getMaxOutputTokens(model) {
35
+ return MODEL_MAX_OUTPUT[model] ?? 16_384;
36
+ }
22
37
  /** Idle gap (minutes) after which old tool results are cleared */
23
38
  const IDLE_GAP_THRESHOLD_MINUTES = 60;
24
39
  /** Number of recent tool results to keep during time-based cleanup */
@@ -86,26 +101,29 @@ export function budgetToolResults(history) {
86
101
  // ─── 2. Thinking Block Stripping ───────────────────────────────────────────
87
102
  /**
88
103
  * Remove thinking blocks from older assistant messages.
89
- * Keeps thinking only in the most recent assistant message.
90
- * Thinking blocks are large and not needed for context after the decision is made.
104
+ * Keeps thinking only in the most recent N assistant messages (default: last 2 turns).
105
+ * Older thinking blocks are large and not needed after the decision is made.
91
106
  */
107
+ const KEEP_THINKING_TURNS = 2;
92
108
  export function stripOldThinking(history) {
93
- // Find the last assistant message index
94
- let lastAssistantIdx = -1;
109
+ // Find the last N assistant message indices to preserve their thinking
110
+ const assistantIndices = [];
95
111
  for (let i = history.length - 1; i >= 0; i--) {
96
112
  if (history[i].role === 'assistant') {
97
- lastAssistantIdx = i;
98
- break;
113
+ assistantIndices.push(i);
114
+ if (assistantIndices.length >= KEEP_THINKING_TURNS)
115
+ break;
99
116
  }
100
117
  }
101
- if (lastAssistantIdx <= 0)
118
+ if (assistantIndices.length === 0)
102
119
  return history;
120
+ const keepSet = new Set(assistantIndices);
103
121
  const result = [];
104
122
  let modified = false;
105
123
  for (let i = 0; i < history.length; i++) {
106
124
  const msg = history[i];
107
- // Only strip from older assistant messages (not the latest)
108
- if (msg.role === 'assistant' && i < lastAssistantIdx && Array.isArray(msg.content)) {
125
+ // Strip thinking from assistant messages NOT in the keep set
126
+ if (msg.role === 'assistant' && !keepSet.has(i) && Array.isArray(msg.content)) {
109
127
  const filtered = msg.content.filter((part) => part.type !== 'thinking');
110
128
  if (filtered.length < msg.content.length) {
111
129
  modified = true;
@@ -64,7 +64,8 @@ export function resetTokenAnchor() {
64
64
  * JSON-heavy content uses 2 bytes/token; general text uses 4.
65
65
  */
66
66
  export function estimateTokens(text, bytesPerToken = DEFAULT_BYTES_PER_TOKEN) {
67
- return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken);
67
+ // Pad by 4/3 (~33%) for conservative estimation — better to over-count than under-count
68
+ return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken * 1.33);
68
69
  }
69
70
  /**
70
71
  * Estimate tokens for a content part.
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Adapter between brcc's Dialogue type and the compression lib's NormalizedMessage type.
3
+ */
4
+ import type { Dialogue } from '../agent/types.js';
5
+ /**
6
+ * Compress conversation history to reduce token usage.
7
+ * Returns compressed Dialogue[] with stats.
8
+ */
9
+ export declare function compressHistory(history: Dialogue[], debug?: boolean): Promise<{
10
+ history: Dialogue[];
11
+ saved: number;
12
+ ratio: number;
13
+ } | null>;
@@ -0,0 +1,104 @@
1
+ /**
2
+ * Adapter between brcc's Dialogue type and the compression lib's NormalizedMessage type.
3
+ */
4
+ import { compressContext, shouldCompress } from './index.js';
5
+ /**
6
+ * Convert brcc Dialogue[] to NormalizedMessage[] for compression.
7
+ */
8
+ function dialogueToNormalized(history) {
9
+ return history.map(msg => {
10
+ if (typeof msg.content === 'string') {
11
+ return { role: msg.role, content: msg.content };
12
+ }
13
+ // Convert content parts to string representation
14
+ const parts = [];
15
+ const toolCalls = [];
16
+ for (const part of msg.content) {
17
+ if ('type' in part) {
18
+ if (part.type === 'text') {
19
+ parts.push(part.text);
20
+ }
21
+ else if (part.type === 'tool_use') {
22
+ const inv = part;
23
+ toolCalls.push({
24
+ id: inv.id,
25
+ type: 'function',
26
+ function: { name: inv.name, arguments: JSON.stringify(inv.input) },
27
+ });
28
+ }
29
+ else if (part.type === 'tool_result') {
30
+ const res = part;
31
+ const content = typeof res.content === 'string' ? res.content : JSON.stringify(res.content);
32
+ parts.push(`[Tool result: ${content}]`);
33
+ }
34
+ else if (part.type === 'thinking') {
35
+ // Skip thinking in compression (already handled by stripOldThinking)
36
+ }
37
+ }
38
+ }
39
+ const normalized = {
40
+ role: msg.role,
41
+ content: parts.join('\n') || null,
42
+ };
43
+ if (toolCalls.length > 0) {
44
+ normalized.tool_calls = toolCalls;
45
+ }
46
+ return normalized;
47
+ });
48
+ }
49
+ /**
50
+ * Compress conversation history to reduce token usage.
51
+ * Returns compressed Dialogue[] with stats.
52
+ */
53
+ export async function compressHistory(history, debug) {
54
+ // Convert to NormalizedMessage format
55
+ const normalized = dialogueToNormalized(history);
56
+ // Check if compression is worthwhile
57
+ if (!shouldCompress(normalized)) {
58
+ return null;
59
+ }
60
+ try {
61
+ const result = await compressContext(normalized);
62
+ const savedPct = Math.round((1 - result.compressionRatio) * 100);
63
+ if (debug) {
64
+ console.error(`[runcode] Compressed context: ${result.originalChars} → ${result.compressedChars} chars (${savedPct}% saved)`);
65
+ if (result.stats) {
66
+ const layers = Object.entries(result.stats)
67
+ .filter(([, v]) => typeof v === 'number' && v > 0)
68
+ .map(([k, v]) => `${k}: ${v}`)
69
+ .join(', ');
70
+ if (layers)
71
+ console.error(`[runcode] Compression layers: ${layers}`);
72
+ }
73
+ }
74
+ // Convert compressed messages back to Dialogue format
75
+ // We only compress the string content, keeping the original structure
76
+ const compressed = [];
77
+ for (let i = 0; i < history.length && i < result.messages.length; i++) {
78
+ const original = history[i];
79
+ const comp = result.messages[i];
80
+ if (typeof original.content === 'string' && typeof comp.content === 'string') {
81
+ compressed.push({ role: original.role, content: comp.content });
82
+ }
83
+ else {
84
+ // Keep complex content as-is (tool_use/tool_result structure can't be modified)
85
+ compressed.push(original);
86
+ }
87
+ }
88
+ // Append any remaining original messages
89
+ for (let i = result.messages.length; i < history.length; i++) {
90
+ compressed.push(history[i]);
91
+ }
92
+ return {
93
+ history: compressed,
94
+ saved: result.originalChars - result.compressedChars,
95
+ ratio: result.compressionRatio,
96
+ };
97
+ }
98
+ catch (err) {
99
+ if (debug) {
100
+ console.error(`[runcode] Compression failed: ${err.message}`);
101
+ }
102
+ return null;
103
+ }
104
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Dictionary Codebook
3
+ *
4
+ * Static dictionary of frequently repeated phrases observed in LLM prompts.
5
+ * Built from analysis of BlockRun production logs.
6
+ *
7
+ * Format: Short code ($XX) -> Long phrase
8
+ * The LLM receives a codebook header and decodes in-context.
9
+ */
10
+ export declare const STATIC_CODEBOOK: Record<string, string>;
11
+ /**
12
+ * Get the inverse codebook for decompression.
13
+ */
14
+ export declare function getInverseCodebook(): Record<string, string>;
15
+ /**
16
+ * Generate the codebook header for inclusion in system message.
17
+ * LLMs can decode in-context using this header.
18
+ */
19
+ export declare function generateCodebookHeader(usedCodes: Set<string>, pathMap?: Record<string, string>): string;
20
+ /**
21
+ * Decompress a string using the codebook (for logging).
22
+ */
23
+ export declare function decompressContent(content: string, codebook?: Record<string, string>): string;
@@ -0,0 +1,118 @@
1
+ /**
2
+ * Dictionary Codebook
3
+ *
4
+ * Static dictionary of frequently repeated phrases observed in LLM prompts.
5
+ * Built from analysis of BlockRun production logs.
6
+ *
7
+ * Format: Short code ($XX) -> Long phrase
8
+ * The LLM receives a codebook header and decodes in-context.
9
+ */
10
+ // Static codebook - common patterns from system prompts
11
+ // Ordered by expected frequency and impact
12
+ export const STATIC_CODEBOOK = {
13
+ // High-impact: OpenClaw/Agent system prompt patterns (very common)
14
+ "$OC01": "unbrowse_", // Common prefix in tool names
15
+ "$OC02": "<location>",
16
+ "$OC03": "</location>",
17
+ "$OC04": "<name>",
18
+ "$OC05": "</name>",
19
+ "$OC06": "<description>",
20
+ "$OC07": "</description>",
21
+ "$OC08": "(may need login)",
22
+ "$OC09": "API skill for OpenClaw",
23
+ "$OC10": "endpoints",
24
+ // Skill/tool markers
25
+ "$SK01": "<available_skills>",
26
+ "$SK02": "</available_skills>",
27
+ "$SK03": "<skill>",
28
+ "$SK04": "</skill>",
29
+ // Schema patterns (very common in tool definitions)
30
+ "$T01": 'type: "function"',
31
+ "$T02": '"type": "function"',
32
+ "$T03": '"type": "string"',
33
+ "$T04": '"type": "object"',
34
+ "$T05": '"type": "array"',
35
+ "$T06": '"type": "boolean"',
36
+ "$T07": '"type": "number"',
37
+ // Common descriptions
38
+ "$D01": "description:",
39
+ "$D02": '"description":',
40
+ // Common instructions
41
+ "$I01": "You are a personal assistant",
42
+ "$I02": "Tool names are case-sensitive",
43
+ "$I03": "Call tools exactly as listed",
44
+ "$I04": "Use when",
45
+ "$I05": "without asking",
46
+ // Safety phrases
47
+ "$S01": "Do not manipulate or persuade",
48
+ "$S02": "Prioritize safety and human oversight",
49
+ "$S03": "unless explicitly requested",
50
+ // JSON patterns
51
+ "$J01": '"required": ["',
52
+ "$J02": '"properties": {',
53
+ "$J03": '"additionalProperties": false',
54
+ // Heartbeat patterns
55
+ "$H01": "HEARTBEAT_OK",
56
+ "$H02": "Read HEARTBEAT.md if it exists",
57
+ // Role markers
58
+ "$R01": '"role": "system"',
59
+ "$R02": '"role": "user"',
60
+ "$R03": '"role": "assistant"',
61
+ "$R04": '"role": "tool"',
62
+ // Common endings/phrases
63
+ "$E01": "would you like to",
64
+ "$E02": "Let me know if you",
65
+ "$E03": "internal APIs",
66
+ "$E04": "session cookies",
67
+ // BlockRun model aliases (common in prompts)
68
+ "$M01": "blockrun/",
69
+ "$M02": "openai/",
70
+ "$M03": "anthropic/",
71
+ "$M04": "google/",
72
+ "$M05": "xai/",
73
+ };
74
+ /**
75
+ * Get the inverse codebook for decompression.
76
+ */
77
+ export function getInverseCodebook() {
78
+ const inverse = {};
79
+ for (const [code, phrase] of Object.entries(STATIC_CODEBOOK)) {
80
+ inverse[phrase] = code;
81
+ }
82
+ return inverse;
83
+ }
84
+ /**
85
+ * Generate the codebook header for inclusion in system message.
86
+ * LLMs can decode in-context using this header.
87
+ */
88
+ export function generateCodebookHeader(usedCodes, pathMap = {}) {
89
+ if (usedCodes.size === 0 && Object.keys(pathMap).length === 0) {
90
+ return "";
91
+ }
92
+ const parts = [];
93
+ // Add used dictionary codes
94
+ if (usedCodes.size > 0) {
95
+ const codeEntries = Array.from(usedCodes)
96
+ .map((code) => `${code}=${STATIC_CODEBOOK[code]}`)
97
+ .join(", ");
98
+ parts.push(`[Dict: ${codeEntries}]`);
99
+ }
100
+ // Add path map
101
+ if (Object.keys(pathMap).length > 0) {
102
+ const pathEntries = Object.entries(pathMap)
103
+ .map(([code, path]) => `${code}=${path}`)
104
+ .join(", ");
105
+ parts.push(`[Paths: ${pathEntries}]`);
106
+ }
107
+ return parts.join("\n");
108
+ }
109
+ /**
110
+ * Decompress a string using the codebook (for logging).
111
+ */
112
+ export function decompressContent(content, codebook = STATIC_CODEBOOK) {
113
+ let result = content;
114
+ for (const [code, phrase] of Object.entries(codebook)) {
115
+ result = result.split(code).join(phrase);
116
+ }
117
+ return result;
118
+ }
@@ -0,0 +1,32 @@
1
+ /**
2
+ * LLM-Safe Context Compression
3
+ *
4
+ * Reduces token usage by 15-40% while preserving semantic meaning.
5
+ * Implements 7 compression layers inspired by claw-compactor.
6
+ *
7
+ * Usage:
8
+ * const result = await compressContext(messages);
9
+ * // result.messages -> compressed version to send to provider
10
+ * // result.originalMessages -> original for logging
11
+ */
12
+ import { NormalizedMessage, CompressionConfig, CompressionResult } from "./types.js";
13
+ export * from "./types.js";
14
+ export { STATIC_CODEBOOK } from "./codebook.js";
15
+ /**
16
+ * Main compression function.
17
+ *
18
+ * Applies 5 layers in sequence:
19
+ * 1. Deduplication - Remove exact duplicate messages
20
+ * 2. Whitespace - Normalize excessive whitespace
21
+ * 3. Dictionary - Replace common phrases with codes
22
+ * 4. Paths - Shorten repeated file paths
23
+ * 5. JSON - Compact JSON in tool calls
24
+ *
25
+ * Then prepends a codebook header for the LLM to decode in-context.
26
+ */
27
+ export declare function compressContext(messages: NormalizedMessage[], config?: Partial<CompressionConfig>): Promise<CompressionResult>;
28
+ /**
29
+ * Quick check if compression would benefit these messages.
30
+ * Returns true if messages are large enough to warrant compression.
31
+ */
32
+ export declare function shouldCompress(messages: NormalizedMessage[]): boolean;