@blockrun/runcode 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -108,13 +108,16 @@ const DIRECT_COMMANDS = {
108
108
  emitDone(ctx);
109
109
  },
110
110
  '/help': (ctx) => {
111
+ const ultrathinkOn = ctx.config.ultrathink;
111
112
  ctx.onEvent({ kind: 'text_delta', text: `**RunCode Commands**\n\n` +
112
113
  ` **Coding:** /commit /review /test /fix /debug /explain /search /find /refactor /scaffold\n` +
113
114
  ` **Git:** /push /pr /undo /status /diff /log /branch /stash /unstash\n` +
114
115
  ` **Analysis:** /security /lint /optimize /todo /deps /clean /migrate /doc\n` +
115
- ` **Session:** /plan /execute /compact /retry /sessions /resume /context /tasks\n` +
116
- ` **Info:** /model /wallet /cost /mcp /doctor /version /bug /help\n` +
117
- ` **UI:** /clear /exit\n`
116
+ ` **Session:** /plan /ultraplan /execute /compact /retry /sessions /resume /context /tasks\n` +
117
+ ` **Power:** /ultrathink [query] /ultraplan /dump\n` +
118
+ ` **Info:** /model /wallet /cost /tokens /mcp /doctor /version /bug /help\n` +
119
+ ` **UI:** /clear /exit\n` +
120
+ (ultrathinkOn ? `\n Ultrathink: ON\n` : '')
118
121
  });
119
122
  emitDone(ctx);
120
123
  },
@@ -201,6 +204,27 @@ const DIRECT_COMMANDS = {
201
204
  }
202
205
  emitDone(ctx);
203
206
  },
207
+ '/ultrathink': (ctx) => {
208
+ const cfg = ctx.config;
209
+ cfg.ultrathink = !cfg.ultrathink;
210
+ if (cfg.ultrathink) {
211
+ ctx.onEvent({ kind: 'text_delta', text: '**Ultrathink mode ON.** Extended reasoning active — the model will think deeply before responding.\n' +
212
+ 'Use `/ultrathink` again to disable, or `/ultrathink <query>` to send a one-shot deep analysis.\n'
213
+ });
214
+ }
215
+ else {
216
+ ctx.onEvent({ kind: 'text_delta', text: '**Ultrathink mode OFF.** Normal response mode restored.\n' });
217
+ }
218
+ emitDone(ctx);
219
+ },
220
+ '/dump': (ctx) => {
221
+ const instructions = ctx.config.systemInstructions;
222
+ const joined = instructions.join('\n\n---\n\n');
223
+ ctx.onEvent({ kind: 'text_delta', text: `**System Prompt** (${instructions.length} section${instructions.length !== 1 ? 's' : ''}):\n\n` +
224
+ `\`\`\`\n${joined.slice(0, 4000)}${joined.length > 4000 ? `\n... (${joined.length - 4000} chars truncated)` : ''}\n\`\`\`\n`
225
+ });
226
+ emitDone(ctx);
227
+ },
204
228
  '/execute': (ctx) => {
205
229
  if (ctx.config.permissionMode !== 'plan') {
206
230
  ctx.onEvent({ kind: 'text_delta', text: 'Not in plan mode. Use /plan to enter.\n' });
@@ -264,9 +288,19 @@ const REWRITE_COMMANDS = {
264
288
  '/migrate': 'Check for pending database migrations, outdated dependencies, or breaking changes that need addressing. List required migration steps.',
265
289
  '/clean': 'Find and remove dead code: unused imports, unreachable code, commented-out blocks, unused variables and functions. Show what would be removed before making changes.',
266
290
  '/tasks': 'List all current tasks using the Task tool.',
291
+ '/ultraplan': 'Enter ultraplan mode: create a detailed, step-by-step implementation plan before writing any code. ' +
292
+ 'First, thoroughly read ALL relevant files. Map out every dependency and potential side effect. ' +
293
+ 'Identify edge cases, security considerations, and performance implications. ' +
294
+ 'Then produce a numbered implementation plan with specific file paths, function names, and code changes. ' +
295
+ 'Do NOT write any code yet — only the plan.',
267
296
  };
268
297
  // Commands with arguments (prefix match → rewrite)
269
298
  const ARG_COMMANDS = [
299
+ { prefix: '/ultrathink ', rewrite: (a) => `Think deeply, carefully, and thoroughly before responding. ` +
300
+ `Consider multiple approaches, check edge cases, reason through implications step by step, ` +
301
+ `and challenge your initial assumptions. Take your time — quality of reasoning matters more than speed. ` +
302
+ `Now respond to: ${a}`
303
+ },
270
304
  { prefix: '/explain ', rewrite: (a) => `Read and explain the code in ${a}. Cover: what it does, key functions/classes, how it connects to the rest of the codebase.` },
271
305
  { prefix: '/search ', rewrite: (a) => `Search the codebase for "${a}" using Grep. Show the matching files and relevant code context.` },
272
306
  { prefix: '/find ', rewrite: (a) => `Find files matching the pattern "${a}" using Glob. Show the results.` },
@@ -50,8 +50,9 @@ You have access to tools for reading, writing, editing files, running shell comm
50
50
  # Slash Commands Available
51
51
  The user can type these shortcuts: /commit, /review, /test, /fix, /debug, /explain <file>,
52
52
  /search <query>, /find <pattern>, /refactor <desc>, /init, /todo, /deps, /diff, /status,
53
- /log, /branch, /stash, /plan, /execute, /compact, /retry, /sessions, /resume, /tasks,
54
- /context, /doctor, /model, /cost, /clear, /help, /exit.`;
53
+ /log, /branch, /stash, /plan, /ultraplan, /execute, /compact, /retry, /sessions, /resume,
54
+ /tasks, /context, /doctor, /tokens, /model, /cost, /dump, /ultrathink [query], /clear,
55
+ /help, /exit.`;
55
56
  /**
56
57
  * Build the full system instructions array for a session.
57
58
  */
@@ -5,9 +5,9 @@
5
5
  */
6
6
  import { ModelClient } from './llm.js';
7
7
  import { autoCompactIfNeeded, microCompact } from './compact.js';
8
- import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor } from './tokens.js';
8
+ import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow } from './tokens.js';
9
9
  import { handleSlashCommand } from './commands.js';
10
- import { compressHistory } from '../compression/adapter.js';
10
+ import { reduceTokens } from './reduce.js';
11
11
  import { PermissionManager } from './permissions.js';
12
12
  import { StreamingExecutor } from './streaming-executor.js';
13
13
  import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
@@ -214,6 +214,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
214
214
  // Session persistence
215
215
  const sessionId = createSessionId();
216
216
  let turnCount = 0;
217
+ let tokenBudgetWarned = false; // Emit token budget warning at most once per session
217
218
  pruneOldSessions(sessionId); // Cleanup old sessions on start, protect current
218
219
  while (true) {
219
220
  let input = await getUserInput();
@@ -254,7 +255,13 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
254
255
  history.length = 0;
255
256
  history.push(...optimized);
256
257
  }
257
- // 2. Microcompact: only when history has >15 messages (skip for short conversations)
258
+ // 2. Token reduction: age old results, normalize whitespace, trim verbose messages
259
+ const reduced = reduceTokens(history, config.debug);
260
+ if (reduced !== history) {
261
+ history.length = 0;
262
+ history.push(...reduced);
263
+ }
264
+ // 3. Microcompact: only when history has >15 messages (skip for short conversations)
258
265
  if (history.length > 15) {
259
266
  const microCompacted = microCompact(history, 8);
260
267
  if (microCompacted !== history) {
@@ -262,15 +269,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
262
269
  history.push(...microCompacted);
263
270
  }
264
271
  }
265
- // 3. Context compression: 7-layer compression for 15-40% token savings
266
- if (history.length > 10) {
267
- const compressed = await compressHistory(history, config.debug);
268
- if (compressed) {
269
- history.length = 0;
270
- history.push(...compressed.history);
271
- }
272
- }
273
- // 4. Auto-compact: summarize history if approaching context limit
272
+ // 3. Auto-compact: summarize history if approaching context limit
274
273
  // Circuit breaker: stop retrying after 3 consecutive failures
275
274
  if (compactFailures < 3) {
276
275
  try {
@@ -292,7 +291,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
292
291
  }
293
292
  }
294
293
  }
295
- const systemPrompt = config.systemInstructions.join('\n\n');
294
+ // Inject ultrathink instruction when mode is active
295
+ const systemParts = [...config.systemInstructions];
296
+ if (config.ultrathink) {
297
+ systemParts.push('# Ultrathink Mode\n' +
298
+ 'You are in deep reasoning mode. Before responding to any request:\n' +
299
+ '1. Thoroughly analyze the problem from multiple angles\n' +
300
+ '2. Consider edge cases, failure modes, and second-order effects\n' +
301
+ '3. Challenge your initial assumptions before committing to an approach\n' +
302
+ '4. Think step by step — show your reasoning explicitly when it adds value\n' +
303
+ 'Prioritize correctness and thoroughness over speed.');
304
+ }
305
+ const systemPrompt = systemParts.join('\n\n');
296
306
  const modelMaxOut = getMaxOutputTokens(config.model);
297
307
  let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
298
308
  let responseParts = [];
@@ -435,6 +445,19 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
435
445
  turnCount,
436
446
  messageCount: history.length,
437
447
  });
448
+ // Token budget warning — emit once per session when crossing 70%
449
+ if (!tokenBudgetWarned) {
450
+ const { estimated } = getAnchoredTokenCount(history);
451
+ const contextWindow = getContextWindow(config.model);
452
+ const pct = (estimated / contextWindow) * 100;
453
+ if (pct >= 70) {
454
+ tokenBudgetWarned = true;
455
+ onEvent({
456
+ kind: 'text_delta',
457
+ text: `\n\n> **Token budget: ${pct.toFixed(0)}% used** (~${estimated.toLocaleString()} / ${(contextWindow / 1000).toFixed(0)}k tokens). Run \`/compact\` to free up space.\n`,
458
+ });
459
+ }
460
+ }
438
461
  onEvent({ kind: 'turn_done', reason: 'completed' });
439
462
  break;
440
463
  }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Token Reduction for runcode.
3
+ * Original implementation — reduces context size through intelligent pruning.
4
+ *
5
+ * Strategy: instead of compression/encoding, we PRUNE redundant content.
6
+ * The model doesn't need verbose tool outputs from 20 turns ago.
7
+ *
8
+ * Three reduction passes:
9
+ * 1. Tool result aging — progressively shorten old tool results
10
+ * 2. Whitespace normalization — remove excessive blank lines and indentation
11
+ * 3. Stale context removal — drop system info that's been superseded
12
+ */
13
+ import type { Dialogue } from './types.js';
14
+ /**
15
+ * Progressively shorten tool results based on age.
16
+ * Recent results: keep full. Older results: keep summary. Very old: keep one line.
17
+ *
18
+ * This is the biggest token saver — a 10KB bash output from 20 turns ago
19
+ * can be reduced to "✓ Bash: ran npm test (exit 0)" saving ~2500 tokens.
20
+ */
21
+ export declare function ageToolResults(history: Dialogue[]): Dialogue[];
22
+ /**
23
+ * Normalize whitespace in text messages.
24
+ * - Collapse 3+ blank lines to 2
25
+ * - Remove trailing spaces
26
+ * - Reduce indentation beyond 8 spaces to 8
27
+ */
28
+ export declare function normalizeWhitespace(history: Dialogue[]): Dialogue[];
29
+ /**
30
+ * Trim very long assistant text messages from old turns.
31
+ * Recent messages: keep full. Old long messages: keep first 1000 chars.
32
+ */
33
+ export declare function trimOldAssistantMessages(history: Dialogue[]): Dialogue[];
34
+ /**
35
+ * Remove consecutive duplicate messages (same role + same content).
36
+ */
37
+ export declare function deduplicateMessages(history: Dialogue[]): Dialogue[];
38
+ /**
39
+ * Run all token reduction passes on conversation history.
40
+ * Returns same reference if nothing changed (cheap identity check).
41
+ */
42
+ export declare function reduceTokens(history: Dialogue[], debug?: boolean): Dialogue[];
@@ -0,0 +1,258 @@
1
+ /**
2
+ * Token Reduction for runcode.
3
+ * Original implementation — reduces context size through intelligent pruning.
4
+ *
5
+ * Strategy: instead of compression/encoding, we PRUNE redundant content.
6
+ * The model doesn't need verbose tool outputs from 20 turns ago.
7
+ *
8
+ * Three reduction passes:
9
+ * 1. Tool result aging — progressively shorten old tool results
10
+ * 2. Whitespace normalization — remove excessive blank lines and indentation
11
+ * 3. Stale context removal — drop system info that's been superseded
12
+ */
13
+ // ─── 1. Tool Result Aging ─────────────────────────────────────────────────
14
+ /**
15
+ * Progressively shorten tool results based on age.
16
+ * Recent results: keep full. Older results: keep summary. Very old: keep one line.
17
+ *
18
+ * This is the biggest token saver — a 10KB bash output from 20 turns ago
19
+ * can be reduced to "✓ Bash: ran npm test (exit 0)" saving ~2500 tokens.
20
+ */
21
+ export function ageToolResults(history) {
22
+ // Find all tool_result positions
23
+ const toolPositions = [];
24
+ for (let i = 0; i < history.length; i++) {
25
+ const msg = history[i];
26
+ if (msg.role === 'user' &&
27
+ Array.isArray(msg.content) &&
28
+ msg.content.some(p => p.type === 'tool_result')) {
29
+ toolPositions.push(i);
30
+ }
31
+ }
32
+ if (toolPositions.length <= 3)
33
+ return history; // Nothing to age
34
+ const result = [...history];
35
+ const totalResults = toolPositions.length;
36
+ for (let idx = 0; idx < toolPositions.length; idx++) {
37
+ const pos = toolPositions[idx];
38
+ const age = totalResults - idx; // Higher = older
39
+ const msg = result[pos];
40
+ if (!Array.isArray(msg.content))
41
+ continue;
42
+ const parts = msg.content;
43
+ let modified = false;
44
+ const aged = parts.map(part => {
45
+ if (part.type !== 'tool_result')
46
+ return part;
47
+ const content = typeof part.content === 'string'
48
+ ? part.content
49
+ : JSON.stringify(part.content);
50
+ const charLen = content.length;
51
+ // Recent 3 results: keep full
52
+ if (age <= 3)
53
+ return part;
54
+ // Age 4-8: keep first 500 chars
55
+ if (age <= 8 && charLen > 500) {
56
+ modified = true;
57
+ const truncated = content.slice(0, 500);
58
+ const lastNl = truncated.lastIndexOf('\n');
59
+ const clean = lastNl > 250 ? truncated.slice(0, lastNl) : truncated;
60
+ return {
61
+ ...part,
62
+ content: `${clean}\n... (${charLen - clean.length} chars omitted, ${age} turns ago)`,
63
+ };
64
+ }
65
+ // Age 9-15: keep first 200 chars
66
+ if (age <= 15 && charLen > 200) {
67
+ modified = true;
68
+ const firstLine = content.split('\n')[0].slice(0, 150);
69
+ return {
70
+ ...part,
71
+ content: `${firstLine}\n... (${charLen} chars, ${age} turns ago)`,
72
+ };
73
+ }
74
+ // Age 16+: one line summary
75
+ if (age > 15 && charLen > 80) {
76
+ modified = true;
77
+ const summary = content.split('\n')[0].slice(0, 60);
78
+ return {
79
+ ...part,
80
+ content: part.is_error
81
+ ? `[Error: ${summary}...]`
82
+ : `[Result: ${summary}...]`,
83
+ };
84
+ }
85
+ return part;
86
+ });
87
+ if (modified) {
88
+ result[pos] = { role: 'user', content: aged };
89
+ }
90
+ }
91
+ return result;
92
+ }
93
+ // ─── 2. Whitespace Normalization ──────────────────────────────────────────
94
+ /**
95
+ * Normalize whitespace in text messages.
96
+ * - Collapse 3+ blank lines to 2
97
+ * - Remove trailing spaces
98
+ * - Reduce indentation beyond 8 spaces to 8
99
+ */
100
+ export function normalizeWhitespace(history) {
101
+ let modified = false;
102
+ const result = history.map(msg => {
103
+ if (typeof msg.content !== 'string')
104
+ return msg;
105
+ const original = msg.content;
106
+ const cleaned = original
107
+ .replace(/[ \t]+$/gm, '') // Trailing spaces
108
+ .replace(/\n{4,}/g, '\n\n\n') // Max 3 consecutive newlines
109
+ .replace(/^( {9,})/gm, ' '); // Cap indentation at 8 spaces
110
+ if (cleaned !== original) {
111
+ modified = true;
112
+ return { ...msg, content: cleaned };
113
+ }
114
+ return msg;
115
+ });
116
+ return modified ? result : history;
117
+ }
118
+ // ─── 3. Verbose Assistant Message Trimming ────────────────────────────────
119
+ /**
120
+ * Trim very long assistant text messages from old turns.
121
+ * Recent messages: keep full. Old long messages: keep first 1000 chars.
122
+ */
123
+ export function trimOldAssistantMessages(history) {
124
+ const MAX_OLD_ASSISTANT_CHARS = 1500;
125
+ const KEEP_RECENT = 4; // Keep last 4 assistant messages full
126
+ let assistantCount = 0;
127
+ for (const msg of history) {
128
+ if (msg.role === 'assistant')
129
+ assistantCount++;
130
+ }
131
+ if (assistantCount <= KEEP_RECENT)
132
+ return history;
133
+ let seenAssistant = 0;
134
+ let modified = false;
135
+ const result = history.map(msg => {
136
+ if (msg.role !== 'assistant')
137
+ return msg;
138
+ seenAssistant++;
139
+ // Keep recent messages full
140
+ if (assistantCount - seenAssistant < KEEP_RECENT)
141
+ return msg;
142
+ if (typeof msg.content === 'string' && msg.content.length > MAX_OLD_ASSISTANT_CHARS) {
143
+ modified = true;
144
+ const truncated = msg.content.slice(0, MAX_OLD_ASSISTANT_CHARS);
145
+ const lastNl = truncated.lastIndexOf('\n');
146
+ const clean = lastNl > MAX_OLD_ASSISTANT_CHARS / 2 ? truncated.slice(0, lastNl) : truncated;
147
+ return { ...msg, content: clean + '\n... (response truncated)' };
148
+ }
149
+ // Also handle content array with text parts
150
+ if (Array.isArray(msg.content)) {
151
+ const parts = msg.content;
152
+ let totalChars = 0;
153
+ for (const p of parts) {
154
+ if (p.type === 'text')
155
+ totalChars += p.text.length;
156
+ }
157
+ if (totalChars > MAX_OLD_ASSISTANT_CHARS) {
158
+ modified = true;
159
+ const trimmedParts = parts.map(p => {
160
+ if (p.type !== 'text' || p.text.length <= 500)
161
+ return p;
162
+ return { ...p, text: p.text.slice(0, 500) + '\n... (trimmed)' };
163
+ });
164
+ return { ...msg, content: trimmedParts };
165
+ }
166
+ }
167
+ return msg;
168
+ });
169
+ return modified ? result : history;
170
+ }
171
+ // ─── 4. Deduplication ────────────���────────────────────────────────────────
172
+ /**
173
+ * Remove consecutive duplicate messages (same role + same content).
174
+ */
175
+ export function deduplicateMessages(history) {
176
+ if (history.length < 3)
177
+ return history;
178
+ const result = [history[0]];
179
+ let modified = false;
180
+ for (let i = 1; i < history.length; i++) {
181
+ const prev = history[i - 1];
182
+ const curr = history[i];
183
+ if (curr.role === prev.role && typeof curr.content === 'string' && curr.content === prev.content) {
184
+ modified = true;
185
+ continue;
186
+ }
187
+ result.push(curr);
188
+ }
189
+ return modified ? result : history;
190
+ }
191
+ // ─── Pipeline ───────���───────────────────���─────────────────────────────────
192
+ /**
193
+ * Run all token reduction passes on conversation history.
194
+ * Returns same reference if nothing changed (cheap identity check).
195
+ */
196
+ export function reduceTokens(history, debug) {
197
+ if (history.length < 8)
198
+ return history; // Skip for short conversations
199
+ let current = history;
200
+ let totalSaved = 0;
201
+ // Pass 1: Age old tool results
202
+ const aged = ageToolResults(current);
203
+ if (aged !== current) {
204
+ const before = estimateChars(current);
205
+ current = aged;
206
+ const saved = before - estimateChars(current);
207
+ totalSaved += saved;
208
+ }
209
+ // Pass 2: Normalize whitespace
210
+ const normalized = normalizeWhitespace(current);
211
+ if (normalized !== current) {
212
+ const before = estimateChars(current);
213
+ current = normalized;
214
+ totalSaved += before - estimateChars(current);
215
+ }
216
+ // Pass 3: Trim old verbose assistant messages
217
+ const trimmed = trimOldAssistantMessages(current);
218
+ if (trimmed !== current) {
219
+ const before = estimateChars(current);
220
+ current = trimmed;
221
+ totalSaved += before - estimateChars(current);
222
+ }
223
+ // Pass 4: Remove consecutive duplicate messages
224
+ const deduped = deduplicateMessages(current);
225
+ if (deduped !== current) {
226
+ const before = estimateChars(current);
227
+ current = deduped;
228
+ totalSaved += before - estimateChars(current);
229
+ }
230
+ if (debug && totalSaved > 500) {
231
+ const tokensSaved = Math.round(totalSaved / 4);
232
+ console.error(`[runcode] Token reduction: ~${tokensSaved} tokens saved`);
233
+ }
234
+ return current;
235
+ }
236
+ function estimateChars(history) {
237
+ let total = 0;
238
+ for (const msg of history) {
239
+ if (typeof msg.content === 'string') {
240
+ total += msg.content.length;
241
+ }
242
+ else if (Array.isArray(msg.content)) {
243
+ for (const p of msg.content) {
244
+ if ('type' in p) {
245
+ if (p.type === 'text')
246
+ total += p.text.length;
247
+ else if (p.type === 'tool_result') {
248
+ total += typeof p.content === 'string' ? p.content.length : JSON.stringify(p.content).length;
249
+ }
250
+ else if (p.type === 'tool_use') {
251
+ total += JSON.stringify(p.input).length;
252
+ }
253
+ }
254
+ }
255
+ }
256
+ }
257
+ return total;
258
+ }
@@ -101,4 +101,6 @@ export interface AgentConfig {
101
101
  permissionMode?: 'default' | 'trust' | 'deny-all' | 'plan';
102
102
  onEvent?: (event: StreamEvent) => void;
103
103
  debug?: boolean;
104
+ /** Ultrathink mode: inject deep-reasoning instruction into every prompt */
105
+ ultrathink?: boolean;
104
106
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/runcode",
3
- "version": "2.4.0",
3
+ "version": "2.5.0",
4
4
  "description": "RunCode — AI coding agent powered by 41+ models. Pay per use with USDC.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,13 +0,0 @@
1
- /**
2
- * Adapter between brcc's Dialogue type and the compression lib's NormalizedMessage type.
3
- */
4
- import type { Dialogue } from '../agent/types.js';
5
- /**
6
- * Compress conversation history to reduce token usage.
7
- * Returns compressed Dialogue[] with stats.
8
- */
9
- export declare function compressHistory(history: Dialogue[], debug?: boolean): Promise<{
10
- history: Dialogue[];
11
- saved: number;
12
- ratio: number;
13
- } | null>;
@@ -1,104 +0,0 @@
1
- /**
2
- * Adapter between brcc's Dialogue type and the compression lib's NormalizedMessage type.
3
- */
4
- import { compressContext, shouldCompress } from './index.js';
5
- /**
6
- * Convert brcc Dialogue[] to NormalizedMessage[] for compression.
7
- */
8
- function dialogueToNormalized(history) {
9
- return history.map(msg => {
10
- if (typeof msg.content === 'string') {
11
- return { role: msg.role, content: msg.content };
12
- }
13
- // Convert content parts to string representation
14
- const parts = [];
15
- const toolCalls = [];
16
- for (const part of msg.content) {
17
- if ('type' in part) {
18
- if (part.type === 'text') {
19
- parts.push(part.text);
20
- }
21
- else if (part.type === 'tool_use') {
22
- const inv = part;
23
- toolCalls.push({
24
- id: inv.id,
25
- type: 'function',
26
- function: { name: inv.name, arguments: JSON.stringify(inv.input) },
27
- });
28
- }
29
- else if (part.type === 'tool_result') {
30
- const res = part;
31
- const content = typeof res.content === 'string' ? res.content : JSON.stringify(res.content);
32
- parts.push(`[Tool result: ${content}]`);
33
- }
34
- else if (part.type === 'thinking') {
35
- // Skip thinking in compression (already handled by stripOldThinking)
36
- }
37
- }
38
- }
39
- const normalized = {
40
- role: msg.role,
41
- content: parts.join('\n') || null,
42
- };
43
- if (toolCalls.length > 0) {
44
- normalized.tool_calls = toolCalls;
45
- }
46
- return normalized;
47
- });
48
- }
49
- /**
50
- * Compress conversation history to reduce token usage.
51
- * Returns compressed Dialogue[] with stats.
52
- */
53
- export async function compressHistory(history, debug) {
54
- // Convert to NormalizedMessage format
55
- const normalized = dialogueToNormalized(history);
56
- // Check if compression is worthwhile
57
- if (!shouldCompress(normalized)) {
58
- return null;
59
- }
60
- try {
61
- const result = await compressContext(normalized);
62
- const savedPct = Math.round((1 - result.compressionRatio) * 100);
63
- if (debug) {
64
- console.error(`[runcode] Compressed context: ${result.originalChars} → ${result.compressedChars} chars (${savedPct}% saved)`);
65
- if (result.stats) {
66
- const layers = Object.entries(result.stats)
67
- .filter(([, v]) => typeof v === 'number' && v > 0)
68
- .map(([k, v]) => `${k}: ${v}`)
69
- .join(', ');
70
- if (layers)
71
- console.error(`[runcode] Compression layers: ${layers}`);
72
- }
73
- }
74
- // Convert compressed messages back to Dialogue format
75
- // We only compress the string content, keeping the original structure
76
- const compressed = [];
77
- for (let i = 0; i < history.length && i < result.messages.length; i++) {
78
- const original = history[i];
79
- const comp = result.messages[i];
80
- if (typeof original.content === 'string' && typeof comp.content === 'string') {
81
- compressed.push({ role: original.role, content: comp.content });
82
- }
83
- else {
84
- // Keep complex content as-is (tool_use/tool_result structure can't be modified)
85
- compressed.push(original);
86
- }
87
- }
88
- // Append any remaining original messages
89
- for (let i = result.messages.length; i < history.length; i++) {
90
- compressed.push(history[i]);
91
- }
92
- return {
93
- history: compressed,
94
- saved: result.originalChars - result.compressedChars,
95
- ratio: result.compressionRatio,
96
- };
97
- }
98
- catch (err) {
99
- if (debug) {
100
- console.error(`[runcode] Compression failed: ${err.message}`);
101
- }
102
- return null;
103
- }
104
- }
@@ -1,23 +0,0 @@
1
- /**
2
- * Dictionary Codebook
3
- *
4
- * Static dictionary of frequently repeated phrases observed in LLM prompts.
5
- * Built from analysis of BlockRun production logs.
6
- *
7
- * Format: Short code ($XX) -> Long phrase
8
- * The LLM receives a codebook header and decodes in-context.
9
- */
10
- export declare const STATIC_CODEBOOK: Record<string, string>;
11
- /**
12
- * Get the inverse codebook for decompression.
13
- */
14
- export declare function getInverseCodebook(): Record<string, string>;
15
- /**
16
- * Generate the codebook header for inclusion in system message.
17
- * LLMs can decode in-context using this header.
18
- */
19
- export declare function generateCodebookHeader(usedCodes: Set<string>, pathMap?: Record<string, string>): string;
20
- /**
21
- * Decompress a string using the codebook (for logging).
22
- */
23
- export declare function decompressContent(content: string, codebook?: Record<string, string>): string;