@blockrun/franklin 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/LICENSE +190 -0
  2. package/README.md +256 -0
  3. package/dist/agent/commands.d.ts +27 -0
  4. package/dist/agent/commands.js +659 -0
  5. package/dist/agent/compact.d.ts +31 -0
  6. package/dist/agent/compact.js +366 -0
  7. package/dist/agent/context.d.ts +11 -0
  8. package/dist/agent/context.js +184 -0
  9. package/dist/agent/error-classifier.d.ts +10 -0
  10. package/dist/agent/error-classifier.js +61 -0
  11. package/dist/agent/llm.d.ts +63 -0
  12. package/dist/agent/llm.js +448 -0
  13. package/dist/agent/loop.d.ts +12 -0
  14. package/dist/agent/loop.js +346 -0
  15. package/dist/agent/optimize.d.ts +53 -0
  16. package/dist/agent/optimize.js +262 -0
  17. package/dist/agent/permissions.d.ts +39 -0
  18. package/dist/agent/permissions.js +226 -0
  19. package/dist/agent/reduce.d.ts +49 -0
  20. package/dist/agent/reduce.js +317 -0
  21. package/dist/agent/streaming-executor.d.ts +36 -0
  22. package/dist/agent/streaming-executor.js +149 -0
  23. package/dist/agent/tokens.d.ts +53 -0
  24. package/dist/agent/tokens.js +185 -0
  25. package/dist/agent/types.d.ts +125 -0
  26. package/dist/agent/types.js +5 -0
  27. package/dist/banner.d.ts +1 -0
  28. package/dist/banner.js +27 -0
  29. package/dist/commands/balance.d.ts +1 -0
  30. package/dist/commands/balance.js +40 -0
  31. package/dist/commands/config.d.ts +14 -0
  32. package/dist/commands/config.js +107 -0
  33. package/dist/commands/daemon.d.ts +3 -0
  34. package/dist/commands/daemon.js +117 -0
  35. package/dist/commands/history.d.ts +5 -0
  36. package/dist/commands/history.js +31 -0
  37. package/dist/commands/init.d.ts +3 -0
  38. package/dist/commands/init.js +92 -0
  39. package/dist/commands/logs.d.ts +5 -0
  40. package/dist/commands/logs.js +89 -0
  41. package/dist/commands/models.d.ts +1 -0
  42. package/dist/commands/models.js +56 -0
  43. package/dist/commands/plugin.d.ts +14 -0
  44. package/dist/commands/plugin.js +176 -0
  45. package/dist/commands/proxy.d.ts +13 -0
  46. package/dist/commands/proxy.js +106 -0
  47. package/dist/commands/setup.d.ts +1 -0
  48. package/dist/commands/setup.js +49 -0
  49. package/dist/commands/start.d.ts +8 -0
  50. package/dist/commands/start.js +292 -0
  51. package/dist/commands/stats.d.ts +10 -0
  52. package/dist/commands/stats.js +94 -0
  53. package/dist/commands/uninit.d.ts +1 -0
  54. package/dist/commands/uninit.js +63 -0
  55. package/dist/config.d.ts +9 -0
  56. package/dist/config.js +41 -0
  57. package/dist/index.d.ts +2 -0
  58. package/dist/index.js +179 -0
  59. package/dist/mcp/client.d.ts +44 -0
  60. package/dist/mcp/client.js +147 -0
  61. package/dist/mcp/config.d.ts +20 -0
  62. package/dist/mcp/config.js +138 -0
  63. package/dist/plugin-sdk/channel.d.ts +100 -0
  64. package/dist/plugin-sdk/channel.js +10 -0
  65. package/dist/plugin-sdk/index.d.ts +14 -0
  66. package/dist/plugin-sdk/index.js +9 -0
  67. package/dist/plugin-sdk/plugin.d.ts +87 -0
  68. package/dist/plugin-sdk/plugin.js +7 -0
  69. package/dist/plugin-sdk/search.d.ts +13 -0
  70. package/dist/plugin-sdk/search.js +4 -0
  71. package/dist/plugin-sdk/tracker.d.ts +27 -0
  72. package/dist/plugin-sdk/tracker.js +5 -0
  73. package/dist/plugin-sdk/workflow.d.ts +126 -0
  74. package/dist/plugin-sdk/workflow.js +11 -0
  75. package/dist/plugins/registry.d.ts +33 -0
  76. package/dist/plugins/registry.js +155 -0
  77. package/dist/plugins/runner.d.ts +21 -0
  78. package/dist/plugins/runner.js +453 -0
  79. package/dist/plugins-bundled/social/index.d.ts +10 -0
  80. package/dist/plugins-bundled/social/index.js +363 -0
  81. package/dist/plugins-bundled/social/plugin.json +14 -0
  82. package/dist/plugins-bundled/social/prompts.d.ts +19 -0
  83. package/dist/plugins-bundled/social/prompts.js +67 -0
  84. package/dist/plugins-bundled/social/types.d.ts +58 -0
  85. package/dist/plugins-bundled/social/types.js +16 -0
  86. package/dist/pricing.d.ts +21 -0
  87. package/dist/pricing.js +91 -0
  88. package/dist/proxy/fallback.d.ts +38 -0
  89. package/dist/proxy/fallback.js +144 -0
  90. package/dist/proxy/server.d.ts +18 -0
  91. package/dist/proxy/server.js +576 -0
  92. package/dist/proxy/sse-translator.d.ts +29 -0
  93. package/dist/proxy/sse-translator.js +270 -0
  94. package/dist/router/index.d.ts +22 -0
  95. package/dist/router/index.js +269 -0
  96. package/dist/session/search.d.ts +33 -0
  97. package/dist/session/search.js +229 -0
  98. package/dist/session/storage.d.ts +48 -0
  99. package/dist/session/storage.js +173 -0
  100. package/dist/stats/insights.d.ts +55 -0
  101. package/dist/stats/insights.js +195 -0
  102. package/dist/stats/tracker.d.ts +54 -0
  103. package/dist/stats/tracker.js +165 -0
  104. package/dist/tools/askuser.d.ts +6 -0
  105. package/dist/tools/askuser.js +76 -0
  106. package/dist/tools/bash.d.ts +5 -0
  107. package/dist/tools/bash.js +336 -0
  108. package/dist/tools/edit.d.ts +5 -0
  109. package/dist/tools/edit.js +148 -0
  110. package/dist/tools/glob.d.ts +5 -0
  111. package/dist/tools/glob.js +158 -0
  112. package/dist/tools/grep.d.ts +5 -0
  113. package/dist/tools/grep.js +194 -0
  114. package/dist/tools/imagegen.d.ts +6 -0
  115. package/dist/tools/imagegen.js +172 -0
  116. package/dist/tools/index.d.ts +17 -0
  117. package/dist/tools/index.js +30 -0
  118. package/dist/tools/read.d.ts +11 -0
  119. package/dist/tools/read.js +90 -0
  120. package/dist/tools/subagent.d.ts +5 -0
  121. package/dist/tools/subagent.js +116 -0
  122. package/dist/tools/task.d.ts +5 -0
  123. package/dist/tools/task.js +91 -0
  124. package/dist/tools/webfetch.d.ts +5 -0
  125. package/dist/tools/webfetch.js +166 -0
  126. package/dist/tools/websearch.d.ts +5 -0
  127. package/dist/tools/websearch.js +103 -0
  128. package/dist/tools/write.d.ts +5 -0
  129. package/dist/tools/write.js +114 -0
  130. package/dist/ui/app.d.ts +26 -0
  131. package/dist/ui/app.js +545 -0
  132. package/dist/ui/model-picker.d.ts +14 -0
  133. package/dist/ui/model-picker.js +161 -0
  134. package/dist/ui/terminal.d.ts +35 -0
  135. package/dist/ui/terminal.js +337 -0
  136. package/dist/wallet/manager.d.ts +10 -0
  137. package/dist/wallet/manager.js +23 -0
  138. package/package.json +79 -0
@@ -0,0 +1,346 @@
1
+ /**
2
+ * runcode Agent Loop
3
+ * The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
4
+ * Original implementation with different architecture from any reference codebase.
5
+ */
6
+ import { ModelClient } from './llm.js';
7
+ import { autoCompactIfNeeded, microCompact } from './compact.js';
8
+ import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow } from './tokens.js';
9
+ import { handleSlashCommand } from './commands.js';
10
+ import { reduceTokens } from './reduce.js';
11
+ import { PermissionManager } from './permissions.js';
12
+ import { StreamingExecutor } from './streaming-executor.js';
13
+ import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
14
+ import { classifyAgentError } from './error-classifier.js';
15
+ import { recordUsage } from '../stats/tracker.js';
16
+ import { estimateCost } from '../pricing.js';
17
+ import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
18
+ // ─── Interactive Session ───────────────────────────────────────────────────
19
+ /**
20
+ * Run a multi-turn interactive session.
21
+ * Each user message triggers a full agent loop.
22
+ * Returns the accumulated conversation history.
23
+ */
24
+ export async function interactiveSession(config, getUserInput, onEvent, onAbortReady) {
25
+ const client = new ModelClient({
26
+ apiUrl: config.apiUrl,
27
+ chain: config.chain,
28
+ debug: config.debug,
29
+ });
30
+ const capabilityMap = new Map();
31
+ for (const cap of config.capabilities) {
32
+ capabilityMap.set(cap.spec.name, cap);
33
+ }
34
+ const toolDefs = config.capabilities.map((c) => c.spec);
35
+ const maxTurns = config.maxTurns ?? 100;
36
+ const workDir = config.workingDir ?? process.cwd();
37
+ const permissions = new PermissionManager(config.permissionMode ?? 'default', config.permissionPromptFn);
38
+ const history = [];
39
+ let lastUserInput = ''; // For /retry
40
+ const failedModels = new Set(); // Models that failed payment/rate-limit (session-level)
41
+ // Session persistence
42
+ const sessionId = createSessionId();
43
+ let turnCount = 0;
44
+ let tokenBudgetWarned = false; // Emit token budget warning at most once per session
45
+ pruneOldSessions(sessionId); // Cleanup old sessions on start, protect current
46
+ while (true) {
47
+ let input = await getUserInput();
48
+ if (input === null)
49
+ break; // User wants to exit
50
+ if (input === '')
51
+ continue; // Empty input → re-prompt
52
+ // ── Slash command dispatch ──
53
+ if (input.startsWith('/')) {
54
+ // /retry re-sends the last user message
55
+ if (input === '/retry') {
56
+ if (!lastUserInput) {
57
+ onEvent({ kind: 'text_delta', text: 'No previous message to retry.\n' });
58
+ onEvent({ kind: 'turn_done', reason: 'completed' });
59
+ continue;
60
+ }
61
+ input = lastUserInput;
62
+ }
63
+ else {
64
+ const cmdResult = await handleSlashCommand(input, {
65
+ history, config, client, sessionId, onEvent,
66
+ });
67
+ if (cmdResult.handled)
68
+ continue;
69
+ if (cmdResult.rewritten)
70
+ input = cmdResult.rewritten;
71
+ }
72
+ }
73
+ lastUserInput = input;
74
+ history.push({ role: 'user', content: input });
75
+ appendToSession(sessionId, { role: 'user', content: input });
76
+ turnCount++;
77
+ const abort = new AbortController();
78
+ onAbortReady?.(() => abort.abort());
79
+ let loopCount = 0;
80
+ let recoveryAttempts = 0;
81
+ let compactFailures = 0;
82
+ let maxTokensOverride;
83
+ let lastActivity = Date.now();
84
+ // Agent loop for this user message
85
+ while (loopCount < maxTurns) {
86
+ loopCount++;
87
+ // ── Token optimization pipeline ──
88
+ // 1. Strip thinking, budget tool results, time-based cleanup (always — cheap)
89
+ const optimized = optimizeHistory(history, {
90
+ debug: config.debug,
91
+ lastActivityTimestamp: lastActivity,
92
+ });
93
+ if (optimized !== history) {
94
+ history.length = 0;
95
+ history.push(...optimized);
96
+ }
97
+ // 2. Token reduction: age old results, normalize whitespace, trim verbose messages
98
+ const reduced = reduceTokens(history, config.debug);
99
+ if (reduced !== history) {
100
+ history.length = 0;
101
+ history.push(...reduced);
102
+ }
103
+ // 3. Microcompact: clear old tool results to prevent context snowball
104
+ if (history.length > 6) {
105
+ const microCompacted = microCompact(history, 3);
106
+ if (microCompacted !== history) {
107
+ history.length = 0;
108
+ history.push(...microCompacted);
109
+ resetTokenAnchor(); // History shrunk — resync token tracking
110
+ }
111
+ }
112
+ // 3. Auto-compact: summarize history if approaching context limit
113
+ // Circuit breaker: stop retrying after 3 consecutive failures
114
+ if (compactFailures < 3) {
115
+ try {
116
+ const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
117
+ if (didCompact) {
118
+ history.length = 0;
119
+ history.push(...compacted);
120
+ resetTokenAnchor();
121
+ compactFailures = 0;
122
+ if (config.debug) {
123
+ console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
124
+ }
125
+ }
126
+ }
127
+ catch (compactErr) {
128
+ compactFailures++;
129
+ if (config.debug) {
130
+ console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
131
+ }
132
+ }
133
+ }
134
+ // Inject ultrathink instruction when mode is active
135
+ const systemParts = [...config.systemInstructions];
136
+ if (config.ultrathink) {
137
+ systemParts.push('# Ultrathink Mode\n' +
138
+ 'You are in deep reasoning mode. Before responding to any request:\n' +
139
+ '1. Thoroughly analyze the problem from multiple angles\n' +
140
+ '2. Consider edge cases, failure modes, and second-order effects\n' +
141
+ '3. Challenge your initial assumptions before committing to an approach\n' +
142
+ '4. Think step by step — show your reasoning explicitly when it adds value\n' +
143
+ 'Prioritize correctness and thoroughness over speed.');
144
+ }
145
+ const systemPrompt = systemParts.join('\n\n');
146
+ const modelMaxOut = getMaxOutputTokens(config.model);
147
+ let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
148
+ let responseParts = [];
149
+ let usage;
150
+ let stopReason;
151
+ // Create streaming executor for concurrent tool execution
152
+ const streamExec = new StreamingExecutor({
153
+ handlers: capabilityMap,
154
+ scope: { workingDir: workDir, abortSignal: abort.signal, onAskUser: config.onAskUser },
155
+ permissions,
156
+ onStart: (id, name, preview) => onEvent({ kind: 'capability_start', id, name, preview }),
157
+ onProgress: (id, text) => onEvent({ kind: 'capability_progress', id, text }),
158
+ });
159
+ try {
160
+ const result = await client.complete({
161
+ model: config.model,
162
+ messages: history,
163
+ system: systemPrompt,
164
+ tools: toolDefs,
165
+ max_tokens: maxTokens,
166
+ stream: true,
167
+ }, abort.signal,
168
+ // Start concurrent tools as soon as their input is fully received
169
+ (tool) => streamExec.onToolReceived(tool),
170
+ // Stream text/thinking deltas to UI in real-time
171
+ (delta) => {
172
+ if (delta.type === 'text') {
173
+ onEvent({ kind: 'text_delta', text: delta.text });
174
+ }
175
+ else if (delta.type === 'thinking') {
176
+ onEvent({ kind: 'thinking_delta', text: delta.text });
177
+ }
178
+ });
179
+ responseParts = result.content;
180
+ usage = result.usage;
181
+ stopReason = result.stopReason;
182
+ }
183
+ catch (err) {
184
+ // ── User abort (Esc key) ──
185
+ if (err.name === 'AbortError' || abort.signal.aborted) {
186
+ // Save any partial response that was streamed before abort
187
+ if (responseParts && responseParts.length > 0) {
188
+ history.push({ role: 'assistant', content: responseParts });
189
+ appendToSession(sessionId, { role: 'assistant', content: responseParts });
190
+ }
191
+ onEvent({ kind: 'turn_done', reason: 'aborted' });
192
+ break;
193
+ }
194
+ const errMsg = err.message || '';
195
+ const classified = classifyAgentError(errMsg);
196
+ // ── Prompt too long recovery ──
197
+ if (classified.category === 'context_limit' && recoveryAttempts < 3) {
198
+ recoveryAttempts++;
199
+ if (config.debug) {
200
+ console.error(`[runcode] Prompt too long — forcing compact (attempt ${recoveryAttempts})`);
201
+ }
202
+ const { history: compactedAgain } = await autoCompactIfNeeded(history, config.model, client, config.debug);
203
+ history.length = 0;
204
+ history.push(...compactedAgain);
205
+ continue; // Retry
206
+ }
207
+ // ── Transient error recovery (network, rate limit, server errors) ──
208
+ if (classified.isTransient && recoveryAttempts < 3) {
209
+ recoveryAttempts++;
210
+ const backoffMs = Math.pow(2, recoveryAttempts) * 1000;
211
+ if (config.debug) {
212
+ console.error(`[runcode] ${classified.label} error — retrying in ${backoffMs / 1000}s (attempt ${recoveryAttempts}): ${errMsg.slice(0, 100)}`);
213
+ }
214
+ onEvent({
215
+ kind: 'text_delta',
216
+ text: `\n*Retrying (${recoveryAttempts}/3) after ${classified.label} error...*\n`,
217
+ });
218
+ await new Promise(r => setTimeout(r, backoffMs));
219
+ continue;
220
+ }
221
+ // Add recovery suggestions based on error type
222
+ let suggestion = '';
223
+ if (classified.category === 'rate_limit') {
224
+ suggestion = '\nTip: Try /model to switch to a different model, or wait a moment and /retry.';
225
+ }
226
+ else if (classified.category === 'payment') {
227
+ // Auto-fallback to free models on payment/rate limit failure
228
+ // Track failed models at session level to prevent ping-pong loops
229
+ failedModels.add(config.model);
230
+ const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/nemotron-ultra-253b', 'nvidia/devstral-2-123b'];
231
+ const nextFree = FREE_MODELS.find(m => !failedModels.has(m));
232
+ if (nextFree) {
233
+ const oldModel = config.model;
234
+ config.model = nextFree;
235
+ config.onModelChange?.(nextFree);
236
+ onEvent({ kind: 'text_delta', text: `\n*${oldModel} failed — switching to ${nextFree}*\n` });
237
+ continue; // Retry with next model
238
+ }
239
+ suggestion = '\nTip: Run `runcode balance` to check funds. Try /model free for free models.';
240
+ }
241
+ else if (classified.category === 'timeout' || classified.category === 'network') {
242
+ suggestion = '\nTip: Check your network connection. Use /retry to try again.';
243
+ }
244
+ else if (classified.category === 'context_limit') {
245
+ suggestion = '\nTip: Run /compact to compress conversation history.';
246
+ }
247
+ onEvent({
248
+ kind: 'turn_done',
249
+ reason: 'error',
250
+ error: `[${classified.label}] ${errMsg}${suggestion}`,
251
+ });
252
+ break;
253
+ }
254
+ // When API doesn't return input tokens (some models return 0), estimate from history
255
+ const inputTokens = usage.inputTokens > 0
256
+ ? usage.inputTokens
257
+ : estimateHistoryTokens(history);
258
+ // Anchor token tracking to actual API counts
259
+ updateActualTokens(inputTokens, usage.outputTokens, history.length);
260
+ onEvent({
261
+ kind: 'usage',
262
+ inputTokens,
263
+ outputTokens: usage.outputTokens,
264
+ model: config.model,
265
+ calls: 1,
266
+ });
267
+ // Record usage for stats tracking (runcode stats command)
268
+ const costEstimate = estimateCost(config.model, inputTokens, usage.outputTokens, 1);
269
+ recordUsage(config.model, inputTokens, usage.outputTokens, costEstimate, 0);
270
+ // ── Max output tokens recovery ──
271
+ if (stopReason === 'max_tokens' && recoveryAttempts < 3) {
272
+ recoveryAttempts++;
273
+ if (maxTokensOverride === undefined) {
274
+ // First hit: escalate to 64K
275
+ maxTokensOverride = ESCALATED_MAX_TOKENS;
276
+ if (config.debug) {
277
+ console.error(`[runcode] Max tokens hit — escalating to ${maxTokensOverride}`);
278
+ }
279
+ }
280
+ // Append what we got + a continuation prompt (text already streamed)
281
+ history.push({ role: 'assistant', content: responseParts });
282
+ history.push({
283
+ role: 'user',
284
+ content: 'Continue where you left off. Do not repeat what you already said.',
285
+ });
286
+ continue; // Retry with higher limit
287
+ }
288
+ // Reset recovery counter on successful completion
289
+ recoveryAttempts = 0;
290
+ // Extract tool invocations (text/thinking already streamed in real-time)
291
+ const invocations = [];
292
+ for (const part of responseParts) {
293
+ if (part.type === 'tool_use') {
294
+ invocations.push(part);
295
+ }
296
+ }
297
+ history.push({ role: 'assistant', content: responseParts });
298
+ // No more capabilities → done with this user message
299
+ if (invocations.length === 0) {
300
+ // Save session on completed turn
301
+ appendToSession(sessionId, { role: 'assistant', content: responseParts });
302
+ updateSessionMeta(sessionId, {
303
+ model: config.model,
304
+ workDir: config.workingDir || process.cwd(),
305
+ turnCount,
306
+ messageCount: history.length,
307
+ });
308
+ // Token budget warning — emit once per session when crossing 70%
309
+ if (!tokenBudgetWarned) {
310
+ const { estimated } = getAnchoredTokenCount(history);
311
+ const contextWindow = getContextWindow(config.model);
312
+ const pct = (estimated / contextWindow) * 100;
313
+ if (pct >= 70) {
314
+ tokenBudgetWarned = true;
315
+ onEvent({
316
+ kind: 'text_delta',
317
+ text: `\n\n> **Token budget: ${pct.toFixed(0)}% used** (~${estimated.toLocaleString()} / ${(contextWindow / 1000).toFixed(0)}k tokens). Run \`/compact\` to free up space.\n`,
318
+ });
319
+ }
320
+ }
321
+ onEvent({ kind: 'turn_done', reason: 'completed' });
322
+ break;
323
+ }
324
+ // Collect results — concurrent tools may already be running from streaming
325
+ const results = await streamExec.collectResults(invocations);
326
+ for (const [inv, result] of results) {
327
+ onEvent({ kind: 'capability_done', id: inv.id, result });
328
+ }
329
+ // Refresh activity timestamp after tool execution
330
+ lastActivity = Date.now();
331
+ // Append outcomes
332
+ const outcomeContent = results.map(([inv, result]) => ({
333
+ type: 'tool_result',
334
+ tool_use_id: inv.id,
335
+ content: result.output,
336
+ is_error: result.isError,
337
+ }));
338
+ history.push({ role: 'user', content: outcomeContent });
339
+ }
340
+ if (loopCount >= maxTurns) {
341
+ onEvent({ kind: 'turn_done', reason: 'max_turns' });
342
+ }
343
+ }
344
+ return history;
345
+ }
346
+ // Cost estimation now uses shared pricing from src/pricing.ts
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Token optimization strategies for runcode.
3
+ *
4
+ * Five layers of optimization to minimize token usage:
5
+ * 1. Tool result size budgeting — cap large outputs, keep preview
6
+ * 2. Thinking block stripping — remove old thinking from history
7
+ * 3. Time-based cleanup — clear stale tool results after idle gap
8
+ * 4. Adaptive max_tokens — start low (8K), escalate on hit
9
+ * 5. Pre-compact stripping — remove images/docs before summarization
10
+ */
11
+ import type { Dialogue } from './types.js';
12
+ /** Default max_tokens (low to save output slot reservation) */
13
+ export declare const CAPPED_MAX_TOKENS = 16384;
14
+ /** Escalated max_tokens after hitting the cap */
15
+ export declare const ESCALATED_MAX_TOKENS = 65536;
16
+ /** Get max output tokens for a model */
17
+ export declare function getMaxOutputTokens(model: string): number;
18
+ /**
19
+ * Cap tool result sizes to prevent context bloat.
20
+ * Large results (>50K chars) are truncated with a preview.
21
+ * Per-message aggregate is also capped at 200K chars.
22
+ */
23
+ export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
24
+ export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
25
+ /**
26
+ * After an idle gap (>60 min), clear old tool results.
27
+ * When the user comes back after being away, old results are stale anyway.
28
+ */
29
+ export declare function timeBasedCleanup(history: Dialogue[], lastActivityTimestamp?: number): {
30
+ history: Dialogue[];
31
+ cleaned: boolean;
32
+ };
33
+ /**
34
+ * Strip heavy content before sending to compaction model.
35
+ * Removes image/document references since the summarizer can't see them anyway.
36
+ */
37
+ export declare function stripHeavyContent(history: Dialogue[]): Dialogue[];
38
+ export interface OptimizeOptions {
39
+ debug?: boolean;
40
+ lastActivityTimestamp?: number;
41
+ }
42
+ /**
43
+ * Run the full optimization pipeline on conversation history.
44
+ * Called before each model request to minimize token usage.
45
+ *
46
+ * Pipeline order (cheapest first):
47
+ * 1. Strip old thinking blocks (free, local)
48
+ * 2. Budget tool results (free, local)
49
+ * 3. Time-based cleanup (free, local, only after idle)
50
+ *
51
+ * Returns the optimized history (may be same reference if no changes).
52
+ */
53
+ export declare function optimizeHistory(history: Dialogue[], opts?: OptimizeOptions): Dialogue[];
@@ -0,0 +1,262 @@
1
+ /**
2
+ * Token optimization strategies for runcode.
3
+ *
4
+ * Five layers of optimization to minimize token usage:
5
+ * 1. Tool result size budgeting — cap large outputs, keep preview
6
+ * 2. Thinking block stripping — remove old thinking from history
7
+ * 3. Time-based cleanup — clear stale tool results after idle gap
8
+ * 4. Adaptive max_tokens — start low (8K), escalate on hit
9
+ * 5. Pre-compact stripping — remove images/docs before summarization
10
+ */
11
+ // ─── Constants ─────────────────────────────────────────────────────────────
12
+ /** Max chars per individual tool result before truncation (history-level safety net) */
13
+ const MAX_TOOL_RESULT_CHARS = 32_000;
14
+ /** Max aggregate tool result chars per user message */
15
+ const MAX_TOOL_RESULTS_PER_MESSAGE_CHARS = 100_000;
16
+ /** Preview size when truncating */
17
+ const PREVIEW_CHARS = 2_000;
18
+ /** Default max_tokens (low to save output slot reservation) */
19
+ export const CAPPED_MAX_TOKENS = 16_384;
20
+ /** Escalated max_tokens after hitting the cap */
21
+ export const ESCALATED_MAX_TOKENS = 65_536;
22
+ /** Per-model max output tokens — prevents requesting more than the model supports */
23
+ const MODEL_MAX_OUTPUT = {
24
+ 'anthropic/claude-opus-4.6': 32_000,
25
+ 'anthropic/claude-sonnet-4.6': 64_000,
26
+ 'anthropic/claude-haiku-4.5-20251001': 16_384,
27
+ 'openai/gpt-5.4': 32_768,
28
+ 'openai/gpt-5-mini': 16_384,
29
+ 'google/gemini-2.5-pro': 65_536,
30
+ 'google/gemini-2.5-flash': 65_536,
31
+ 'deepseek/deepseek-chat': 8_192,
32
+ };
33
+ /** Get max output tokens for a model */
34
+ export function getMaxOutputTokens(model) {
35
+ return MODEL_MAX_OUTPUT[model] ?? 16_384;
36
+ }
37
+ /** Idle gap (minutes) after which old tool results are cleared */
38
+ const IDLE_GAP_THRESHOLD_MINUTES = 5;
39
+ /** Number of recent tool results to keep during time-based cleanup */
40
+ const KEEP_RECENT_TOOL_RESULTS = 3;
41
+ // ─── 1. Tool Result Size Budgeting ─────────────────────────────────────────
42
+ /**
43
+ * Cap tool result sizes to prevent context bloat.
44
+ * Large results (>50K chars) are truncated with a preview.
45
+ * Per-message aggregate is also capped at 200K chars.
46
+ */
47
+ export function budgetToolResults(history) {
48
+ const result = [];
49
+ for (const msg of history) {
50
+ if (msg.role !== 'user' || typeof msg.content === 'string' || !Array.isArray(msg.content)) {
51
+ result.push(msg);
52
+ continue;
53
+ }
54
+ let messageTotal = 0;
55
+ let modified = false;
56
+ const budgeted = [];
57
+ for (const part of msg.content) {
58
+ if (part.type !== 'tool_result') {
59
+ budgeted.push(part);
60
+ continue;
61
+ }
62
+ const content = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
63
+ const size = content.length;
64
+ // Per-tool cap
65
+ if (size > MAX_TOOL_RESULT_CHARS) {
66
+ modified = true;
67
+ // Truncate at line boundary for cleaner output
68
+ let preview = content.slice(0, PREVIEW_CHARS);
69
+ const lastNewline = preview.lastIndexOf('\n');
70
+ if (lastNewline > PREVIEW_CHARS * 0.5) {
71
+ preview = preview.slice(0, lastNewline);
72
+ }
73
+ budgeted.push({
74
+ type: 'tool_result',
75
+ tool_use_id: part.tool_use_id,
76
+ content: `[Output truncated: ${size.toLocaleString()} chars → ${PREVIEW_CHARS} preview]\n\n${preview}\n\n... (${size - PREVIEW_CHARS} chars omitted)`,
77
+ is_error: part.is_error,
78
+ });
79
+ messageTotal += PREVIEW_CHARS + 200;
80
+ continue;
81
+ }
82
+ // Per-message aggregate cap — once exceeded, truncate remaining results
83
+ if (messageTotal + size > MAX_TOOL_RESULTS_PER_MESSAGE_CHARS) {
84
+ modified = true;
85
+ budgeted.push({
86
+ type: 'tool_result',
87
+ tool_use_id: part.tool_use_id,
88
+ content: `[Output omitted: message budget exceeded (${MAX_TOOL_RESULTS_PER_MESSAGE_CHARS / 1000}K chars/msg)]`,
89
+ is_error: part.is_error,
90
+ });
91
+ messageTotal = MAX_TOOL_RESULTS_PER_MESSAGE_CHARS;
92
+ continue;
93
+ }
94
+ budgeted.push(part);
95
+ messageTotal += size;
96
+ }
97
+ result.push(modified ? { role: 'user', content: budgeted } : msg);
98
+ }
99
+ return result;
100
+ }
101
+ // ─── 2. Thinking Block Stripping ───────────────────────────────────────────
102
+ /**
103
+ * Remove thinking blocks from older assistant messages.
104
+ * Keeps thinking only in the most recent N assistant messages (default: last 2 turns).
105
+ * Older thinking blocks are large and not needed after the decision is made.
106
+ */
107
+ const KEEP_THINKING_TURNS = 2;
108
+ export function stripOldThinking(history) {
109
+ // Find the last N assistant message indices to preserve their thinking
110
+ const assistantIndices = [];
111
+ for (let i = history.length - 1; i >= 0; i--) {
112
+ if (history[i].role === 'assistant') {
113
+ assistantIndices.push(i);
114
+ if (assistantIndices.length >= KEEP_THINKING_TURNS)
115
+ break;
116
+ }
117
+ }
118
+ if (assistantIndices.length === 0)
119
+ return history;
120
+ const keepSet = new Set(assistantIndices);
121
+ const result = [];
122
+ let modified = false;
123
+ for (let i = 0; i < history.length; i++) {
124
+ const msg = history[i];
125
+ // Strip thinking from assistant messages NOT in the keep set
126
+ if (msg.role === 'assistant' && !keepSet.has(i) && Array.isArray(msg.content)) {
127
+ const filtered = msg.content.filter((part) => part.type !== 'thinking');
128
+ if (filtered.length < msg.content.length) {
129
+ modified = true;
130
+ result.push({
131
+ role: 'assistant',
132
+ content: filtered.length > 0 ? filtered : [{ type: 'text', text: '[thinking omitted]' }],
133
+ });
134
+ continue;
135
+ }
136
+ }
137
+ result.push(msg);
138
+ }
139
+ return modified ? result : history;
140
+ }
141
+ // ─── 3. Time-Based Cleanup ─────────────────────────────────────────────────
142
+ /**
143
+ * After an idle gap (>60 min), clear old tool results.
144
+ * When the user comes back after being away, old results are stale anyway.
145
+ */
146
+ export function timeBasedCleanup(history, lastActivityTimestamp) {
147
+ if (!lastActivityTimestamp) {
148
+ return { history, cleaned: false };
149
+ }
150
+ const gapMs = Date.now() - lastActivityTimestamp;
151
+ if (gapMs < 0)
152
+ return { history, cleaned: false }; // Clock skew protection
153
+ const gapMinutes = gapMs / 60_000;
154
+ if (gapMinutes < IDLE_GAP_THRESHOLD_MINUTES) {
155
+ return { history, cleaned: false };
156
+ }
157
+ // Find all tool_result positions
158
+ const toolPositions = [];
159
+ for (let i = 0; i < history.length; i++) {
160
+ const msg = history[i];
161
+ if (msg.role === 'user' &&
162
+ Array.isArray(msg.content) &&
163
+ msg.content.length > 0 &&
164
+ typeof msg.content[0] !== 'string' &&
165
+ 'type' in msg.content[0] &&
166
+ msg.content[0].type === 'tool_result') {
167
+ toolPositions.push(i);
168
+ }
169
+ }
170
+ if (toolPositions.length <= KEEP_RECENT_TOOL_RESULTS) {
171
+ return { history, cleaned: false };
172
+ }
173
+ // Clear all but the most recent N
174
+ const toClear = toolPositions.slice(0, -KEEP_RECENT_TOOL_RESULTS);
175
+ const result = [...history];
176
+ for (const pos of toClear) {
177
+ const msg = result[pos];
178
+ if (!Array.isArray(msg.content))
179
+ continue;
180
+ const cleared = msg.content.map((part) => {
181
+ if (part.type === 'tool_result') {
182
+ return {
183
+ type: 'tool_result',
184
+ tool_use_id: part.tool_use_id,
185
+ content: '[Stale tool result cleared after idle gap]',
186
+ is_error: part.is_error,
187
+ };
188
+ }
189
+ return part;
190
+ });
191
+ result[pos] = { role: 'user', content: cleared };
192
+ }
193
+ return { history: result, cleaned: true };
194
+ }
195
+ // ─── 4. Pre-Compact Stripping ──────────────────────────────────────────────
196
+ /**
197
+ * Strip heavy content before sending to compaction model.
198
+ * Removes image/document references since the summarizer can't see them anyway.
199
+ */
200
+ export function stripHeavyContent(history) {
201
+ return history.map((msg) => {
202
+ if (typeof msg.content === 'string')
203
+ return msg;
204
+ if (!Array.isArray(msg.content))
205
+ return msg;
206
+ let modified = false;
207
+ const stripped = msg.content.map((part) => {
208
+ // Strip image blocks (if they ever appear)
209
+ if ('type' in part && part.type === 'image') {
210
+ modified = true;
211
+ return { type: 'text', text: '[image]' };
212
+ }
213
+ // Strip document blocks
214
+ if ('type' in part && part.type === 'document') {
215
+ modified = true;
216
+ return { type: 'text', text: '[document]' };
217
+ }
218
+ return part;
219
+ });
220
+ return modified ? { ...msg, content: stripped } : msg;
221
+ });
222
+ }
223
+ /**
224
+ * Run the full optimization pipeline on conversation history.
225
+ * Called before each model request to minimize token usage.
226
+ *
227
+ * Pipeline order (cheapest first):
228
+ * 1. Strip old thinking blocks (free, local)
229
+ * 2. Budget tool results (free, local)
230
+ * 3. Time-based cleanup (free, local, only after idle)
231
+ *
232
+ * Returns the optimized history (may be same reference if no changes).
233
+ */
234
+ export function optimizeHistory(history, opts) {
235
+ let result = history;
236
+ let changed = false;
237
+ // 1. Strip old thinking
238
+ const stripped = stripOldThinking(result);
239
+ if (stripped !== result) {
240
+ result = stripped;
241
+ changed = true;
242
+ if (opts?.debug)
243
+ console.error('[runcode] Stripped old thinking blocks');
244
+ }
245
+ // 2. Budget tool results
246
+ const budgeted = budgetToolResults(result);
247
+ if (budgeted !== result) {
248
+ result = budgeted;
249
+ changed = true;
250
+ if (opts?.debug)
251
+ console.error('[runcode] Budgeted oversized tool results');
252
+ }
253
+ // 3. Time-based cleanup
254
+ const { history: cleaned, cleaned: didClean } = timeBasedCleanup(result, opts?.lastActivityTimestamp);
255
+ if (didClean) {
256
+ result = cleaned;
257
+ changed = true;
258
+ if (opts?.debug)
259
+ console.error('[runcode] Cleared stale tool results after idle gap');
260
+ }
261
+ return result;
262
+ }