@blockrun/franklin 3.3.3 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +55 -4
  2. package/dist/agent/commands.d.ts +1 -1
  3. package/dist/agent/commands.js +128 -17
  4. package/dist/agent/compact.d.ts +2 -2
  5. package/dist/agent/compact.js +148 -22
  6. package/dist/agent/context.d.ts +8 -3
  7. package/dist/agent/context.js +301 -108
  8. package/dist/agent/error-classifier.d.ts +11 -2
  9. package/dist/agent/error-classifier.js +64 -10
  10. package/dist/agent/llm.d.ts +8 -1
  11. package/dist/agent/llm.js +114 -19
  12. package/dist/agent/loop.d.ts +1 -2
  13. package/dist/agent/loop.js +509 -61
  14. package/dist/agent/optimize.d.ts +2 -2
  15. package/dist/agent/optimize.js +9 -7
  16. package/dist/agent/permissions.d.ts +1 -1
  17. package/dist/agent/permissions.js +1 -1
  18. package/dist/agent/planner.d.ts +42 -0
  19. package/dist/agent/planner.js +110 -0
  20. package/dist/agent/reduce.d.ts +7 -1
  21. package/dist/agent/reduce.js +85 -3
  22. package/dist/agent/streaming-executor.d.ts +6 -1
  23. package/dist/agent/streaming-executor.js +83 -5
  24. package/dist/agent/tokens.d.ts +11 -2
  25. package/dist/agent/tokens.js +38 -5
  26. package/dist/agent/tool-guard.d.ts +27 -0
  27. package/dist/agent/tool-guard.js +324 -0
  28. package/dist/agent/types.d.ts +7 -1
  29. package/dist/agent/types.js +1 -1
  30. package/dist/brain/extract.d.ts +11 -0
  31. package/dist/brain/extract.js +154 -0
  32. package/dist/brain/index.d.ts +3 -0
  33. package/dist/brain/index.js +2 -0
  34. package/dist/brain/store.d.ts +42 -0
  35. package/dist/brain/store.js +225 -0
  36. package/dist/brain/types.d.ts +45 -0
  37. package/dist/brain/types.js +5 -0
  38. package/dist/commands/daemon.js +2 -1
  39. package/dist/commands/start.js +16 -3
  40. package/dist/config.js +1 -1
  41. package/dist/index.js +27 -2
  42. package/dist/learnings/extractor.d.ts +13 -0
  43. package/dist/learnings/extractor.js +69 -8
  44. package/dist/learnings/index.d.ts +1 -1
  45. package/dist/learnings/index.js +1 -1
  46. package/dist/learnings/store.js +42 -13
  47. package/dist/learnings/types.d.ts +1 -1
  48. package/dist/mcp/client.d.ts +1 -1
  49. package/dist/mcp/client.js +5 -5
  50. package/dist/mcp/config.d.ts +1 -1
  51. package/dist/mcp/config.js +1 -1
  52. package/dist/panel/html.d.ts +2 -0
  53. package/dist/panel/html.js +409 -146
  54. package/dist/panel/server.js +19 -0
  55. package/dist/pricing.js +3 -2
  56. package/dist/proxy/fallback.d.ts +3 -1
  57. package/dist/proxy/fallback.js +4 -4
  58. package/dist/proxy/server.js +29 -11
  59. package/dist/proxy/sse-translator.js +1 -1
  60. package/dist/router/categories.d.ts +21 -0
  61. package/dist/router/categories.js +96 -0
  62. package/dist/router/index.d.ts +9 -2
  63. package/dist/router/index.js +106 -27
  64. package/dist/router/local-elo.d.ts +32 -0
  65. package/dist/router/local-elo.js +107 -0
  66. package/dist/router/selector.d.ts +46 -0
  67. package/dist/router/selector.js +106 -0
  68. package/dist/session/storage.d.ts +5 -1
  69. package/dist/session/storage.js +24 -2
  70. package/dist/social/a11y.d.ts +1 -1
  71. package/dist/social/a11y.js +5 -1
  72. package/dist/social/browser.d.ts +5 -0
  73. package/dist/social/browser.js +22 -0
  74. package/dist/social/preflight.d.ts +4 -0
  75. package/dist/social/preflight.js +42 -3
  76. package/dist/stats/failures.d.ts +20 -0
  77. package/dist/stats/failures.js +63 -0
  78. package/dist/stats/format.d.ts +6 -0
  79. package/dist/stats/format.js +23 -0
  80. package/dist/stats/insights.js +1 -21
  81. package/dist/stats/session-tracker.d.ts +21 -0
  82. package/dist/stats/session-tracker.js +28 -0
  83. package/dist/stats/tracker.d.ts +1 -1
  84. package/dist/stats/tracker.js +1 -1
  85. package/dist/tools/bash.d.ts +14 -1
  86. package/dist/tools/bash.js +132 -7
  87. package/dist/tools/edit.js +77 -14
  88. package/dist/tools/glob.js +13 -3
  89. package/dist/tools/grep.js +30 -12
  90. package/dist/tools/imagegen.js +3 -3
  91. package/dist/tools/index.d.ts +1 -1
  92. package/dist/tools/index.js +5 -1
  93. package/dist/tools/read.d.ts +16 -2
  94. package/dist/tools/read.js +36 -8
  95. package/dist/tools/searchx.d.ts +6 -2
  96. package/dist/tools/searchx.js +221 -44
  97. package/dist/tools/subagent.js +37 -3
  98. package/dist/tools/task.js +43 -7
  99. package/dist/tools/validate.d.ts +11 -0
  100. package/dist/tools/validate.js +42 -0
  101. package/dist/tools/webfetch.js +18 -7
  102. package/dist/tools/websearch.js +41 -7
  103. package/dist/tools/write.js +26 -6
  104. package/dist/ui/app.js +31 -6
  105. package/dist/ui/model-picker.d.ts +1 -1
  106. package/dist/ui/model-picker.js +1 -1
  107. package/dist/ui/terminal.d.ts +1 -1
  108. package/dist/ui/terminal.js +1 -1
  109. package/package.json +2 -2
@@ -1,20 +1,201 @@
1
1
  /**
2
- * runcode Agent Loop
2
+ * Franklin Agent Loop
3
3
  * The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
4
- * Original implementation with different architecture from any reference codebase.
5
4
  */
6
5
  import { ModelClient } from './llm.js';
7
- import { autoCompactIfNeeded, microCompact } from './compact.js';
8
- import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow } from './tokens.js';
6
+ import { autoCompactIfNeeded, forceCompact, microCompact } from './compact.js';
7
+ import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow, setEstimationModel } from './tokens.js';
9
8
  import { handleSlashCommand } from './commands.js';
10
9
  import { reduceTokens } from './reduce.js';
11
10
  import { PermissionManager } from './permissions.js';
12
11
  import { StreamingExecutor } from './streaming-executor.js';
13
12
  import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
14
13
  import { classifyAgentError } from './error-classifier.js';
14
+ import { SessionToolGuard } from './tool-guard.js';
15
15
  import { recordUsage } from '../stats/tracker.js';
16
- import { estimateCost } from '../pricing.js';
16
+ import { recordSessionUsage } from '../stats/session-tracker.js';
17
+ import { estimateCost, OPUS_PRICING } from '../pricing.js';
18
+ import { maybeMidSessionExtract } from '../learnings/extractor.js';
19
+ import { routeRequest, parseRoutingProfile } from '../router/index.js';
20
+ import { recordOutcome } from '../router/local-elo.js';
21
+ import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
17
22
  import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
23
+ /**
24
+ * Atomically replace all elements in a history array.
25
+ * Safer than `history.length = 0; history.push(...)` because if push throws
26
+ * (e.g., OOM), the array is already in its new state — not empty.
27
+ * Uses splice to do a single atomic operation on the array.
28
+ */
29
+ function replaceHistory(target, replacement) {
30
+ target.splice(0, target.length, ...replacement);
31
+ }
32
+ /**
33
+ * Sanitize history: fix orphaned tool results AND inject missing results.
34
+ * Inspired by Claude Code's yieldMissingToolResultBlocks + Hermes _sanitize_api_messages().
35
+ *
36
+ * Two problems this solves:
37
+ * 1. Orphaned tool_results — results without matching tool_use calls (remove them)
38
+ * 2. Missing tool_results — tool_use calls without matching results (inject stubs)
39
+ * This happens when the model response includes tool calls that weren't executed
40
+ * (e.g., abort mid-stream, error before tool execution). The API requires every
41
+ * tool_use to have a corresponding tool_result or it rejects the request.
42
+ */
43
+ function sanitizeHistory(history) {
44
+ // Collect all tool_use IDs from assistant messages
45
+ const callIds = new Set();
46
+ // Collect all tool_result IDs from user messages
47
+ const resultIds = new Set();
48
+ for (const msg of history) {
49
+ if (msg.role === 'assistant' && Array.isArray(msg.content)) {
50
+ for (const part of msg.content) {
51
+ if (part.type === 'tool_use' && part.id) {
52
+ callIds.add(part.id);
53
+ }
54
+ }
55
+ }
56
+ if (msg.role === 'user' && Array.isArray(msg.content)) {
57
+ for (const part of msg.content) {
58
+ if (part.type === 'tool_result' && part.tool_use_id) {
59
+ resultIds.add(part.tool_use_id);
60
+ }
61
+ }
62
+ }
63
+ }
64
+ // 1. Remove orphaned tool results (results without matching calls)
65
+ const orphanedResults = new Set([...resultIds].filter(id => !callIds.has(id)));
66
+ // 2. Find missing tool results (calls without matching results)
67
+ const missingResults = new Set([...callIds].filter(id => !resultIds.has(id)));
68
+ if (orphanedResults.size === 0 && missingResults.size === 0)
69
+ return history;
70
+ const result = [];
71
+ for (let i = 0; i < history.length; i++) {
72
+ const msg = history[i];
73
+ if (msg.role === 'user' && Array.isArray(msg.content)) {
74
+ // Remove orphaned tool results
75
+ if (orphanedResults.size > 0) {
76
+ const filtered = msg.content.filter(p => !(p.type === 'tool_result' && orphanedResults.has(p.tool_use_id)));
77
+ if (filtered.length === 0)
78
+ continue; // Skip empty messages
79
+ result.push({ ...msg, content: filtered });
80
+ }
81
+ else {
82
+ result.push(msg);
83
+ }
84
+ continue;
85
+ }
86
+ result.push(msg);
87
+ // After each assistant message with tool_use, check if the next message
88
+ // contains all the required tool_results. If not, inject stubs.
89
+ if (msg.role === 'assistant' && Array.isArray(msg.content) && missingResults.size > 0) {
90
+ const toolUseIds = [];
91
+ for (const part of msg.content) {
92
+ if (part.type === 'tool_use' && missingResults.has(part.id)) {
93
+ toolUseIds.push(part.id);
94
+ }
95
+ }
96
+ if (toolUseIds.length > 0) {
97
+ // Check if the next message already has some of these results
98
+ const nextMsg = history[i + 1];
99
+ const nextResultIds = new Set();
100
+ if (nextMsg?.role === 'user' && Array.isArray(nextMsg.content)) {
101
+ for (const part of nextMsg.content) {
102
+ if (part.type === 'tool_result') {
103
+ nextResultIds.add(part.tool_use_id);
104
+ }
105
+ }
106
+ }
107
+ // Inject stub results for any tool_use IDs that are truly missing
108
+ const stubParts = [];
109
+ for (const id of toolUseIds) {
110
+ if (!nextResultIds.has(id)) {
111
+ stubParts.push({
112
+ type: 'tool_result',
113
+ tool_use_id: id,
114
+ content: '[Tool execution was interrupted — result not available]',
115
+ is_error: true,
116
+ });
117
+ missingResults.delete(id); // Don't inject twice
118
+ }
119
+ }
120
+ if (stubParts.length > 0) {
121
+ // If next message is a user message, prepend stubs to it
122
+ if (nextMsg?.role === 'user' && Array.isArray(nextMsg.content)) {
123
+ // Will be handled when we process that message next
124
+ const existingContent = orphanedResults.size > 0
125
+ ? nextMsg.content.filter(p => !(p.type === 'tool_result' && orphanedResults.has(p.tool_use_id)))
126
+ : [...nextMsg.content];
127
+ // Replace the next message with merged content
128
+ history[i + 1] = { role: 'user', content: [...stubParts, ...existingContent] };
129
+ }
130
+ else {
131
+ // No user message follows — insert a new one with the stubs
132
+ result.push({ role: 'user', content: stubParts });
133
+ }
134
+ }
135
+ }
136
+ }
137
+ }
138
+ return result;
139
+ }
140
+ /**
141
+ * Detect media-related errors (image too large, too many images, PDF too large).
142
+ * These can be recovered by stripping media blocks and retrying.
143
+ */
144
+ function isMediaSizeError(msg) {
145
+ return ((msg.includes('image exceeds') && msg.includes('maximum')) ||
146
+ (msg.includes('image dimensions exceed')) ||
147
+ /maximum of \d+ PDF pages/.test(msg) ||
148
+ (msg.includes('image') && msg.includes('too large')) ||
149
+ (msg.includes('PDF') && msg.includes('too large')));
150
+ }
151
+ /**
152
+ * Strip image and document blocks from history, replacing with text placeholders.
153
+ * Used for media error recovery — retry without the oversized media.
154
+ */
155
+ function stripMediaFromHistory(history) {
156
+ let stripped = false;
157
+ const result = history.map(msg => {
158
+ if (typeof msg.content === 'string' || !Array.isArray(msg.content))
159
+ return msg;
160
+ let modified = false;
161
+ const cleaned = msg.content.map((part) => {
162
+ if (part.type === 'image') {
163
+ modified = true;
164
+ stripped = true;
165
+ return { type: 'text', text: '[image removed — too large for context]' };
166
+ }
167
+ if (part.type === 'document') {
168
+ modified = true;
169
+ stripped = true;
170
+ return { type: 'text', text: '[document removed — too large for context]' };
171
+ }
172
+ // Also strip media nested inside tool_result content arrays
173
+ if (part.type === 'tool_result' && Array.isArray(part.content)) {
174
+ const cleanedContent = part.content.map((c) => {
175
+ if (c.type === 'image' || c.type === 'document') {
176
+ modified = true;
177
+ stripped = true;
178
+ return { type: 'text', text: `[${c.type} removed — too large for context]` };
179
+ }
180
+ return c;
181
+ });
182
+ return modified ? { ...part, content: cleanedContent } : part;
183
+ }
184
+ return part;
185
+ });
186
+ return modified ? { ...msg, content: cleaned } : msg;
187
+ });
188
+ return { history: stripped ? result : history, stripped };
189
+ }
190
+ /**
191
+ * Calculate backoff delay with jitter to avoid thundering herd.
192
+ * Base: exponential (2^attempt * 1000ms), jitter: ±25%.
193
+ */
194
+ function getBackoffDelay(attempt, maxDelayMs = 32_000) {
195
+ const base = Math.min(Math.pow(2, attempt) * 1000, maxDelayMs);
196
+ const jitter = base * 0.25 * (Math.random() * 2 - 1); // ±25%
197
+ return Math.max(500, Math.round(base + jitter));
198
+ }
18
199
  // ─── Interactive Session ───────────────────────────────────────────────────
19
200
  /**
20
201
  * Run a multi-turn interactive session.
@@ -37,18 +218,35 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
37
218
  const permissions = new PermissionManager(config.permissionMode ?? 'default', config.permissionPromptFn);
38
219
  const history = [];
39
220
  let lastUserInput = ''; // For /retry
40
- const failedModels = new Set(); // Models that failed payment/rate-limit (session-level)
221
+ const originalModel = config.model; // Preserve original model/routing profile for recovery
222
+ let turnFailedModels = new Set(); // Models that failed this turn (cleared each new turn)
223
+ // Track models that failed with 402 (payment required) across turns.
224
+ // These persist until the session ends — unlike transient errors, payment failures
225
+ // will keep failing until the user adds funds. Map stores failure timestamp for future TTL.
226
+ const paymentFailedModels = new Map(); // model → timestamp
227
+ // Plan-then-execute: session-level disable flag lives on config (set by /noplan command)
41
228
  // Session persistence
42
229
  const sessionId = createSessionId();
43
230
  let turnCount = 0;
44
231
  let tokenBudgetWarned = false; // Emit token budget warning at most once per session
45
232
  let lastSessionActivity = Date.now();
233
+ let lastRoutedModel = ''; // last model chosen by router (for local elo)
234
+ let lastRoutedCategory = ''; // last category detected (for local elo)
235
+ let sessionInputTokens = 0;
236
+ let sessionOutputTokens = 0;
237
+ let sessionCostUsd = 0;
238
+ let sessionSavedVsOpus = 0;
239
+ const toolGuard = new SessionToolGuard();
46
240
  const persistSessionMeta = () => {
47
241
  updateSessionMeta(sessionId, {
48
242
  model: config.model,
49
243
  workDir,
50
244
  turnCount,
51
245
  messageCount: history.length,
246
+ inputTokens: sessionInputTokens,
247
+ outputTokens: sessionOutputTokens,
248
+ costUsd: sessionCostUsd,
249
+ savedVsOpusUsd: sessionSavedVsOpus,
52
250
  });
53
251
  };
54
252
  const persistSessionMessage = (message) => {
@@ -67,6 +265,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
67
265
  if (input.startsWith('/')) {
68
266
  // /retry re-sends the last user message
69
267
  if (input === '/retry') {
268
+ // Record retry as negative signal for local elo
269
+ if (lastRoutedCategory && lastRoutedModel) {
270
+ recordOutcome(lastRoutedCategory, lastRoutedModel, 'retried');
271
+ }
70
272
  if (!lastUserInput) {
71
273
  onEvent({ kind: 'text_delta', text: 'No previous message to retry.\n' });
72
274
  onEvent({ kind: 'turn_done', reason: 'completed' });
@@ -87,15 +289,38 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
87
289
  lastUserInput = input;
88
290
  history.push({ role: 'user', content: input });
89
291
  turnCount++;
292
+ toolGuard.startTurn();
90
293
  persistSessionMessage({ role: 'user', content: input });
294
+ // ── Model recovery: try original model at the start of each new turn ──
295
+ // If we fell back to a free model last turn due to a transient error, try original again.
296
+ // But DON'T reset if the original model had a payment failure — it will just fail again.
297
+ if (config.model !== originalModel && !paymentFailedModels.has(originalModel)) {
298
+ config.model = originalModel;
299
+ config.onModelChange?.(originalModel);
300
+ }
301
+ turnFailedModels = new Set(); // Fresh slate for transient failures this turn
91
302
  const abort = new AbortController();
92
303
  onAbortReady?.(() => abort.abort());
93
304
  let loopCount = 0;
94
305
  let recoveryAttempts = 0;
306
+ const MAX_RECOVERY_ATTEMPTS = 5; // Up from 3 — Claude Code uses 10, we split the difference
95
307
  let compactFailures = 0;
96
308
  let maxTokensOverride;
97
309
  const turnIdleReference = lastSessionActivity;
98
310
  lastSessionActivity = Date.now();
311
+ // ── Plan-then-execute state (per turn) ──
312
+ let planActive = false;
313
+ let planPlannerModel = '';
314
+ let planExecutorModel = '';
315
+ let planEscalationCount = 0;
316
+ let planConsecutiveErrors = 0;
317
+ let lastToolSig = ''; // For same-tool repeat detection
318
+ // ── Tool call guardrails (inspired by hermes-agent) ──
319
+ let turnToolCalls = 0; // Total tool calls this user turn
320
+ const turnToolCounts = new Map(); // Per-tool-name counts this turn
321
+ const readFileCache = new Set(); // Files already read (dedup)
322
+ const MAX_TOOL_CALLS_PER_TURN = 25; // Hard cap per user turn
323
+ const SAME_TOOL_WARN_THRESHOLD = 5; // Warn after N calls to same tool
99
324
  // Agent loop for this user message
100
325
  while (loopCount < maxTurns) {
101
326
  loopCount++;
@@ -110,21 +335,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
110
335
  lastActivityTimestamp: loopCount === 1 ? turnIdleReference : lastSessionActivity,
111
336
  });
112
337
  if (optimized !== history) {
113
- history.length = 0;
114
- history.push(...optimized);
338
+ replaceHistory(history, optimized);
115
339
  }
116
340
  // 2. Token reduction: age old results, normalize whitespace, trim verbose messages
117
341
  const reduced = reduceTokens(history, config.debug);
118
342
  if (reduced !== history) {
119
- history.length = 0;
120
- history.push(...reduced);
343
+ replaceHistory(history, reduced);
121
344
  }
122
345
  // 3. Microcompact: clear old tool results to prevent context snowball
123
346
  if (history.length > 6) {
124
347
  const microCompacted = microCompact(history, 3);
125
348
  if (microCompacted !== history) {
126
- history.length = 0;
127
- history.push(...microCompacted);
349
+ replaceHistory(history, microCompacted);
128
350
  resetTokenAnchor(); // History shrunk — resync token tracking
129
351
  }
130
352
  }
@@ -134,19 +356,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
134
356
  try {
135
357
  const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
136
358
  if (didCompact) {
137
- history.length = 0;
138
- history.push(...compacted);
359
+ replaceHistory(history, compacted);
139
360
  resetTokenAnchor();
140
361
  compactFailures = 0;
141
362
  if (config.debug) {
142
- console.error(`[runcode] History compacted: ~${estimateHistoryTokens(history)} tokens`);
363
+ console.error(`[franklin] History compacted: ~${estimateHistoryTokens(history)} tokens`);
143
364
  }
144
365
  }
145
366
  }
146
367
  catch (compactErr) {
147
368
  compactFailures++;
148
369
  if (config.debug) {
149
- console.error(`[runcode] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
370
+ console.error(`[franklin] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
150
371
  }
151
372
  }
152
373
  }
@@ -161,6 +382,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
161
382
  '4. Think step by step — show your reasoning explicitly when it adds value\n' +
162
383
  'Prioritize correctness and thoroughness over speed.');
163
384
  }
385
+ // ── Context awareness injection ──
386
+ // Tell the model how full its context window is so it can self-regulate.
387
+ // At high usage, nudge it to be concise and avoid unnecessary tool calls.
388
+ const { contextUsagePct: preCallPct } = getAnchoredTokenCount(history);
389
+ if (preCallPct > 50) {
390
+ let contextNote = `# Context Window Status\nYou have used approximately ${Math.round(preCallPct)}% of your context window.`;
391
+ if (preCallPct > 80) {
392
+ contextNote += ' Context is critically full. Be extremely concise. Avoid re-reading files already in context. Prioritize completing the current task over exploring new questions.';
393
+ }
394
+ else if (preCallPct > 65) {
395
+ contextNote += ' Be concise in responses. Avoid unnecessary tool calls. Do not re-read files you already have in context.';
396
+ }
397
+ systemParts.push(contextNote);
398
+ }
164
399
  const systemPrompt = systemParts.join('\n\n');
165
400
  const modelMaxOut = getMaxOutputTokens(config.model);
166
401
  let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
@@ -172,16 +407,73 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
172
407
  handlers: capabilityMap,
173
408
  scope: { workingDir: workDir, abortSignal: abort.signal, onAskUser: config.onAskUser },
174
409
  permissions,
410
+ guard: toolGuard,
175
411
  onStart: (id, name, preview) => onEvent({ kind: 'capability_start', id, name, preview }),
176
412
  onProgress: (id, text) => onEvent({ kind: 'capability_progress', id, text }),
413
+ sessionId,
177
414
  });
415
+ // ── Router: resolve routing profiles to concrete models ──
416
+ const routingProfile = parseRoutingProfile(config.model);
417
+ let resolvedModel = config.model;
418
+ let routingTier;
419
+ let routingConfidence;
420
+ let routingSavings;
421
+ if (routingProfile) {
422
+ // Extract latest user text for classification
423
+ const lastUser = [...history].reverse().find((m) => m.role === 'user');
424
+ const userText = typeof lastUser?.content === 'string'
425
+ ? lastUser.content
426
+ : Array.isArray(lastUser?.content)
427
+ ? lastUser.content
428
+ .filter(p => p.type === 'text')
429
+ .map(p => p.text ?? '')
430
+ .join(' ')
431
+ : '';
432
+ const routing = routeRequest(userText, routingProfile);
433
+ resolvedModel = routing.model;
434
+ routingTier = routing.tier;
435
+ routingConfidence = routing.confidence;
436
+ routingSavings = routing.savings;
437
+ lastRoutedModel = routing.model;
438
+ lastRoutedCategory = routing.signals[0] || '';
439
+ }
440
+ // Update token estimation model for more accurate byte-per-token ratio
441
+ setEstimationModel(resolvedModel);
442
+ // ── Plan-then-execute: detect and activate ──
443
+ if (loopCount === 1 && !planActive && routingProfile &&
444
+ shouldPlan(routingTier, routingProfile, lastUserInput, !!config.ultrathink, !!config.planDisabled)) {
445
+ planActive = true;
446
+ planPlannerModel = resolvedModel;
447
+ planExecutorModel = getExecutorModel(routingProfile);
448
+ onEvent({ kind: 'text_delta', text: '\n*Planning...*\n' });
449
+ }
450
+ // Plan-then-execute: override model on execution iterations
451
+ if (planActive && loopCount > 1) {
452
+ resolvedModel = planExecutorModel;
453
+ }
454
+ // Build per-call tool defs, max_tokens, and system prompt
455
+ // (planning calls get no tools + short output + planning prompt)
456
+ let callToolDefs = toolDefs;
457
+ let callMaxTokens = maxTokens;
458
+ let callSystemPrompt = systemPrompt;
459
+ if (planActive && loopCount === 1) {
460
+ callToolDefs = []; // No tools during planning
461
+ callMaxTokens = 2048; // Short plan output
462
+ callSystemPrompt = systemPrompt + '\n\n' + getPlanningPrompt();
463
+ }
464
+ // Safety net: handled in llm.ts resolveVirtualModel()
465
+ // Sanitize: remove orphaned tool results that could confuse the API
466
+ const sanitized = sanitizeHistory(history);
467
+ if (sanitized.length !== history.length) {
468
+ replaceHistory(history, sanitized);
469
+ }
178
470
  try {
179
471
  const result = await client.complete({
180
- model: config.model,
472
+ model: resolvedModel,
181
473
  messages: history,
182
- system: systemPrompt,
183
- tools: toolDefs,
184
- max_tokens: maxTokens,
474
+ system: callSystemPrompt,
475
+ tools: callToolDefs,
476
+ max_tokens: callMaxTokens,
185
477
  stream: true,
186
478
  }, abort.signal,
187
479
  // Start concurrent tools as soon as their input is fully received
@@ -198,6 +490,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
198
490
  responseParts = result.content;
199
491
  usage = result.usage;
200
492
  stopReason = result.stopReason;
493
+ // ── Empty response recovery (inspired by Hermes _empty_content_retries) ──
494
+ const hasText = responseParts.some(p => p.type === 'text' && p.text?.trim());
495
+ const hasTools = responseParts.some(p => p.type === 'tool_use');
496
+ const hasThinking = responseParts.some(p => p.type === 'thinking');
497
+ if (!hasText && !hasTools && !hasThinking && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
498
+ recoveryAttempts++;
499
+ if (config.debug) {
500
+ console.error(`[franklin] Empty response — retrying (${recoveryAttempts}/${MAX_RECOVERY_ATTEMPTS})`);
501
+ }
502
+ onEvent({ kind: 'text_delta', text: `\n*Empty response — retrying (${recoveryAttempts}/${MAX_RECOVERY_ATTEMPTS})...*\n` });
503
+ continue;
504
+ }
201
505
  }
202
506
  catch (err) {
203
507
  // ── User abort (Esc key) ──
@@ -215,42 +519,63 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
215
519
  }
216
520
  const errMsg = err.message || '';
217
521
  const classified = classifyAgentError(errMsg);
218
- // ── Prompt too long recovery ──
219
- if (classified.category === 'context_limit' && recoveryAttempts < 3) {
522
+ // ── Media size error recovery (strip images/PDFs + retry) ──
523
+ if (isMediaSizeError(errMsg) && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
220
524
  recoveryAttempts++;
221
525
  if (config.debug) {
222
- console.error(`[runcode] Prompt too longforcing compact (attempt ${recoveryAttempts})`);
526
+ console.error(`[franklin] Media too largestripping and retrying (attempt ${recoveryAttempts})`);
223
527
  }
224
- const { history: compactedAgain } = await autoCompactIfNeeded(history, config.model, client, config.debug);
225
- history.length = 0;
226
- history.push(...compactedAgain);
528
+ const { history: stripped, stripped: didStrip } = stripMediaFromHistory(history);
529
+ if (didStrip) {
530
+ replaceHistory(history, stripped);
531
+ onEvent({ kind: 'text_delta', text: '\n*Media too large — retrying without images/documents...*\n' });
532
+ continue;
533
+ }
534
+ // No media to strip — fall through to other error handling
535
+ }
536
+ // ── Prompt too long recovery (reactive compaction) ──
537
+ // Use forceCompact instead of autoCompactIfNeeded — the API already told us
538
+ // the prompt is too long, so we must compact regardless of our threshold estimate.
539
+ // This is the key insight from Claude Code: reactive compaction must FORCE compress.
540
+ if (classified.category === 'context_limit' && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
541
+ recoveryAttempts++;
542
+ if (config.debug) {
543
+ console.error(`[franklin] Prompt too long — force compacting (attempt ${recoveryAttempts})`);
544
+ }
545
+ onEvent({ kind: 'text_delta', text: '\n*Context limit hit — compacting conversation...*\n' });
546
+ const { history: compactedAgain } = await forceCompact(history, config.model, client, config.debug);
547
+ replaceHistory(history, compactedAgain);
548
+ resetTokenAnchor(); // History mutated — resync tracking
227
549
  continue; // Retry
228
550
  }
229
551
  // ── Transient error recovery (network, rate limit, server errors) ──
230
- if (classified.isTransient && recoveryAttempts < 3) {
552
+ // Respect per-error maxRetries (e.g., 529/overloaded gets only 3 retries)
553
+ const effectiveMaxRetries = classified.maxRetries ?? MAX_RECOVERY_ATTEMPTS;
554
+ if (classified.isTransient && recoveryAttempts < effectiveMaxRetries) {
231
555
  recoveryAttempts++;
232
- const backoffMs = Math.pow(2, recoveryAttempts) * 1000;
556
+ const backoffMs = getBackoffDelay(recoveryAttempts);
233
557
  if (config.debug) {
234
- console.error(`[runcode] ${classified.label} error — retrying in ${backoffMs / 1000}s (attempt ${recoveryAttempts}): ${errMsg.slice(0, 100)}`);
558
+ console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
235
559
  }
236
560
  onEvent({
237
561
  kind: 'text_delta',
238
- text: `\n*Retrying (${recoveryAttempts}/3) after ${classified.label} error...*\n`,
562
+ text: `\n*Retrying (${recoveryAttempts}/${effectiveMaxRetries}) after ${classified.label} error...*\n`,
239
563
  });
240
564
  await new Promise(r => setTimeout(r, backoffMs));
241
565
  continue;
242
566
  }
243
- // Add recovery suggestions based on error type
244
- let suggestion = '';
245
- if (classified.category === 'rate_limit') {
246
- suggestion = '\nTip: Try /model to switch to a different model, or wait a moment and /retry.';
247
- }
248
- else if (classified.category === 'payment') {
249
- // Auto-fallback to free models on payment/rate limit failure
250
- // Track failed models at session level to prevent ping-pong loops
251
- failedModels.add(config.model);
567
+ // ── Payment failure: auto-fallback to free models ──
568
+ // Track payment-failed models for the entire session — unlike transient errors,
569
+ // 402s will keep failing until the user adds funds.
570
+ if (classified.category === 'payment') {
571
+ turnFailedModels.add(config.model);
572
+ paymentFailedModels.set(config.model, Date.now());
573
+ // Record to local Elo so the router learns to avoid this model
574
+ if (lastRoutedCategory) {
575
+ recordOutcome(lastRoutedCategory, config.model, 'payment');
576
+ }
252
577
  const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/nemotron-ultra-253b', 'nvidia/devstral-2-123b'];
253
- const nextFree = FREE_MODELS.find(m => !failedModels.has(m));
578
+ const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
254
579
  if (nextFree) {
255
580
  const oldModel = config.model;
256
581
  config.model = nextFree;
@@ -258,14 +583,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
258
583
  onEvent({ kind: 'text_delta', text: `\n*${oldModel} failed — switching to ${nextFree}*\n` });
259
584
  continue; // Retry with next model
260
585
  }
261
- suggestion = '\nTip: Run `runcode balance` to check funds. Try /model free for free models.';
262
- }
263
- else if (classified.category === 'timeout' || classified.category === 'network') {
264
- suggestion = '\nTip: Check your network connection. Use /retry to try again.';
265
- }
266
- else if (classified.category === 'context_limit') {
267
- suggestion = '\nTip: Run /compact to compress conversation history.';
268
586
  }
587
+ // ── Unrecoverable: show error with suggestion from classifier ──
588
+ const suggestion = classified.suggestion ? `\nTip: ${classified.suggestion}` : '';
269
589
  onEvent({
270
590
  kind: 'turn_done',
271
591
  reason: 'error',
@@ -281,31 +601,51 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
281
601
  : estimateHistoryTokens(history);
282
602
  // Anchor token tracking to actual API counts
283
603
  updateActualTokens(inputTokens, usage.outputTokens, history.length);
604
+ const { contextUsagePct } = getAnchoredTokenCount(history);
284
605
  onEvent({
285
606
  kind: 'usage',
286
607
  inputTokens,
287
608
  outputTokens: usage.outputTokens,
288
- model: config.model,
609
+ model: resolvedModel,
289
610
  calls: 1,
611
+ tier: routingTier,
612
+ confidence: routingConfidence,
613
+ savings: routingSavings,
614
+ contextPct: Math.round(contextUsagePct),
290
615
  });
291
- // Record usage for stats tracking (runcode stats command)
292
- const costEstimate = estimateCost(config.model, inputTokens, usage.outputTokens, 1);
293
- recordUsage(config.model, inputTokens, usage.outputTokens, costEstimate, 0);
616
+ // Record usage for stats tracking (franklin stats command)
617
+ const costEstimate = estimateCost(resolvedModel, inputTokens, usage.outputTokens, 1);
618
+ recordUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, 0);
619
+ recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, routingTier);
620
+ // Accumulate session-level totals for session meta
621
+ sessionInputTokens += inputTokens;
622
+ sessionOutputTokens += usage.outputTokens;
623
+ sessionCostUsd += costEstimate;
624
+ const opusCost = (inputTokens / 1_000_000) * OPUS_PRICING.input
625
+ + (usage.outputTokens / 1_000_000) * OPUS_PRICING.output;
626
+ sessionSavedVsOpus += Math.max(0, opusCost - costEstimate);
294
627
  // ── Max output tokens recovery ──
295
- if (stopReason === 'max_tokens' && recoveryAttempts < 3) {
628
+ if (stopReason === 'max_tokens' && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
296
629
  recoveryAttempts++;
297
630
  if (maxTokensOverride === undefined) {
298
631
  // First hit: escalate to 64K
299
632
  maxTokensOverride = ESCALATED_MAX_TOKENS;
300
633
  if (config.debug) {
301
- console.error(`[runcode] Max tokens hit — escalating to ${maxTokensOverride}`);
634
+ console.error(`[franklin] Max tokens hit — escalating to ${maxTokensOverride}`);
302
635
  }
303
636
  }
304
637
  // Append what we got + a continuation prompt (text already streamed)
305
638
  const partialAssistant = { role: 'assistant', content: responseParts };
306
639
  const continuationPrompt = {
307
640
  role: 'user',
308
- content: 'Continue where you left off. Do not repeat what you already said.',
641
+ content: [
642
+ 'Output token limit hit. Continue with these rules:',
643
+ '1. Resume directly — no apology, no recap of what you already said. Pick up mid-sentence if that is where the cut happened.',
644
+ '2. Do NOT repeat any text or code that was already output above.',
645
+ '3. Break remaining work into smaller pieces — use multiple tool calls if needed instead of one large output.',
646
+ '4. Skip extended reasoning for the continuation — focus on executing.',
647
+ '5. If you were in the middle of outputting code, finish the code block first.',
648
+ ].join('\n'),
309
649
  };
310
650
  history.push(partialAssistant);
311
651
  persistSessionMessage(partialAssistant);
@@ -326,6 +666,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
326
666
  const assistantMessage = { role: 'assistant', content: responseParts };
327
667
  history.push(assistantMessage);
328
668
  persistSessionMessage(assistantMessage);
669
+ // ── Plan-then-execute: transition from planning to execution ──
670
+ if (planActive && loopCount === 1 && invocations.length === 0) {
671
+ // Planning call completed — inject execution kickoff
672
+ const execKickoff = {
673
+ role: 'user',
674
+ content: 'Execute the plan above step by step. Use tools to complete each step. After each step, briefly state what you did and move to the next.',
675
+ };
676
+ history.push(execKickoff);
677
+ persistSessionMessage(execKickoff);
678
+ onEvent({ kind: 'text_delta', text: `\n*Executing with ${planExecutorModel}...*\n` });
679
+ continue; // Next iteration uses the cheap executor model
680
+ }
329
681
  // No more capabilities → done with this user message
330
682
  if (invocations.length === 0) {
331
683
  lastSessionActivity = Date.now();
@@ -343,6 +695,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
343
695
  });
344
696
  }
345
697
  }
698
+ // Record success for local Elo learning (include tool call count for efficiency)
699
+ if (lastRoutedCategory && lastRoutedModel) {
700
+ recordOutcome(lastRoutedCategory, lastRoutedModel, 'continued', turnToolCalls);
701
+ }
346
702
  onEvent({ kind: 'turn_done', reason: 'completed' });
347
703
  break;
348
704
  }
@@ -351,22 +707,114 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
351
707
  for (const [inv, result] of results) {
352
708
  onEvent({ kind: 'capability_done', id: inv.id, result });
353
709
  }
710
+ // ── Tool call guardrails ──
711
+ turnToolCalls += results.length;
712
+ for (const [inv] of results) {
713
+ const name = inv.name;
714
+ turnToolCounts.set(name, (turnToolCounts.get(name) || 0) + 1);
715
+ // Read file dedup: track paths already read
716
+ if (name === 'Read' && inv.input.file_path) {
717
+ readFileCache.add(inv.input.file_path);
718
+ }
719
+ }
354
720
  // Refresh activity timestamp after tool execution
355
721
  lastSessionActivity = Date.now();
356
- // Append outcomes
357
- const outcomeContent = results.map(([inv, result]) => ({
358
- type: 'tool_result',
359
- tool_use_id: inv.id,
360
- content: result.output,
361
- is_error: result.isError,
362
- }));
722
+ // Mid-session learning extraction (like Claude Code's SessionMemory)
723
+ // Runs in background never blocks the conversation
724
+ const { estimated: currentTokens } = getAnchoredTokenCount(history);
725
+ maybeMidSessionExtract(history, currentTokens, turnToolCalls, sessionId, client);
726
+ // Append outcomes (with guardrail injections)
727
+ const outcomeContent = results.map(([inv, result]) => {
728
+ // Read file dedup: if this file was already read earlier in this turn,
729
+ // replace content with a stub to save tokens
730
+ if (inv.name === 'Read' && !result.isError) {
731
+ const fp = inv.input.file_path;
732
+ const count = results.filter(([i]) => i.name === 'Read' && i.input.file_path === fp).length;
733
+ if (count > 1 && inv !== results.filter(([i]) => i.name === 'Read' && i.input.file_path === fp).pop()?.[0]) {
734
+ return {
735
+ type: 'tool_result',
736
+ tool_use_id: inv.id,
737
+ content: `File already read in this turn. Refer to the other Read result for ${fp}.`,
738
+ is_error: false,
739
+ };
740
+ }
741
+ }
742
+ return {
743
+ type: 'tool_result',
744
+ tool_use_id: inv.id,
745
+ content: result.output,
746
+ is_error: result.isError,
747
+ };
748
+ });
749
+ // ── Guardrail injections ──
750
+ // Warn about same-tool repetition
751
+ for (const [name, count] of turnToolCounts) {
752
+ if (count === SAME_TOOL_WARN_THRESHOLD) {
753
+ outcomeContent.push({
754
+ type: 'tool_result',
755
+ tool_use_id: `guardrail-warn-${name}`,
756
+ content: `[SYSTEM] You have called ${name} ${count} times this turn. Stop and present your results now. Do not make more ${name} calls.`,
757
+ is_error: true,
758
+ });
759
+ }
760
+ }
761
+ // Hard cap: stop the turn if too many tool calls
762
+ if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
763
+ outcomeContent.push({
764
+ type: 'tool_result',
765
+ tool_use_id: 'guardrail-cap',
766
+ content: `[SYSTEM] Tool call limit reached (${MAX_TOOL_CALLS_PER_TURN}). Present your results to the user NOW. Do not make any more tool calls.`,
767
+ is_error: true,
768
+ });
769
+ }
363
770
  const toolResultMessage = { role: 'user', content: outcomeContent };
364
771
  history.push(toolResultMessage);
365
772
  persistSessionMessage(toolResultMessage);
773
+ // ── Plan-then-execute: stuck detection ──
774
+ if (planActive && loopCount > 1) {
775
+ const hasErrors = results.some(([, r]) => r.isError);
776
+ planConsecutiveErrors = hasErrors ? planConsecutiveErrors + 1 : 0;
777
+ // Check for same-tool repeat (model calling the exact same thing twice)
778
+ const currentSig = results.length === 1
779
+ ? toolCallSignature(results[0][0].name, results[0][0].input)
780
+ : '';
781
+ const sameToolRepeat = currentSig !== '' && currentSig === lastToolSig;
782
+ lastToolSig = currentSig;
783
+ if (isExecutorStuck(planConsecutiveErrors, sameToolRepeat)) {
784
+ if (planEscalationCount < 2) {
785
+ planEscalationCount++;
786
+ // One-shot escalation: next iteration uses the planner model
787
+ resolvedModel = planPlannerModel;
788
+ const escalation = {
789
+ role: 'user',
790
+ content: '[ESCALATION] The executor got stuck on repeated errors. You are a stronger model. Review what happened and either fix the approach or continue from where execution stopped.',
791
+ };
792
+ history.push(escalation);
793
+ persistSessionMessage(escalation);
794
+ onEvent({ kind: 'text_delta', text: '\n*Escalating to stronger model...*\n' });
795
+ }
796
+ else {
797
+ // Abandon plan — strong model finishes the task directly
798
+ planActive = false;
799
+ onEvent({ kind: 'text_delta', text: '\n*Plan abandoned — switching to full model...*\n' });
800
+ }
801
+ }
802
+ }
803
+ // Hard stop: if cap exceeded, force end this agent loop iteration
804
+ if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
805
+ if (config.debug) {
806
+ console.error(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
807
+ }
808
+ // Don't break — let the model respond one more time to summarize,
809
+ // but inject the stop signal above so it knows to finish up.
810
+ }
366
811
  }
367
812
  if (loopCount >= maxTurns) {
368
813
  lastSessionActivity = Date.now();
369
814
  persistSessionMeta();
815
+ if (lastRoutedCategory && lastRoutedModel) {
816
+ recordOutcome(lastRoutedCategory, lastRoutedModel, 'max_turns', turnToolCalls);
817
+ }
370
818
  onEvent({ kind: 'turn_done', reason: 'max_turns' });
371
819
  }
372
820
  }