@blockrun/franklin 3.3.3 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -4
- package/dist/agent/commands.d.ts +1 -1
- package/dist/agent/commands.js +128 -17
- package/dist/agent/compact.d.ts +2 -2
- package/dist/agent/compact.js +148 -22
- package/dist/agent/context.d.ts +8 -3
- package/dist/agent/context.js +301 -108
- package/dist/agent/error-classifier.d.ts +11 -2
- package/dist/agent/error-classifier.js +64 -10
- package/dist/agent/llm.d.ts +8 -1
- package/dist/agent/llm.js +114 -19
- package/dist/agent/loop.d.ts +1 -2
- package/dist/agent/loop.js +509 -61
- package/dist/agent/optimize.d.ts +2 -2
- package/dist/agent/optimize.js +9 -7
- package/dist/agent/permissions.d.ts +1 -1
- package/dist/agent/permissions.js +1 -1
- package/dist/agent/planner.d.ts +42 -0
- package/dist/agent/planner.js +110 -0
- package/dist/agent/reduce.d.ts +7 -1
- package/dist/agent/reduce.js +85 -3
- package/dist/agent/streaming-executor.d.ts +6 -1
- package/dist/agent/streaming-executor.js +83 -5
- package/dist/agent/tokens.d.ts +11 -2
- package/dist/agent/tokens.js +38 -5
- package/dist/agent/tool-guard.d.ts +27 -0
- package/dist/agent/tool-guard.js +324 -0
- package/dist/agent/types.d.ts +7 -1
- package/dist/agent/types.js +1 -1
- package/dist/brain/extract.d.ts +11 -0
- package/dist/brain/extract.js +154 -0
- package/dist/brain/index.d.ts +3 -0
- package/dist/brain/index.js +2 -0
- package/dist/brain/store.d.ts +42 -0
- package/dist/brain/store.js +225 -0
- package/dist/brain/types.d.ts +45 -0
- package/dist/brain/types.js +5 -0
- package/dist/commands/daemon.js +2 -1
- package/dist/commands/start.js +16 -3
- package/dist/config.js +1 -1
- package/dist/index.js +27 -2
- package/dist/learnings/extractor.d.ts +13 -0
- package/dist/learnings/extractor.js +69 -8
- package/dist/learnings/index.d.ts +1 -1
- package/dist/learnings/index.js +1 -1
- package/dist/learnings/store.js +42 -13
- package/dist/learnings/types.d.ts +1 -1
- package/dist/mcp/client.d.ts +1 -1
- package/dist/mcp/client.js +5 -5
- package/dist/mcp/config.d.ts +1 -1
- package/dist/mcp/config.js +1 -1
- package/dist/panel/html.d.ts +2 -0
- package/dist/panel/html.js +409 -146
- package/dist/panel/server.js +19 -0
- package/dist/pricing.js +3 -2
- package/dist/proxy/fallback.d.ts +3 -1
- package/dist/proxy/fallback.js +4 -4
- package/dist/proxy/server.js +29 -11
- package/dist/proxy/sse-translator.js +1 -1
- package/dist/router/categories.d.ts +21 -0
- package/dist/router/categories.js +96 -0
- package/dist/router/index.d.ts +9 -2
- package/dist/router/index.js +106 -27
- package/dist/router/local-elo.d.ts +32 -0
- package/dist/router/local-elo.js +107 -0
- package/dist/router/selector.d.ts +46 -0
- package/dist/router/selector.js +106 -0
- package/dist/session/storage.d.ts +5 -1
- package/dist/session/storage.js +24 -2
- package/dist/social/a11y.d.ts +1 -1
- package/dist/social/a11y.js +5 -1
- package/dist/social/browser.d.ts +5 -0
- package/dist/social/browser.js +22 -0
- package/dist/social/preflight.d.ts +4 -0
- package/dist/social/preflight.js +42 -3
- package/dist/stats/failures.d.ts +20 -0
- package/dist/stats/failures.js +63 -0
- package/dist/stats/format.d.ts +6 -0
- package/dist/stats/format.js +23 -0
- package/dist/stats/insights.js +1 -21
- package/dist/stats/session-tracker.d.ts +21 -0
- package/dist/stats/session-tracker.js +28 -0
- package/dist/stats/tracker.d.ts +1 -1
- package/dist/stats/tracker.js +1 -1
- package/dist/tools/bash.d.ts +14 -1
- package/dist/tools/bash.js +132 -7
- package/dist/tools/edit.js +77 -14
- package/dist/tools/glob.js +13 -3
- package/dist/tools/grep.js +30 -12
- package/dist/tools/imagegen.js +3 -3
- package/dist/tools/index.d.ts +1 -1
- package/dist/tools/index.js +5 -1
- package/dist/tools/read.d.ts +16 -2
- package/dist/tools/read.js +36 -8
- package/dist/tools/searchx.d.ts +6 -2
- package/dist/tools/searchx.js +221 -44
- package/dist/tools/subagent.js +37 -3
- package/dist/tools/task.js +43 -7
- package/dist/tools/validate.d.ts +11 -0
- package/dist/tools/validate.js +42 -0
- package/dist/tools/webfetch.js +18 -7
- package/dist/tools/websearch.js +41 -7
- package/dist/tools/write.js +26 -6
- package/dist/ui/app.js +31 -6
- package/dist/ui/model-picker.d.ts +1 -1
- package/dist/ui/model-picker.js +1 -1
- package/dist/ui/terminal.d.ts +1 -1
- package/dist/ui/terminal.js +1 -1
- package/package.json +2 -2
package/dist/agent/loop.js
CHANGED
|
@@ -1,20 +1,201 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Franklin Agent Loop
|
|
3
3
|
* The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
|
|
4
|
-
* Original implementation with different architecture from any reference codebase.
|
|
5
4
|
*/
|
|
6
5
|
import { ModelClient } from './llm.js';
|
|
7
|
-
import { autoCompactIfNeeded, microCompact } from './compact.js';
|
|
8
|
-
import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow } from './tokens.js';
|
|
6
|
+
import { autoCompactIfNeeded, forceCompact, microCompact } from './compact.js';
|
|
7
|
+
import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow, setEstimationModel } from './tokens.js';
|
|
9
8
|
import { handleSlashCommand } from './commands.js';
|
|
10
9
|
import { reduceTokens } from './reduce.js';
|
|
11
10
|
import { PermissionManager } from './permissions.js';
|
|
12
11
|
import { StreamingExecutor } from './streaming-executor.js';
|
|
13
12
|
import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
|
|
14
13
|
import { classifyAgentError } from './error-classifier.js';
|
|
14
|
+
import { SessionToolGuard } from './tool-guard.js';
|
|
15
15
|
import { recordUsage } from '../stats/tracker.js';
|
|
16
|
-
import {
|
|
16
|
+
import { recordSessionUsage } from '../stats/session-tracker.js';
|
|
17
|
+
import { estimateCost, OPUS_PRICING } from '../pricing.js';
|
|
18
|
+
import { maybeMidSessionExtract } from '../learnings/extractor.js';
|
|
19
|
+
import { routeRequest, parseRoutingProfile } from '../router/index.js';
|
|
20
|
+
import { recordOutcome } from '../router/local-elo.js';
|
|
21
|
+
import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
|
|
17
22
|
import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
|
|
23
|
+
/**
|
|
24
|
+
* Atomically replace all elements in a history array.
|
|
25
|
+
* Safer than `history.length = 0; history.push(...)` because if push throws
|
|
26
|
+
* (e.g., OOM), the array is already in its new state — not empty.
|
|
27
|
+
* Uses splice to do a single atomic operation on the array.
|
|
28
|
+
*/
|
|
29
|
+
function replaceHistory(target, replacement) {
|
|
30
|
+
target.splice(0, target.length, ...replacement);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Sanitize history: fix orphaned tool results AND inject missing results.
|
|
34
|
+
* Inspired by Claude Code's yieldMissingToolResultBlocks + Hermes _sanitize_api_messages().
|
|
35
|
+
*
|
|
36
|
+
* Two problems this solves:
|
|
37
|
+
* 1. Orphaned tool_results — results without matching tool_use calls (remove them)
|
|
38
|
+
* 2. Missing tool_results — tool_use calls without matching results (inject stubs)
|
|
39
|
+
* This happens when the model response includes tool calls that weren't executed
|
|
40
|
+
* (e.g., abort mid-stream, error before tool execution). The API requires every
|
|
41
|
+
* tool_use to have a corresponding tool_result or it rejects the request.
|
|
42
|
+
*/
|
|
43
|
+
function sanitizeHistory(history) {
|
|
44
|
+
// Collect all tool_use IDs from assistant messages
|
|
45
|
+
const callIds = new Set();
|
|
46
|
+
// Collect all tool_result IDs from user messages
|
|
47
|
+
const resultIds = new Set();
|
|
48
|
+
for (const msg of history) {
|
|
49
|
+
if (msg.role === 'assistant' && Array.isArray(msg.content)) {
|
|
50
|
+
for (const part of msg.content) {
|
|
51
|
+
if (part.type === 'tool_use' && part.id) {
|
|
52
|
+
callIds.add(part.id);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
if (msg.role === 'user' && Array.isArray(msg.content)) {
|
|
57
|
+
for (const part of msg.content) {
|
|
58
|
+
if (part.type === 'tool_result' && part.tool_use_id) {
|
|
59
|
+
resultIds.add(part.tool_use_id);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
// 1. Remove orphaned tool results (results without matching calls)
|
|
65
|
+
const orphanedResults = new Set([...resultIds].filter(id => !callIds.has(id)));
|
|
66
|
+
// 2. Find missing tool results (calls without matching results)
|
|
67
|
+
const missingResults = new Set([...callIds].filter(id => !resultIds.has(id)));
|
|
68
|
+
if (orphanedResults.size === 0 && missingResults.size === 0)
|
|
69
|
+
return history;
|
|
70
|
+
const result = [];
|
|
71
|
+
for (let i = 0; i < history.length; i++) {
|
|
72
|
+
const msg = history[i];
|
|
73
|
+
if (msg.role === 'user' && Array.isArray(msg.content)) {
|
|
74
|
+
// Remove orphaned tool results
|
|
75
|
+
if (orphanedResults.size > 0) {
|
|
76
|
+
const filtered = msg.content.filter(p => !(p.type === 'tool_result' && orphanedResults.has(p.tool_use_id)));
|
|
77
|
+
if (filtered.length === 0)
|
|
78
|
+
continue; // Skip empty messages
|
|
79
|
+
result.push({ ...msg, content: filtered });
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
result.push(msg);
|
|
83
|
+
}
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
result.push(msg);
|
|
87
|
+
// After each assistant message with tool_use, check if the next message
|
|
88
|
+
// contains all the required tool_results. If not, inject stubs.
|
|
89
|
+
if (msg.role === 'assistant' && Array.isArray(msg.content) && missingResults.size > 0) {
|
|
90
|
+
const toolUseIds = [];
|
|
91
|
+
for (const part of msg.content) {
|
|
92
|
+
if (part.type === 'tool_use' && missingResults.has(part.id)) {
|
|
93
|
+
toolUseIds.push(part.id);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (toolUseIds.length > 0) {
|
|
97
|
+
// Check if the next message already has some of these results
|
|
98
|
+
const nextMsg = history[i + 1];
|
|
99
|
+
const nextResultIds = new Set();
|
|
100
|
+
if (nextMsg?.role === 'user' && Array.isArray(nextMsg.content)) {
|
|
101
|
+
for (const part of nextMsg.content) {
|
|
102
|
+
if (part.type === 'tool_result') {
|
|
103
|
+
nextResultIds.add(part.tool_use_id);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
// Inject stub results for any tool_use IDs that are truly missing
|
|
108
|
+
const stubParts = [];
|
|
109
|
+
for (const id of toolUseIds) {
|
|
110
|
+
if (!nextResultIds.has(id)) {
|
|
111
|
+
stubParts.push({
|
|
112
|
+
type: 'tool_result',
|
|
113
|
+
tool_use_id: id,
|
|
114
|
+
content: '[Tool execution was interrupted — result not available]',
|
|
115
|
+
is_error: true,
|
|
116
|
+
});
|
|
117
|
+
missingResults.delete(id); // Don't inject twice
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
if (stubParts.length > 0) {
|
|
121
|
+
// If next message is a user message, prepend stubs to it
|
|
122
|
+
if (nextMsg?.role === 'user' && Array.isArray(nextMsg.content)) {
|
|
123
|
+
// Will be handled when we process that message next
|
|
124
|
+
const existingContent = orphanedResults.size > 0
|
|
125
|
+
? nextMsg.content.filter(p => !(p.type === 'tool_result' && orphanedResults.has(p.tool_use_id)))
|
|
126
|
+
: [...nextMsg.content];
|
|
127
|
+
// Replace the next message with merged content
|
|
128
|
+
history[i + 1] = { role: 'user', content: [...stubParts, ...existingContent] };
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
// No user message follows — insert a new one with the stubs
|
|
132
|
+
result.push({ role: 'user', content: stubParts });
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return result;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Detect media-related errors (image too large, too many images, PDF too large).
|
|
142
|
+
* These can be recovered by stripping media blocks and retrying.
|
|
143
|
+
*/
|
|
144
|
+
function isMediaSizeError(msg) {
|
|
145
|
+
return ((msg.includes('image exceeds') && msg.includes('maximum')) ||
|
|
146
|
+
(msg.includes('image dimensions exceed')) ||
|
|
147
|
+
/maximum of \d+ PDF pages/.test(msg) ||
|
|
148
|
+
(msg.includes('image') && msg.includes('too large')) ||
|
|
149
|
+
(msg.includes('PDF') && msg.includes('too large')));
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Strip image and document blocks from history, replacing with text placeholders.
|
|
153
|
+
* Used for media error recovery — retry without the oversized media.
|
|
154
|
+
*/
|
|
155
|
+
function stripMediaFromHistory(history) {
|
|
156
|
+
let stripped = false;
|
|
157
|
+
const result = history.map(msg => {
|
|
158
|
+
if (typeof msg.content === 'string' || !Array.isArray(msg.content))
|
|
159
|
+
return msg;
|
|
160
|
+
let modified = false;
|
|
161
|
+
const cleaned = msg.content.map((part) => {
|
|
162
|
+
if (part.type === 'image') {
|
|
163
|
+
modified = true;
|
|
164
|
+
stripped = true;
|
|
165
|
+
return { type: 'text', text: '[image removed — too large for context]' };
|
|
166
|
+
}
|
|
167
|
+
if (part.type === 'document') {
|
|
168
|
+
modified = true;
|
|
169
|
+
stripped = true;
|
|
170
|
+
return { type: 'text', text: '[document removed — too large for context]' };
|
|
171
|
+
}
|
|
172
|
+
// Also strip media nested inside tool_result content arrays
|
|
173
|
+
if (part.type === 'tool_result' && Array.isArray(part.content)) {
|
|
174
|
+
const cleanedContent = part.content.map((c) => {
|
|
175
|
+
if (c.type === 'image' || c.type === 'document') {
|
|
176
|
+
modified = true;
|
|
177
|
+
stripped = true;
|
|
178
|
+
return { type: 'text', text: `[${c.type} removed — too large for context]` };
|
|
179
|
+
}
|
|
180
|
+
return c;
|
|
181
|
+
});
|
|
182
|
+
return modified ? { ...part, content: cleanedContent } : part;
|
|
183
|
+
}
|
|
184
|
+
return part;
|
|
185
|
+
});
|
|
186
|
+
return modified ? { ...msg, content: cleaned } : msg;
|
|
187
|
+
});
|
|
188
|
+
return { history: stripped ? result : history, stripped };
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Calculate backoff delay with jitter to avoid thundering herd.
|
|
192
|
+
* Base: exponential (2^attempt * 1000ms), jitter: ±25%.
|
|
193
|
+
*/
|
|
194
|
+
function getBackoffDelay(attempt, maxDelayMs = 32_000) {
|
|
195
|
+
const base = Math.min(Math.pow(2, attempt) * 1000, maxDelayMs);
|
|
196
|
+
const jitter = base * 0.25 * (Math.random() * 2 - 1); // ±25%
|
|
197
|
+
return Math.max(500, Math.round(base + jitter));
|
|
198
|
+
}
|
|
18
199
|
// ─── Interactive Session ───────────────────────────────────────────────────
|
|
19
200
|
/**
|
|
20
201
|
* Run a multi-turn interactive session.
|
|
@@ -37,18 +218,35 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
37
218
|
const permissions = new PermissionManager(config.permissionMode ?? 'default', config.permissionPromptFn);
|
|
38
219
|
const history = [];
|
|
39
220
|
let lastUserInput = ''; // For /retry
|
|
40
|
-
const
|
|
221
|
+
const originalModel = config.model; // Preserve original model/routing profile for recovery
|
|
222
|
+
let turnFailedModels = new Set(); // Models that failed this turn (cleared each new turn)
|
|
223
|
+
// Track models that failed with 402 (payment required) across turns.
|
|
224
|
+
// These persist until the session ends — unlike transient errors, payment failures
|
|
225
|
+
// will keep failing until the user adds funds. Map stores failure timestamp for future TTL.
|
|
226
|
+
const paymentFailedModels = new Map(); // model → timestamp
|
|
227
|
+
// Plan-then-execute: session-level disable flag lives on config (set by /noplan command)
|
|
41
228
|
// Session persistence
|
|
42
229
|
const sessionId = createSessionId();
|
|
43
230
|
let turnCount = 0;
|
|
44
231
|
let tokenBudgetWarned = false; // Emit token budget warning at most once per session
|
|
45
232
|
let lastSessionActivity = Date.now();
|
|
233
|
+
let lastRoutedModel = ''; // last model chosen by router (for local elo)
|
|
234
|
+
let lastRoutedCategory = ''; // last category detected (for local elo)
|
|
235
|
+
let sessionInputTokens = 0;
|
|
236
|
+
let sessionOutputTokens = 0;
|
|
237
|
+
let sessionCostUsd = 0;
|
|
238
|
+
let sessionSavedVsOpus = 0;
|
|
239
|
+
const toolGuard = new SessionToolGuard();
|
|
46
240
|
const persistSessionMeta = () => {
|
|
47
241
|
updateSessionMeta(sessionId, {
|
|
48
242
|
model: config.model,
|
|
49
243
|
workDir,
|
|
50
244
|
turnCount,
|
|
51
245
|
messageCount: history.length,
|
|
246
|
+
inputTokens: sessionInputTokens,
|
|
247
|
+
outputTokens: sessionOutputTokens,
|
|
248
|
+
costUsd: sessionCostUsd,
|
|
249
|
+
savedVsOpusUsd: sessionSavedVsOpus,
|
|
52
250
|
});
|
|
53
251
|
};
|
|
54
252
|
const persistSessionMessage = (message) => {
|
|
@@ -67,6 +265,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
67
265
|
if (input.startsWith('/')) {
|
|
68
266
|
// /retry re-sends the last user message
|
|
69
267
|
if (input === '/retry') {
|
|
268
|
+
// Record retry as negative signal for local elo
|
|
269
|
+
if (lastRoutedCategory && lastRoutedModel) {
|
|
270
|
+
recordOutcome(lastRoutedCategory, lastRoutedModel, 'retried');
|
|
271
|
+
}
|
|
70
272
|
if (!lastUserInput) {
|
|
71
273
|
onEvent({ kind: 'text_delta', text: 'No previous message to retry.\n' });
|
|
72
274
|
onEvent({ kind: 'turn_done', reason: 'completed' });
|
|
@@ -87,15 +289,38 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
87
289
|
lastUserInput = input;
|
|
88
290
|
history.push({ role: 'user', content: input });
|
|
89
291
|
turnCount++;
|
|
292
|
+
toolGuard.startTurn();
|
|
90
293
|
persistSessionMessage({ role: 'user', content: input });
|
|
294
|
+
// ── Model recovery: try original model at the start of each new turn ──
|
|
295
|
+
// If we fell back to a free model last turn due to a transient error, try original again.
|
|
296
|
+
// But DON'T reset if the original model had a payment failure — it will just fail again.
|
|
297
|
+
if (config.model !== originalModel && !paymentFailedModels.has(originalModel)) {
|
|
298
|
+
config.model = originalModel;
|
|
299
|
+
config.onModelChange?.(originalModel);
|
|
300
|
+
}
|
|
301
|
+
turnFailedModels = new Set(); // Fresh slate for transient failures this turn
|
|
91
302
|
const abort = new AbortController();
|
|
92
303
|
onAbortReady?.(() => abort.abort());
|
|
93
304
|
let loopCount = 0;
|
|
94
305
|
let recoveryAttempts = 0;
|
|
306
|
+
const MAX_RECOVERY_ATTEMPTS = 5; // Up from 3 — Claude Code uses 10, we split the difference
|
|
95
307
|
let compactFailures = 0;
|
|
96
308
|
let maxTokensOverride;
|
|
97
309
|
const turnIdleReference = lastSessionActivity;
|
|
98
310
|
lastSessionActivity = Date.now();
|
|
311
|
+
// ── Plan-then-execute state (per turn) ──
|
|
312
|
+
let planActive = false;
|
|
313
|
+
let planPlannerModel = '';
|
|
314
|
+
let planExecutorModel = '';
|
|
315
|
+
let planEscalationCount = 0;
|
|
316
|
+
let planConsecutiveErrors = 0;
|
|
317
|
+
let lastToolSig = ''; // For same-tool repeat detection
|
|
318
|
+
// ── Tool call guardrails (inspired by hermes-agent) ──
|
|
319
|
+
let turnToolCalls = 0; // Total tool calls this user turn
|
|
320
|
+
const turnToolCounts = new Map(); // Per-tool-name counts this turn
|
|
321
|
+
const readFileCache = new Set(); // Files already read (dedup)
|
|
322
|
+
const MAX_TOOL_CALLS_PER_TURN = 25; // Hard cap per user turn
|
|
323
|
+
const SAME_TOOL_WARN_THRESHOLD = 5; // Warn after N calls to same tool
|
|
99
324
|
// Agent loop for this user message
|
|
100
325
|
while (loopCount < maxTurns) {
|
|
101
326
|
loopCount++;
|
|
@@ -110,21 +335,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
110
335
|
lastActivityTimestamp: loopCount === 1 ? turnIdleReference : lastSessionActivity,
|
|
111
336
|
});
|
|
112
337
|
if (optimized !== history) {
|
|
113
|
-
history
|
|
114
|
-
history.push(...optimized);
|
|
338
|
+
replaceHistory(history, optimized);
|
|
115
339
|
}
|
|
116
340
|
// 2. Token reduction: age old results, normalize whitespace, trim verbose messages
|
|
117
341
|
const reduced = reduceTokens(history, config.debug);
|
|
118
342
|
if (reduced !== history) {
|
|
119
|
-
history
|
|
120
|
-
history.push(...reduced);
|
|
343
|
+
replaceHistory(history, reduced);
|
|
121
344
|
}
|
|
122
345
|
// 3. Microcompact: clear old tool results to prevent context snowball
|
|
123
346
|
if (history.length > 6) {
|
|
124
347
|
const microCompacted = microCompact(history, 3);
|
|
125
348
|
if (microCompacted !== history) {
|
|
126
|
-
history
|
|
127
|
-
history.push(...microCompacted);
|
|
349
|
+
replaceHistory(history, microCompacted);
|
|
128
350
|
resetTokenAnchor(); // History shrunk — resync token tracking
|
|
129
351
|
}
|
|
130
352
|
}
|
|
@@ -134,19 +356,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
134
356
|
try {
|
|
135
357
|
const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
|
|
136
358
|
if (didCompact) {
|
|
137
|
-
history
|
|
138
|
-
history.push(...compacted);
|
|
359
|
+
replaceHistory(history, compacted);
|
|
139
360
|
resetTokenAnchor();
|
|
140
361
|
compactFailures = 0;
|
|
141
362
|
if (config.debug) {
|
|
142
|
-
console.error(`[
|
|
363
|
+
console.error(`[franklin] History compacted: ~${estimateHistoryTokens(history)} tokens`);
|
|
143
364
|
}
|
|
144
365
|
}
|
|
145
366
|
}
|
|
146
367
|
catch (compactErr) {
|
|
147
368
|
compactFailures++;
|
|
148
369
|
if (config.debug) {
|
|
149
|
-
console.error(`[
|
|
370
|
+
console.error(`[franklin] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
|
|
150
371
|
}
|
|
151
372
|
}
|
|
152
373
|
}
|
|
@@ -161,6 +382,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
161
382
|
'4. Think step by step — show your reasoning explicitly when it adds value\n' +
|
|
162
383
|
'Prioritize correctness and thoroughness over speed.');
|
|
163
384
|
}
|
|
385
|
+
// ── Context awareness injection ──
|
|
386
|
+
// Tell the model how full its context window is so it can self-regulate.
|
|
387
|
+
// At high usage, nudge it to be concise and avoid unnecessary tool calls.
|
|
388
|
+
const { contextUsagePct: preCallPct } = getAnchoredTokenCount(history);
|
|
389
|
+
if (preCallPct > 50) {
|
|
390
|
+
let contextNote = `# Context Window Status\nYou have used approximately ${Math.round(preCallPct)}% of your context window.`;
|
|
391
|
+
if (preCallPct > 80) {
|
|
392
|
+
contextNote += ' Context is critically full. Be extremely concise. Avoid re-reading files already in context. Prioritize completing the current task over exploring new questions.';
|
|
393
|
+
}
|
|
394
|
+
else if (preCallPct > 65) {
|
|
395
|
+
contextNote += ' Be concise in responses. Avoid unnecessary tool calls. Do not re-read files you already have in context.';
|
|
396
|
+
}
|
|
397
|
+
systemParts.push(contextNote);
|
|
398
|
+
}
|
|
164
399
|
const systemPrompt = systemParts.join('\n\n');
|
|
165
400
|
const modelMaxOut = getMaxOutputTokens(config.model);
|
|
166
401
|
let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
|
|
@@ -172,16 +407,73 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
172
407
|
handlers: capabilityMap,
|
|
173
408
|
scope: { workingDir: workDir, abortSignal: abort.signal, onAskUser: config.onAskUser },
|
|
174
409
|
permissions,
|
|
410
|
+
guard: toolGuard,
|
|
175
411
|
onStart: (id, name, preview) => onEvent({ kind: 'capability_start', id, name, preview }),
|
|
176
412
|
onProgress: (id, text) => onEvent({ kind: 'capability_progress', id, text }),
|
|
413
|
+
sessionId,
|
|
177
414
|
});
|
|
415
|
+
// ── Router: resolve routing profiles to concrete models ──
|
|
416
|
+
const routingProfile = parseRoutingProfile(config.model);
|
|
417
|
+
let resolvedModel = config.model;
|
|
418
|
+
let routingTier;
|
|
419
|
+
let routingConfidence;
|
|
420
|
+
let routingSavings;
|
|
421
|
+
if (routingProfile) {
|
|
422
|
+
// Extract latest user text for classification
|
|
423
|
+
const lastUser = [...history].reverse().find((m) => m.role === 'user');
|
|
424
|
+
const userText = typeof lastUser?.content === 'string'
|
|
425
|
+
? lastUser.content
|
|
426
|
+
: Array.isArray(lastUser?.content)
|
|
427
|
+
? lastUser.content
|
|
428
|
+
.filter(p => p.type === 'text')
|
|
429
|
+
.map(p => p.text ?? '')
|
|
430
|
+
.join(' ')
|
|
431
|
+
: '';
|
|
432
|
+
const routing = routeRequest(userText, routingProfile);
|
|
433
|
+
resolvedModel = routing.model;
|
|
434
|
+
routingTier = routing.tier;
|
|
435
|
+
routingConfidence = routing.confidence;
|
|
436
|
+
routingSavings = routing.savings;
|
|
437
|
+
lastRoutedModel = routing.model;
|
|
438
|
+
lastRoutedCategory = routing.signals[0] || '';
|
|
439
|
+
}
|
|
440
|
+
// Update token estimation model for more accurate byte-per-token ratio
|
|
441
|
+
setEstimationModel(resolvedModel);
|
|
442
|
+
// ── Plan-then-execute: detect and activate ──
|
|
443
|
+
if (loopCount === 1 && !planActive && routingProfile &&
|
|
444
|
+
shouldPlan(routingTier, routingProfile, lastUserInput, !!config.ultrathink, !!config.planDisabled)) {
|
|
445
|
+
planActive = true;
|
|
446
|
+
planPlannerModel = resolvedModel;
|
|
447
|
+
planExecutorModel = getExecutorModel(routingProfile);
|
|
448
|
+
onEvent({ kind: 'text_delta', text: '\n*Planning...*\n' });
|
|
449
|
+
}
|
|
450
|
+
// Plan-then-execute: override model on execution iterations
|
|
451
|
+
if (planActive && loopCount > 1) {
|
|
452
|
+
resolvedModel = planExecutorModel;
|
|
453
|
+
}
|
|
454
|
+
// Build per-call tool defs, max_tokens, and system prompt
|
|
455
|
+
// (planning calls get no tools + short output + planning prompt)
|
|
456
|
+
let callToolDefs = toolDefs;
|
|
457
|
+
let callMaxTokens = maxTokens;
|
|
458
|
+
let callSystemPrompt = systemPrompt;
|
|
459
|
+
if (planActive && loopCount === 1) {
|
|
460
|
+
callToolDefs = []; // No tools during planning
|
|
461
|
+
callMaxTokens = 2048; // Short plan output
|
|
462
|
+
callSystemPrompt = systemPrompt + '\n\n' + getPlanningPrompt();
|
|
463
|
+
}
|
|
464
|
+
// Safety net: handled in llm.ts resolveVirtualModel()
|
|
465
|
+
// Sanitize: remove orphaned tool results that could confuse the API
|
|
466
|
+
const sanitized = sanitizeHistory(history);
|
|
467
|
+
if (sanitized.length !== history.length) {
|
|
468
|
+
replaceHistory(history, sanitized);
|
|
469
|
+
}
|
|
178
470
|
try {
|
|
179
471
|
const result = await client.complete({
|
|
180
|
-
model:
|
|
472
|
+
model: resolvedModel,
|
|
181
473
|
messages: history,
|
|
182
|
-
system:
|
|
183
|
-
tools:
|
|
184
|
-
max_tokens:
|
|
474
|
+
system: callSystemPrompt,
|
|
475
|
+
tools: callToolDefs,
|
|
476
|
+
max_tokens: callMaxTokens,
|
|
185
477
|
stream: true,
|
|
186
478
|
}, abort.signal,
|
|
187
479
|
// Start concurrent tools as soon as their input is fully received
|
|
@@ -198,6 +490,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
198
490
|
responseParts = result.content;
|
|
199
491
|
usage = result.usage;
|
|
200
492
|
stopReason = result.stopReason;
|
|
493
|
+
// ── Empty response recovery (inspired by Hermes _empty_content_retries) ──
|
|
494
|
+
const hasText = responseParts.some(p => p.type === 'text' && p.text?.trim());
|
|
495
|
+
const hasTools = responseParts.some(p => p.type === 'tool_use');
|
|
496
|
+
const hasThinking = responseParts.some(p => p.type === 'thinking');
|
|
497
|
+
if (!hasText && !hasTools && !hasThinking && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
|
|
498
|
+
recoveryAttempts++;
|
|
499
|
+
if (config.debug) {
|
|
500
|
+
console.error(`[franklin] Empty response — retrying (${recoveryAttempts}/${MAX_RECOVERY_ATTEMPTS})`);
|
|
501
|
+
}
|
|
502
|
+
onEvent({ kind: 'text_delta', text: `\n*Empty response — retrying (${recoveryAttempts}/${MAX_RECOVERY_ATTEMPTS})...*\n` });
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
201
505
|
}
|
|
202
506
|
catch (err) {
|
|
203
507
|
// ── User abort (Esc key) ──
|
|
@@ -215,42 +519,63 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
215
519
|
}
|
|
216
520
|
const errMsg = err.message || '';
|
|
217
521
|
const classified = classifyAgentError(errMsg);
|
|
218
|
-
// ──
|
|
219
|
-
if (
|
|
522
|
+
// ── Media size error recovery (strip images/PDFs + retry) ──
|
|
523
|
+
if (isMediaSizeError(errMsg) && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
|
|
220
524
|
recoveryAttempts++;
|
|
221
525
|
if (config.debug) {
|
|
222
|
-
console.error(`[
|
|
526
|
+
console.error(`[franklin] Media too large — stripping and retrying (attempt ${recoveryAttempts})`);
|
|
223
527
|
}
|
|
224
|
-
const { history:
|
|
225
|
-
|
|
226
|
-
|
|
528
|
+
const { history: stripped, stripped: didStrip } = stripMediaFromHistory(history);
|
|
529
|
+
if (didStrip) {
|
|
530
|
+
replaceHistory(history, stripped);
|
|
531
|
+
onEvent({ kind: 'text_delta', text: '\n*Media too large — retrying without images/documents...*\n' });
|
|
532
|
+
continue;
|
|
533
|
+
}
|
|
534
|
+
// No media to strip — fall through to other error handling
|
|
535
|
+
}
|
|
536
|
+
// ── Prompt too long recovery (reactive compaction) ──
|
|
537
|
+
// Use forceCompact instead of autoCompactIfNeeded — the API already told us
|
|
538
|
+
// the prompt is too long, so we must compact regardless of our threshold estimate.
|
|
539
|
+
// This is the key insight from Claude Code: reactive compaction must FORCE compress.
|
|
540
|
+
if (classified.category === 'context_limit' && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
|
|
541
|
+
recoveryAttempts++;
|
|
542
|
+
if (config.debug) {
|
|
543
|
+
console.error(`[franklin] Prompt too long — force compacting (attempt ${recoveryAttempts})`);
|
|
544
|
+
}
|
|
545
|
+
onEvent({ kind: 'text_delta', text: '\n*Context limit hit — compacting conversation...*\n' });
|
|
546
|
+
const { history: compactedAgain } = await forceCompact(history, config.model, client, config.debug);
|
|
547
|
+
replaceHistory(history, compactedAgain);
|
|
548
|
+
resetTokenAnchor(); // History mutated — resync tracking
|
|
227
549
|
continue; // Retry
|
|
228
550
|
}
|
|
229
551
|
// ── Transient error recovery (network, rate limit, server errors) ──
|
|
230
|
-
|
|
552
|
+
// Respect per-error maxRetries (e.g., 529/overloaded gets only 3 retries)
|
|
553
|
+
const effectiveMaxRetries = classified.maxRetries ?? MAX_RECOVERY_ATTEMPTS;
|
|
554
|
+
if (classified.isTransient && recoveryAttempts < effectiveMaxRetries) {
|
|
231
555
|
recoveryAttempts++;
|
|
232
|
-
const backoffMs =
|
|
556
|
+
const backoffMs = getBackoffDelay(recoveryAttempts);
|
|
233
557
|
if (config.debug) {
|
|
234
|
-
console.error(`[
|
|
558
|
+
console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
|
|
235
559
|
}
|
|
236
560
|
onEvent({
|
|
237
561
|
kind: 'text_delta',
|
|
238
|
-
text: `\n*Retrying (${recoveryAttempts}
|
|
562
|
+
text: `\n*Retrying (${recoveryAttempts}/${effectiveMaxRetries}) after ${classified.label} error...*\n`,
|
|
239
563
|
});
|
|
240
564
|
await new Promise(r => setTimeout(r, backoffMs));
|
|
241
565
|
continue;
|
|
242
566
|
}
|
|
243
|
-
//
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
//
|
|
250
|
-
|
|
251
|
-
|
|
567
|
+
// ── Payment failure: auto-fallback to free models ──
|
|
568
|
+
// Track payment-failed models for the entire session — unlike transient errors,
|
|
569
|
+
// 402s will keep failing until the user adds funds.
|
|
570
|
+
if (classified.category === 'payment') {
|
|
571
|
+
turnFailedModels.add(config.model);
|
|
572
|
+
paymentFailedModels.set(config.model, Date.now());
|
|
573
|
+
// Record to local Elo so the router learns to avoid this model
|
|
574
|
+
if (lastRoutedCategory) {
|
|
575
|
+
recordOutcome(lastRoutedCategory, config.model, 'payment');
|
|
576
|
+
}
|
|
252
577
|
const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/nemotron-ultra-253b', 'nvidia/devstral-2-123b'];
|
|
253
|
-
const nextFree = FREE_MODELS.find(m => !
|
|
578
|
+
const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
|
|
254
579
|
if (nextFree) {
|
|
255
580
|
const oldModel = config.model;
|
|
256
581
|
config.model = nextFree;
|
|
@@ -258,14 +583,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
258
583
|
onEvent({ kind: 'text_delta', text: `\n*${oldModel} failed — switching to ${nextFree}*\n` });
|
|
259
584
|
continue; // Retry with next model
|
|
260
585
|
}
|
|
261
|
-
suggestion = '\nTip: Run `runcode balance` to check funds. Try /model free for free models.';
|
|
262
|
-
}
|
|
263
|
-
else if (classified.category === 'timeout' || classified.category === 'network') {
|
|
264
|
-
suggestion = '\nTip: Check your network connection. Use /retry to try again.';
|
|
265
|
-
}
|
|
266
|
-
else if (classified.category === 'context_limit') {
|
|
267
|
-
suggestion = '\nTip: Run /compact to compress conversation history.';
|
|
268
586
|
}
|
|
587
|
+
// ── Unrecoverable: show error with suggestion from classifier ──
|
|
588
|
+
const suggestion = classified.suggestion ? `\nTip: ${classified.suggestion}` : '';
|
|
269
589
|
onEvent({
|
|
270
590
|
kind: 'turn_done',
|
|
271
591
|
reason: 'error',
|
|
@@ -281,31 +601,51 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
281
601
|
: estimateHistoryTokens(history);
|
|
282
602
|
// Anchor token tracking to actual API counts
|
|
283
603
|
updateActualTokens(inputTokens, usage.outputTokens, history.length);
|
|
604
|
+
const { contextUsagePct } = getAnchoredTokenCount(history);
|
|
284
605
|
onEvent({
|
|
285
606
|
kind: 'usage',
|
|
286
607
|
inputTokens,
|
|
287
608
|
outputTokens: usage.outputTokens,
|
|
288
|
-
model:
|
|
609
|
+
model: resolvedModel,
|
|
289
610
|
calls: 1,
|
|
611
|
+
tier: routingTier,
|
|
612
|
+
confidence: routingConfidence,
|
|
613
|
+
savings: routingSavings,
|
|
614
|
+
contextPct: Math.round(contextUsagePct),
|
|
290
615
|
});
|
|
291
|
-
// Record usage for stats tracking (
|
|
292
|
-
const costEstimate = estimateCost(
|
|
293
|
-
recordUsage(
|
|
616
|
+
// Record usage for stats tracking (franklin stats command)
|
|
617
|
+
const costEstimate = estimateCost(resolvedModel, inputTokens, usage.outputTokens, 1);
|
|
618
|
+
recordUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, 0);
|
|
619
|
+
recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, routingTier);
|
|
620
|
+
// Accumulate session-level totals for session meta
|
|
621
|
+
sessionInputTokens += inputTokens;
|
|
622
|
+
sessionOutputTokens += usage.outputTokens;
|
|
623
|
+
sessionCostUsd += costEstimate;
|
|
624
|
+
const opusCost = (inputTokens / 1_000_000) * OPUS_PRICING.input
|
|
625
|
+
+ (usage.outputTokens / 1_000_000) * OPUS_PRICING.output;
|
|
626
|
+
sessionSavedVsOpus += Math.max(0, opusCost - costEstimate);
|
|
294
627
|
// ── Max output tokens recovery ──
|
|
295
|
-
if (stopReason === 'max_tokens' && recoveryAttempts <
|
|
628
|
+
if (stopReason === 'max_tokens' && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
|
|
296
629
|
recoveryAttempts++;
|
|
297
630
|
if (maxTokensOverride === undefined) {
|
|
298
631
|
// First hit: escalate to 64K
|
|
299
632
|
maxTokensOverride = ESCALATED_MAX_TOKENS;
|
|
300
633
|
if (config.debug) {
|
|
301
|
-
console.error(`[
|
|
634
|
+
console.error(`[franklin] Max tokens hit — escalating to ${maxTokensOverride}`);
|
|
302
635
|
}
|
|
303
636
|
}
|
|
304
637
|
// Append what we got + a continuation prompt (text already streamed)
|
|
305
638
|
const partialAssistant = { role: 'assistant', content: responseParts };
|
|
306
639
|
const continuationPrompt = {
|
|
307
640
|
role: 'user',
|
|
308
|
-
content:
|
|
641
|
+
content: [
|
|
642
|
+
'Output token limit hit. Continue with these rules:',
|
|
643
|
+
'1. Resume directly — no apology, no recap of what you already said. Pick up mid-sentence if that is where the cut happened.',
|
|
644
|
+
'2. Do NOT repeat any text or code that was already output above.',
|
|
645
|
+
'3. Break remaining work into smaller pieces — use multiple tool calls if needed instead of one large output.',
|
|
646
|
+
'4. Skip extended reasoning for the continuation — focus on executing.',
|
|
647
|
+
'5. If you were in the middle of outputting code, finish the code block first.',
|
|
648
|
+
].join('\n'),
|
|
309
649
|
};
|
|
310
650
|
history.push(partialAssistant);
|
|
311
651
|
persistSessionMessage(partialAssistant);
|
|
@@ -326,6 +666,18 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
326
666
|
const assistantMessage = { role: 'assistant', content: responseParts };
|
|
327
667
|
history.push(assistantMessage);
|
|
328
668
|
persistSessionMessage(assistantMessage);
|
|
669
|
+
// ── Plan-then-execute: transition from planning to execution ──
|
|
670
|
+
if (planActive && loopCount === 1 && invocations.length === 0) {
|
|
671
|
+
// Planning call completed — inject execution kickoff
|
|
672
|
+
const execKickoff = {
|
|
673
|
+
role: 'user',
|
|
674
|
+
content: 'Execute the plan above step by step. Use tools to complete each step. After each step, briefly state what you did and move to the next.',
|
|
675
|
+
};
|
|
676
|
+
history.push(execKickoff);
|
|
677
|
+
persistSessionMessage(execKickoff);
|
|
678
|
+
onEvent({ kind: 'text_delta', text: `\n*Executing with ${planExecutorModel}...*\n` });
|
|
679
|
+
continue; // Next iteration uses the cheap executor model
|
|
680
|
+
}
|
|
329
681
|
// No more capabilities → done with this user message
|
|
330
682
|
if (invocations.length === 0) {
|
|
331
683
|
lastSessionActivity = Date.now();
|
|
@@ -343,6 +695,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
343
695
|
});
|
|
344
696
|
}
|
|
345
697
|
}
|
|
698
|
+
// Record success for local Elo learning (include tool call count for efficiency)
|
|
699
|
+
if (lastRoutedCategory && lastRoutedModel) {
|
|
700
|
+
recordOutcome(lastRoutedCategory, lastRoutedModel, 'continued', turnToolCalls);
|
|
701
|
+
}
|
|
346
702
|
onEvent({ kind: 'turn_done', reason: 'completed' });
|
|
347
703
|
break;
|
|
348
704
|
}
|
|
@@ -351,22 +707,114 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
351
707
|
for (const [inv, result] of results) {
|
|
352
708
|
onEvent({ kind: 'capability_done', id: inv.id, result });
|
|
353
709
|
}
|
|
710
|
+
// ── Tool call guardrails ──
|
|
711
|
+
turnToolCalls += results.length;
|
|
712
|
+
for (const [inv] of results) {
|
|
713
|
+
const name = inv.name;
|
|
714
|
+
turnToolCounts.set(name, (turnToolCounts.get(name) || 0) + 1);
|
|
715
|
+
// Read file dedup: track paths already read
|
|
716
|
+
if (name === 'Read' && inv.input.file_path) {
|
|
717
|
+
readFileCache.add(inv.input.file_path);
|
|
718
|
+
}
|
|
719
|
+
}
|
|
354
720
|
// Refresh activity timestamp after tool execution
|
|
355
721
|
lastSessionActivity = Date.now();
|
|
356
|
-
//
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
722
|
+
// Mid-session learning extraction (like Claude Code's SessionMemory)
|
|
723
|
+
// Runs in background — never blocks the conversation
|
|
724
|
+
const { estimated: currentTokens } = getAnchoredTokenCount(history);
|
|
725
|
+
maybeMidSessionExtract(history, currentTokens, turnToolCalls, sessionId, client);
|
|
726
|
+
// Append outcomes (with guardrail injections)
|
|
727
|
+
const outcomeContent = results.map(([inv, result]) => {
|
|
728
|
+
// Read file dedup: if this file was already read earlier in this turn,
|
|
729
|
+
// replace content with a stub to save tokens
|
|
730
|
+
if (inv.name === 'Read' && !result.isError) {
|
|
731
|
+
const fp = inv.input.file_path;
|
|
732
|
+
const count = results.filter(([i]) => i.name === 'Read' && i.input.file_path === fp).length;
|
|
733
|
+
if (count > 1 && inv !== results.filter(([i]) => i.name === 'Read' && i.input.file_path === fp).pop()?.[0]) {
|
|
734
|
+
return {
|
|
735
|
+
type: 'tool_result',
|
|
736
|
+
tool_use_id: inv.id,
|
|
737
|
+
content: `File already read in this turn. Refer to the other Read result for ${fp}.`,
|
|
738
|
+
is_error: false,
|
|
739
|
+
};
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
return {
|
|
743
|
+
type: 'tool_result',
|
|
744
|
+
tool_use_id: inv.id,
|
|
745
|
+
content: result.output,
|
|
746
|
+
is_error: result.isError,
|
|
747
|
+
};
|
|
748
|
+
});
|
|
749
|
+
// ── Guardrail injections ──
|
|
750
|
+
// Warn about same-tool repetition
|
|
751
|
+
for (const [name, count] of turnToolCounts) {
|
|
752
|
+
if (count === SAME_TOOL_WARN_THRESHOLD) {
|
|
753
|
+
outcomeContent.push({
|
|
754
|
+
type: 'tool_result',
|
|
755
|
+
tool_use_id: `guardrail-warn-${name}`,
|
|
756
|
+
content: `[SYSTEM] You have called ${name} ${count} times this turn. Stop and present your results now. Do not make more ${name} calls.`,
|
|
757
|
+
is_error: true,
|
|
758
|
+
});
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
// Hard cap: stop the turn if too many tool calls
|
|
762
|
+
if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
|
|
763
|
+
outcomeContent.push({
|
|
764
|
+
type: 'tool_result',
|
|
765
|
+
tool_use_id: 'guardrail-cap',
|
|
766
|
+
content: `[SYSTEM] Tool call limit reached (${MAX_TOOL_CALLS_PER_TURN}). Present your results to the user NOW. Do not make any more tool calls.`,
|
|
767
|
+
is_error: true,
|
|
768
|
+
});
|
|
769
|
+
}
|
|
363
770
|
const toolResultMessage = { role: 'user', content: outcomeContent };
|
|
364
771
|
history.push(toolResultMessage);
|
|
365
772
|
persistSessionMessage(toolResultMessage);
|
|
773
|
+
// ── Plan-then-execute: stuck detection ──
|
|
774
|
+
if (planActive && loopCount > 1) {
|
|
775
|
+
const hasErrors = results.some(([, r]) => r.isError);
|
|
776
|
+
planConsecutiveErrors = hasErrors ? planConsecutiveErrors + 1 : 0;
|
|
777
|
+
// Check for same-tool repeat (model calling the exact same thing twice)
|
|
778
|
+
const currentSig = results.length === 1
|
|
779
|
+
? toolCallSignature(results[0][0].name, results[0][0].input)
|
|
780
|
+
: '';
|
|
781
|
+
const sameToolRepeat = currentSig !== '' && currentSig === lastToolSig;
|
|
782
|
+
lastToolSig = currentSig;
|
|
783
|
+
if (isExecutorStuck(planConsecutiveErrors, sameToolRepeat)) {
|
|
784
|
+
if (planEscalationCount < 2) {
|
|
785
|
+
planEscalationCount++;
|
|
786
|
+
// One-shot escalation: next iteration uses the planner model
|
|
787
|
+
resolvedModel = planPlannerModel;
|
|
788
|
+
const escalation = {
|
|
789
|
+
role: 'user',
|
|
790
|
+
content: '[ESCALATION] The executor got stuck on repeated errors. You are a stronger model. Review what happened and either fix the approach or continue from where execution stopped.',
|
|
791
|
+
};
|
|
792
|
+
history.push(escalation);
|
|
793
|
+
persistSessionMessage(escalation);
|
|
794
|
+
onEvent({ kind: 'text_delta', text: '\n*Escalating to stronger model...*\n' });
|
|
795
|
+
}
|
|
796
|
+
else {
|
|
797
|
+
// Abandon plan — strong model finishes the task directly
|
|
798
|
+
planActive = false;
|
|
799
|
+
onEvent({ kind: 'text_delta', text: '\n*Plan abandoned — switching to full model...*\n' });
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
// Hard stop: if cap exceeded, force end this agent loop iteration
|
|
804
|
+
if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
|
|
805
|
+
if (config.debug) {
|
|
806
|
+
console.error(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
|
|
807
|
+
}
|
|
808
|
+
// Don't break — let the model respond one more time to summarize,
|
|
809
|
+
// but inject the stop signal above so it knows to finish up.
|
|
810
|
+
}
|
|
366
811
|
}
|
|
367
812
|
if (loopCount >= maxTurns) {
|
|
368
813
|
lastSessionActivity = Date.now();
|
|
369
814
|
persistSessionMeta();
|
|
815
|
+
if (lastRoutedCategory && lastRoutedModel) {
|
|
816
|
+
recordOutcome(lastRoutedCategory, lastRoutedModel, 'max_turns', turnToolCalls);
|
|
817
|
+
}
|
|
370
818
|
onEvent({ kind: 'turn_done', reason: 'max_turns' });
|
|
371
819
|
}
|
|
372
820
|
}
|