@blockrun/franklin 3.3.2 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -7
- package/dist/agent/commands.d.ts +1 -1
- package/dist/agent/commands.js +128 -17
- package/dist/agent/compact.d.ts +2 -2
- package/dist/agent/compact.js +148 -22
- package/dist/agent/context.d.ts +8 -3
- package/dist/agent/context.js +301 -108
- package/dist/agent/error-classifier.d.ts +11 -2
- package/dist/agent/error-classifier.js +64 -10
- package/dist/agent/llm.d.ts +8 -1
- package/dist/agent/llm.js +114 -19
- package/dist/agent/loop.d.ts +1 -2
- package/dist/agent/loop.js +509 -61
- package/dist/agent/optimize.d.ts +2 -2
- package/dist/agent/optimize.js +9 -7
- package/dist/agent/permissions.d.ts +1 -1
- package/dist/agent/permissions.js +1 -1
- package/dist/agent/planner.d.ts +42 -0
- package/dist/agent/planner.js +110 -0
- package/dist/agent/reduce.d.ts +7 -1
- package/dist/agent/reduce.js +85 -3
- package/dist/agent/streaming-executor.d.ts +6 -1
- package/dist/agent/streaming-executor.js +83 -5
- package/dist/agent/tokens.d.ts +11 -2
- package/dist/agent/tokens.js +38 -5
- package/dist/agent/tool-guard.d.ts +27 -0
- package/dist/agent/tool-guard.js +324 -0
- package/dist/agent/types.d.ts +7 -1
- package/dist/agent/types.js +1 -1
- package/dist/banner.js +27 -40
- package/dist/brain/extract.d.ts +11 -0
- package/dist/brain/extract.js +154 -0
- package/dist/brain/index.d.ts +3 -0
- package/dist/brain/index.js +2 -0
- package/dist/brain/store.d.ts +42 -0
- package/dist/brain/store.js +225 -0
- package/dist/brain/types.d.ts +45 -0
- package/dist/brain/types.js +5 -0
- package/dist/commands/daemon.js +2 -1
- package/dist/commands/start.js +16 -3
- package/dist/config.js +1 -1
- package/dist/index.js +27 -2
- package/dist/learnings/extractor.d.ts +13 -0
- package/dist/learnings/extractor.js +69 -8
- package/dist/learnings/index.d.ts +1 -1
- package/dist/learnings/index.js +1 -1
- package/dist/learnings/store.js +42 -13
- package/dist/learnings/types.d.ts +1 -1
- package/dist/mcp/client.d.ts +1 -1
- package/dist/mcp/client.js +5 -5
- package/dist/mcp/config.d.ts +1 -1
- package/dist/mcp/config.js +1 -1
- package/dist/panel/html.d.ts +2 -0
- package/dist/panel/html.js +409 -146
- package/dist/panel/server.js +19 -0
- package/dist/pricing.js +3 -2
- package/dist/proxy/fallback.d.ts +3 -1
- package/dist/proxy/fallback.js +4 -4
- package/dist/proxy/server.js +29 -11
- package/dist/proxy/sse-translator.js +1 -1
- package/dist/router/categories.d.ts +21 -0
- package/dist/router/categories.js +96 -0
- package/dist/router/index.d.ts +9 -2
- package/dist/router/index.js +106 -27
- package/dist/router/local-elo.d.ts +32 -0
- package/dist/router/local-elo.js +107 -0
- package/dist/router/selector.d.ts +46 -0
- package/dist/router/selector.js +106 -0
- package/dist/session/storage.d.ts +5 -1
- package/dist/session/storage.js +24 -2
- package/dist/social/a11y.d.ts +1 -1
- package/dist/social/a11y.js +5 -1
- package/dist/social/browser.d.ts +5 -0
- package/dist/social/browser.js +22 -0
- package/dist/social/preflight.d.ts +4 -0
- package/dist/social/preflight.js +42 -3
- package/dist/stats/failures.d.ts +20 -0
- package/dist/stats/failures.js +63 -0
- package/dist/stats/format.d.ts +6 -0
- package/dist/stats/format.js +23 -0
- package/dist/stats/insights.js +1 -21
- package/dist/stats/session-tracker.d.ts +21 -0
- package/dist/stats/session-tracker.js +28 -0
- package/dist/stats/tracker.d.ts +1 -1
- package/dist/stats/tracker.js +1 -1
- package/dist/tools/bash.d.ts +14 -1
- package/dist/tools/bash.js +132 -7
- package/dist/tools/edit.js +77 -14
- package/dist/tools/glob.js +13 -3
- package/dist/tools/grep.js +30 -12
- package/dist/tools/imagegen.js +3 -3
- package/dist/tools/index.d.ts +1 -1
- package/dist/tools/index.js +5 -1
- package/dist/tools/read.d.ts +16 -2
- package/dist/tools/read.js +36 -8
- package/dist/tools/searchx.d.ts +6 -2
- package/dist/tools/searchx.js +221 -44
- package/dist/tools/subagent.js +37 -3
- package/dist/tools/task.js +43 -7
- package/dist/tools/validate.d.ts +11 -0
- package/dist/tools/validate.js +42 -0
- package/dist/tools/webfetch.js +18 -7
- package/dist/tools/websearch.js +41 -7
- package/dist/tools/write.js +26 -6
- package/dist/ui/app.js +31 -6
- package/dist/ui/model-picker.d.ts +1 -1
- package/dist/ui/model-picker.js +1 -1
- package/dist/ui/terminal.d.ts +1 -1
- package/dist/ui/terminal.js +1 -1
- package/package.json +2 -2
package/dist/agent/optimize.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Token optimization strategies for
|
|
2
|
+
* Token optimization strategies for Franklin.
|
|
3
3
|
*
|
|
4
4
|
* Five layers of optimization to minimize token usage:
|
|
5
5
|
* 1. Tool result size budgeting — cap large outputs, keep preview
|
|
@@ -23,7 +23,7 @@ export declare function getMaxOutputTokens(model: string): number;
|
|
|
23
23
|
export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
|
|
24
24
|
export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
|
|
25
25
|
/**
|
|
26
|
-
* After an idle gap (>
|
|
26
|
+
* After an idle gap (>30 min), clear old tool results.
|
|
27
27
|
* When the user comes back after being away, old results are stale anyway.
|
|
28
28
|
*/
|
|
29
29
|
export declare function timeBasedCleanup(history: Dialogue[], lastActivityTimestamp?: number): {
|
package/dist/agent/optimize.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Token optimization strategies for
|
|
2
|
+
* Token optimization strategies for Franklin.
|
|
3
3
|
*
|
|
4
4
|
* Five layers of optimization to minimize token usage:
|
|
5
5
|
* 1. Tool result size budgeting — cap large outputs, keep preview
|
|
@@ -34,8 +34,10 @@ const MODEL_MAX_OUTPUT = {
|
|
|
34
34
|
export function getMaxOutputTokens(model) {
|
|
35
35
|
return MODEL_MAX_OUTPUT[model] ?? 16_384;
|
|
36
36
|
}
|
|
37
|
-
/** Idle gap (minutes) after which old tool results are cleared
|
|
38
|
-
|
|
37
|
+
/** Idle gap (minutes) after which old tool results are cleared.
|
|
38
|
+
* Set to 30 min — a coffee break shouldn't lose tool context.
|
|
39
|
+
* Was 5 min which was too aggressive (comment said 60, code said 5). */
|
|
40
|
+
const IDLE_GAP_THRESHOLD_MINUTES = 30;
|
|
39
41
|
/** Number of recent tool results to keep during time-based cleanup */
|
|
40
42
|
const KEEP_RECENT_TOOL_RESULTS = 3;
|
|
41
43
|
// ─── 1. Tool Result Size Budgeting ─────────────────────────────────────────
|
|
@@ -140,7 +142,7 @@ export function stripOldThinking(history) {
|
|
|
140
142
|
}
|
|
141
143
|
// ─── 3. Time-Based Cleanup ─────────────────────────────────────────────────
|
|
142
144
|
/**
|
|
143
|
-
* After an idle gap (>
|
|
145
|
+
* After an idle gap (>30 min), clear old tool results.
|
|
144
146
|
* When the user comes back after being away, old results are stale anyway.
|
|
145
147
|
*/
|
|
146
148
|
export function timeBasedCleanup(history, lastActivityTimestamp) {
|
|
@@ -240,7 +242,7 @@ export function optimizeHistory(history, opts) {
|
|
|
240
242
|
result = stripped;
|
|
241
243
|
changed = true;
|
|
242
244
|
if (opts?.debug)
|
|
243
|
-
console.error('[
|
|
245
|
+
console.error('[franklin] Stripped old thinking blocks');
|
|
244
246
|
}
|
|
245
247
|
// 2. Budget tool results
|
|
246
248
|
const budgeted = budgetToolResults(result);
|
|
@@ -248,7 +250,7 @@ export function optimizeHistory(history, opts) {
|
|
|
248
250
|
result = budgeted;
|
|
249
251
|
changed = true;
|
|
250
252
|
if (opts?.debug)
|
|
251
|
-
console.error('[
|
|
253
|
+
console.error('[franklin] Budgeted oversized tool results');
|
|
252
254
|
}
|
|
253
255
|
// 3. Time-based cleanup
|
|
254
256
|
const { history: cleaned, cleaned: didClean } = timeBasedCleanup(result, opts?.lastActivityTimestamp);
|
|
@@ -256,7 +258,7 @@ export function optimizeHistory(history, opts) {
|
|
|
256
258
|
result = cleaned;
|
|
257
259
|
changed = true;
|
|
258
260
|
if (opts?.debug)
|
|
259
|
-
console.error('[
|
|
261
|
+
console.error('[franklin] Cleared stale tool results after idle gap');
|
|
260
262
|
}
|
|
261
263
|
return result;
|
|
262
264
|
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Planner-Executor for Franklin
|
|
3
|
+
*
|
|
4
|
+
* Uses expensive models (Opus/Sonnet) for planning, then cheap/free models
|
|
5
|
+
* for execution. Saves 40-70% on complex tasks while maintaining quality.
|
|
6
|
+
*
|
|
7
|
+
* Flow: detect complexity → plan with strong model → execute with cheap model
|
|
8
|
+
* → escalate back to strong model if executor gets stuck
|
|
9
|
+
*/
|
|
10
|
+
import type { Tier, RoutingProfile } from '../router/index.js';
|
|
11
|
+
/**
|
|
12
|
+
* Should this task use plan-then-execute?
|
|
13
|
+
* Returns true only for complex, multi-step tasks where the savings justify
|
|
14
|
+
* the overhead of an extra planning call.
|
|
15
|
+
*/
|
|
16
|
+
export declare function shouldPlan(tier: Tier | undefined, profile: RoutingProfile | undefined, userText: string, ultrathink: boolean, planDisabled: boolean): boolean;
|
|
17
|
+
/**
|
|
18
|
+
* Returns the planning system prompt section.
|
|
19
|
+
* Injected alongside the normal system prompt during the planning call.
|
|
20
|
+
*/
|
|
21
|
+
export declare function getPlanningPrompt(): string;
|
|
22
|
+
/**
|
|
23
|
+
* Pick the cheap executor model for a given routing profile.
|
|
24
|
+
* These models are good at following structured instructions (the plan)
|
|
25
|
+
* but much cheaper than the planning model.
|
|
26
|
+
*/
|
|
27
|
+
export declare function getExecutorModel(profile: RoutingProfile): string;
|
|
28
|
+
/**
|
|
29
|
+
* Extract numbered steps from plan text.
|
|
30
|
+
* Handles formats like "1. Do X", "1) Do X", "Step 1: Do X".
|
|
31
|
+
*/
|
|
32
|
+
export declare function parsePlanSteps(text: string): string[];
|
|
33
|
+
/**
|
|
34
|
+
* Detect if the executor model is stuck.
|
|
35
|
+
* Triggers when the model hits repeated errors or repeats the same tool call.
|
|
36
|
+
*/
|
|
37
|
+
export declare function isExecutorStuck(consecutiveErrors: number, sameToolRepeat: boolean): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* Build a signature for a tool call (name + first 100 chars of input JSON).
|
|
40
|
+
* Used to detect when the executor repeats the exact same call.
|
|
41
|
+
*/
|
|
42
|
+
export declare function toolCallSignature(name: string, input: unknown): string;
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Planner-Executor for Franklin
|
|
3
|
+
*
|
|
4
|
+
* Uses expensive models (Opus/Sonnet) for planning, then cheap/free models
|
|
5
|
+
* for execution. Saves 40-70% on complex tasks while maintaining quality.
|
|
6
|
+
*
|
|
7
|
+
* Flow: detect complexity → plan with strong model → execute with cheap model
|
|
8
|
+
* → escalate back to strong model if executor gets stuck
|
|
9
|
+
*/
|
|
10
|
+
// ─── Agentic keywords that suggest multi-step work ───────────────────────
|
|
11
|
+
const AGENTIC_KEYWORDS = /\b(implement|refactor|build|fix|debug|migrate|deploy|create|add|remove|update|restructure|extract|rewrite|optimize|convert|integrate|setup|configure)\b/i;
|
|
12
|
+
const MULTI_STEP_PATTERN = /first.*then|step\s+\d|\d+\.\s|and\s+then|after\s+that|next\s*,|finally\b/i;
|
|
13
|
+
// ─── Detection ───────────────────────────────────────────────────────────
|
|
14
|
+
/**
|
|
15
|
+
* Should this task use plan-then-execute?
|
|
16
|
+
* Returns true only for complex, multi-step tasks where the savings justify
|
|
17
|
+
* the overhead of an extra planning call.
|
|
18
|
+
*/
|
|
19
|
+
export function shouldPlan(tier, profile, userText, ultrathink, planDisabled) {
|
|
20
|
+
// Gate 1: only COMPLEX or REASONING tiers benefit from planning
|
|
21
|
+
if (tier !== 'COMPLEX' && tier !== 'REASONING')
|
|
22
|
+
return false;
|
|
23
|
+
// Gate 2: only auto or premium profiles (eco/free already cost-optimized)
|
|
24
|
+
if (profile !== 'auto' && profile !== 'premium')
|
|
25
|
+
return false;
|
|
26
|
+
// Gate 3: skip short queries — planning overhead not worth it
|
|
27
|
+
if (userText.length < 80)
|
|
28
|
+
return false;
|
|
29
|
+
// Gate 4: ultrathink already provides deep reasoning
|
|
30
|
+
if (ultrathink)
|
|
31
|
+
return false;
|
|
32
|
+
// Gate 5: user disabled planning for this session
|
|
33
|
+
if (planDisabled)
|
|
34
|
+
return false;
|
|
35
|
+
// Gate 6: must have agentic or multi-step signals
|
|
36
|
+
const hasAgenticKeyword = AGENTIC_KEYWORDS.test(userText);
|
|
37
|
+
const hasMultiStep = MULTI_STEP_PATTERN.test(userText);
|
|
38
|
+
return hasAgenticKeyword || hasMultiStep;
|
|
39
|
+
}
|
|
40
|
+
// ─── Planning Prompt ─────────────────────────────────────────────────────
|
|
41
|
+
/**
|
|
42
|
+
* Returns the planning system prompt section.
|
|
43
|
+
* Injected alongside the normal system prompt during the planning call.
|
|
44
|
+
*/
|
|
45
|
+
export function getPlanningPrompt() {
|
|
46
|
+
return `# Planning Mode — Active
|
|
47
|
+
You are in planning mode. Produce a structured execution plan for the user's request.
|
|
48
|
+
|
|
49
|
+
Rules:
|
|
50
|
+
- Output a numbered list of concrete steps. Each step = one action.
|
|
51
|
+
- Include specific file paths, function names, or shell commands when known.
|
|
52
|
+
- If you need to explore the codebase first, make it step 1.
|
|
53
|
+
- Mark steps that can run in parallel with [PARALLEL].
|
|
54
|
+
- Keep the plan to 15 steps max.
|
|
55
|
+
- End with a verification step (run tests, check output, etc.).
|
|
56
|
+
- Output ONLY the numbered plan. No code blocks, no explanations, no preamble.`;
|
|
57
|
+
}
|
|
58
|
+
// ─── Executor Model Selection ────────────────────────────────────────────
|
|
59
|
+
/**
|
|
60
|
+
* Pick the cheap executor model for a given routing profile.
|
|
61
|
+
* These models are good at following structured instructions (the plan)
|
|
62
|
+
* but much cheaper than the planning model.
|
|
63
|
+
*/
|
|
64
|
+
export function getExecutorModel(profile) {
|
|
65
|
+
switch (profile) {
|
|
66
|
+
case 'premium':
|
|
67
|
+
return 'moonshot/kimi-k2.5'; // Medium-tier, reliable execution
|
|
68
|
+
case 'auto':
|
|
69
|
+
default:
|
|
70
|
+
return 'google/gemini-2.5-flash'; // Cheap, fast, good at instructions
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
// ─── Plan Parsing ────────────────────────────────────────────────────────
|
|
74
|
+
/**
|
|
75
|
+
* Extract numbered steps from plan text.
|
|
76
|
+
* Handles formats like "1. Do X", "1) Do X", "Step 1: Do X".
|
|
77
|
+
*/
|
|
78
|
+
export function parsePlanSteps(text) {
|
|
79
|
+
const lines = text.split('\n');
|
|
80
|
+
const steps = [];
|
|
81
|
+
for (const line of lines) {
|
|
82
|
+
const trimmed = line.trim();
|
|
83
|
+
// Match: "1. ...", "1) ...", "Step 1: ...", "- 1. ..."
|
|
84
|
+
if (/^(?:\d+[\.\):]|step\s+\d)/i.test(trimmed)) {
|
|
85
|
+
steps.push(trimmed);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return steps;
|
|
89
|
+
}
|
|
90
|
+
// ─── Stuck Detection ─────────────────────────────────────────────────────
|
|
91
|
+
/** Max consecutive tool errors before escalation */
|
|
92
|
+
const MAX_CONSECUTIVE_ERRORS = 3;
|
|
93
|
+
/**
|
|
94
|
+
* Detect if the executor model is stuck.
|
|
95
|
+
* Triggers when the model hits repeated errors or repeats the same tool call.
|
|
96
|
+
*/
|
|
97
|
+
export function isExecutorStuck(consecutiveErrors, sameToolRepeat) {
|
|
98
|
+
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS)
|
|
99
|
+
return true;
|
|
100
|
+
if (sameToolRepeat)
|
|
101
|
+
return true;
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Build a signature for a tool call (name + first 100 chars of input JSON).
|
|
106
|
+
* Used to detect when the executor repeats the exact same call.
|
|
107
|
+
*/
|
|
108
|
+
export function toolCallSignature(name, input) {
|
|
109
|
+
return `${name}::${JSON.stringify(input).slice(0, 100)}`;
|
|
110
|
+
}
|
package/dist/agent/reduce.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Token Reduction for
|
|
2
|
+
* Token Reduction for Franklin.
|
|
3
3
|
* Original implementation — reduces context size through intelligent pruning.
|
|
4
4
|
*
|
|
5
5
|
* Strategy: instead of compression/encoding, we PRUNE redundant content.
|
|
@@ -42,6 +42,12 @@ export declare function deduplicateMessages(history: Dialogue[]): Dialogue[];
|
|
|
42
42
|
* RTK-inspired: dedup_lines + strip_ansi pipeline stages.
|
|
43
43
|
*/
|
|
44
44
|
export declare function deduplicateToolResultLines(history: Dialogue[]): Dialogue[];
|
|
45
|
+
/**
|
|
46
|
+
* When the same tool (WebSearch, Grep, etc.) is called 6+ times,
|
|
47
|
+
* collapse all but the last 3 results to one-line summaries.
|
|
48
|
+
* Prevents context snowball from search spam (e.g. 96 WebSearches).
|
|
49
|
+
*/
|
|
50
|
+
export declare function collapseRepetitiveTools(history: Dialogue[]): Dialogue[];
|
|
45
51
|
/**
|
|
46
52
|
* Run all token reduction passes on conversation history.
|
|
47
53
|
* Returns same reference if nothing changed (cheap identity check).
|
package/dist/agent/reduce.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Token Reduction for
|
|
2
|
+
* Token Reduction for Franklin.
|
|
3
3
|
* Original implementation — reduces context size through intelligent pruning.
|
|
4
4
|
*
|
|
5
5
|
* Strategy: instead of compression/encoding, we PRUNE redundant content.
|
|
@@ -240,7 +240,82 @@ export function deduplicateToolResultLines(history) {
|
|
|
240
240
|
});
|
|
241
241
|
return modified ? result : history;
|
|
242
242
|
}
|
|
243
|
-
// ───
|
|
243
|
+
// ─── 6. Repetitive Tool Collapse ─────────────────────────────────────────
|
|
244
|
+
/**
|
|
245
|
+
* When the same tool (WebSearch, Grep, etc.) is called 6+ times,
|
|
246
|
+
* collapse all but the last 3 results to one-line summaries.
|
|
247
|
+
* Prevents context snowball from search spam (e.g. 96 WebSearches).
|
|
248
|
+
*/
|
|
249
|
+
export function collapseRepetitiveTools(history) {
|
|
250
|
+
// Count tool_use by name
|
|
251
|
+
const toolCounts = new Map();
|
|
252
|
+
for (const msg of history) {
|
|
253
|
+
if (msg.role !== 'assistant' || !Array.isArray(msg.content))
|
|
254
|
+
continue;
|
|
255
|
+
for (const part of msg.content) {
|
|
256
|
+
if (part.type === 'tool_use') {
|
|
257
|
+
const name = part.name ?? '';
|
|
258
|
+
toolCounts.set(name, (toolCounts.get(name) || 0) + 1);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// Only for tools called 6+ times
|
|
263
|
+
const repetitive = new Set();
|
|
264
|
+
for (const [name, count] of toolCounts) {
|
|
265
|
+
if (count >= 6)
|
|
266
|
+
repetitive.add(name);
|
|
267
|
+
}
|
|
268
|
+
if (repetitive.size === 0)
|
|
269
|
+
return history;
|
|
270
|
+
// Map tool_use_id → name, track call order per tool
|
|
271
|
+
const idToName = new Map();
|
|
272
|
+
const callOrder = new Map(); // name → [tool_use_id, ...]
|
|
273
|
+
for (const msg of history) {
|
|
274
|
+
if (msg.role !== 'assistant' || !Array.isArray(msg.content))
|
|
275
|
+
continue;
|
|
276
|
+
for (const part of msg.content) {
|
|
277
|
+
if (part.type === 'tool_use' && repetitive.has(part.name ?? '')) {
|
|
278
|
+
const name = part.name ?? '';
|
|
279
|
+
idToName.set(part.id, name);
|
|
280
|
+
if (!callOrder.has(name))
|
|
281
|
+
callOrder.set(name, []);
|
|
282
|
+
callOrder.get(name).push(part.id);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
// Mark old IDs (all but last 3 per tool)
|
|
287
|
+
const oldIds = new Set();
|
|
288
|
+
for (const [, ids] of callOrder) {
|
|
289
|
+
for (let i = 0; i < ids.length - 3; i++) {
|
|
290
|
+
oldIds.add(ids[i]);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
if (oldIds.size === 0)
|
|
294
|
+
return history;
|
|
295
|
+
// Collapse old results
|
|
296
|
+
let modified = false;
|
|
297
|
+
const result = history.map(msg => {
|
|
298
|
+
if (msg.role !== 'user' || !Array.isArray(msg.content))
|
|
299
|
+
return msg;
|
|
300
|
+
let changed = false;
|
|
301
|
+
const parts = msg.content.map(part => {
|
|
302
|
+
if (part.type !== 'tool_result' || !oldIds.has(part.tool_use_id))
|
|
303
|
+
return part;
|
|
304
|
+
const content = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
|
|
305
|
+
if (content.length <= 80)
|
|
306
|
+
return part;
|
|
307
|
+
changed = true;
|
|
308
|
+
const first = content.split('\n')[0].slice(0, 60);
|
|
309
|
+
return { ...part, content: `[${first}...]` };
|
|
310
|
+
});
|
|
311
|
+
if (!changed)
|
|
312
|
+
return msg;
|
|
313
|
+
modified = true;
|
|
314
|
+
return { ...msg, content: parts };
|
|
315
|
+
});
|
|
316
|
+
return modified ? result : history;
|
|
317
|
+
}
|
|
318
|
+
// ─── Pipeline ────────────────────────────────────────────────────────────
|
|
244
319
|
/**
|
|
245
320
|
* Run all token reduction passes on conversation history.
|
|
246
321
|
* Returns same reference if nothing changed (cheap identity check).
|
|
@@ -250,6 +325,13 @@ export function reduceTokens(history, debug) {
|
|
|
250
325
|
return history; // Skip for short conversations
|
|
251
326
|
let current = history;
|
|
252
327
|
let totalSaved = 0;
|
|
328
|
+
// Pass 0: Collapse repetitive tool results (e.g. 96 WebSearches with similar queries)
|
|
329
|
+
const collapsed = collapseRepetitiveTools(current);
|
|
330
|
+
if (collapsed !== current) {
|
|
331
|
+
const before = estimateChars(current);
|
|
332
|
+
current = collapsed;
|
|
333
|
+
totalSaved += before - estimateChars(current);
|
|
334
|
+
}
|
|
253
335
|
// Pass 1: Age old tool results
|
|
254
336
|
const aged = ageToolResults(current);
|
|
255
337
|
if (aged !== current) {
|
|
@@ -288,7 +370,7 @@ export function reduceTokens(history, debug) {
|
|
|
288
370
|
}
|
|
289
371
|
if (debug && totalSaved > 500) {
|
|
290
372
|
const tokensSaved = Math.round(totalSaved / 4);
|
|
291
|
-
console.error(`[
|
|
373
|
+
console.error(`[franklin] Token reduction: ~${tokensSaved} tokens saved`);
|
|
292
374
|
}
|
|
293
375
|
return current;
|
|
294
376
|
}
|
|
@@ -1,23 +1,28 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Streaming Tool Executor for
|
|
2
|
+
* Streaming Tool Executor for Franklin.
|
|
3
3
|
* Starts executing concurrent-safe tools while the model is still streaming.
|
|
4
4
|
* Non-concurrent tools wait until the full response is received.
|
|
5
5
|
*/
|
|
6
6
|
import type { CapabilityHandler, CapabilityInvocation, CapabilityResult, ExecutionScope } from './types.js';
|
|
7
7
|
import type { PermissionManager } from './permissions.js';
|
|
8
|
+
import type { SessionToolGuard } from './tool-guard.js';
|
|
8
9
|
export declare class StreamingExecutor {
|
|
9
10
|
private handlers;
|
|
10
11
|
private scope;
|
|
11
12
|
private permissions?;
|
|
13
|
+
private guard?;
|
|
12
14
|
private onStart;
|
|
13
15
|
private onProgress?;
|
|
14
16
|
private pending;
|
|
17
|
+
private sessionId;
|
|
15
18
|
constructor(opts: {
|
|
16
19
|
handlers: Map<string, CapabilityHandler>;
|
|
17
20
|
scope: ExecutionScope;
|
|
18
21
|
permissions?: PermissionManager;
|
|
22
|
+
guard?: SessionToolGuard;
|
|
19
23
|
onStart: (id: string, name: string, preview?: string) => void;
|
|
20
24
|
onProgress?: (id: string, text: string) => void;
|
|
25
|
+
sessionId?: string;
|
|
21
26
|
});
|
|
22
27
|
/**
|
|
23
28
|
* Called when a tool_use block is fully received from the stream.
|
|
@@ -1,21 +1,53 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Streaming Tool Executor for
|
|
2
|
+
* Streaming Tool Executor for Franklin.
|
|
3
3
|
* Starts executing concurrent-safe tools while the model is still streaming.
|
|
4
4
|
* Non-concurrent tools wait until the full response is received.
|
|
5
5
|
*/
|
|
6
|
+
import { mkdirSync, writeFileSync } from 'node:fs';
|
|
7
|
+
import { join } from 'node:path';
|
|
8
|
+
import { recordFailure } from '../stats/failures.js';
|
|
9
|
+
import { BLOCKRUN_DIR } from '../config.js';
|
|
10
|
+
/** Persist a large tool result to disk and return a preview string.
|
|
11
|
+
* Inspired by Claude Code's toolResultStorage.ts. */
|
|
12
|
+
const PERSIST_THRESHOLD = 50_000;
|
|
13
|
+
const PREVIEW_SIZE = 2_000;
|
|
14
|
+
function persistLargeResult(sessionId, toolUseId, output) {
|
|
15
|
+
const dir = join(BLOCKRUN_DIR, 'tool-results', sessionId);
|
|
16
|
+
try {
|
|
17
|
+
mkdirSync(dir, { recursive: true });
|
|
18
|
+
const filePath = join(dir, `${toolUseId}.txt`);
|
|
19
|
+
writeFileSync(filePath, output, { flag: 'wx' }); // write-once (skip if exists)
|
|
20
|
+
// Generate preview — truncate at line boundary for clean output
|
|
21
|
+
let preview = output.slice(0, PREVIEW_SIZE);
|
|
22
|
+
const lastNl = preview.lastIndexOf('\n');
|
|
23
|
+
if (lastNl > PREVIEW_SIZE * 0.5) {
|
|
24
|
+
preview = preview.slice(0, lastNl);
|
|
25
|
+
}
|
|
26
|
+
return `<persisted-output>\nOutput too large (${(output.length / 1024).toFixed(1)}KB). Full output saved to: ${filePath}\n\nPreview (first ${PREVIEW_SIZE / 1000}KB):\n${preview}\n...\n</persisted-output>`;
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
// Fallback: simple truncation if disk write fails
|
|
30
|
+
return output.slice(0, PERSIST_THRESHOLD) +
|
|
31
|
+
`\n\n[Truncated: original was ${output.length.toLocaleString()} chars]`;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
6
34
|
export class StreamingExecutor {
|
|
7
35
|
handlers;
|
|
8
36
|
scope;
|
|
9
37
|
permissions;
|
|
38
|
+
guard;
|
|
10
39
|
onStart;
|
|
11
40
|
onProgress;
|
|
12
41
|
pending = [];
|
|
42
|
+
sessionId;
|
|
13
43
|
constructor(opts) {
|
|
14
44
|
this.handlers = opts.handlers;
|
|
15
45
|
this.scope = opts.scope;
|
|
16
46
|
this.permissions = opts.permissions;
|
|
47
|
+
this.guard = opts.guard;
|
|
17
48
|
this.onStart = opts.onStart;
|
|
18
49
|
this.onProgress = opts.onProgress;
|
|
50
|
+
this.sessionId = opts.sessionId || 'default';
|
|
19
51
|
}
|
|
20
52
|
/**
|
|
21
53
|
* Called when a tool_use block is fully received from the stream.
|
|
@@ -24,7 +56,10 @@ export class StreamingExecutor {
|
|
|
24
56
|
*/
|
|
25
57
|
onToolReceived(invocation) {
|
|
26
58
|
const handler = this.handlers.get(invocation.name);
|
|
27
|
-
|
|
59
|
+
// Dynamic concurrency check (e.g., Bash is concurrent only for read-only commands)
|
|
60
|
+
const isConcurrent = handler?.isConcurrentSafe
|
|
61
|
+
? handler.isConcurrentSafe(invocation.input)
|
|
62
|
+
: (handler?.concurrent ?? false);
|
|
28
63
|
if (isConcurrent) {
|
|
29
64
|
// Concurrent tools are auto-allowed — start immediately and time from here
|
|
30
65
|
const preview = this.inputPreview(invocation);
|
|
@@ -78,10 +113,17 @@ export class StreamingExecutor {
|
|
|
78
113
|
}
|
|
79
114
|
async executeWithPermissions(invocation, pendingCount = 1, callStart = true // false for concurrent tools (already called in onToolReceived)
|
|
80
115
|
) {
|
|
116
|
+
const guardResult = this.guard
|
|
117
|
+
? await this.guard.beforeExecute(invocation, this.scope)
|
|
118
|
+
: null;
|
|
119
|
+
if (guardResult) {
|
|
120
|
+
return guardResult;
|
|
121
|
+
}
|
|
81
122
|
// Permission check
|
|
82
123
|
if (this.permissions) {
|
|
83
124
|
const decision = await this.permissions.check(invocation.name, invocation.input);
|
|
84
125
|
if (decision.behavior === 'deny') {
|
|
126
|
+
this.guard?.cancelInvocation(invocation.id);
|
|
85
127
|
return {
|
|
86
128
|
output: `Permission denied for ${invocation.name}: ${decision.reason || 'denied by policy'}. Do not retry — explain to the user what you were trying to do and ask how they'd like to proceed.`,
|
|
87
129
|
isError: true,
|
|
@@ -90,6 +132,7 @@ export class StreamingExecutor {
|
|
|
90
132
|
if (decision.behavior === 'ask') {
|
|
91
133
|
const allowed = await this.permissions.promptUser(invocation.name, invocation.input, pendingCount);
|
|
92
134
|
if (!allowed) {
|
|
135
|
+
this.guard?.cancelInvocation(invocation.id);
|
|
93
136
|
return {
|
|
94
137
|
output: `User denied permission for ${invocation.name}. Do not retry — ask the user what they'd like to do instead.`,
|
|
95
138
|
isError: true,
|
|
@@ -102,9 +145,26 @@ export class StreamingExecutor {
|
|
|
102
145
|
const preview = this.inputPreview(invocation);
|
|
103
146
|
this.onStart(invocation.id, invocation.name, preview);
|
|
104
147
|
}
|
|
105
|
-
|
|
148
|
+
let handler = this.handlers.get(invocation.name);
|
|
106
149
|
if (!handler) {
|
|
107
|
-
|
|
150
|
+
// Attempt repair: lowercase, normalize hyphens/spaces → match
|
|
151
|
+
const attempted = invocation.name;
|
|
152
|
+
const lower = attempted.toLowerCase();
|
|
153
|
+
for (const [name, h] of this.handlers) {
|
|
154
|
+
if (name.toLowerCase() === lower || name.toLowerCase().replace(/[-_ ]/g, '') === lower.replace(/[-_ ]/g, '')) {
|
|
155
|
+
handler = h;
|
|
156
|
+
invocation = { ...invocation, name };
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
if (!handler) {
|
|
161
|
+
this.guard?.cancelInvocation(invocation.id);
|
|
162
|
+
const available = [...this.handlers.keys()].join(', ');
|
|
163
|
+
return {
|
|
164
|
+
output: `Unknown tool "${attempted}". Available tools: ${available}. Check spelling and try again.`,
|
|
165
|
+
isError: true,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
108
168
|
}
|
|
109
169
|
// Wire per-invocation progress to onProgress callback
|
|
110
170
|
const progressScope = this.onProgress
|
|
@@ -114,9 +174,27 @@ export class StreamingExecutor {
|
|
|
114
174
|
}
|
|
115
175
|
: this.scope;
|
|
116
176
|
try {
|
|
117
|
-
|
|
177
|
+
let result = await handler.execute(invocation.input, progressScope);
|
|
178
|
+
this.guard?.afterExecute(invocation, result);
|
|
179
|
+
// Persist large results to disk with preview (inspired by Claude Code toolResultStorage)
|
|
180
|
+
// Instead of just truncating, save the full result to disk so it can be re-read later.
|
|
181
|
+
if (result.output.length > PERSIST_THRESHOLD) {
|
|
182
|
+
result = {
|
|
183
|
+
output: persistLargeResult(this.sessionId, invocation.id, result.output),
|
|
184
|
+
isError: result.isError,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
return result;
|
|
118
188
|
}
|
|
119
189
|
catch (err) {
|
|
190
|
+
this.guard?.cancelInvocation(invocation.id);
|
|
191
|
+
recordFailure({
|
|
192
|
+
timestamp: Date.now(),
|
|
193
|
+
model: '', // not available at tool level
|
|
194
|
+
failureType: 'tool_error',
|
|
195
|
+
toolName: invocation.name,
|
|
196
|
+
errorMessage: err.message,
|
|
197
|
+
});
|
|
120
198
|
return {
|
|
121
199
|
output: `Error executing ${invocation.name}: ${err.message}`,
|
|
122
200
|
isError: true,
|
package/dist/agent/tokens.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Token estimation for
|
|
2
|
+
* Token estimation for Franklin.
|
|
3
3
|
* Uses byte-based heuristic (no external tokenizer dependency).
|
|
4
4
|
* Anchors to actual API counts when available, estimates on top for new messages.
|
|
5
5
|
*/
|
|
@@ -22,9 +22,18 @@ export declare function getAnchoredTokenCount(history: Dialogue[]): {
|
|
|
22
22
|
* Reset anchor (e.g., after compaction).
|
|
23
23
|
*/
|
|
24
24
|
export declare function resetTokenAnchor(): void;
|
|
25
|
+
/**
|
|
26
|
+
* Set the current model for token estimation context.
|
|
27
|
+
* Called when the model is resolved in the agent loop.
|
|
28
|
+
*/
|
|
29
|
+
export declare function setEstimationModel(model: string): void;
|
|
25
30
|
/**
|
|
26
31
|
* Estimate token count for a string using byte-length heuristic.
|
|
27
|
-
* JSON-heavy content uses 2 bytes/token; general text uses
|
|
32
|
+
* JSON-heavy content uses 2 bytes/token; general text uses model-specific ratio.
|
|
33
|
+
*
|
|
34
|
+
* Padding reduced from 1.33x to 1.15x to prevent premature compaction.
|
|
35
|
+
* The old 1.33x + ceil() combo caused ~36% overestimation, triggering
|
|
36
|
+
* auto-compact when context was still 15-20% below the actual limit.
|
|
28
37
|
*/
|
|
29
38
|
export declare function estimateTokens(text: string, bytesPerToken?: number): number;
|
|
30
39
|
/**
|
package/dist/agent/tokens.js
CHANGED
|
@@ -1,9 +1,30 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Token estimation for
|
|
2
|
+
* Token estimation for Franklin.
|
|
3
3
|
* Uses byte-based heuristic (no external tokenizer dependency).
|
|
4
4
|
* Anchors to actual API counts when available, estimates on top for new messages.
|
|
5
5
|
*/
|
|
6
6
|
const DEFAULT_BYTES_PER_TOKEN = 4;
|
|
7
|
+
/**
|
|
8
|
+
* Model-specific bytes-per-token ratios for more accurate estimation.
|
|
9
|
+
* Claude tokenizes more efficiently (~3.5 bytes/token), GPT at ~4, Gemini at ~3.
|
|
10
|
+
*/
|
|
11
|
+
const MODEL_BYTES_PER_TOKEN = {
|
|
12
|
+
'anthropic': 3.5,
|
|
13
|
+
'openai': 4,
|
|
14
|
+
'google': 3,
|
|
15
|
+
'deepseek': 3.5,
|
|
16
|
+
'xai': 4,
|
|
17
|
+
'zai': 4,
|
|
18
|
+
};
|
|
19
|
+
/** Get bytes-per-token ratio for a model. Falls back to DEFAULT_BYTES_PER_TOKEN. */
|
|
20
|
+
function getModelBytesPerToken(model) {
|
|
21
|
+
if (!model)
|
|
22
|
+
return DEFAULT_BYTES_PER_TOKEN;
|
|
23
|
+
const provider = model.split('/')[0];
|
|
24
|
+
return MODEL_BYTES_PER_TOKEN[provider] ?? DEFAULT_BYTES_PER_TOKEN;
|
|
25
|
+
}
|
|
26
|
+
// Store current model for token estimation context
|
|
27
|
+
let _currentModel;
|
|
7
28
|
// ─── API-anchored token tracking ───────────────────────���──────────────────
|
|
8
29
|
/** Last known actual token count from API response */
|
|
9
30
|
let lastApiInputTokens = 0;
|
|
@@ -59,13 +80,25 @@ export function resetTokenAnchor() {
|
|
|
59
80
|
lastApiOutputTokens = 0;
|
|
60
81
|
lastApiMessageCount = 0;
|
|
61
82
|
}
|
|
83
|
+
/**
|
|
84
|
+
* Set the current model for token estimation context.
|
|
85
|
+
* Called when the model is resolved in the agent loop.
|
|
86
|
+
*/
|
|
87
|
+
export function setEstimationModel(model) {
|
|
88
|
+
_currentModel = model;
|
|
89
|
+
}
|
|
62
90
|
/**
|
|
63
91
|
* Estimate token count for a string using byte-length heuristic.
|
|
64
|
-
* JSON-heavy content uses 2 bytes/token; general text uses
|
|
92
|
+
* JSON-heavy content uses 2 bytes/token; general text uses model-specific ratio.
|
|
93
|
+
*
|
|
94
|
+
* Padding reduced from 1.33x to 1.15x to prevent premature compaction.
|
|
95
|
+
* The old 1.33x + ceil() combo caused ~36% overestimation, triggering
|
|
96
|
+
* auto-compact when context was still 15-20% below the actual limit.
|
|
65
97
|
*/
|
|
66
|
-
export function estimateTokens(text, bytesPerToken
|
|
67
|
-
|
|
68
|
-
|
|
98
|
+
export function estimateTokens(text, bytesPerToken) {
|
|
99
|
+
const effectiveBPT = bytesPerToken ?? getModelBytesPerToken(_currentModel);
|
|
100
|
+
// Pad by 15% for safety margin — still conservative but not premature
|
|
101
|
+
return Math.ceil(Buffer.byteLength(text, 'utf-8') / effectiveBPT * 1.15);
|
|
69
102
|
}
|
|
70
103
|
/**
|
|
71
104
|
* Estimate tokens for a content part.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { CapabilityInvocation, CapabilityResult, ExecutionScope } from './types.js';
|
|
2
|
+
export declare function normalizeSearchQuery(query: string): {
|
|
3
|
+
normalized: string;
|
|
4
|
+
tokens: string[];
|
|
5
|
+
};
|
|
6
|
+
export declare class SessionToolGuard {
|
|
7
|
+
private turn;
|
|
8
|
+
private webSearchesThisTurn;
|
|
9
|
+
private searchFamilies;
|
|
10
|
+
private searchCache;
|
|
11
|
+
private pendingSearches;
|
|
12
|
+
private recentReads;
|
|
13
|
+
private pendingReads;
|
|
14
|
+
private recentFetches;
|
|
15
|
+
private pendingFetches;
|
|
16
|
+
private toolErrorCounts;
|
|
17
|
+
startTurn(): void;
|
|
18
|
+
beforeExecute(invocation: CapabilityInvocation, scope: ExecutionScope): Promise<CapabilityResult | null>;
|
|
19
|
+
afterExecute(invocation: CapabilityInvocation, result: CapabilityResult): void;
|
|
20
|
+
cancelInvocation(invocationId: string): void;
|
|
21
|
+
private beforeWebSearch;
|
|
22
|
+
private beforeRead;
|
|
23
|
+
private beforeWebFetch;
|
|
24
|
+
private afterWebSearch;
|
|
25
|
+
private afterRead;
|
|
26
|
+
private afterWebFetch;
|
|
27
|
+
}
|