npm - @blockrun/franklin - Versions diffs - 3.3.2 → 3.5.0 - Mend

@blockrun/franklin 3.3.2 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

package/README.md +58 -7
package/dist/agent/commands.d.ts +1 -1
package/dist/agent/commands.js +128 -17
package/dist/agent/compact.d.ts +2 -2
package/dist/agent/compact.js +148 -22
package/dist/agent/context.d.ts +8 -3
package/dist/agent/context.js +301 -108
package/dist/agent/error-classifier.d.ts +11 -2
package/dist/agent/error-classifier.js +64 -10
package/dist/agent/llm.d.ts +8 -1
package/dist/agent/llm.js +114 -19
package/dist/agent/loop.d.ts +1 -2
package/dist/agent/loop.js +509 -61
package/dist/agent/optimize.d.ts +2 -2
package/dist/agent/optimize.js +9 -7
package/dist/agent/permissions.d.ts +1 -1
package/dist/agent/permissions.js +1 -1
package/dist/agent/planner.d.ts +42 -0
package/dist/agent/planner.js +110 -0
package/dist/agent/reduce.d.ts +7 -1
package/dist/agent/reduce.js +85 -3
package/dist/agent/streaming-executor.d.ts +6 -1
package/dist/agent/streaming-executor.js +83 -5
package/dist/agent/tokens.d.ts +11 -2
package/dist/agent/tokens.js +38 -5
package/dist/agent/tool-guard.d.ts +27 -0
package/dist/agent/tool-guard.js +324 -0
package/dist/agent/types.d.ts +7 -1
package/dist/agent/types.js +1 -1
package/dist/banner.js +27 -40
package/dist/brain/extract.d.ts +11 -0
package/dist/brain/extract.js +154 -0
package/dist/brain/index.d.ts +3 -0
package/dist/brain/index.js +2 -0
package/dist/brain/store.d.ts +42 -0
package/dist/brain/store.js +225 -0
package/dist/brain/types.d.ts +45 -0
package/dist/brain/types.js +5 -0
package/dist/commands/daemon.js +2 -1
package/dist/commands/start.js +16 -3
package/dist/config.js +1 -1
package/dist/index.js +27 -2
package/dist/learnings/extractor.d.ts +13 -0
package/dist/learnings/extractor.js +69 -8
package/dist/learnings/index.d.ts +1 -1
package/dist/learnings/index.js +1 -1
package/dist/learnings/store.js +42 -13
package/dist/learnings/types.d.ts +1 -1
package/dist/mcp/client.d.ts +1 -1
package/dist/mcp/client.js +5 -5
package/dist/mcp/config.d.ts +1 -1
package/dist/mcp/config.js +1 -1
package/dist/panel/html.d.ts +2 -0
package/dist/panel/html.js +409 -146
package/dist/panel/server.js +19 -0
package/dist/pricing.js +3 -2
package/dist/proxy/fallback.d.ts +3 -1
package/dist/proxy/fallback.js +4 -4
package/dist/proxy/server.js +29 -11
package/dist/proxy/sse-translator.js +1 -1
package/dist/router/categories.d.ts +21 -0
package/dist/router/categories.js +96 -0
package/dist/router/index.d.ts +9 -2
package/dist/router/index.js +106 -27
package/dist/router/local-elo.d.ts +32 -0
package/dist/router/local-elo.js +107 -0
package/dist/router/selector.d.ts +46 -0
package/dist/router/selector.js +106 -0
package/dist/session/storage.d.ts +5 -1
package/dist/session/storage.js +24 -2
package/dist/social/a11y.d.ts +1 -1
package/dist/social/a11y.js +5 -1
package/dist/social/browser.d.ts +5 -0
package/dist/social/browser.js +22 -0
package/dist/social/preflight.d.ts +4 -0
package/dist/social/preflight.js +42 -3
package/dist/stats/failures.d.ts +20 -0
package/dist/stats/failures.js +63 -0
package/dist/stats/format.d.ts +6 -0
package/dist/stats/format.js +23 -0
package/dist/stats/insights.js +1 -21
package/dist/stats/session-tracker.d.ts +21 -0
package/dist/stats/session-tracker.js +28 -0
package/dist/stats/tracker.d.ts +1 -1
package/dist/stats/tracker.js +1 -1
package/dist/tools/bash.d.ts +14 -1
package/dist/tools/bash.js +132 -7
package/dist/tools/edit.js +77 -14
package/dist/tools/glob.js +13 -3
package/dist/tools/grep.js +30 -12
package/dist/tools/imagegen.js +3 -3
package/dist/tools/index.d.ts +1 -1
package/dist/tools/index.js +5 -1
package/dist/tools/read.d.ts +16 -2
package/dist/tools/read.js +36 -8
package/dist/tools/searchx.d.ts +6 -2
package/dist/tools/searchx.js +221 -44
package/dist/tools/subagent.js +37 -3
package/dist/tools/task.js +43 -7
package/dist/tools/validate.d.ts +11 -0
package/dist/tools/validate.js +42 -0
package/dist/tools/webfetch.js +18 -7
package/dist/tools/websearch.js +41 -7
package/dist/tools/write.js +26 -6
package/dist/ui/app.js +31 -6
package/dist/ui/model-picker.d.ts +1 -1
package/dist/ui/model-picker.js +1 -1
package/dist/ui/terminal.d.ts +1 -1
package/dist/ui/terminal.js +1 -1
package/package.json +2 -2

package/dist/agent/optimize.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Token optimization strategies for runcode.
+ * Token optimization strategies for Franklin.
  *
  * Five layers of optimization to minimize token usage:
  * 1. Tool result size budgeting — cap large outputs, keep preview
@@ -23,7 +23,7 @@ export declare function getMaxOutputTokens(model: string): number;
 export declare function budgetToolResults(history: Dialogue[]): Dialogue[];
 export declare function stripOldThinking(history: Dialogue[]): Dialogue[];
 /**
- * After an idle gap (>60 min), clear old tool results.
+ * After an idle gap (>30 min), clear old tool results.
  * When the user comes back after being away, old results are stale anyway.
  */
 export declare function timeBasedCleanup(history: Dialogue[], lastActivityTimestamp?: number): {

package/dist/agent/optimize.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Token optimization strategies for runcode.
+ * Token optimization strategies for Franklin.
  *
  * Five layers of optimization to minimize token usage:
  * 1. Tool result size budgeting — cap large outputs, keep preview
@@ -34,8 +34,10 @@ const MODEL_MAX_OUTPUT = {
 export function getMaxOutputTokens(model) {
     return MODEL_MAX_OUTPUT[model] ?? 16_384;
 }
-/** Idle gap (minutes) after which old tool results are cleared */
-const IDLE_GAP_THRESHOLD_MINUTES = 5;
+/** Idle gap (minutes) after which old tool results are cleared.
+ * Set to 30 min — a coffee break shouldn't lose tool context.
+ * Was 5 min which was too aggressive (comment said 60, code said 5). */
+const IDLE_GAP_THRESHOLD_MINUTES = 30;
 /** Number of recent tool results to keep during time-based cleanup */
 const KEEP_RECENT_TOOL_RESULTS = 3;
 // ─── 1. Tool Result Size Budgeting ─────────────────────────────────────────
@@ -140,7 +142,7 @@ export function stripOldThinking(history) {
 }
 // ─── 3. Time-Based Cleanup ─────────────────────────────────────────────────
 /**
- * After an idle gap (>60 min), clear old tool results.
+ * After an idle gap (>30 min), clear old tool results.
  * When the user comes back after being away, old results are stale anyway.
  */
 export function timeBasedCleanup(history, lastActivityTimestamp) {
@@ -240,7 +242,7 @@ export function optimizeHistory(history, opts) {
         result = stripped;
         changed = true;
         if (opts?.debug)
-            console.error('[runcode] Stripped old thinking blocks');
+            console.error('[franklin] Stripped old thinking blocks');
     }
     // 2. Budget tool results
     const budgeted = budgetToolResults(result);
@@ -248,7 +250,7 @@ export function optimizeHistory(history, opts) {
         result = budgeted;
         changed = true;
         if (opts?.debug)
-            console.error('[runcode] Budgeted oversized tool results');
+            console.error('[franklin] Budgeted oversized tool results');
     }
     // 3. Time-based cleanup
     const { history: cleaned, cleaned: didClean } = timeBasedCleanup(result, opts?.lastActivityTimestamp);
@@ -256,7 +258,7 @@ export function optimizeHistory(history, opts) {
         result = cleaned;
         changed = true;
         if (opts?.debug)
-            console.error('[runcode] Cleared stale tool results after idle gap');
+            console.error('[franklin] Cleared stale tool results after idle gap');
     }
     return result;
 }

package/dist/agent/permissions.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Permission system for runcode.
+ * Permission system for Franklin.
  * Controls which tools can execute automatically vs. require user approval.
  */
 export type PermissionBehavior = 'allow' | 'deny' | 'ask';

package/dist/agent/permissions.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Permission system for runcode.
+ * Permission system for Franklin.
  * Controls which tools can execute automatically vs. require user approval.
  */
 import fs from 'node:fs';

package/dist/agent/planner.d.ts ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * Planner-Executor for Franklin
+ *
+ * Uses expensive models (Opus/Sonnet) for planning, then cheap/free models
+ * for execution. Saves 40-70% on complex tasks while maintaining quality.
+ *
+ * Flow: detect complexity → plan with strong model → execute with cheap model
+ *       → escalate back to strong model if executor gets stuck
+ */
+import type { Tier, RoutingProfile } from '../router/index.js';
+/**
+ * Should this task use plan-then-execute?
+ * Returns true only for complex, multi-step tasks where the savings justify
+ * the overhead of an extra planning call.
+ */
+export declare function shouldPlan(tier: Tier | undefined, profile: RoutingProfile | undefined, userText: string, ultrathink: boolean, planDisabled: boolean): boolean;
+/**
+ * Returns the planning system prompt section.
+ * Injected alongside the normal system prompt during the planning call.
+ */
+export declare function getPlanningPrompt(): string;
+/**
+ * Pick the cheap executor model for a given routing profile.
+ * These models are good at following structured instructions (the plan)
+ * but much cheaper than the planning model.
+ */
+export declare function getExecutorModel(profile: RoutingProfile): string;
+/**
+ * Extract numbered steps from plan text.
+ * Handles formats like "1. Do X", "1) Do X", "Step 1: Do X".
+ */
+export declare function parsePlanSteps(text: string): string[];
+/**
+ * Detect if the executor model is stuck.
+ * Triggers when the model hits repeated errors or repeats the same tool call.
+ */
+export declare function isExecutorStuck(consecutiveErrors: number, sameToolRepeat: boolean): boolean;
+/**
+ * Build a signature for a tool call (name + first 100 chars of input JSON).
+ * Used to detect when the executor repeats the exact same call.
+ */
+export declare function toolCallSignature(name: string, input: unknown): string;

package/dist/agent/planner.js ADDED Viewed

@@ -0,0 +1,110 @@
+/**
+ * Planner-Executor for Franklin
+ *
+ * Uses expensive models (Opus/Sonnet) for planning, then cheap/free models
+ * for execution. Saves 40-70% on complex tasks while maintaining quality.
+ *
+ * Flow: detect complexity → plan with strong model → execute with cheap model
+ *       → escalate back to strong model if executor gets stuck
+ */
+// ─── Agentic keywords that suggest multi-step work ───────────────────────
+const AGENTIC_KEYWORDS = /\b(implement|refactor|build|fix|debug|migrate|deploy|create|add|remove|update|restructure|extract|rewrite|optimize|convert|integrate|setup|configure)\b/i;
+const MULTI_STEP_PATTERN = /first.*then|step\s+\d|\d+\.\s|and\s+then|after\s+that|next\s*,|finally\b/i;
+// ─── Detection ───────────────────────────────────────────────────────────
+/**
+ * Should this task use plan-then-execute?
+ * Returns true only for complex, multi-step tasks where the savings justify
+ * the overhead of an extra planning call.
+ */
+export function shouldPlan(tier, profile, userText, ultrathink, planDisabled) {
+    // Gate 1: only COMPLEX or REASONING tiers benefit from planning
+    if (tier !== 'COMPLEX' && tier !== 'REASONING')
+        return false;
+    // Gate 2: only auto or premium profiles (eco/free already cost-optimized)
+    if (profile !== 'auto' && profile !== 'premium')
+        return false;
+    // Gate 3: skip short queries — planning overhead not worth it
+    if (userText.length < 80)
+        return false;
+    // Gate 4: ultrathink already provides deep reasoning
+    if (ultrathink)
+        return false;
+    // Gate 5: user disabled planning for this session
+    if (planDisabled)
+        return false;
+    // Gate 6: must have agentic or multi-step signals
+    const hasAgenticKeyword = AGENTIC_KEYWORDS.test(userText);
+    const hasMultiStep = MULTI_STEP_PATTERN.test(userText);
+    return hasAgenticKeyword || hasMultiStep;
+}
+// ─── Planning Prompt ─────────────────────────────────────────────────────
+/**
+ * Returns the planning system prompt section.
+ * Injected alongside the normal system prompt during the planning call.
+ */
+export function getPlanningPrompt() {
+    return `# Planning Mode — Active
+You are in planning mode. Produce a structured execution plan for the user's request.
+Rules:
+- Output a numbered list of concrete steps. Each step = one action.
+- Include specific file paths, function names, or shell commands when known.
+- If you need to explore the codebase first, make it step 1.
+- Mark steps that can run in parallel with [PARALLEL].
+- Keep the plan to 15 steps max.
+- End with a verification step (run tests, check output, etc.).
+- Output ONLY the numbered plan. No code blocks, no explanations, no preamble.`;
+}
+// ─── Executor Model Selection ────────────────────────────────────────────
+/**
+ * Pick the cheap executor model for a given routing profile.
+ * These models are good at following structured instructions (the plan)
+ * but much cheaper than the planning model.
+ */
+export function getExecutorModel(profile) {
+    switch (profile) {
+        case 'premium':
+            return 'moonshot/kimi-k2.5'; // Medium-tier, reliable execution
+        case 'auto':
+        default:
+            return 'google/gemini-2.5-flash'; // Cheap, fast, good at instructions
+    }
+}
+// ─── Plan Parsing ────────────────────────────────────────────────────────
+/**
+ * Extract numbered steps from plan text.
+ * Handles formats like "1. Do X", "1) Do X", "Step 1: Do X".
+ */
+export function parsePlanSteps(text) {
+    const lines = text.split('\n');
+    const steps = [];
+    for (const line of lines) {
+        const trimmed = line.trim();
+        // Match: "1. ...", "1) ...", "Step 1: ...", "- 1. ..."
+        if (/^(?:\d+[\.\):]|step\s+\d)/i.test(trimmed)) {
+            steps.push(trimmed);
+        }
+    }
+    return steps;
+}
+// ─── Stuck Detection ─────────────────────────────────────────────────────
+/** Max consecutive tool errors before escalation */
+const MAX_CONSECUTIVE_ERRORS = 3;
+/**
+ * Detect if the executor model is stuck.
+ * Triggers when the model hits repeated errors or repeats the same tool call.
+ */
+export function isExecutorStuck(consecutiveErrors, sameToolRepeat) {
+    if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS)
+        return true;
+    if (sameToolRepeat)
+        return true;
+    return false;
+}
+/**
+ * Build a signature for a tool call (name + first 100 chars of input JSON).
+ * Used to detect when the executor repeats the exact same call.
+ */
+export function toolCallSignature(name, input) {
+    return `${name}::${JSON.stringify(input).slice(0, 100)}`;
+}

package/dist/agent/reduce.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Token Reduction for runcode.
+ * Token Reduction for Franklin.
  * Original implementation — reduces context size through intelligent pruning.
  *
  * Strategy: instead of compression/encoding, we PRUNE redundant content.
@@ -42,6 +42,12 @@ export declare function deduplicateMessages(history: Dialogue[]): Dialogue[];
  * RTK-inspired: dedup_lines + strip_ansi pipeline stages.
  */
 export declare function deduplicateToolResultLines(history: Dialogue[]): Dialogue[];
+/**
+ * When the same tool (WebSearch, Grep, etc.) is called 6+ times,
+ * collapse all but the last 3 results to one-line summaries.
+ * Prevents context snowball from search spam (e.g. 96 WebSearches).
+ */
+export declare function collapseRepetitiveTools(history: Dialogue[]): Dialogue[];
 /**
  * Run all token reduction passes on conversation history.
  * Returns same reference if nothing changed (cheap identity check).

package/dist/agent/reduce.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Token Reduction for runcode.
+ * Token Reduction for Franklin.
  * Original implementation — reduces context size through intelligent pruning.
  *
  * Strategy: instead of compression/encoding, we PRUNE redundant content.
@@ -240,7 +240,82 @@ export function deduplicateToolResultLines(history) {
     });
     return modified ? result : history;
 }
-// ─── Pipeline ───────���───────────────────���─────────────────────────────────
+// ─── 6. Repetitive Tool Collapse ─────────────────────────────────────────
+/**
+ * When the same tool (WebSearch, Grep, etc.) is called 6+ times,
+ * collapse all but the last 3 results to one-line summaries.
+ * Prevents context snowball from search spam (e.g. 96 WebSearches).
+ */
+export function collapseRepetitiveTools(history) {
+    // Count tool_use by name
+    const toolCounts = new Map();
+    for (const msg of history) {
+        if (msg.role !== 'assistant' || !Array.isArray(msg.content))
+            continue;
+        for (const part of msg.content) {
+            if (part.type === 'tool_use') {
+                const name = part.name ?? '';
+                toolCounts.set(name, (toolCounts.get(name) || 0) + 1);
+            }
+        }
+    }
+    // Only for tools called 6+ times
+    const repetitive = new Set();
+    for (const [name, count] of toolCounts) {
+        if (count >= 6)
+            repetitive.add(name);
+    }
+    if (repetitive.size === 0)
+        return history;
+    // Map tool_use_id → name, track call order per tool
+    const idToName = new Map();
+    const callOrder = new Map(); // name → [tool_use_id, ...]
+    for (const msg of history) {
+        if (msg.role !== 'assistant' || !Array.isArray(msg.content))
+            continue;
+        for (const part of msg.content) {
+            if (part.type === 'tool_use' && repetitive.has(part.name ?? '')) {
+                const name = part.name ?? '';
+                idToName.set(part.id, name);
+                if (!callOrder.has(name))
+                    callOrder.set(name, []);
+                callOrder.get(name).push(part.id);
+            }
+        }
+    }
+    // Mark old IDs (all but last 3 per tool)
+    const oldIds = new Set();
+    for (const [, ids] of callOrder) {
+        for (let i = 0; i < ids.length - 3; i++) {
+            oldIds.add(ids[i]);
+        }
+    }
+    if (oldIds.size === 0)
+        return history;
+    // Collapse old results
+    let modified = false;
+    const result = history.map(msg => {
+        if (msg.role !== 'user' || !Array.isArray(msg.content))
+            return msg;
+        let changed = false;
+        const parts = msg.content.map(part => {
+            if (part.type !== 'tool_result' || !oldIds.has(part.tool_use_id))
+                return part;
+            const content = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
+            if (content.length <= 80)
+                return part;
+            changed = true;
+            const first = content.split('\n')[0].slice(0, 60);
+            return { ...part, content: `[${first}...]` };
+        });
+        if (!changed)
+            return msg;
+        modified = true;
+        return { ...msg, content: parts };
+    });
+    return modified ? result : history;
+}
+// ─── Pipeline ────────────────────────────────────────────────────────────
 /**
  * Run all token reduction passes on conversation history.
  * Returns same reference if nothing changed (cheap identity check).
@@ -250,6 +325,13 @@ export function reduceTokens(history, debug) {
         return history; // Skip for short conversations
     let current = history;
     let totalSaved = 0;
+    // Pass 0: Collapse repetitive tool results (e.g. 96 WebSearches with similar queries)
+    const collapsed = collapseRepetitiveTools(current);
+    if (collapsed !== current) {
+        const before = estimateChars(current);
+        current = collapsed;
+        totalSaved += before - estimateChars(current);
+    }
     // Pass 1: Age old tool results
     const aged = ageToolResults(current);
     if (aged !== current) {
@@ -288,7 +370,7 @@ export function reduceTokens(history, debug) {
     }
     if (debug && totalSaved > 500) {
         const tokensSaved = Math.round(totalSaved / 4);
-        console.error(`[runcode] Token reduction: ~${tokensSaved} tokens saved`);
+        console.error(`[franklin] Token reduction: ~${tokensSaved} tokens saved`);
     }
     return current;
 }

package/dist/agent/streaming-executor.d.ts CHANGED Viewed

@@ -1,23 +1,28 @@
 /**
- * Streaming Tool Executor for runcode.
+ * Streaming Tool Executor for Franklin.
  * Starts executing concurrent-safe tools while the model is still streaming.
  * Non-concurrent tools wait until the full response is received.
  */
 import type { CapabilityHandler, CapabilityInvocation, CapabilityResult, ExecutionScope } from './types.js';
 import type { PermissionManager } from './permissions.js';
+import type { SessionToolGuard } from './tool-guard.js';
 export declare class StreamingExecutor {
     private handlers;
     private scope;
     private permissions?;
+    private guard?;
     private onStart;
     private onProgress?;
     private pending;
+    private sessionId;
     constructor(opts: {
         handlers: Map<string, CapabilityHandler>;
         scope: ExecutionScope;
         permissions?: PermissionManager;
+        guard?: SessionToolGuard;
         onStart: (id: string, name: string, preview?: string) => void;
         onProgress?: (id: string, text: string) => void;
+        sessionId?: string;
     });
     /**
      * Called when a tool_use block is fully received from the stream.

package/dist/agent/streaming-executor.js CHANGED Viewed

@@ -1,21 +1,53 @@
 /**
- * Streaming Tool Executor for runcode.
+ * Streaming Tool Executor for Franklin.
  * Starts executing concurrent-safe tools while the model is still streaming.
  * Non-concurrent tools wait until the full response is received.
  */
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { recordFailure } from '../stats/failures.js';
+import { BLOCKRUN_DIR } from '../config.js';
+/** Persist a large tool result to disk and return a preview string.
+ * Inspired by Claude Code's toolResultStorage.ts. */
+const PERSIST_THRESHOLD = 50_000;
+const PREVIEW_SIZE = 2_000;
+function persistLargeResult(sessionId, toolUseId, output) {
+    const dir = join(BLOCKRUN_DIR, 'tool-results', sessionId);
+    try {
+        mkdirSync(dir, { recursive: true });
+        const filePath = join(dir, `${toolUseId}.txt`);
+        writeFileSync(filePath, output, { flag: 'wx' }); // write-once (skip if exists)
+        // Generate preview — truncate at line boundary for clean output
+        let preview = output.slice(0, PREVIEW_SIZE);
+        const lastNl = preview.lastIndexOf('\n');
+        if (lastNl > PREVIEW_SIZE * 0.5) {
+            preview = preview.slice(0, lastNl);
+        }
+        return `<persisted-output>\nOutput too large (${(output.length / 1024).toFixed(1)}KB). Full output saved to: ${filePath}\n\nPreview (first ${PREVIEW_SIZE / 1000}KB):\n${preview}\n...\n</persisted-output>`;
+    }
+    catch {
+        // Fallback: simple truncation if disk write fails
+        return output.slice(0, PERSIST_THRESHOLD) +
+            `\n\n[Truncated: original was ${output.length.toLocaleString()} chars]`;
+    }
+}
 export class StreamingExecutor {
     handlers;
     scope;
     permissions;
+    guard;
     onStart;
     onProgress;
     pending = [];
+    sessionId;
     constructor(opts) {
         this.handlers = opts.handlers;
         this.scope = opts.scope;
         this.permissions = opts.permissions;
+        this.guard = opts.guard;
         this.onStart = opts.onStart;
         this.onProgress = opts.onProgress;
+        this.sessionId = opts.sessionId || 'default';
     }
     /**
      * Called when a tool_use block is fully received from the stream.
@@ -24,7 +56,10 @@ export class StreamingExecutor {
      */
     onToolReceived(invocation) {
         const handler = this.handlers.get(invocation.name);
-        const isConcurrent = handler?.concurrent ?? false;
+        // Dynamic concurrency check (e.g., Bash is concurrent only for read-only commands)
+        const isConcurrent = handler?.isConcurrentSafe
+            ? handler.isConcurrentSafe(invocation.input)
+            : (handler?.concurrent ?? false);
         if (isConcurrent) {
             // Concurrent tools are auto-allowed — start immediately and time from here
             const preview = this.inputPreview(invocation);
@@ -78,10 +113,17 @@ export class StreamingExecutor {
     }
     async executeWithPermissions(invocation, pendingCount = 1, callStart = true // false for concurrent tools (already called in onToolReceived)
     ) {
+        const guardResult = this.guard
+            ? await this.guard.beforeExecute(invocation, this.scope)
+            : null;
+        if (guardResult) {
+            return guardResult;
+        }
         // Permission check
         if (this.permissions) {
             const decision = await this.permissions.check(invocation.name, invocation.input);
             if (decision.behavior === 'deny') {
+                this.guard?.cancelInvocation(invocation.id);
                 return {
                     output: `Permission denied for ${invocation.name}: ${decision.reason || 'denied by policy'}. Do not retry — explain to the user what you were trying to do and ask how they'd like to proceed.`,
                     isError: true,
@@ -90,6 +132,7 @@ export class StreamingExecutor {
             if (decision.behavior === 'ask') {
                 const allowed = await this.permissions.promptUser(invocation.name, invocation.input, pendingCount);
                 if (!allowed) {
+                    this.guard?.cancelInvocation(invocation.id);
                     return {
                         output: `User denied permission for ${invocation.name}. Do not retry — ask the user what they'd like to do instead.`,
                         isError: true,
@@ -102,9 +145,26 @@ export class StreamingExecutor {
             const preview = this.inputPreview(invocation);
             this.onStart(invocation.id, invocation.name, preview);
         }
-        const handler = this.handlers.get(invocation.name);
+        let handler = this.handlers.get(invocation.name);
         if (!handler) {
-            return { output: `Unknown capability: ${invocation.name}`, isError: true };
+            // Attempt repair: lowercase, normalize hyphens/spaces → match
+            const attempted = invocation.name;
+            const lower = attempted.toLowerCase();
+            for (const [name, h] of this.handlers) {
+                if (name.toLowerCase() === lower || name.toLowerCase().replace(/[-_ ]/g, '') === lower.replace(/[-_ ]/g, '')) {
+                    handler = h;
+                    invocation = { ...invocation, name };
+                    break;
+                }
+            }
+            if (!handler) {
+                this.guard?.cancelInvocation(invocation.id);
+                const available = [...this.handlers.keys()].join(', ');
+                return {
+                    output: `Unknown tool "${attempted}". Available tools: ${available}. Check spelling and try again.`,
+                    isError: true,
+                };
+            }
         }
         // Wire per-invocation progress to onProgress callback
         const progressScope = this.onProgress
@@ -114,9 +174,27 @@ export class StreamingExecutor {
             }
             : this.scope;
         try {
-            return await handler.execute(invocation.input, progressScope);
+            let result = await handler.execute(invocation.input, progressScope);
+            this.guard?.afterExecute(invocation, result);
+            // Persist large results to disk with preview (inspired by Claude Code toolResultStorage)
+            // Instead of just truncating, save the full result to disk so it can be re-read later.
+            if (result.output.length > PERSIST_THRESHOLD) {
+                result = {
+                    output: persistLargeResult(this.sessionId, invocation.id, result.output),
+                    isError: result.isError,
+                };
+            }
+            return result;
         }
         catch (err) {
+            this.guard?.cancelInvocation(invocation.id);
+            recordFailure({
+                timestamp: Date.now(),
+                model: '', // not available at tool level
+                failureType: 'tool_error',
+                toolName: invocation.name,
+                errorMessage: err.message,
+            });
             return {
                 output: `Error executing ${invocation.name}: ${err.message}`,
                 isError: true,

package/dist/agent/tokens.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Token estimation for runcode.
+ * Token estimation for Franklin.
  * Uses byte-based heuristic (no external tokenizer dependency).
  * Anchors to actual API counts when available, estimates on top for new messages.
  */
@@ -22,9 +22,18 @@ export declare function getAnchoredTokenCount(history: Dialogue[]): {
  * Reset anchor (e.g., after compaction).
  */
 export declare function resetTokenAnchor(): void;
+/**
+ * Set the current model for token estimation context.
+ * Called when the model is resolved in the agent loop.
+ */
+export declare function setEstimationModel(model: string): void;
 /**
  * Estimate token count for a string using byte-length heuristic.
- * JSON-heavy content uses 2 bytes/token; general text uses 4.
+ * JSON-heavy content uses 2 bytes/token; general text uses model-specific ratio.
+ *
+ * Padding reduced from 1.33x to 1.15x to prevent premature compaction.
+ * The old 1.33x + ceil() combo caused ~36% overestimation, triggering
+ * auto-compact when context was still 15-20% below the actual limit.
  */
 export declare function estimateTokens(text: string, bytesPerToken?: number): number;
 /**

package/dist/agent/tokens.js CHANGED Viewed

@@ -1,9 +1,30 @@
 /**
- * Token estimation for runcode.
+ * Token estimation for Franklin.
  * Uses byte-based heuristic (no external tokenizer dependency).
  * Anchors to actual API counts when available, estimates on top for new messages.
  */
 const DEFAULT_BYTES_PER_TOKEN = 4;
+/**
+ * Model-specific bytes-per-token ratios for more accurate estimation.
+ * Claude tokenizes more efficiently (~3.5 bytes/token), GPT at ~4, Gemini at ~3.
+ */
+const MODEL_BYTES_PER_TOKEN = {
+    'anthropic': 3.5,
+    'openai': 4,
+    'google': 3,
+    'deepseek': 3.5,
+    'xai': 4,
+    'zai': 4,
+};
+/** Get bytes-per-token ratio for a model. Falls back to DEFAULT_BYTES_PER_TOKEN. */
+function getModelBytesPerToken(model) {
+    if (!model)
+        return DEFAULT_BYTES_PER_TOKEN;
+    const provider = model.split('/')[0];
+    return MODEL_BYTES_PER_TOKEN[provider] ?? DEFAULT_BYTES_PER_TOKEN;
+}
+// Store current model for token estimation context
+let _currentModel;
 // ─── API-anchored token tracking ───────────────────────���──────────────────
 /** Last known actual token count from API response */
 let lastApiInputTokens = 0;
@@ -59,13 +80,25 @@ export function resetTokenAnchor() {
     lastApiOutputTokens = 0;
     lastApiMessageCount = 0;
 }
+/**
+ * Set the current model for token estimation context.
+ * Called when the model is resolved in the agent loop.
+ */
+export function setEstimationModel(model) {
+    _currentModel = model;
+}
 /**
  * Estimate token count for a string using byte-length heuristic.
- * JSON-heavy content uses 2 bytes/token; general text uses 4.
+ * JSON-heavy content uses 2 bytes/token; general text uses model-specific ratio.
+ *
+ * Padding reduced from 1.33x to 1.15x to prevent premature compaction.
+ * The old 1.33x + ceil() combo caused ~36% overestimation, triggering
+ * auto-compact when context was still 15-20% below the actual limit.
  */
-export function estimateTokens(text, bytesPerToken = DEFAULT_BYTES_PER_TOKEN) {
-    // Pad by 4/3 (~33%) for conservative estimation — better to over-count than under-count
-    return Math.ceil(Buffer.byteLength(text, 'utf-8') / bytesPerToken * 1.33);
+export function estimateTokens(text, bytesPerToken) {
+    const effectiveBPT = bytesPerToken ?? getModelBytesPerToken(_currentModel);
+    // Pad by 15% for safety margin — still conservative but not premature
+    return Math.ceil(Buffer.byteLength(text, 'utf-8') / effectiveBPT * 1.15);
 }
 /**
  * Estimate tokens for a content part.

package/dist/agent/tool-guard.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import type { CapabilityInvocation, CapabilityResult, ExecutionScope } from './types.js';
+export declare function normalizeSearchQuery(query: string): {
+    normalized: string;
+    tokens: string[];
+};
+export declare class SessionToolGuard {
+    private turn;
+    private webSearchesThisTurn;
+    private searchFamilies;
+    private searchCache;
+    private pendingSearches;
+    private recentReads;
+    private pendingReads;
+    private recentFetches;
+    private pendingFetches;
+    private toolErrorCounts;
+    startTurn(): void;
+    beforeExecute(invocation: CapabilityInvocation, scope: ExecutionScope): Promise<CapabilityResult | null>;
+    afterExecute(invocation: CapabilityInvocation, result: CapabilityResult): void;
+    cancelInvocation(invocationId: string): void;
+    private beforeWebSearch;
+    private beforeRead;
+    private beforeWebFetch;
+    private afterWebSearch;
+    private afterRead;
+    private afterWebFetch;
+}