npm - @blockrun/franklin - Versions diffs - 3.10.2 → 3.10.4 - Mend

@blockrun/franklin 3.10.2 → 3.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/agent/error-classifier.js +8 -1
package/dist/agent/loop.d.ts +18 -1
package/dist/agent/loop.js +77 -0
package/dist/router/local-elo.d.ts +1 -1
package/dist/router/local-elo.js +7 -0
package/dist/ui/app.js +25 -1
package/package.json +2 -2

package/dist/agent/error-classifier.js CHANGED Viewed

@@ -44,9 +44,16 @@ export function classifyAgentError(message) {
         '429',
         'rate limit',
         'too many requests',
+        'too many tokens', // Anthropic per-day TPM cap leak via gateway
+        'tokens per day',
+        'please wait before trying',
+        'quota exceeded',
     ])) {
+        // 1 retry is plenty: a per-second rate limit clears in seconds (one
+        // backoff covers it), but a per-day TPM quota won't clear in this
+        // session at all — caller falls back to a different provider after.
         return {
-            category: 'rate_limit', label: 'RateLimit', isTransient: true,
+            category: 'rate_limit', label: 'RateLimit', isTransient: true, maxRetries: 1,
             suggestion: 'Try /model to switch to a different model, or wait a moment and /retry.',
         };
     }

package/dist/agent/loop.d.ts CHANGED Viewed

@@ -2,7 +2,24 @@
  * Franklin Agent Loop
  * The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
  */
-import type { AgentConfig, Dialogue, StreamEvent } from './types.js';
+import type { AgentConfig, ContentPart, Dialogue, StreamEvent } from './types.js';
+/**
+ * Detect when the gateway leaked an upstream rate-limit / quota error as a
+ * 200-OK text content block instead of a real HTTP error. The Anthropic
+ * provider in particular surfaces per-day TPM exhaustion as a bracketed
+ * "[Error: Too many tokens per day, please wait before trying again.]"
+ * message glued into the assistant text channel, which then poisons grounding
+ * checks and gets persisted to session history as if it were a real reply.
+ *
+ * Treat any assistant turn whose entire text payload is a single bracketed
+ * `[Error: ...]` line — and contains no tool_use / thinking blocks — as a
+ * masquerading transport error. The caller throws to let the existing
+ * classifier + retry path take over.
+ */
+export declare function looksLikeGatewayErrorAsText(parts: ContentPart[]): {
+    match: boolean;
+    message: string;
+};
 /**
  * Identify models known to hallucinate tool calls (invented names, literal
  * `[TOOLCALL]` / `<tool_call>` text in answers) — they need the explicit

package/dist/agent/loop.js CHANGED Viewed

@@ -206,6 +206,42 @@ function stripMediaFromHistory(history) {
     });
     return { history: stripped ? result : history, stripped };
 }
+/**
+ * Detect when the gateway leaked an upstream rate-limit / quota error as a
+ * 200-OK text content block instead of a real HTTP error. The Anthropic
+ * provider in particular surfaces per-day TPM exhaustion as a bracketed
+ * "[Error: Too many tokens per day, please wait before trying again.]"
+ * message glued into the assistant text channel, which then poisons grounding
+ * checks and gets persisted to session history as if it were a real reply.
+ *
+ * Treat any assistant turn whose entire text payload is a single bracketed
+ * `[Error: ...]` line — and contains no tool_use / thinking blocks — as a
+ * masquerading transport error. The caller throws to let the existing
+ * classifier + retry path take over.
+ */
+export function looksLikeGatewayErrorAsText(parts) {
+    if (parts.length === 0)
+        return { match: false, message: '' };
+    // Reject if any non-text content (real tool calls, real thinking) was emitted.
+    const textParts = [];
+    for (const p of parts) {
+        if (p.type === 'tool_use')
+            return { match: false, message: '' };
+        if (p.type === 'text' && typeof p.text === 'string') {
+            textParts.push(p.text);
+        }
+    }
+    const joined = textParts.join('').trim();
+    if (!joined)
+        return { match: false, message: '' };
+    // Pattern: `[Error: ...]` taking up the entire text payload, modulo
+    // surrounding whitespace. Allow the bracket to be the whole message OR
+    // the message to start with it (some gateways append a stray newline).
+    const m = /^\[Error:\s*([^\]]+?)\]\s*$/.exec(joined);
+    if (!m)
+        return { match: false, message: '' };
+    return { match: true, message: m[1].trim() };
+}
 /**
  * Calculate backoff delay with jitter to avoid thundering herd.
  * Base: exponential (2^attempt * 1000ms), jitter: ±25%.
@@ -1024,6 +1060,33 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                         continue; // Retry with next model
                     }
                 }
+                // ── Rate-limit / quota: auto-fallback to a different provider ──
+                // Per-day TPM caps (Anthropic) won't clear in this session; per-second
+                // limits already had their backoff retry above and still failed. In
+                // both cases, the productive next move is to run the same turn on a
+                // model from a different provider rather than thrash on the failing
+                // one. Mirror the payment fallback shape: mark the model as failed
+                // for this turn and pick the next free model that hasn't failed yet.
+                if (classified.category === 'rate_limit') {
+                    turnFailedModels.add(config.model);
+                    if (lastRoutedCategory) {
+                        recordOutcome(lastRoutedCategory, config.model, 'rate_limit');
+                    }
+                    const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/glm-4.7'];
+                    const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
+                    if (nextFree) {
+                        const oldModel = config.model;
+                        config.model = nextFree;
+                        config.onModelChange?.(nextFree, 'system');
+                        // Reset retry counter — the new model gets its own retry budget.
+                        recoveryAttempts = 0;
+                        onEvent({
+                            kind: 'text_delta',
+                            text: `\n*${oldModel} rate-limited — switching to ${nextFree}*\n`,
+                        });
+                        continue;
+                    }
+                }
                 // ── Unrecoverable: show error with suggestion from classifier ──
                 const suggestion = classified.suggestion ? `\nTip: ${classified.suggestion}` : '';
                 onEvent({
@@ -1165,6 +1228,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 lastSessionActivity = Date.now();
                 continue; // Retry with higher limit
             }
+            // ── Gateway error masquerading as text (BlockRun → Anthropic TPM) ──
+            // Some upstreams swallow rate-limit / quota errors and emit them as a
+            // single bracketed text block on a 200 OK. Persisting that as a real
+            // assistant reply poisons history (the next turn sees an "answer" that
+            // is actually a transport error) and triggers grounding-check retries
+            // that hit the same wall. Detect, throw into the classifier, and let
+            // the existing recovery flow handle it.
+            const gatewayErr = looksLikeGatewayErrorAsText(responseParts);
+            if (gatewayErr.match) {
+                if (config.debug) {
+                    console.error(`[franklin] Gateway returned an error text in lieu of an answer (${resolvedModel}): ${gatewayErr.message}`);
+                }
+                throw new Error(gatewayErr.message);
+            }
             // Reset recovery counter on successful completion
             recoveryAttempts = 0;
             // Extract tool invocations (text/thinking already streamed in real-time)

package/dist/router/local-elo.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * Storage: ~/.blockrun/router-history.jsonl (append-only, capped 2000 records)
  * Never uploaded — purely local personalization.
  */
-export type Outcome = 'continued' | 'switched' | 'retried' | 'error' | 'max_turns' | 'payment';
+export type Outcome = 'continued' | 'switched' | 'retried' | 'error' | 'max_turns' | 'payment' | 'rate_limit';
 /**
  * Record a model outcome for local learning.
  */

package/dist/router/local-elo.js CHANGED Viewed

@@ -92,6 +92,13 @@ export function computeLocalElo() {
                     case 'payment':
                         delta = -K_FACTOR * 1.5;
                         break;
+                    // Rate-limited: provider isn't broken, just exhausted right now.
+                    // Penalize less than payment (which won't clear without action) but
+                    // more than a generic error so the router avoids the same provider
+                    // for the rest of the session.
+                    case 'rate_limit':
+                        delta = -K_FACTOR * 1.2;
+                        break;
                     case 'max_turns':
                         delta = -K_FACTOR * 0.3;
                         break;

package/dist/ui/app.js CHANGED Viewed

@@ -15,6 +15,25 @@ import { estimateCost } from '../pricing.js';
 import { formatTokens, shortModelName } from '../stats/format.js';
 import { mouse, forceDisableMouseTracking } from './mouse.js';
 // ─── Full-width input box ──────────────────────────────────────────────────
+const DISABLE_AUTO_WRAP = '\x1b[?7l';
+const ENABLE_AUTO_WRAP = '\x1b[?7h';
+function disableTerminalAutoWrap() {
+    if (!process.stdout.isTTY)
+        return undefined;
+    let restored = false;
+    const restore = () => {
+        if (restored || !process.stdout.writable)
+            return;
+        restored = true;
+        process.stdout.write(ENABLE_AUTO_WRAP);
+    };
+    process.stdout.write(DISABLE_AUTO_WRAP);
+    process.once('exit', restore);
+    return () => {
+        process.off('exit', restore);
+        restore();
+    };
+}
 // Subscribe to terminal resize so React re-renders with fresh dimensions.
 // Without this, useStdout() returns a stable ref and children that read
 // stdout.columns on each render still need React to re-execute them — which
@@ -775,6 +794,7 @@ export function launchInkUI(opts) {
     let pendingInput = null; // Queue for inputs that arrive before waitForInput
     let exiting = false;
     let abortCallback = null;
+    const restoreTerminalAutoWrap = disableTerminalAutoWrap();
     const instance = render(_jsx(RunCodeApp, { initialModel: opts.model, workDir: opts.workDir, walletAddress: opts.walletAddress || 'not set — run: franklin setup', walletBalance: opts.walletBalance || 'unknown', chain: opts.chain || 'base', startWithPicker: opts.showPicker, onSubmit: (value) => {
             if (resolveInput) {
                 resolveInput(value);
@@ -820,7 +840,11 @@ export function launchInkUI(opts) {
             return new Promise((resolve) => { resolveInput = resolve; });
         },
         onAbort: (cb) => { abortCallback = cb; },
-        cleanup: () => { mouse.disable(); instance.unmount(); },
+        cleanup: () => {
+            mouse.disable();
+            instance.unmount();
+            restoreTerminalAutoWrap?.();
+        },
         requestPermission: (toolName, description) => {
             const ui = globalThis.__franklin_ui;
             return ui?.requestPermission(toolName, description) ?? Promise.resolve('no');

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.10.2",
+  "version": "3.10.4",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {
@@ -66,7 +66,7 @@
     "node": ">=20"
   },
   "dependencies": {
-    "@blockrun/llm": "^1.4.2",
+    "@blockrun/llm": "^1.13.0",
     "@modelcontextprotocol/sdk": "^1.29.0",
     "@solana/spl-token": "^0.4.14",
     "@solana/web3.js": "^1.98.4",