npm - @blockrun/franklin - Versions diffs - 3.15.6 → 3.15.7 - Mend

@blockrun/franklin 3.15.6 → 3.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/agent/loop.js +41 -2
package/package.json +1 -1

package/dist/agent/loop.js CHANGED Viewed

@@ -22,7 +22,7 @@ import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
 import { estimateCost, OPUS_PRICING } from '../pricing.js';
 import { maybeMidSessionExtract } from '../learnings/extractor.js';
 import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
-import { routeRequestAsync, resolveTierToModel, parseRoutingProfile } from '../router/index.js';
+import { routeRequestAsync, resolveTierToModel, parseRoutingProfile, getFallbackChain } from '../router/index.js';
 import { recordOutcome } from '../router/local-elo.js';
 import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
 import { shouldVerify, runVerification } from './verification.js';
@@ -505,6 +505,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         let recoveryAttempts = 0;
         let autoContinuationCount = 0;
         const MAX_RECOVERY_ATTEMPTS = 5;
+        // Track per-model server-error streak so we can break out of a stuck
+        // upstream and try the next model in the routing fallback chain instead
+        // of burning all MAX_RECOVERY_ATTEMPTS retries on the same failure.
+        const serverErrorsByModel = new Map();
+        const SERVER_ERROR_STREAK_BEFORE_SWITCH = 2;
         let compactFailures = 0;
         let maxTokensOverride;
         const turnIdleReference = lastSessionActivity;
@@ -993,14 +998,48 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     }
                 }
                 if (classified.isTransient && recoveryAttempts < effectiveMaxRetries) {
+                    // Server-error streak guard: if the same model 5xx's twice in a row
+                    // it's almost always an upstream incident, not a blip. Switch to
+                    // the next routing fallback instead of waiting out 5 backoffs on a
+                    // dead provider — same idea as the payment-failure auto-fallback
+                    // below, but for transient server errors. Skipped for non-server
+                    // transients (rate limits, network blips) where retry is the right
+                    // call. Also skipped when the user picked a concrete model — they
+                    // explicitly chose this one, so we shouldn't silently swap.
+                    if (classified.category === 'server' && parseRoutingProfile(config.model)) {
+                        const streak = (serverErrorsByModel.get(resolvedModel) ?? 0) + 1;
+                        serverErrorsByModel.set(resolvedModel, streak);
+                        if (streak >= SERVER_ERROR_STREAK_BEFORE_SWITCH) {
+                            const fallbackChain = getFallbackChain(routingTier ?? 'MEDIUM', parseRoutingProfile(config.model) ?? 'auto');
+                            const nextModel = fallbackChain.find(m => m !== resolvedModel && (serverErrorsByModel.get(m) ?? 0) < SERVER_ERROR_STREAK_BEFORE_SWITCH);
+                            if (nextModel) {
+                                config.model = nextModel;
+                                config.onModelChange?.(nextModel, 'system');
+                                recoveryAttempts = 0;
+                                onEvent({
+                                    kind: 'text_delta',
+                                    text: `\n*${resolvedModel} keeps 5xx'ing (${streak} in a row) — switching to ${nextModel}*\n`,
+                                });
+                                continue;
+                            }
+                            // No alternative left in the fallback chain — fall through to
+                            // the normal retry path so we at least exhaust attempts before
+                            // surrender.
+                        }
+                    }
                     recoveryAttempts++;
                     const backoffMs = getBackoffDelay(recoveryAttempts);
                     if (config.debug) {
                         console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
                     }
+                    // Surface the actual error + model so the user can see which model
+                    // is failing and what the upstream said. Old "Retrying after Server
+                    // error" was uninformative — users couldn't tell whether to wait,
+                    // /retry, or /model-switch.
+                    const errSnippet = errMsg.replace(/\s+/g, ' ').slice(0, 100);
                     onEvent({
                         kind: 'text_delta',
-                        text: `\n*Retrying (${recoveryAttempts}/${effectiveMaxRetries}) after ${classified.label} error...*\n`,
+                        text: `\n*Retrying ${recoveryAttempts}/${effectiveMaxRetries} on ${resolvedModel} — ${classified.label}: ${errSnippet}*\n`,
                     });
                     await new Promise(r => setTimeout(r, backoffMs));
                     continue;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.15.6",
+  "version": "3.15.7",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {