npm - @blockrun/franklin - Versions diffs - 3.15.5 → 3.15.7 - Mend

@blockrun/franklin 3.15.5 → 3.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/agent/llm.d.ts CHANGED Viewed

@@ -107,10 +107,12 @@ export declare class ModelClient {
      * Handles x402 payment automatically on 402 responses.
      */
     /**
-     * Resolve virtual routing profiles (blockrun/auto, blockrun/eco, etc.)
-     * to concrete models. This is the final safety net — if the router in
+     * Resolve virtual routing profiles (blockrun/auto, blockrun/free) to
+     * concrete models. This is the final safety net — if the router in
      * loop.ts didn't resolve it (e.g. old global install without router),
-     * we resolve it here before hitting the API.
+     * we resolve it here before hitting the API. Legacy blockrun/eco and
+     * blockrun/premium fall through the unknown-key path to the same
+     * default model.
      */
     private resolveVirtualModel;
     streamCompletion(request: ModelRequest, signal?: AbortSignal): AsyncGenerator<StreamChunk>;

package/dist/agent/llm.js CHANGED Viewed

@@ -260,10 +260,12 @@ export class ModelClient {
      * Handles x402 payment automatically on 402 responses.
      */
     /**
-     * Resolve virtual routing profiles (blockrun/auto, blockrun/eco, etc.)
-     * to concrete models. This is the final safety net — if the router in
+     * Resolve virtual routing profiles (blockrun/auto, blockrun/free) to
+     * concrete models. This is the final safety net — if the router in
      * loop.ts didn't resolve it (e.g. old global install without router),
-     * we resolve it here before hitting the API.
+     * we resolve it here before hitting the API. Legacy blockrun/eco and
+     * blockrun/premium fall through the unknown-key path to the same
+     * default model.
      */
     resolveVirtualModel(model) {
         if (!model.startsWith('blockrun/'))
@@ -280,12 +282,13 @@ export class ModelClient {
         catch {
             // Router not available (e.g. old build) — use hardcoded fallback table
         }
-        // Static fallback if router is unavailable. Default to FREE model so
-        // users aren't silently charged when their intended model can't resolve.
+        // Static fallback when the router module isn't loadable. Defaults to a
+        // FREE model so users aren't silently charged. The unknown-key path also
+        // falls through to qwen, so legacy `blockrun/eco` / `blockrun/premium`
+        // strings (now retired routing profiles) end up at the same place
+        // without needing dedicated entries.
         const FALLBACKS = {
             'blockrun/auto': 'nvidia/qwen3-coder-480b',
-            'blockrun/eco': 'nvidia/qwen3-coder-480b',
-            'blockrun/premium': 'anthropic/claude-sonnet-4.6',
             'blockrun/free': 'nvidia/qwen3-coder-480b',
         };
         return FALLBACKS[model] || 'nvidia/qwen3-coder-480b';

package/dist/agent/loop.js CHANGED Viewed

@@ -22,7 +22,7 @@ import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
 import { estimateCost, OPUS_PRICING } from '../pricing.js';
 import { maybeMidSessionExtract } from '../learnings/extractor.js';
 import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
-import { routeRequestAsync, resolveTierToModel, parseRoutingProfile } from '../router/index.js';
+import { routeRequestAsync, resolveTierToModel, parseRoutingProfile, getFallbackChain } from '../router/index.js';
 import { recordOutcome } from '../router/local-elo.js';
 import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
 import { shouldVerify, runVerification } from './verification.js';
@@ -505,6 +505,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         let recoveryAttempts = 0;
         let autoContinuationCount = 0;
         const MAX_RECOVERY_ATTEMPTS = 5;
+        // Track per-model server-error streak so we can break out of a stuck
+        // upstream and try the next model in the routing fallback chain instead
+        // of burning all MAX_RECOVERY_ATTEMPTS retries on the same failure.
+        const serverErrorsByModel = new Map();
+        const SERVER_ERROR_STREAK_BEFORE_SWITCH = 2;
         let compactFailures = 0;
         let maxTokensOverride;
         const turnIdleReference = lastSessionActivity;
@@ -993,14 +998,48 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     }
                 }
                 if (classified.isTransient && recoveryAttempts < effectiveMaxRetries) {
+                    // Server-error streak guard: if the same model 5xx's twice in a row
+                    // it's almost always an upstream incident, not a blip. Switch to
+                    // the next routing fallback instead of waiting out 5 backoffs on a
+                    // dead provider — same idea as the payment-failure auto-fallback
+                    // below, but for transient server errors. Skipped for non-server
+                    // transients (rate limits, network blips) where retry is the right
+                    // call. Also skipped when the user picked a concrete model — they
+                    // explicitly chose this one, so we shouldn't silently swap.
+                    if (classified.category === 'server' && parseRoutingProfile(config.model)) {
+                        const streak = (serverErrorsByModel.get(resolvedModel) ?? 0) + 1;
+                        serverErrorsByModel.set(resolvedModel, streak);
+                        if (streak >= SERVER_ERROR_STREAK_BEFORE_SWITCH) {
+                            const fallbackChain = getFallbackChain(routingTier ?? 'MEDIUM', parseRoutingProfile(config.model) ?? 'auto');
+                            const nextModel = fallbackChain.find(m => m !== resolvedModel && (serverErrorsByModel.get(m) ?? 0) < SERVER_ERROR_STREAK_BEFORE_SWITCH);
+                            if (nextModel) {
+                                config.model = nextModel;
+                                config.onModelChange?.(nextModel, 'system');
+                                recoveryAttempts = 0;
+                                onEvent({
+                                    kind: 'text_delta',
+                                    text: `\n*${resolvedModel} keeps 5xx'ing (${streak} in a row) — switching to ${nextModel}*\n`,
+                                });
+                                continue;
+                            }
+                            // No alternative left in the fallback chain — fall through to
+                            // the normal retry path so we at least exhaust attempts before
+                            // surrender.
+                        }
+                    }
                     recoveryAttempts++;
                     const backoffMs = getBackoffDelay(recoveryAttempts);
                     if (config.debug) {
                         console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
                     }
+                    // Surface the actual error + model so the user can see which model
+                    // is failing and what the upstream said. Old "Retrying after Server
+                    // error" was uninformative — users couldn't tell whether to wait,
+                    // /retry, or /model-switch.
+                    const errSnippet = errMsg.replace(/\s+/g, ' ').slice(0, 100);
                     onEvent({
                         kind: 'text_delta',
-                        text: `\n*Retrying (${recoveryAttempts}/${effectiveMaxRetries}) after ${classified.label} error...*\n`,
+                        text: `\n*Retrying ${recoveryAttempts}/${effectiveMaxRetries} on ${resolvedModel} — ${classified.label}: ${errSnippet}*\n`,
                     });
                     await new Promise(r => setTimeout(r, backoffMs));
                     continue;

package/dist/agent/optimize.js CHANGED Viewed

@@ -34,7 +34,11 @@ const MODEL_MAX_OUTPUT = {
     'openai/gpt-5-mini': 16_384,
     'google/gemini-2.5-pro': 65_536,
     'google/gemini-2.5-flash': 65_536,
-    'deepseek/deepseek-chat': 8_192,
+    // DeepSeek V4 family — upstream max_output is 65K on V4 Flash + V4 Pro;
+    // gateway re-aliased deepseek-chat/-reasoner to V4 Flash modes 2026-05-03.
+    'deepseek/deepseek-chat': 65_536,
+    'deepseek/deepseek-reasoner': 65_536,
+    'deepseek/deepseek-v4-pro': 65_536,
     // Kimi K2.6 supports 65K output per the BlockRun gateway model entry
     // (moonshot/kimi-k2.6 max_output: 65536). Without this entry the default
     // 16K cap left users with 4× headroom on the table for long-form coding

package/dist/agent/planner.d.ts CHANGED Viewed

@@ -30,7 +30,7 @@ export declare function getPlanningPrompt(): string;
  * These models are good at following structured instructions (the plan)
  * but much cheaper than the planning model.
  */
-export declare function getExecutorModel(profile: RoutingProfile): string;
+export declare function getExecutorModel(_profile: RoutingProfile): string;
 /**
  * Extract numbered steps from plan text.
  * Handles formats like "1. Do X", "1) Do X", "Step 1: Do X".

package/dist/agent/planner.js CHANGED Viewed

@@ -34,8 +34,10 @@ export function shouldPlan(profile, ultrathink, planDisabled, analyzerSaysNeedsP
         return false;
     if (ultrathink)
         return false; // ultrathink already provides deep reasoning
-    // Only auto / premium profiles — eco / free are cost-constrained.
-    if (profile !== 'auto' && profile !== 'premium')
+    // Only the 'auto' profile uses planning. 'free' is cost-constrained;
+    // legacy 'eco' / 'premium' both alias to 'auto' via parseRoutingProfile,
+    // so this check covers them implicitly.
+    if (profile !== 'auto')
         return false;
     // Final decision comes from the turn analyzer's boolean flag.
     return analyzerSaysNeedsPlanning;
@@ -64,14 +66,10 @@ Rules:
  * These models are good at following structured instructions (the plan)
  * but much cheaper than the planning model.
  */
-export function getExecutorModel(profile) {
-    switch (profile) {
-        case 'premium':
-            return 'moonshot/kimi-k2.6'; // Medium-tier, reliable execution (256K ctx, vision + reasoning)
-        case 'auto':
-        default:
-            return 'google/gemini-2.5-flash'; // Cheap, fast, good at instructions
-    }
+export function getExecutorModel(_profile) {
+    // Auto is the only profile that runs planning (see shouldPlan above), so
+    // there's only one executor branch to pick. 'free' never reaches here.
+    return 'google/gemini-2.5-flash';
 }
 // ─── Plan Parsing ────────────────────────────────────────────────────────
 /**

package/dist/agent/tokens.js CHANGED Viewed

@@ -185,9 +185,11 @@ const MODEL_CONTEXT_WINDOWS = {
     'google/gemini-2.5-flash': 1_000_000,
     'google/gemini-2.5-flash-lite': 1_000_000,
     'google/gemini-3.1-pro': 1_000_000,
-    // DeepSeek
-    'deepseek/deepseek-chat': 64_000,
-    'deepseek/deepseek-reasoner': 64_000,
+    // DeepSeek (V4 family — gateway aliased deepseek-chat / -reasoner to V4
+    // Flash on 2026-05-03; context bumped 128K → 1M for both, 65K out)
+    'deepseek/deepseek-chat': 1_000_000,
+    'deepseek/deepseek-reasoner': 1_000_000,
+    'deepseek/deepseek-v4-pro': 1_000_000,
     // xAI
     'xai/grok-3': 131_072,
     'xai/grok-4-0709': 131_072,

package/dist/pricing.js CHANGED Viewed

@@ -3,10 +3,10 @@
  * Used by agent loop, proxy server, stats tracker, and router.
  */
 export const MODEL_PRICING = {
-    // Routing profiles (blended averages)
+    // Routing profiles (blended averages). Auto + Free are the only profiles
+    // surfaced after the 2026-05-03 collapse; eco/premium were retired and
+    // their parser mapping promotes them to Auto upstream of cost estimation.
     'blockrun/auto': { input: 0.8, output: 4.0 },
-    'blockrun/eco': { input: 0.2, output: 1.0 },
-    'blockrun/premium': { input: 3.0, output: 15.0 },
     'blockrun/free': { input: 0, output: 0 },
     // FREE — BlockRun gateway free tier (refreshed 2026-04-29 with V4 Flash + Omni launch)
     'nvidia/deepseek-v4-flash': { input: 0, output: 0 },
@@ -67,9 +67,13 @@ export const MODEL_PRICING = {
     'xai/grok-3-mini': { input: 0.3, output: 0.5 },
     'xai/grok-2-vision': { input: 2.0, output: 10.0 },
     'xai/grok-3': { input: 3.0, output: 15.0 },
-    // DeepSeek
-    'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
-    'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
+    // DeepSeek (gateway re-aliased these to V4 Flash on 2026-05-03; price
+    // dropped from $0.28/$0.42 to $0.20/$0.40, context bumped 128K→1M).
+    'deepseek/deepseek-chat': { input: 0.20, output: 0.40 },
+    'deepseek/deepseek-reasoner': { input: 0.20, output: 0.40 },
+    // V4 Pro (1.6T MoE / 49B active, 1M ctx, 65K out). 75% launch promo
+    // through 2026-05-31 — list is $2.00/$4.00, promo is $0.50/$1.00.
+    'deepseek/deepseek-v4-pro': { input: 0.50, output: 1.00 },
     // Minimax
     'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
     'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },

package/dist/proxy/fallback.d.ts CHANGED Viewed

@@ -30,7 +30,7 @@ export declare function fetchWithFallback(url: string, init: RequestInit, origin
  * Get the current model from fallback chain based on parsed request
  */
 export declare function getCurrentModelFromChain(requestedModel: string | undefined, config?: FallbackConfig): string;
-/** Routing profiles that must never be sent to the backend directly */
+/** Routing profiles that must never be sent to the backend directly. */
 export declare const ROUTING_PROFILES: Set<string>;
 /**
  * Build fallback chain starting from a specific model.

package/dist/proxy/fallback.js CHANGED Viewed

@@ -119,9 +119,9 @@ export function getCurrentModelFromChain(requestedModel, config = DEFAULT_FALLBA
     // Default to first model in chain
     return config.chain[0];
 }
-/** Routing profiles that must never be sent to the backend directly */
+/** Routing profiles that must never be sent to the backend directly. */
 export const ROUTING_PROFILES = new Set([
-    'blockrun/auto', 'blockrun/eco', 'blockrun/premium', 'blockrun/free',
+    'blockrun/auto', 'blockrun/free',
 ]);
 /**
  * Build fallback chain starting from a specific model.

package/dist/proxy/server.js CHANGED Viewed

@@ -111,11 +111,13 @@ function trackOutputTokens(model, tokens) {
 }
 // Model shortcuts for quick switching
 const MODEL_SHORTCUTS = {
-    // Routing profiles
+    // Routing profiles — Auto-only since 2026-05-03 (Eco/Premium retired).
+    // `eco` / `premium` aliases retained for back-compat with proxy clients;
+    // they parse to Auto downstream.
     auto: 'blockrun/auto',
     smart: 'blockrun/auto',
-    eco: 'blockrun/eco',
-    premium: 'blockrun/premium',
+    eco: 'blockrun/auto',
+    premium: 'blockrun/auto',
     // Anthropic
     sonnet: 'anthropic/claude-sonnet-4.6',
     claude: 'anthropic/claude-sonnet-4.6',

package/dist/router/index.d.ts CHANGED Viewed

@@ -11,7 +11,7 @@
  */
 import { type Category } from './categories.js';
 export type Tier = 'SIMPLE' | 'MEDIUM' | 'COMPLEX' | 'REASONING';
-export type RoutingProfile = 'auto' | 'eco' | 'premium' | 'free';
+export type RoutingProfile = 'auto' | 'free';
 export interface RoutingResult {
     model: string;
     tier: Tier;

package/dist/router/index.js CHANGED Viewed

@@ -33,73 +33,42 @@ function loadLearnedWeights() {
     return null;
 }
 // ─── Tier Model Configs ───
-// Agent-first defaults. Sonnet-tier models are the current sweet spot for
-// multi-step tool-use agent work; cheap models keep derailing on simple agent
-// loops. Each tier's fallback ends with a cheaper option so payment/quota
-// failures don't strand users on equally expensive alternatives.
+// Auto-routing strategy (post-DeepSeek-V4-Pro launch promo, 2026-05-03):
+// V4 Pro at $0.50/$1.00 with 1M context is the new sweet spot for SIMPLE +
+// MEDIUM agent work — Sonnet-quality reasoning at ~1/6 the price. Reserve
+// Opus only for genuinely complex multi-file/multi-decision tasks where
+// the model's wider context handling and tighter tool-use discipline still
+// pay for themselves. Sonnet drops to fallback because V4 Pro covers most
+// of what users were calling Sonnet for, at a fraction of the cost.
 const AUTO_TIERS = {
     SIMPLE: {
-        primary: 'google/gemini-2.5-flash',
-        fallback: ['moonshot/kimi-k2.6', 'deepseek/deepseek-chat'],
+        primary: 'deepseek/deepseek-v4-pro',
+        fallback: ['google/gemini-2.5-flash', 'moonshot/kimi-k2.6', 'deepseek/deepseek-chat'],
     },
     MEDIUM: {
-        primary: 'anthropic/claude-sonnet-4.6',
-        fallback: ['openai/gpt-5.5', 'google/gemini-3.1-pro', 'moonshot/kimi-k2.6'],
+        primary: 'deepseek/deepseek-v4-pro',
+        fallback: ['anthropic/claude-sonnet-4.6', 'openai/gpt-5.5', 'google/gemini-3.1-pro'],
     },
     COMPLEX: {
-        primary: 'anthropic/claude-sonnet-4.6',
-        fallback: ['openai/gpt-5.5', 'anthropic/claude-opus-4.7', 'moonshot/kimi-k2.6'],
+        // Hard tasks — multi-file refactors, ambiguous specs, dense reasoning
+        // chains — still go to Opus. V4 Pro is great but not a Sonnet/Opus
+        // replacement at the high end of difficulty per recent agent-bench runs.
+        primary: 'anthropic/claude-opus-4.7',
+        fallback: ['openai/gpt-5.5', 'anthropic/claude-sonnet-4.6', 'deepseek/deepseek-v4-pro'],
     },
     REASONING: {
         // Opus 4.7: step-change improvement in agentic coding over 4.6 per
-        // Anthropic. Same price, same 200k ctx in Franklin's baseline, so
-        // swap is cost-neutral. 4.6 stays in the fallback chain in case of
-        // rollout delays on the gateway side.
+        // Anthropic. 4.6 stays in the fallback chain in case of rollout delays.
         primary: 'anthropic/claude-opus-4.7',
         fallback: [
             'anthropic/claude-opus-4.6',
             'openai/o3',
+            'deepseek/deepseek-v4-pro',
             'xai/grok-4-1-fast-reasoning',
             'deepseek/deepseek-reasoner',
         ],
     },
 };
-const ECO_TIERS = {
-    SIMPLE: {
-        primary: 'nvidia/qwen3-coder-480b',
-        fallback: ['nvidia/llama-4-maverick'],
-    },
-    MEDIUM: {
-        primary: 'google/gemini-2.5-flash-lite',
-        fallback: ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick'],
-    },
-    COMPLEX: {
-        primary: 'google/gemini-2.5-flash-lite',
-        fallback: ['deepseek/deepseek-chat', 'nvidia/qwen3-coder-480b'],
-    },
-    REASONING: {
-        primary: 'xai/grok-4-1-fast-reasoning',
-        fallback: ['deepseek/deepseek-reasoner', 'nvidia/qwen3-coder-480b'],
-    },
-};
-const PREMIUM_TIERS = {
-    SIMPLE: {
-        primary: 'moonshot/kimi-k2.6',
-        fallback: ['anthropic/claude-haiku-4.5'],
-    },
-    MEDIUM: {
-        primary: 'openai/gpt-5.3-codex',
-        fallback: ['anthropic/claude-sonnet-4.6'],
-    },
-    COMPLEX: {
-        primary: 'anthropic/claude-opus-4.7',
-        fallback: ['anthropic/claude-opus-4.6', 'openai/gpt-5.5', 'anthropic/claude-sonnet-4.6'],
-    },
-    REASONING: {
-        primary: 'anthropic/claude-opus-4.7',
-        fallback: ['anthropic/claude-opus-4.6', 'anthropic/claude-sonnet-4.6', 'openai/o3'],
-    },
-};
 // ─── Keywords for Classification ───
 const CODE_KEYWORDS = [
     'function', 'class', 'import', 'def', 'SELECT', 'async', 'await',
@@ -285,18 +254,11 @@ function classicRouteRequest(prompt, profile) {
     const tokenCount = Math.ceil(byteLen / 4);
     // Classify the request
     const { tier, confidence, signals } = classifyRequest(prompt, tokenCount);
-    // Select tier config based on profile
-    let tierConfigs;
-    switch (profile) {
-        case 'eco':
-            tierConfigs = ECO_TIERS;
-            break;
-        case 'premium':
-            tierConfigs = PREMIUM_TIERS;
-            break;
-        default:
-            tierConfigs = AUTO_TIERS;
-    }
+    // Auto is the only routing profile now (Eco/Premium were retired
+    // 2026-05-03 — see comment on RoutingProfile above). 'free' is handled
+    // earlier by the caller path; if it ever reaches here, fall through to
+    // AUTO_TIERS rather than crashing.
+    const tierConfigs = AUTO_TIERS;
     const model = tierConfigs[tier].primary;
     const savings = computeSavings(model);
     const category = detectCategory(prompt, loadLearnedWeights()?.category_keywords).category;
@@ -409,16 +371,7 @@ export async function routeRequestAsync(prompt, profile = 'auto', classify = llm
     }
     // Build a RoutingResult from the LLM-picked tier using the same tier
     // tables the keyword path uses. Keeps downstream code path-identical.
-    let tierConfigs;
-    switch (profile) {
-        case 'eco':
-            tierConfigs = ECO_TIERS;
-            break;
-        case 'premium':
-            tierConfigs = PREMIUM_TIERS;
-            break;
-        default: tierConfigs = AUTO_TIERS;
-    }
+    const tierConfigs = AUTO_TIERS;
     const model = tierConfigs[tier].primary;
     const category = detectCategory(prompt, loadLearnedWeights()?.category_keywords).category;
     return {
@@ -450,16 +403,7 @@ export function resolveTierToModel(tier, profile = 'auto') {
             savings: 1.0,
         };
     }
-    let tierConfigs;
-    switch (profile) {
-        case 'eco':
-            tierConfigs = ECO_TIERS;
-            break;
-        case 'premium':
-            tierConfigs = PREMIUM_TIERS;
-            break;
-        default: tierConfigs = AUTO_TIERS;
-    }
+    const tierConfigs = AUTO_TIERS;
     const model = tierConfigs[tier].primary;
     return {
         model,
@@ -538,20 +482,9 @@ function computeSavings(model) {
  * Get fallback models for a tier
  */
 export function getFallbackChain(tier, profile = 'auto') {
-    let tierConfigs;
-    switch (profile) {
-        case 'eco':
-            tierConfigs = ECO_TIERS;
-            break;
-        case 'premium':
-            tierConfigs = PREMIUM_TIERS;
-            break;
-        case 'free':
-            return ['nvidia/qwen3-coder-480b'];
-        default:
-            tierConfigs = AUTO_TIERS;
-    }
-    const config = tierConfigs[tier];
+    if (profile === 'free')
+        return ['nvidia/qwen3-coder-480b'];
+    const config = AUTO_TIERS[tier];
     return [config.primary, ...config.fallback];
 }
 /**
@@ -561,11 +494,14 @@ export function parseRoutingProfile(model) {
     const lower = model.toLowerCase();
     if (lower === 'blockrun/auto' || lower === 'auto')
         return 'auto';
-    if (lower === 'blockrun/eco' || lower === 'eco')
-        return 'eco';
-    if (lower === 'blockrun/premium' || lower === 'premium')
-        return 'premium';
     if (lower === 'blockrun/free' || lower === 'free')
         return 'free';
+    // Back-compat: Eco / Premium routing profiles were retired 2026-05-03.
+    // Existing configs / sessions that still pass these values get silently
+    // promoted to Auto so nothing breaks; new code should use 'auto' directly.
+    if (lower === 'blockrun/eco' || lower === 'eco')
+        return 'auto';
+    if (lower === 'blockrun/premium' || lower === 'premium')
+        return 'auto';
     return null;
 }

package/dist/ui/model-picker.js CHANGED Viewed

@@ -6,11 +6,15 @@ import readline from 'node:readline';
 import chalk from 'chalk';
 // ─── Model Shortcuts (same as proxy) ───────────────────────────────────────
 export const MODEL_SHORTCUTS = {
-    // Routing profiles
+    // Routing profiles — Auto is the only profile surfaced in the picker.
+    // `eco` / `premium` were retired 2026-05-03 (V4 Pro launch made Auto cheap
+    // enough that separate profiles for "cheap" and "best" were redundant).
+    // The shortcuts still resolve through parseRoutingProfile() for back-compat
+    // with old configs/sessions, which silently promotes them to Auto.
     auto: 'blockrun/auto',
     smart: 'blockrun/auto',
-    eco: 'blockrun/eco',
-    premium: 'blockrun/premium',
+    eco: 'blockrun/auto',
+    premium: 'blockrun/auto',
     // Anthropic
     sonnet: 'anthropic/claude-sonnet-4.6',
     claude: 'anthropic/claude-sonnet-4.6',
@@ -51,9 +55,23 @@ export const MODEL_SHORTCUTS = {
     'grok-4': 'xai/grok-4-0709',
     'grok-fast': 'xai/grok-4-1-fast-reasoning',
     'grok-4.1': 'xai/grok-4-1-fast-reasoning',
-    // DeepSeek
-    deepseek: 'deepseek/deepseek-chat',
-    r1: 'deepseek/deepseek-reasoner',
+    // DeepSeek — paid SKUs route through deepseek/* (gateway aliases serve V4
+    // Flash modes upstream); free tier routes through nvidia/*.
+    deepseek: 'deepseek/deepseek-chat', // V4 Flash Chat (paid, $0.20/$0.40)
+    r1: 'deepseek/deepseek-reasoner', // V4 Flash Reasoner (paid)
+    // V4 Pro: paid flagship, 1.6T MoE / 49B active, 1M ctx, 75% launch promo.
+    'deepseek-v4-pro': 'deepseek/deepseek-v4-pro',
+    'dsv4-pro': 'deepseek/deepseek-v4-pro',
+    'v4-pro': 'deepseek/deepseek-v4-pro',
+    // V4 Flash: free on NVIDIA inference. Bare `deepseek-v4` resolves here
+    // since the paid V4 Flash SKU was dropped (overlapped with this free one).
+    'deepseek-v4': 'nvidia/deepseek-v4-flash',
+    'deepseek-v4-flash': 'nvidia/deepseek-v4-flash',
+    dsv4: 'nvidia/deepseek-v4-flash',
+    // V3.2 free fallback for users who specifically want the older Terminus
+    // checkpoint instead of the V4 Flash default.
+    'deepseek-v3.2': 'nvidia/deepseek-v3.2',
+    'deepseek-v3': 'nvidia/deepseek-v3.2',
     // Free (agent-tested BlockRun gateway free tier — refreshed 2026-04)
     free: 'nvidia/qwen3-coder-480b',
     glm4: 'nvidia/qwen3-coder-480b',
@@ -112,9 +130,14 @@ export const PICKER_CATEGORIES = [
     {
         category: '🧠 Smart routing (auto-pick)',
         models: [
+            // Auto is the only routing profile surfaced in the picker. Eco and
+            // Premium are kept as shortcut aliases (`eco`, `premium`) and resolve
+            // through the router for back-compat with older configs/sessions, but
+            // they're hidden from new users — Auto already covers the cheap end
+            // (V4 Pro at $0.50/$1.00 for SIMPLE/MEDIUM) and the quality end (Opus
+            // for COMPLEX), so a separate Eco/Premium picker entry just adds
+            // choice paralysis without distinct value.
             { id: 'blockrun/auto', shortcut: 'auto', label: 'Auto', price: 'routed' },
-            { id: 'blockrun/eco', shortcut: 'eco', label: 'Eco', price: 'cheapest' },
-            { id: 'blockrun/premium', shortcut: 'premium', label: 'Premium', price: 'best' },
         ],
     },
     {
@@ -139,7 +162,10 @@ export const PICKER_CATEGORIES = [
         models: [
             { id: 'openai/o3', shortcut: 'o3', label: 'O3', price: '$2/$8' },
             { id: 'openai/gpt-5.3-codex', shortcut: 'codex', label: 'GPT-5.3 Codex', price: '$1.75/$14' },
-            { id: 'deepseek/deepseek-reasoner', shortcut: 'r1', label: 'DeepSeek R1', price: '$0.28/$0.42' },
+            // V4 Pro on launch promo (75% off through 2026-05-31). 1M context,
+            // 1.6T MoE → punches up to GPT-5.5/Opus on hard tasks at <1/10 the price.
+            { id: 'deepseek/deepseek-v4-pro', shortcut: 'deepseek-v4-pro', label: 'DeepSeek V4 Pro', price: '$0.5/$1 (promo)', highlight: true },
+            { id: 'deepseek/deepseek-reasoner', shortcut: 'r1', label: 'DeepSeek V4 Flash R.', price: '$0.2/$0.4' },
             { id: 'xai/grok-4-1-fast-reasoning', shortcut: 'grok-fast', label: 'Grok 4.1 Fast R.', price: '$0.2/$0.5' },
         ],
     },
@@ -149,14 +175,22 @@ export const PICKER_CATEGORIES = [
             { id: 'anthropic/claude-haiku-4.5-20251001', shortcut: 'haiku', label: 'Claude Haiku 4.5', price: '$1/$5' },
             { id: 'openai/gpt-5-mini', shortcut: 'mini', label: 'GPT-5 Mini', price: '$0.25/$2' },
             { id: 'google/gemini-2.5-flash', shortcut: 'flash', label: 'Gemini 2.5 Flash', price: '$0.3/$2.5' },
-            { id: 'deepseek/deepseek-chat', shortcut: 'deepseek', label: 'DeepSeek V3', price: '$0.28/$0.42' },
+            // Re-aliased to V4 Flash Chat upstream — context 1M, price 30% lower.
+            { id: 'deepseek/deepseek-chat', shortcut: 'deepseek', label: 'DeepSeek V4 Flash Chat', price: '$0.2/$0.4' },
             { id: 'moonshot/kimi-k2.6', shortcut: 'kimi', label: 'Kimi K2.6', price: '$0.95/$4' },
-            { id: 'minimax/minimax-m2.7', shortcut: 'minimax', label: 'Minimax M2.7', price: '$0.3/$1.2' },
+            // Minimax M2.7 hidden to make room for V4 Pro in Reasoning + V4 Flash
+            // (free) without exceeding the picker's 24-entry cap. Shortcut `minimax`
+            // still resolves to it.
         ],
     },
     {
         category: '🆓 Free (no USDC needed)',
         models: [
+            // V4 Flash leads the section: newest gateway addition, general-purpose,
+            // fast — better default for most users than the coder-specialized Qwen.
+            // V3.2 hidden (shortcut `deepseek-v3` still works) since V4 Flash
+            // supersedes it; keeping the picker tight.
+            { id: 'nvidia/deepseek-v4-flash', shortcut: 'deepseek-v4', label: 'DeepSeek V4 Flash', price: 'FREE', highlight: true },
             { id: 'nvidia/qwen3-coder-480b', shortcut: 'free', label: 'Qwen3 Coder 480B', price: 'FREE' },
             { id: 'nvidia/llama-4-maverick', shortcut: 'maverick', label: 'Llama 4 Maverick', price: 'FREE' },
         ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.15.5",
+  "version": "3.15.7",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {