@blockrun/franklin 3.15.5 → 3.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -107,10 +107,12 @@ export declare class ModelClient {
107
107
  * Handles x402 payment automatically on 402 responses.
108
108
  */
109
109
  /**
110
- * Resolve virtual routing profiles (blockrun/auto, blockrun/eco, etc.)
111
- * to concrete models. This is the final safety net — if the router in
110
+ * Resolve virtual routing profiles (blockrun/auto, blockrun/free) to
111
+ * concrete models. This is the final safety net — if the router in
112
112
  * loop.ts didn't resolve it (e.g. old global install without router),
113
- * we resolve it here before hitting the API.
113
+ * we resolve it here before hitting the API. Legacy blockrun/eco and
114
+ * blockrun/premium fall through the unknown-key path to the same
115
+ * default model.
114
116
  */
115
117
  private resolveVirtualModel;
116
118
  streamCompletion(request: ModelRequest, signal?: AbortSignal): AsyncGenerator<StreamChunk>;
package/dist/agent/llm.js CHANGED
@@ -260,10 +260,12 @@ export class ModelClient {
260
260
  * Handles x402 payment automatically on 402 responses.
261
261
  */
262
262
  /**
263
- * Resolve virtual routing profiles (blockrun/auto, blockrun/eco, etc.)
264
- * to concrete models. This is the final safety net — if the router in
263
+ * Resolve virtual routing profiles (blockrun/auto, blockrun/free) to
264
+ * concrete models. This is the final safety net — if the router in
265
265
  * loop.ts didn't resolve it (e.g. old global install without router),
266
- * we resolve it here before hitting the API.
266
+ * we resolve it here before hitting the API. Legacy blockrun/eco and
267
+ * blockrun/premium fall through the unknown-key path to the same
268
+ * default model.
267
269
  */
268
270
  resolveVirtualModel(model) {
269
271
  if (!model.startsWith('blockrun/'))
@@ -280,12 +282,13 @@ export class ModelClient {
280
282
  catch {
281
283
  // Router not available (e.g. old build) — use hardcoded fallback table
282
284
  }
283
- // Static fallback if router is unavailable. Default to FREE model so
284
- // users aren't silently charged when their intended model can't resolve.
285
+ // Static fallback when the router module isn't loadable. Defaults to a
286
+ // FREE model so users aren't silently charged. The unknown-key path also
287
+ // falls through to qwen, so legacy `blockrun/eco` / `blockrun/premium`
288
+ // strings (now retired routing profiles) end up at the same place
289
+ // without needing dedicated entries.
285
290
  const FALLBACKS = {
286
291
  'blockrun/auto': 'nvidia/qwen3-coder-480b',
287
- 'blockrun/eco': 'nvidia/qwen3-coder-480b',
288
- 'blockrun/premium': 'anthropic/claude-sonnet-4.6',
289
292
  'blockrun/free': 'nvidia/qwen3-coder-480b',
290
293
  };
291
294
  return FALLBACKS[model] || 'nvidia/qwen3-coder-480b';
@@ -22,7 +22,7 @@ import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
22
22
  import { estimateCost, OPUS_PRICING } from '../pricing.js';
23
23
  import { maybeMidSessionExtract } from '../learnings/extractor.js';
24
24
  import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
25
- import { routeRequestAsync, resolveTierToModel, parseRoutingProfile } from '../router/index.js';
25
+ import { routeRequestAsync, resolveTierToModel, parseRoutingProfile, getFallbackChain } from '../router/index.js';
26
26
  import { recordOutcome } from '../router/local-elo.js';
27
27
  import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
28
28
  import { shouldVerify, runVerification } from './verification.js';
@@ -505,6 +505,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
505
505
  let recoveryAttempts = 0;
506
506
  let autoContinuationCount = 0;
507
507
  const MAX_RECOVERY_ATTEMPTS = 5;
508
+ // Track per-model server-error streak so we can break out of a stuck
509
+ // upstream and try the next model in the routing fallback chain instead
510
+ // of burning all MAX_RECOVERY_ATTEMPTS retries on the same failure.
511
+ const serverErrorsByModel = new Map();
512
+ const SERVER_ERROR_STREAK_BEFORE_SWITCH = 2;
508
513
  let compactFailures = 0;
509
514
  let maxTokensOverride;
510
515
  const turnIdleReference = lastSessionActivity;
@@ -993,14 +998,48 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
993
998
  }
994
999
  }
995
1000
  if (classified.isTransient && recoveryAttempts < effectiveMaxRetries) {
1001
+ // Server-error streak guard: if the same model 5xx's twice in a row
1002
+ // it's almost always an upstream incident, not a blip. Switch to
1003
+ // the next routing fallback instead of waiting out 5 backoffs on a
1004
+ // dead provider — same idea as the payment-failure auto-fallback
1005
+ // below, but for transient server errors. Skipped for non-server
1006
+ // transients (rate limits, network blips) where retry is the right
1007
+ // call. Also skipped when the user picked a concrete model — they
1008
+ // explicitly chose this one, so we shouldn't silently swap.
1009
+ if (classified.category === 'server' && parseRoutingProfile(config.model)) {
1010
+ const streak = (serverErrorsByModel.get(resolvedModel) ?? 0) + 1;
1011
+ serverErrorsByModel.set(resolvedModel, streak);
1012
+ if (streak >= SERVER_ERROR_STREAK_BEFORE_SWITCH) {
1013
+ const fallbackChain = getFallbackChain(routingTier ?? 'MEDIUM', parseRoutingProfile(config.model) ?? 'auto');
1014
+ const nextModel = fallbackChain.find(m => m !== resolvedModel && (serverErrorsByModel.get(m) ?? 0) < SERVER_ERROR_STREAK_BEFORE_SWITCH);
1015
+ if (nextModel) {
1016
+ config.model = nextModel;
1017
+ config.onModelChange?.(nextModel, 'system');
1018
+ recoveryAttempts = 0;
1019
+ onEvent({
1020
+ kind: 'text_delta',
1021
+ text: `\n*${resolvedModel} keeps 5xx'ing (${streak} in a row) — switching to ${nextModel}*\n`,
1022
+ });
1023
+ continue;
1024
+ }
1025
+ // No alternative left in the fallback chain — fall through to
1026
+ // the normal retry path so we at least exhaust attempts before
1027
+ // surrender.
1028
+ }
1029
+ }
996
1030
  recoveryAttempts++;
997
1031
  const backoffMs = getBackoffDelay(recoveryAttempts);
998
1032
  if (config.debug) {
999
1033
  console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
1000
1034
  }
1035
+ // Surface the actual error + model so the user can see which model
1036
+ // is failing and what the upstream said. Old "Retrying after Server
1037
+ // error" was uninformative — users couldn't tell whether to wait,
1038
+ // /retry, or /model-switch.
1039
+ const errSnippet = errMsg.replace(/\s+/g, ' ').slice(0, 100);
1001
1040
  onEvent({
1002
1041
  kind: 'text_delta',
1003
- text: `\n*Retrying (${recoveryAttempts}/${effectiveMaxRetries}) after ${classified.label} error...*\n`,
1042
+ text: `\n*Retrying ${recoveryAttempts}/${effectiveMaxRetries} on ${resolvedModel} — ${classified.label}: ${errSnippet}*\n`,
1004
1043
  });
1005
1044
  await new Promise(r => setTimeout(r, backoffMs));
1006
1045
  continue;
@@ -34,7 +34,11 @@ const MODEL_MAX_OUTPUT = {
34
34
  'openai/gpt-5-mini': 16_384,
35
35
  'google/gemini-2.5-pro': 65_536,
36
36
  'google/gemini-2.5-flash': 65_536,
37
- 'deepseek/deepseek-chat': 8_192,
37
+ // DeepSeek V4 family — upstream max_output is 65K on V4 Flash + V4 Pro;
38
+ // gateway re-aliased deepseek-chat/-reasoner to V4 Flash modes 2026-05-03.
39
+ 'deepseek/deepseek-chat': 65_536,
40
+ 'deepseek/deepseek-reasoner': 65_536,
41
+ 'deepseek/deepseek-v4-pro': 65_536,
38
42
  // Kimi K2.6 supports 65K output per the BlockRun gateway model entry
39
43
  // (moonshot/kimi-k2.6 max_output: 65536). Without this entry the default
40
44
  // 16K cap left users with 4× headroom on the table for long-form coding
@@ -30,7 +30,7 @@ export declare function getPlanningPrompt(): string;
30
30
  * These models are good at following structured instructions (the plan)
31
31
  * but much cheaper than the planning model.
32
32
  */
33
- export declare function getExecutorModel(profile: RoutingProfile): string;
33
+ export declare function getExecutorModel(_profile: RoutingProfile): string;
34
34
  /**
35
35
  * Extract numbered steps from plan text.
36
36
  * Handles formats like "1. Do X", "1) Do X", "Step 1: Do X".
@@ -34,8 +34,10 @@ export function shouldPlan(profile, ultrathink, planDisabled, analyzerSaysNeedsP
34
34
  return false;
35
35
  if (ultrathink)
36
36
  return false; // ultrathink already provides deep reasoning
37
- // Only auto / premium profiles — eco / free are cost-constrained.
38
- if (profile !== 'auto' && profile !== 'premium')
37
+ // Only the 'auto' profile uses planning. 'free' is cost-constrained;
38
+ // legacy 'eco' / 'premium' both alias to 'auto' via parseRoutingProfile,
39
+ // so this check covers them implicitly.
40
+ if (profile !== 'auto')
39
41
  return false;
40
42
  // Final decision comes from the turn analyzer's boolean flag.
41
43
  return analyzerSaysNeedsPlanning;
@@ -64,14 +66,10 @@ Rules:
64
66
  * These models are good at following structured instructions (the plan)
65
67
  * but much cheaper than the planning model.
66
68
  */
67
- export function getExecutorModel(profile) {
68
- switch (profile) {
69
- case 'premium':
70
- return 'moonshot/kimi-k2.6'; // Medium-tier, reliable execution (256K ctx, vision + reasoning)
71
- case 'auto':
72
- default:
73
- return 'google/gemini-2.5-flash'; // Cheap, fast, good at instructions
74
- }
69
+ export function getExecutorModel(_profile) {
70
+ // Auto is the only profile that runs planning (see shouldPlan above), so
71
+ // there's only one executor branch to pick. 'free' never reaches here.
72
+ return 'google/gemini-2.5-flash';
75
73
  }
76
74
  // ─── Plan Parsing ────────────────────────────────────────────────────────
77
75
  /**
@@ -185,9 +185,11 @@ const MODEL_CONTEXT_WINDOWS = {
185
185
  'google/gemini-2.5-flash': 1_000_000,
186
186
  'google/gemini-2.5-flash-lite': 1_000_000,
187
187
  'google/gemini-3.1-pro': 1_000_000,
188
- // DeepSeek
189
- 'deepseek/deepseek-chat': 64_000,
190
- 'deepseek/deepseek-reasoner': 64_000,
188
+ // DeepSeek (V4 family — gateway aliased deepseek-chat / -reasoner to V4
189
+ // Flash on 2026-05-03; context bumped 128K → 1M for both, 65K out)
190
+ 'deepseek/deepseek-chat': 1_000_000,
191
+ 'deepseek/deepseek-reasoner': 1_000_000,
192
+ 'deepseek/deepseek-v4-pro': 1_000_000,
191
193
  // xAI
192
194
  'xai/grok-3': 131_072,
193
195
  'xai/grok-4-0709': 131_072,
package/dist/pricing.js CHANGED
@@ -3,10 +3,10 @@
3
3
  * Used by agent loop, proxy server, stats tracker, and router.
4
4
  */
5
5
  export const MODEL_PRICING = {
6
- // Routing profiles (blended averages)
6
+ // Routing profiles (blended averages). Auto + Free are the only profiles
7
+ // surfaced after the 2026-05-03 collapse; eco/premium were retired and
8
+ // their parser mapping promotes them to Auto upstream of cost estimation.
7
9
  'blockrun/auto': { input: 0.8, output: 4.0 },
8
- 'blockrun/eco': { input: 0.2, output: 1.0 },
9
- 'blockrun/premium': { input: 3.0, output: 15.0 },
10
10
  'blockrun/free': { input: 0, output: 0 },
11
11
  // FREE — BlockRun gateway free tier (refreshed 2026-04-29 with V4 Flash + Omni launch)
12
12
  'nvidia/deepseek-v4-flash': { input: 0, output: 0 },
@@ -67,9 +67,13 @@ export const MODEL_PRICING = {
67
67
  'xai/grok-3-mini': { input: 0.3, output: 0.5 },
68
68
  'xai/grok-2-vision': { input: 2.0, output: 10.0 },
69
69
  'xai/grok-3': { input: 3.0, output: 15.0 },
70
- // DeepSeek
71
- 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
72
- 'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
70
+ // DeepSeek (gateway re-aliased these to V4 Flash on 2026-05-03; price
71
+ // dropped from $0.28/$0.42 to $0.20/$0.40, context bumped 128K→1M).
72
+ 'deepseek/deepseek-chat': { input: 0.20, output: 0.40 },
73
+ 'deepseek/deepseek-reasoner': { input: 0.20, output: 0.40 },
74
+ // V4 Pro (1.6T MoE / 49B active, 1M ctx, 65K out). 75% launch promo
75
+ // through 2026-05-31 — list is $2.00/$4.00, promo is $0.50/$1.00.
76
+ 'deepseek/deepseek-v4-pro': { input: 0.50, output: 1.00 },
73
77
  // Minimax
74
78
  'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
75
79
  'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
@@ -30,7 +30,7 @@ export declare function fetchWithFallback(url: string, init: RequestInit, origin
30
30
  * Get the current model from fallback chain based on parsed request
31
31
  */
32
32
  export declare function getCurrentModelFromChain(requestedModel: string | undefined, config?: FallbackConfig): string;
33
- /** Routing profiles that must never be sent to the backend directly */
33
+ /** Routing profiles that must never be sent to the backend directly. */
34
34
  export declare const ROUTING_PROFILES: Set<string>;
35
35
  /**
36
36
  * Build fallback chain starting from a specific model.
@@ -119,9 +119,9 @@ export function getCurrentModelFromChain(requestedModel, config = DEFAULT_FALLBA
119
119
  // Default to first model in chain
120
120
  return config.chain[0];
121
121
  }
122
- /** Routing profiles that must never be sent to the backend directly */
122
+ /** Routing profiles that must never be sent to the backend directly. */
123
123
  export const ROUTING_PROFILES = new Set([
124
- 'blockrun/auto', 'blockrun/eco', 'blockrun/premium', 'blockrun/free',
124
+ 'blockrun/auto', 'blockrun/free',
125
125
  ]);
126
126
  /**
127
127
  * Build fallback chain starting from a specific model.
@@ -111,11 +111,13 @@ function trackOutputTokens(model, tokens) {
111
111
  }
112
112
  // Model shortcuts for quick switching
113
113
  const MODEL_SHORTCUTS = {
114
- // Routing profiles
114
+ // Routing profiles — Auto-only since 2026-05-03 (Eco/Premium retired).
115
+ // `eco` / `premium` aliases retained for back-compat with proxy clients;
116
+ // they parse to Auto downstream.
115
117
  auto: 'blockrun/auto',
116
118
  smart: 'blockrun/auto',
117
- eco: 'blockrun/eco',
118
- premium: 'blockrun/premium',
119
+ eco: 'blockrun/auto',
120
+ premium: 'blockrun/auto',
119
121
  // Anthropic
120
122
  sonnet: 'anthropic/claude-sonnet-4.6',
121
123
  claude: 'anthropic/claude-sonnet-4.6',
@@ -11,7 +11,7 @@
11
11
  */
12
12
  import { type Category } from './categories.js';
13
13
  export type Tier = 'SIMPLE' | 'MEDIUM' | 'COMPLEX' | 'REASONING';
14
- export type RoutingProfile = 'auto' | 'eco' | 'premium' | 'free';
14
+ export type RoutingProfile = 'auto' | 'free';
15
15
  export interface RoutingResult {
16
16
  model: string;
17
17
  tier: Tier;
@@ -33,73 +33,42 @@ function loadLearnedWeights() {
33
33
  return null;
34
34
  }
35
35
  // ─── Tier Model Configs ───
36
- // Agent-first defaults. Sonnet-tier models are the current sweet spot for
37
- // multi-step tool-use agent work; cheap models keep derailing on simple agent
38
- // loops. Each tier's fallback ends with a cheaper option so payment/quota
39
- // failures don't strand users on equally expensive alternatives.
36
+ // Auto-routing strategy (post-DeepSeek-V4-Pro launch promo, 2026-05-03):
37
+ // V4 Pro at $0.50/$1.00 with 1M context is the new sweet spot for SIMPLE +
38
+ // MEDIUM agent work Sonnet-quality reasoning at ~1/6 the price. Reserve
39
+ // Opus only for genuinely complex multi-file/multi-decision tasks where
40
+ // the model's wider context handling and tighter tool-use discipline still
41
+ // pay for themselves. Sonnet drops to fallback because V4 Pro covers most
42
+ // of what users were calling Sonnet for, at a fraction of the cost.
40
43
  const AUTO_TIERS = {
41
44
  SIMPLE: {
42
- primary: 'google/gemini-2.5-flash',
43
- fallback: ['moonshot/kimi-k2.6', 'deepseek/deepseek-chat'],
45
+ primary: 'deepseek/deepseek-v4-pro',
46
+ fallback: ['google/gemini-2.5-flash', 'moonshot/kimi-k2.6', 'deepseek/deepseek-chat'],
44
47
  },
45
48
  MEDIUM: {
46
- primary: 'anthropic/claude-sonnet-4.6',
47
- fallback: ['openai/gpt-5.5', 'google/gemini-3.1-pro', 'moonshot/kimi-k2.6'],
49
+ primary: 'deepseek/deepseek-v4-pro',
50
+ fallback: ['anthropic/claude-sonnet-4.6', 'openai/gpt-5.5', 'google/gemini-3.1-pro'],
48
51
  },
49
52
  COMPLEX: {
50
- primary: 'anthropic/claude-sonnet-4.6',
51
- fallback: ['openai/gpt-5.5', 'anthropic/claude-opus-4.7', 'moonshot/kimi-k2.6'],
53
+ // Hard tasks — multi-file refactors, ambiguous specs, dense reasoning
54
+ // chains — still go to Opus. V4 Pro is great but not a Sonnet/Opus
55
+ // replacement at the high end of difficulty per recent agent-bench runs.
56
+ primary: 'anthropic/claude-opus-4.7',
57
+ fallback: ['openai/gpt-5.5', 'anthropic/claude-sonnet-4.6', 'deepseek/deepseek-v4-pro'],
52
58
  },
53
59
  REASONING: {
54
60
  // Opus 4.7: step-change improvement in agentic coding over 4.6 per
55
- // Anthropic. Same price, same 200k ctx in Franklin's baseline, so
56
- // swap is cost-neutral. 4.6 stays in the fallback chain in case of
57
- // rollout delays on the gateway side.
61
+ // Anthropic. 4.6 stays in the fallback chain in case of rollout delays.
58
62
  primary: 'anthropic/claude-opus-4.7',
59
63
  fallback: [
60
64
  'anthropic/claude-opus-4.6',
61
65
  'openai/o3',
66
+ 'deepseek/deepseek-v4-pro',
62
67
  'xai/grok-4-1-fast-reasoning',
63
68
  'deepseek/deepseek-reasoner',
64
69
  ],
65
70
  },
66
71
  };
67
- const ECO_TIERS = {
68
- SIMPLE: {
69
- primary: 'nvidia/qwen3-coder-480b',
70
- fallback: ['nvidia/llama-4-maverick'],
71
- },
72
- MEDIUM: {
73
- primary: 'google/gemini-2.5-flash-lite',
74
- fallback: ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick'],
75
- },
76
- COMPLEX: {
77
- primary: 'google/gemini-2.5-flash-lite',
78
- fallback: ['deepseek/deepseek-chat', 'nvidia/qwen3-coder-480b'],
79
- },
80
- REASONING: {
81
- primary: 'xai/grok-4-1-fast-reasoning',
82
- fallback: ['deepseek/deepseek-reasoner', 'nvidia/qwen3-coder-480b'],
83
- },
84
- };
85
- const PREMIUM_TIERS = {
86
- SIMPLE: {
87
- primary: 'moonshot/kimi-k2.6',
88
- fallback: ['anthropic/claude-haiku-4.5'],
89
- },
90
- MEDIUM: {
91
- primary: 'openai/gpt-5.3-codex',
92
- fallback: ['anthropic/claude-sonnet-4.6'],
93
- },
94
- COMPLEX: {
95
- primary: 'anthropic/claude-opus-4.7',
96
- fallback: ['anthropic/claude-opus-4.6', 'openai/gpt-5.5', 'anthropic/claude-sonnet-4.6'],
97
- },
98
- REASONING: {
99
- primary: 'anthropic/claude-opus-4.7',
100
- fallback: ['anthropic/claude-opus-4.6', 'anthropic/claude-sonnet-4.6', 'openai/o3'],
101
- },
102
- };
103
72
  // ─── Keywords for Classification ───
104
73
  const CODE_KEYWORDS = [
105
74
  'function', 'class', 'import', 'def', 'SELECT', 'async', 'await',
@@ -285,18 +254,11 @@ function classicRouteRequest(prompt, profile) {
285
254
  const tokenCount = Math.ceil(byteLen / 4);
286
255
  // Classify the request
287
256
  const { tier, confidence, signals } = classifyRequest(prompt, tokenCount);
288
- // Select tier config based on profile
289
- let tierConfigs;
290
- switch (profile) {
291
- case 'eco':
292
- tierConfigs = ECO_TIERS;
293
- break;
294
- case 'premium':
295
- tierConfigs = PREMIUM_TIERS;
296
- break;
297
- default:
298
- tierConfigs = AUTO_TIERS;
299
- }
257
+ // Auto is the only routing profile now (Eco/Premium were retired
258
+ // 2026-05-03 — see comment on RoutingProfile above). 'free' is handled
259
+ // earlier by the caller path; if it ever reaches here, fall through to
260
+ // AUTO_TIERS rather than crashing.
261
+ const tierConfigs = AUTO_TIERS;
300
262
  const model = tierConfigs[tier].primary;
301
263
  const savings = computeSavings(model);
302
264
  const category = detectCategory(prompt, loadLearnedWeights()?.category_keywords).category;
@@ -409,16 +371,7 @@ export async function routeRequestAsync(prompt, profile = 'auto', classify = llm
409
371
  }
410
372
  // Build a RoutingResult from the LLM-picked tier using the same tier
411
373
  // tables the keyword path uses. Keeps downstream code path-identical.
412
- let tierConfigs;
413
- switch (profile) {
414
- case 'eco':
415
- tierConfigs = ECO_TIERS;
416
- break;
417
- case 'premium':
418
- tierConfigs = PREMIUM_TIERS;
419
- break;
420
- default: tierConfigs = AUTO_TIERS;
421
- }
374
+ const tierConfigs = AUTO_TIERS;
422
375
  const model = tierConfigs[tier].primary;
423
376
  const category = detectCategory(prompt, loadLearnedWeights()?.category_keywords).category;
424
377
  return {
@@ -450,16 +403,7 @@ export function resolveTierToModel(tier, profile = 'auto') {
450
403
  savings: 1.0,
451
404
  };
452
405
  }
453
- let tierConfigs;
454
- switch (profile) {
455
- case 'eco':
456
- tierConfigs = ECO_TIERS;
457
- break;
458
- case 'premium':
459
- tierConfigs = PREMIUM_TIERS;
460
- break;
461
- default: tierConfigs = AUTO_TIERS;
462
- }
406
+ const tierConfigs = AUTO_TIERS;
463
407
  const model = tierConfigs[tier].primary;
464
408
  return {
465
409
  model,
@@ -538,20 +482,9 @@ function computeSavings(model) {
538
482
  * Get fallback models for a tier
539
483
  */
540
484
  export function getFallbackChain(tier, profile = 'auto') {
541
- let tierConfigs;
542
- switch (profile) {
543
- case 'eco':
544
- tierConfigs = ECO_TIERS;
545
- break;
546
- case 'premium':
547
- tierConfigs = PREMIUM_TIERS;
548
- break;
549
- case 'free':
550
- return ['nvidia/qwen3-coder-480b'];
551
- default:
552
- tierConfigs = AUTO_TIERS;
553
- }
554
- const config = tierConfigs[tier];
485
+ if (profile === 'free')
486
+ return ['nvidia/qwen3-coder-480b'];
487
+ const config = AUTO_TIERS[tier];
555
488
  return [config.primary, ...config.fallback];
556
489
  }
557
490
  /**
@@ -561,11 +494,14 @@ export function parseRoutingProfile(model) {
561
494
  const lower = model.toLowerCase();
562
495
  if (lower === 'blockrun/auto' || lower === 'auto')
563
496
  return 'auto';
564
- if (lower === 'blockrun/eco' || lower === 'eco')
565
- return 'eco';
566
- if (lower === 'blockrun/premium' || lower === 'premium')
567
- return 'premium';
568
497
  if (lower === 'blockrun/free' || lower === 'free')
569
498
  return 'free';
499
+ // Back-compat: Eco / Premium routing profiles were retired 2026-05-03.
500
+ // Existing configs / sessions that still pass these values get silently
501
+ // promoted to Auto so nothing breaks; new code should use 'auto' directly.
502
+ if (lower === 'blockrun/eco' || lower === 'eco')
503
+ return 'auto';
504
+ if (lower === 'blockrun/premium' || lower === 'premium')
505
+ return 'auto';
570
506
  return null;
571
507
  }
@@ -6,11 +6,15 @@ import readline from 'node:readline';
6
6
  import chalk from 'chalk';
7
7
  // ─── Model Shortcuts (same as proxy) ───────────────────────────────────────
8
8
  export const MODEL_SHORTCUTS = {
9
- // Routing profiles
9
+ // Routing profiles — Auto is the only profile surfaced in the picker.
10
+ // `eco` / `premium` were retired 2026-05-03 (V4 Pro launch made Auto cheap
11
+ // enough that separate profiles for "cheap" and "best" were redundant).
12
+ // The shortcuts still resolve through parseRoutingProfile() for back-compat
13
+ // with old configs/sessions, which silently promotes them to Auto.
10
14
  auto: 'blockrun/auto',
11
15
  smart: 'blockrun/auto',
12
- eco: 'blockrun/eco',
13
- premium: 'blockrun/premium',
16
+ eco: 'blockrun/auto',
17
+ premium: 'blockrun/auto',
14
18
  // Anthropic
15
19
  sonnet: 'anthropic/claude-sonnet-4.6',
16
20
  claude: 'anthropic/claude-sonnet-4.6',
@@ -51,9 +55,23 @@ export const MODEL_SHORTCUTS = {
51
55
  'grok-4': 'xai/grok-4-0709',
52
56
  'grok-fast': 'xai/grok-4-1-fast-reasoning',
53
57
  'grok-4.1': 'xai/grok-4-1-fast-reasoning',
54
- // DeepSeek
55
- deepseek: 'deepseek/deepseek-chat',
56
- r1: 'deepseek/deepseek-reasoner',
58
+ // DeepSeek — paid SKUs route through deepseek/* (gateway aliases serve V4
59
+ // Flash modes upstream); free tier routes through nvidia/*.
60
+ deepseek: 'deepseek/deepseek-chat', // V4 Flash Chat (paid, $0.20/$0.40)
61
+ r1: 'deepseek/deepseek-reasoner', // V4 Flash Reasoner (paid)
62
+ // V4 Pro: paid flagship, 1.6T MoE / 49B active, 1M ctx, 75% launch promo.
63
+ 'deepseek-v4-pro': 'deepseek/deepseek-v4-pro',
64
+ 'dsv4-pro': 'deepseek/deepseek-v4-pro',
65
+ 'v4-pro': 'deepseek/deepseek-v4-pro',
66
+ // V4 Flash: free on NVIDIA inference. Bare `deepseek-v4` resolves here
67
+ // since the paid V4 Flash SKU was dropped (overlapped with this free one).
68
+ 'deepseek-v4': 'nvidia/deepseek-v4-flash',
69
+ 'deepseek-v4-flash': 'nvidia/deepseek-v4-flash',
70
+ dsv4: 'nvidia/deepseek-v4-flash',
71
+ // V3.2 free fallback for users who specifically want the older Terminus
72
+ // checkpoint instead of the V4 Flash default.
73
+ 'deepseek-v3.2': 'nvidia/deepseek-v3.2',
74
+ 'deepseek-v3': 'nvidia/deepseek-v3.2',
57
75
  // Free (agent-tested BlockRun gateway free tier — refreshed 2026-04)
58
76
  free: 'nvidia/qwen3-coder-480b',
59
77
  glm4: 'nvidia/qwen3-coder-480b',
@@ -112,9 +130,14 @@ export const PICKER_CATEGORIES = [
112
130
  {
113
131
  category: '🧠 Smart routing (auto-pick)',
114
132
  models: [
133
+ // Auto is the only routing profile surfaced in the picker. Eco and
134
+ // Premium are kept as shortcut aliases (`eco`, `premium`) and resolve
135
+ // through the router for back-compat with older configs/sessions, but
136
+ // they're hidden from new users — Auto already covers the cheap end
137
+ // (V4 Pro at $0.50/$1.00 for SIMPLE/MEDIUM) and the quality end (Opus
138
+ // for COMPLEX), so a separate Eco/Premium picker entry just adds
139
+ // choice paralysis without distinct value.
115
140
  { id: 'blockrun/auto', shortcut: 'auto', label: 'Auto', price: 'routed' },
116
- { id: 'blockrun/eco', shortcut: 'eco', label: 'Eco', price: 'cheapest' },
117
- { id: 'blockrun/premium', shortcut: 'premium', label: 'Premium', price: 'best' },
118
141
  ],
119
142
  },
120
143
  {
@@ -139,7 +162,10 @@ export const PICKER_CATEGORIES = [
139
162
  models: [
140
163
  { id: 'openai/o3', shortcut: 'o3', label: 'O3', price: '$2/$8' },
141
164
  { id: 'openai/gpt-5.3-codex', shortcut: 'codex', label: 'GPT-5.3 Codex', price: '$1.75/$14' },
142
- { id: 'deepseek/deepseek-reasoner', shortcut: 'r1', label: 'DeepSeek R1', price: '$0.28/$0.42' },
165
+ // V4 Pro on launch promo (75% off through 2026-05-31). 1M context,
166
+ // 1.6T MoE → punches up to GPT-5.5/Opus on hard tasks at <1/10 the price.
167
+ { id: 'deepseek/deepseek-v4-pro', shortcut: 'deepseek-v4-pro', label: 'DeepSeek V4 Pro', price: '$0.5/$1 (promo)', highlight: true },
168
+ { id: 'deepseek/deepseek-reasoner', shortcut: 'r1', label: 'DeepSeek V4 Flash R.', price: '$0.2/$0.4' },
143
169
  { id: 'xai/grok-4-1-fast-reasoning', shortcut: 'grok-fast', label: 'Grok 4.1 Fast R.', price: '$0.2/$0.5' },
144
170
  ],
145
171
  },
@@ -149,14 +175,22 @@ export const PICKER_CATEGORIES = [
149
175
  { id: 'anthropic/claude-haiku-4.5-20251001', shortcut: 'haiku', label: 'Claude Haiku 4.5', price: '$1/$5' },
150
176
  { id: 'openai/gpt-5-mini', shortcut: 'mini', label: 'GPT-5 Mini', price: '$0.25/$2' },
151
177
  { id: 'google/gemini-2.5-flash', shortcut: 'flash', label: 'Gemini 2.5 Flash', price: '$0.3/$2.5' },
152
- { id: 'deepseek/deepseek-chat', shortcut: 'deepseek', label: 'DeepSeek V3', price: '$0.28/$0.42' },
178
+ // Re-aliased to V4 Flash Chat upstream — context 1M, price 30% lower.
179
+ { id: 'deepseek/deepseek-chat', shortcut: 'deepseek', label: 'DeepSeek V4 Flash Chat', price: '$0.2/$0.4' },
153
180
  { id: 'moonshot/kimi-k2.6', shortcut: 'kimi', label: 'Kimi K2.6', price: '$0.95/$4' },
154
- { id: 'minimax/minimax-m2.7', shortcut: 'minimax', label: 'Minimax M2.7', price: '$0.3/$1.2' },
181
+ // Minimax M2.7 hidden to make room for V4 Pro in Reasoning + V4 Flash
182
+ // (free) without exceeding the picker's 24-entry cap. Shortcut `minimax`
183
+ // still resolves to it.
155
184
  ],
156
185
  },
157
186
  {
158
187
  category: '🆓 Free (no USDC needed)',
159
188
  models: [
189
+ // V4 Flash leads the section: newest gateway addition, general-purpose,
190
+ // fast — better default for most users than the coder-specialized Qwen.
191
+ // V3.2 hidden (shortcut `deepseek-v3` still works) since V4 Flash
192
+ // supersedes it; keeping the picker tight.
193
+ { id: 'nvidia/deepseek-v4-flash', shortcut: 'deepseek-v4', label: 'DeepSeek V4 Flash', price: 'FREE', highlight: true },
160
194
  { id: 'nvidia/qwen3-coder-480b', shortcut: 'free', label: 'Qwen3 Coder 480B', price: 'FREE' },
161
195
  { id: 'nvidia/llama-4-maverick', shortcut: 'maverick', label: 'Llama 4 Maverick', price: 'FREE' },
162
196
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.5",
3
+ "version": "3.15.7",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {