@blockrun/franklin 3.15.4 → 3.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/context.js +8 -2
- package/dist/agent/llm.d.ts +5 -3
- package/dist/agent/llm.js +10 -7
- package/dist/agent/loop.js +15 -1
- package/dist/agent/optimize.js +5 -1
- package/dist/agent/planner.d.ts +1 -1
- package/dist/agent/planner.js +8 -10
- package/dist/agent/tokens.js +5 -3
- package/dist/pricing.js +10 -6
- package/dist/proxy/fallback.d.ts +1 -1
- package/dist/proxy/fallback.js +2 -2
- package/dist/proxy/server.js +5 -3
- package/dist/router/index.d.ts +1 -1
- package/dist/router/index.js +40 -99
- package/dist/tools/webfetch.js +166 -0
- package/dist/ui/model-picker.js +45 -11
- package/package.json +1 -1
package/dist/agent/context.js
CHANGED
|
@@ -83,7 +83,13 @@ A user approving an action once does NOT mean they approve it in all contexts. M
|
|
|
83
83
|
}
|
|
84
84
|
function getOutputEfficiencySection() {
|
|
85
85
|
return `# Output Efficiency
|
|
86
|
-
Go straight to the point. Lead with the action, not the reasoning. Do not restate what the user said.
|
|
86
|
+
Go straight to the point. Lead with the action, not the reasoning. Do not restate what the user said.
|
|
87
|
+
|
|
88
|
+
**No pre-tool narration.** Do NOT write things like "让我先 X...", "Let me read the file...", "I'll now search for...", "好的,让我研究一下...", "现在我来 X", "OK now I have everything I need", "完美!", "好,现在我完全明白了". These phrases are internal monologue — the user can see your tool calls directly and does not need step-by-step play-by-play. Just call the tool.
|
|
89
|
+
|
|
90
|
+
The exception: a single short sentence between tool calls is fine when it tells the user something they would otherwise miss — a finding ("Build passes — moving on to tests."), a course correction ("That approach won't work — switching to X."), or a one-line status before a long-running operation. One sentence per update is enough.
|
|
91
|
+
|
|
92
|
+
**No internal-language leakage.** Always write your visible response in the same language the user is using. If your private reasoning happens in a different language (English while the user writes Chinese, Korean while the user writes Chinese, etc.), do NOT let phrases from that language appear in the user-facing text. The user should never see a stray "좋아", "OK now", or "Alright" in the middle of a Chinese reply.
|
|
87
93
|
|
|
88
94
|
Focus text output on:
|
|
89
95
|
- Decisions that need the user's input
|
|
@@ -97,7 +103,7 @@ function getToneAndStyleSection() {
|
|
|
97
103
|
- Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
|
|
98
104
|
- Your responses should be short and concise.
|
|
99
105
|
- When referencing specific functions or pieces of code include the pattern file_path:line_number to allow the user to easily navigate to the source code location.
|
|
100
|
-
-
|
|
106
|
+
- See "Output Efficiency" above for the rules on pre-tool narration and language consistency. Those override any habit you may have of writing "Let me X..." before a tool call.`;
|
|
101
107
|
}
|
|
102
108
|
function getGitProtocolSection() {
|
|
103
109
|
return `# Git Protocol
|
package/dist/agent/llm.d.ts
CHANGED
|
@@ -107,10 +107,12 @@ export declare class ModelClient {
|
|
|
107
107
|
* Handles x402 payment automatically on 402 responses.
|
|
108
108
|
*/
|
|
109
109
|
/**
|
|
110
|
-
* Resolve virtual routing profiles (blockrun/auto, blockrun/
|
|
111
|
-
*
|
|
110
|
+
* Resolve virtual routing profiles (blockrun/auto, blockrun/free) to
|
|
111
|
+
* concrete models. This is the final safety net — if the router in
|
|
112
112
|
* loop.ts didn't resolve it (e.g. old global install without router),
|
|
113
|
-
* we resolve it here before hitting the API.
|
|
113
|
+
* we resolve it here before hitting the API. Legacy blockrun/eco and
|
|
114
|
+
* blockrun/premium fall through the unknown-key path to the same
|
|
115
|
+
* default model.
|
|
114
116
|
*/
|
|
115
117
|
private resolveVirtualModel;
|
|
116
118
|
streamCompletion(request: ModelRequest, signal?: AbortSignal): AsyncGenerator<StreamChunk>;
|
package/dist/agent/llm.js
CHANGED
|
@@ -260,10 +260,12 @@ export class ModelClient {
|
|
|
260
260
|
* Handles x402 payment automatically on 402 responses.
|
|
261
261
|
*/
|
|
262
262
|
/**
|
|
263
|
-
* Resolve virtual routing profiles (blockrun/auto, blockrun/
|
|
264
|
-
*
|
|
263
|
+
* Resolve virtual routing profiles (blockrun/auto, blockrun/free) to
|
|
264
|
+
* concrete models. This is the final safety net — if the router in
|
|
265
265
|
* loop.ts didn't resolve it (e.g. old global install without router),
|
|
266
|
-
* we resolve it here before hitting the API.
|
|
266
|
+
* we resolve it here before hitting the API. Legacy blockrun/eco and
|
|
267
|
+
* blockrun/premium fall through the unknown-key path to the same
|
|
268
|
+
* default model.
|
|
267
269
|
*/
|
|
268
270
|
resolveVirtualModel(model) {
|
|
269
271
|
if (!model.startsWith('blockrun/'))
|
|
@@ -280,12 +282,13 @@ export class ModelClient {
|
|
|
280
282
|
catch {
|
|
281
283
|
// Router not available (e.g. old build) — use hardcoded fallback table
|
|
282
284
|
}
|
|
283
|
-
// Static fallback
|
|
284
|
-
// users aren't silently charged
|
|
285
|
+
// Static fallback when the router module isn't loadable. Defaults to a
|
|
286
|
+
// FREE model so users aren't silently charged. The unknown-key path also
|
|
287
|
+
// falls through to qwen, so legacy `blockrun/eco` / `blockrun/premium`
|
|
288
|
+
// strings (now retired routing profiles) end up at the same place
|
|
289
|
+
// without needing dedicated entries.
|
|
285
290
|
const FALLBACKS = {
|
|
286
291
|
'blockrun/auto': 'nvidia/qwen3-coder-480b',
|
|
287
|
-
'blockrun/eco': 'nvidia/qwen3-coder-480b',
|
|
288
|
-
'blockrun/premium': 'anthropic/claude-sonnet-4.6',
|
|
289
292
|
'blockrun/free': 'nvidia/qwen3-coder-480b',
|
|
290
293
|
};
|
|
291
294
|
return FALLBACKS[model] || 'nvidia/qwen3-coder-480b';
|
package/dist/agent/loop.js
CHANGED
|
@@ -651,13 +651,27 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
651
651
|
// Circuit breaker: stop retrying after 3 consecutive failures
|
|
652
652
|
if (compactFailures < 3) {
|
|
653
653
|
try {
|
|
654
|
+
// Capture pre-compaction size so we can surface "saved X%" to the
|
|
655
|
+
// user. Without this, the per-turn input-token count would silently
|
|
656
|
+
// drop from e.g. 215K → 9K and look like a metric bug.
|
|
657
|
+
const beforeTokens = estimateHistoryTokens(history);
|
|
654
658
|
const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
|
|
655
659
|
if (didCompact) {
|
|
656
660
|
replaceHistory(history, compacted);
|
|
657
661
|
resetTokenAnchor();
|
|
658
662
|
compactFailures = 0;
|
|
663
|
+
const afterTokens = estimateHistoryTokens(history);
|
|
664
|
+
const pct = beforeTokens > 0
|
|
665
|
+
? Math.round((1 - afterTokens / beforeTokens) * 100)
|
|
666
|
+
: 0;
|
|
667
|
+
// Visible to the user — explains the upcoming token-count drop
|
|
668
|
+
// in the next turn footer and frames it as a feature, not a bug.
|
|
669
|
+
onEvent({
|
|
670
|
+
kind: 'text_delta',
|
|
671
|
+
text: `\n*🗜 Auto-compacted: ~${(beforeTokens / 1000).toFixed(0)}K → ~${(afterTokens / 1000).toFixed(0)}K tokens (saved ${pct}%)*\n\n`,
|
|
672
|
+
});
|
|
659
673
|
if (config.debug) {
|
|
660
|
-
console.error(`[franklin] History compacted: ~${
|
|
674
|
+
console.error(`[franklin] History compacted: ~${afterTokens} tokens`);
|
|
661
675
|
}
|
|
662
676
|
}
|
|
663
677
|
}
|
package/dist/agent/optimize.js
CHANGED
|
@@ -34,7 +34,11 @@ const MODEL_MAX_OUTPUT = {
|
|
|
34
34
|
'openai/gpt-5-mini': 16_384,
|
|
35
35
|
'google/gemini-2.5-pro': 65_536,
|
|
36
36
|
'google/gemini-2.5-flash': 65_536,
|
|
37
|
-
|
|
37
|
+
// DeepSeek V4 family — upstream max_output is 65K on V4 Flash + V4 Pro;
|
|
38
|
+
// gateway re-aliased deepseek-chat/-reasoner to V4 Flash modes 2026-05-03.
|
|
39
|
+
'deepseek/deepseek-chat': 65_536,
|
|
40
|
+
'deepseek/deepseek-reasoner': 65_536,
|
|
41
|
+
'deepseek/deepseek-v4-pro': 65_536,
|
|
38
42
|
// Kimi K2.6 supports 65K output per the BlockRun gateway model entry
|
|
39
43
|
// (moonshot/kimi-k2.6 max_output: 65536). Without this entry the default
|
|
40
44
|
// 16K cap left users with 4× headroom on the table for long-form coding
|
package/dist/agent/planner.d.ts
CHANGED
|
@@ -30,7 +30,7 @@ export declare function getPlanningPrompt(): string;
|
|
|
30
30
|
* These models are good at following structured instructions (the plan)
|
|
31
31
|
* but much cheaper than the planning model.
|
|
32
32
|
*/
|
|
33
|
-
export declare function getExecutorModel(
|
|
33
|
+
export declare function getExecutorModel(_profile: RoutingProfile): string;
|
|
34
34
|
/**
|
|
35
35
|
* Extract numbered steps from plan text.
|
|
36
36
|
* Handles formats like "1. Do X", "1) Do X", "Step 1: Do X".
|
package/dist/agent/planner.js
CHANGED
|
@@ -34,8 +34,10 @@ export function shouldPlan(profile, ultrathink, planDisabled, analyzerSaysNeedsP
|
|
|
34
34
|
return false;
|
|
35
35
|
if (ultrathink)
|
|
36
36
|
return false; // ultrathink already provides deep reasoning
|
|
37
|
-
// Only auto
|
|
38
|
-
|
|
37
|
+
// Only the 'auto' profile uses planning. 'free' is cost-constrained;
|
|
38
|
+
// legacy 'eco' / 'premium' both alias to 'auto' via parseRoutingProfile,
|
|
39
|
+
// so this check covers them implicitly.
|
|
40
|
+
if (profile !== 'auto')
|
|
39
41
|
return false;
|
|
40
42
|
// Final decision comes from the turn analyzer's boolean flag.
|
|
41
43
|
return analyzerSaysNeedsPlanning;
|
|
@@ -64,14 +66,10 @@ Rules:
|
|
|
64
66
|
* These models are good at following structured instructions (the plan)
|
|
65
67
|
* but much cheaper than the planning model.
|
|
66
68
|
*/
|
|
67
|
-
export function getExecutorModel(
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
case 'auto':
|
|
72
|
-
default:
|
|
73
|
-
return 'google/gemini-2.5-flash'; // Cheap, fast, good at instructions
|
|
74
|
-
}
|
|
69
|
+
export function getExecutorModel(_profile) {
|
|
70
|
+
// Auto is the only profile that runs planning (see shouldPlan above), so
|
|
71
|
+
// there's only one executor branch to pick. 'free' never reaches here.
|
|
72
|
+
return 'google/gemini-2.5-flash';
|
|
75
73
|
}
|
|
76
74
|
// ─── Plan Parsing ────────────────────────────────────────────────────────
|
|
77
75
|
/**
|
package/dist/agent/tokens.js
CHANGED
|
@@ -185,9 +185,11 @@ const MODEL_CONTEXT_WINDOWS = {
|
|
|
185
185
|
'google/gemini-2.5-flash': 1_000_000,
|
|
186
186
|
'google/gemini-2.5-flash-lite': 1_000_000,
|
|
187
187
|
'google/gemini-3.1-pro': 1_000_000,
|
|
188
|
-
// DeepSeek
|
|
189
|
-
|
|
190
|
-
'deepseek/deepseek-
|
|
188
|
+
// DeepSeek (V4 family — gateway aliased deepseek-chat / -reasoner to V4
|
|
189
|
+
// Flash on 2026-05-03; context bumped 128K → 1M for both, 65K out)
|
|
190
|
+
'deepseek/deepseek-chat': 1_000_000,
|
|
191
|
+
'deepseek/deepseek-reasoner': 1_000_000,
|
|
192
|
+
'deepseek/deepseek-v4-pro': 1_000_000,
|
|
191
193
|
// xAI
|
|
192
194
|
'xai/grok-3': 131_072,
|
|
193
195
|
'xai/grok-4-0709': 131_072,
|
package/dist/pricing.js
CHANGED
|
@@ -3,10 +3,10 @@
|
|
|
3
3
|
* Used by agent loop, proxy server, stats tracker, and router.
|
|
4
4
|
*/
|
|
5
5
|
export const MODEL_PRICING = {
|
|
6
|
-
// Routing profiles (blended averages)
|
|
6
|
+
// Routing profiles (blended averages). Auto + Free are the only profiles
|
|
7
|
+
// surfaced after the 2026-05-03 collapse; eco/premium were retired and
|
|
8
|
+
// their parser mapping promotes them to Auto upstream of cost estimation.
|
|
7
9
|
'blockrun/auto': { input: 0.8, output: 4.0 },
|
|
8
|
-
'blockrun/eco': { input: 0.2, output: 1.0 },
|
|
9
|
-
'blockrun/premium': { input: 3.0, output: 15.0 },
|
|
10
10
|
'blockrun/free': { input: 0, output: 0 },
|
|
11
11
|
// FREE — BlockRun gateway free tier (refreshed 2026-04-29 with V4 Flash + Omni launch)
|
|
12
12
|
'nvidia/deepseek-v4-flash': { input: 0, output: 0 },
|
|
@@ -67,9 +67,13 @@ export const MODEL_PRICING = {
|
|
|
67
67
|
'xai/grok-3-mini': { input: 0.3, output: 0.5 },
|
|
68
68
|
'xai/grok-2-vision': { input: 2.0, output: 10.0 },
|
|
69
69
|
'xai/grok-3': { input: 3.0, output: 15.0 },
|
|
70
|
-
// DeepSeek
|
|
71
|
-
|
|
72
|
-
'deepseek/deepseek-
|
|
70
|
+
// DeepSeek (gateway re-aliased these to V4 Flash on 2026-05-03; price
|
|
71
|
+
// dropped from $0.28/$0.42 to $0.20/$0.40, context bumped 128K→1M).
|
|
72
|
+
'deepseek/deepseek-chat': { input: 0.20, output: 0.40 },
|
|
73
|
+
'deepseek/deepseek-reasoner': { input: 0.20, output: 0.40 },
|
|
74
|
+
// V4 Pro (1.6T MoE / 49B active, 1M ctx, 65K out). 75% launch promo
|
|
75
|
+
// through 2026-05-31 — list is $2.00/$4.00, promo is $0.50/$1.00.
|
|
76
|
+
'deepseek/deepseek-v4-pro': { input: 0.50, output: 1.00 },
|
|
73
77
|
// Minimax
|
|
74
78
|
'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
|
|
75
79
|
'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
|
package/dist/proxy/fallback.d.ts
CHANGED
|
@@ -30,7 +30,7 @@ export declare function fetchWithFallback(url: string, init: RequestInit, origin
|
|
|
30
30
|
* Get the current model from fallback chain based on parsed request
|
|
31
31
|
*/
|
|
32
32
|
export declare function getCurrentModelFromChain(requestedModel: string | undefined, config?: FallbackConfig): string;
|
|
33
|
-
/** Routing profiles that must never be sent to the backend directly */
|
|
33
|
+
/** Routing profiles that must never be sent to the backend directly. */
|
|
34
34
|
export declare const ROUTING_PROFILES: Set<string>;
|
|
35
35
|
/**
|
|
36
36
|
* Build fallback chain starting from a specific model.
|
package/dist/proxy/fallback.js
CHANGED
|
@@ -119,9 +119,9 @@ export function getCurrentModelFromChain(requestedModel, config = DEFAULT_FALLBA
|
|
|
119
119
|
// Default to first model in chain
|
|
120
120
|
return config.chain[0];
|
|
121
121
|
}
|
|
122
|
-
/** Routing profiles that must never be sent to the backend directly */
|
|
122
|
+
/** Routing profiles that must never be sent to the backend directly. */
|
|
123
123
|
export const ROUTING_PROFILES = new Set([
|
|
124
|
-
'blockrun/auto', 'blockrun/
|
|
124
|
+
'blockrun/auto', 'blockrun/free',
|
|
125
125
|
]);
|
|
126
126
|
/**
|
|
127
127
|
* Build fallback chain starting from a specific model.
|
package/dist/proxy/server.js
CHANGED
|
@@ -111,11 +111,13 @@ function trackOutputTokens(model, tokens) {
|
|
|
111
111
|
}
|
|
112
112
|
// Model shortcuts for quick switching
|
|
113
113
|
const MODEL_SHORTCUTS = {
|
|
114
|
-
// Routing profiles
|
|
114
|
+
// Routing profiles — Auto-only since 2026-05-03 (Eco/Premium retired).
|
|
115
|
+
// `eco` / `premium` aliases retained for back-compat with proxy clients;
|
|
116
|
+
// they parse to Auto downstream.
|
|
115
117
|
auto: 'blockrun/auto',
|
|
116
118
|
smart: 'blockrun/auto',
|
|
117
|
-
eco: 'blockrun/
|
|
118
|
-
premium: 'blockrun/
|
|
119
|
+
eco: 'blockrun/auto',
|
|
120
|
+
premium: 'blockrun/auto',
|
|
119
121
|
// Anthropic
|
|
120
122
|
sonnet: 'anthropic/claude-sonnet-4.6',
|
|
121
123
|
claude: 'anthropic/claude-sonnet-4.6',
|
package/dist/router/index.d.ts
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { type Category } from './categories.js';
|
|
13
13
|
export type Tier = 'SIMPLE' | 'MEDIUM' | 'COMPLEX' | 'REASONING';
|
|
14
|
-
export type RoutingProfile = 'auto' | '
|
|
14
|
+
export type RoutingProfile = 'auto' | 'free';
|
|
15
15
|
export interface RoutingResult {
|
|
16
16
|
model: string;
|
|
17
17
|
tier: Tier;
|
package/dist/router/index.js
CHANGED
|
@@ -33,73 +33,42 @@ function loadLearnedWeights() {
|
|
|
33
33
|
return null;
|
|
34
34
|
}
|
|
35
35
|
// ─── Tier Model Configs ───
|
|
36
|
-
//
|
|
37
|
-
//
|
|
38
|
-
//
|
|
39
|
-
//
|
|
36
|
+
// Auto-routing strategy (post-DeepSeek-V4-Pro launch promo, 2026-05-03):
|
|
37
|
+
// V4 Pro at $0.50/$1.00 with 1M context is the new sweet spot for SIMPLE +
|
|
38
|
+
// MEDIUM agent work — Sonnet-quality reasoning at ~1/6 the price. Reserve
|
|
39
|
+
// Opus only for genuinely complex multi-file/multi-decision tasks where
|
|
40
|
+
// the model's wider context handling and tighter tool-use discipline still
|
|
41
|
+
// pay for themselves. Sonnet drops to fallback because V4 Pro covers most
|
|
42
|
+
// of what users were calling Sonnet for, at a fraction of the cost.
|
|
40
43
|
const AUTO_TIERS = {
|
|
41
44
|
SIMPLE: {
|
|
42
|
-
primary: '
|
|
43
|
-
fallback: ['moonshot/kimi-k2.6', 'deepseek/deepseek-chat'],
|
|
45
|
+
primary: 'deepseek/deepseek-v4-pro',
|
|
46
|
+
fallback: ['google/gemini-2.5-flash', 'moonshot/kimi-k2.6', 'deepseek/deepseek-chat'],
|
|
44
47
|
},
|
|
45
48
|
MEDIUM: {
|
|
46
|
-
primary: '
|
|
47
|
-
fallback: ['openai/gpt-5.5', 'google/gemini-3.1-pro'
|
|
49
|
+
primary: 'deepseek/deepseek-v4-pro',
|
|
50
|
+
fallback: ['anthropic/claude-sonnet-4.6', 'openai/gpt-5.5', 'google/gemini-3.1-pro'],
|
|
48
51
|
},
|
|
49
52
|
COMPLEX: {
|
|
50
|
-
|
|
51
|
-
|
|
53
|
+
// Hard tasks — multi-file refactors, ambiguous specs, dense reasoning
|
|
54
|
+
// chains — still go to Opus. V4 Pro is great but not a Sonnet/Opus
|
|
55
|
+
// replacement at the high end of difficulty per recent agent-bench runs.
|
|
56
|
+
primary: 'anthropic/claude-opus-4.7',
|
|
57
|
+
fallback: ['openai/gpt-5.5', 'anthropic/claude-sonnet-4.6', 'deepseek/deepseek-v4-pro'],
|
|
52
58
|
},
|
|
53
59
|
REASONING: {
|
|
54
60
|
// Opus 4.7: step-change improvement in agentic coding over 4.6 per
|
|
55
|
-
// Anthropic.
|
|
56
|
-
// swap is cost-neutral. 4.6 stays in the fallback chain in case of
|
|
57
|
-
// rollout delays on the gateway side.
|
|
61
|
+
// Anthropic. 4.6 stays in the fallback chain in case of rollout delays.
|
|
58
62
|
primary: 'anthropic/claude-opus-4.7',
|
|
59
63
|
fallback: [
|
|
60
64
|
'anthropic/claude-opus-4.6',
|
|
61
65
|
'openai/o3',
|
|
66
|
+
'deepseek/deepseek-v4-pro',
|
|
62
67
|
'xai/grok-4-1-fast-reasoning',
|
|
63
68
|
'deepseek/deepseek-reasoner',
|
|
64
69
|
],
|
|
65
70
|
},
|
|
66
71
|
};
|
|
67
|
-
const ECO_TIERS = {
|
|
68
|
-
SIMPLE: {
|
|
69
|
-
primary: 'nvidia/qwen3-coder-480b',
|
|
70
|
-
fallback: ['nvidia/llama-4-maverick'],
|
|
71
|
-
},
|
|
72
|
-
MEDIUM: {
|
|
73
|
-
primary: 'google/gemini-2.5-flash-lite',
|
|
74
|
-
fallback: ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick'],
|
|
75
|
-
},
|
|
76
|
-
COMPLEX: {
|
|
77
|
-
primary: 'google/gemini-2.5-flash-lite',
|
|
78
|
-
fallback: ['deepseek/deepseek-chat', 'nvidia/qwen3-coder-480b'],
|
|
79
|
-
},
|
|
80
|
-
REASONING: {
|
|
81
|
-
primary: 'xai/grok-4-1-fast-reasoning',
|
|
82
|
-
fallback: ['deepseek/deepseek-reasoner', 'nvidia/qwen3-coder-480b'],
|
|
83
|
-
},
|
|
84
|
-
};
|
|
85
|
-
const PREMIUM_TIERS = {
|
|
86
|
-
SIMPLE: {
|
|
87
|
-
primary: 'moonshot/kimi-k2.6',
|
|
88
|
-
fallback: ['anthropic/claude-haiku-4.5'],
|
|
89
|
-
},
|
|
90
|
-
MEDIUM: {
|
|
91
|
-
primary: 'openai/gpt-5.3-codex',
|
|
92
|
-
fallback: ['anthropic/claude-sonnet-4.6'],
|
|
93
|
-
},
|
|
94
|
-
COMPLEX: {
|
|
95
|
-
primary: 'anthropic/claude-opus-4.7',
|
|
96
|
-
fallback: ['anthropic/claude-opus-4.6', 'openai/gpt-5.5', 'anthropic/claude-sonnet-4.6'],
|
|
97
|
-
},
|
|
98
|
-
REASONING: {
|
|
99
|
-
primary: 'anthropic/claude-opus-4.7',
|
|
100
|
-
fallback: ['anthropic/claude-opus-4.6', 'anthropic/claude-sonnet-4.6', 'openai/o3'],
|
|
101
|
-
},
|
|
102
|
-
};
|
|
103
72
|
// ─── Keywords for Classification ───
|
|
104
73
|
const CODE_KEYWORDS = [
|
|
105
74
|
'function', 'class', 'import', 'def', 'SELECT', 'async', 'await',
|
|
@@ -148,6 +117,11 @@ const AGENTIC_URL_PATTERNS = [
|
|
|
148
117
|
/github\.com/i, /gitlab\.com/i, /bitbucket\.org/i,
|
|
149
118
|
/npmjs\.com/i, /pypi\.org/i, /crates\.io/i,
|
|
150
119
|
/stackoverflow\.com/i, /docs\.\w+/i,
|
|
120
|
+
// Media URLs need the model to actually fetch+understand content,
|
|
121
|
+
// not just regurgitate from weights. Bumping these prevents the
|
|
122
|
+
// "user pastes 3 YouTube links → SIMPLE-tier model gives up" path.
|
|
123
|
+
/youtube\.com/i, /youtu\.be/i,
|
|
124
|
+
/twitter\.com/i, /x\.com/i,
|
|
151
125
|
];
|
|
152
126
|
function countMatches(text, keywords) {
|
|
153
127
|
const lower = text.toLowerCase();
|
|
@@ -280,18 +254,11 @@ function classicRouteRequest(prompt, profile) {
|
|
|
280
254
|
const tokenCount = Math.ceil(byteLen / 4);
|
|
281
255
|
// Classify the request
|
|
282
256
|
const { tier, confidence, signals } = classifyRequest(prompt, tokenCount);
|
|
283
|
-
//
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
break;
|
|
289
|
-
case 'premium':
|
|
290
|
-
tierConfigs = PREMIUM_TIERS;
|
|
291
|
-
break;
|
|
292
|
-
default:
|
|
293
|
-
tierConfigs = AUTO_TIERS;
|
|
294
|
-
}
|
|
257
|
+
// Auto is the only routing profile now (Eco/Premium were retired
|
|
258
|
+
// 2026-05-03 — see comment on RoutingProfile above). 'free' is handled
|
|
259
|
+
// earlier by the caller path; if it ever reaches here, fall through to
|
|
260
|
+
// AUTO_TIERS rather than crashing.
|
|
261
|
+
const tierConfigs = AUTO_TIERS;
|
|
295
262
|
const model = tierConfigs[tier].primary;
|
|
296
263
|
const savings = computeSavings(model);
|
|
297
264
|
const category = detectCategory(prompt, loadLearnedWeights()?.category_keywords).category;
|
|
@@ -404,16 +371,7 @@ export async function routeRequestAsync(prompt, profile = 'auto', classify = llm
|
|
|
404
371
|
}
|
|
405
372
|
// Build a RoutingResult from the LLM-picked tier using the same tier
|
|
406
373
|
// tables the keyword path uses. Keeps downstream code path-identical.
|
|
407
|
-
|
|
408
|
-
switch (profile) {
|
|
409
|
-
case 'eco':
|
|
410
|
-
tierConfigs = ECO_TIERS;
|
|
411
|
-
break;
|
|
412
|
-
case 'premium':
|
|
413
|
-
tierConfigs = PREMIUM_TIERS;
|
|
414
|
-
break;
|
|
415
|
-
default: tierConfigs = AUTO_TIERS;
|
|
416
|
-
}
|
|
374
|
+
const tierConfigs = AUTO_TIERS;
|
|
417
375
|
const model = tierConfigs[tier].primary;
|
|
418
376
|
const category = detectCategory(prompt, loadLearnedWeights()?.category_keywords).category;
|
|
419
377
|
return {
|
|
@@ -445,16 +403,7 @@ export function resolveTierToModel(tier, profile = 'auto') {
|
|
|
445
403
|
savings: 1.0,
|
|
446
404
|
};
|
|
447
405
|
}
|
|
448
|
-
|
|
449
|
-
switch (profile) {
|
|
450
|
-
case 'eco':
|
|
451
|
-
tierConfigs = ECO_TIERS;
|
|
452
|
-
break;
|
|
453
|
-
case 'premium':
|
|
454
|
-
tierConfigs = PREMIUM_TIERS;
|
|
455
|
-
break;
|
|
456
|
-
default: tierConfigs = AUTO_TIERS;
|
|
457
|
-
}
|
|
406
|
+
const tierConfigs = AUTO_TIERS;
|
|
458
407
|
const model = tierConfigs[tier].primary;
|
|
459
408
|
return {
|
|
460
409
|
model,
|
|
@@ -533,20 +482,9 @@ function computeSavings(model) {
|
|
|
533
482
|
* Get fallback models for a tier
|
|
534
483
|
*/
|
|
535
484
|
export function getFallbackChain(tier, profile = 'auto') {
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
tierConfigs = ECO_TIERS;
|
|
540
|
-
break;
|
|
541
|
-
case 'premium':
|
|
542
|
-
tierConfigs = PREMIUM_TIERS;
|
|
543
|
-
break;
|
|
544
|
-
case 'free':
|
|
545
|
-
return ['nvidia/qwen3-coder-480b'];
|
|
546
|
-
default:
|
|
547
|
-
tierConfigs = AUTO_TIERS;
|
|
548
|
-
}
|
|
549
|
-
const config = tierConfigs[tier];
|
|
485
|
+
if (profile === 'free')
|
|
486
|
+
return ['nvidia/qwen3-coder-480b'];
|
|
487
|
+
const config = AUTO_TIERS[tier];
|
|
550
488
|
return [config.primary, ...config.fallback];
|
|
551
489
|
}
|
|
552
490
|
/**
|
|
@@ -556,11 +494,14 @@ export function parseRoutingProfile(model) {
|
|
|
556
494
|
const lower = model.toLowerCase();
|
|
557
495
|
if (lower === 'blockrun/auto' || lower === 'auto')
|
|
558
496
|
return 'auto';
|
|
559
|
-
if (lower === 'blockrun/eco' || lower === 'eco')
|
|
560
|
-
return 'eco';
|
|
561
|
-
if (lower === 'blockrun/premium' || lower === 'premium')
|
|
562
|
-
return 'premium';
|
|
563
497
|
if (lower === 'blockrun/free' || lower === 'free')
|
|
564
498
|
return 'free';
|
|
499
|
+
// Back-compat: Eco / Premium routing profiles were retired 2026-05-03.
|
|
500
|
+
// Existing configs / sessions that still pass these values get silently
|
|
501
|
+
// promoted to Auto so nothing breaks; new code should use 'auto' directly.
|
|
502
|
+
if (lower === 'blockrun/eco' || lower === 'eco')
|
|
503
|
+
return 'auto';
|
|
504
|
+
if (lower === 'blockrun/premium' || lower === 'premium')
|
|
505
|
+
return 'auto';
|
|
565
506
|
return null;
|
|
566
507
|
}
|
package/dist/tools/webfetch.js
CHANGED
|
@@ -59,6 +59,35 @@ async function execute(input, ctx) {
|
|
|
59
59
|
return { output: `Error: only http/https URLs are supported`, isError: true };
|
|
60
60
|
}
|
|
61
61
|
const maxLen = Math.min(max_length ?? DEFAULT_MAX_LENGTH, MAX_BODY_BYTES);
|
|
62
|
+
// ── YouTube special case ──
|
|
63
|
+
// Plain HTML fetch on a youtube.com URL returns the SPA bundle (a wall of
|
|
64
|
+
// minified JS), which is useless to the model and was the failure mode
|
|
65
|
+
// behind "I can't access YouTube" responses. Auto-redirect to the caption
|
|
66
|
+
// track so the model gets the actual spoken content. Transparent to
|
|
67
|
+
// callers — same WebFetch tool, the right thing happens for video URLs.
|
|
68
|
+
const videoId = extractYouTubeVideoId(parsed);
|
|
69
|
+
if (videoId) {
|
|
70
|
+
const ytKey = cacheKey(`youtube-transcript:${videoId}`, maxLen);
|
|
71
|
+
const ytCached = getCached(ytKey);
|
|
72
|
+
if (ytCached)
|
|
73
|
+
return { output: ytCached + '\n\n(cached)' };
|
|
74
|
+
const transcript = await fetchYouTubeTranscript(videoId, ctx.abortSignal);
|
|
75
|
+
if (transcript.ok) {
|
|
76
|
+
const truncated = transcript.text.length > maxLen
|
|
77
|
+
? transcript.text.slice(0, maxLen) + '\n\n... (transcript truncated)'
|
|
78
|
+
: transcript.text;
|
|
79
|
+
const output = `URL: ${url}\nSource: YouTube auto-captions (videoId=${videoId}, lang=${transcript.lang})\n\n${truncated}`;
|
|
80
|
+
setCached(ytKey, output);
|
|
81
|
+
return { output };
|
|
82
|
+
}
|
|
83
|
+
// Fall through to raw HTML fetch only if transcript path failed entirely;
|
|
84
|
+
// surface why so the model can decide what to do (e.g., suggest a manual
|
|
85
|
+
// step) instead of silently scraping JS.
|
|
86
|
+
return {
|
|
87
|
+
output: `YouTube transcript unavailable for ${url} — ${transcript.reason}. The video may have captions disabled or be region-locked.`,
|
|
88
|
+
isError: true,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
62
91
|
const key = cacheKey(url, maxLen);
|
|
63
92
|
// Check cache first
|
|
64
93
|
const cached = getCached(key);
|
|
@@ -147,6 +176,143 @@ async function execute(input, ctx) {
|
|
|
147
176
|
ctx.abortSignal.removeEventListener('abort', onAbort);
|
|
148
177
|
}
|
|
149
178
|
}
|
|
179
|
+
// ─── YouTube transcript fetcher ─────────────────────────────────────────────
|
|
180
|
+
// Fetches auto-generated or uploaded captions for a YouTube video by parsing
|
|
181
|
+
// the watch-page's `ytInitialPlayerResponse` JSON. Pure HTTP, no deps. Saves
|
|
182
|
+
// us from the alternative (shelling out to yt-dlp, which the user may not
|
|
183
|
+
// have installed) and from leaving the model to guess at JS bundles.
|
|
184
|
+
function extractYouTubeVideoId(parsed) {
|
|
185
|
+
const host = parsed.hostname.replace(/^www\./, '');
|
|
186
|
+
if (host === 'youtu.be') {
|
|
187
|
+
return parsed.pathname.slice(1).split('/')[0] || null;
|
|
188
|
+
}
|
|
189
|
+
if (host === 'youtube.com' || host === 'm.youtube.com' || host === 'music.youtube.com') {
|
|
190
|
+
if (parsed.pathname === '/watch') {
|
|
191
|
+
return parsed.searchParams.get('v');
|
|
192
|
+
}
|
|
193
|
+
// /shorts/{id}, /live/{id}, /embed/{id}
|
|
194
|
+
const shortsMatch = parsed.pathname.match(/^\/(?:shorts|live|embed)\/([A-Za-z0-9_-]{6,})/);
|
|
195
|
+
if (shortsMatch)
|
|
196
|
+
return shortsMatch[1];
|
|
197
|
+
}
|
|
198
|
+
return null;
|
|
199
|
+
}
|
|
200
|
+
async function fetchYouTubeTranscript(videoId, abortSignal) {
|
|
201
|
+
const watchUrl = `https://www.youtube.com/watch?v=${encodeURIComponent(videoId)}&hl=en`;
|
|
202
|
+
const ctrl = new AbortController();
|
|
203
|
+
const timer = setTimeout(() => ctrl.abort(), 20_000);
|
|
204
|
+
const onAbort = () => ctrl.abort();
|
|
205
|
+
abortSignal.addEventListener('abort', onAbort, { once: true });
|
|
206
|
+
try {
|
|
207
|
+
const res = await fetch(watchUrl, {
|
|
208
|
+
signal: ctrl.signal,
|
|
209
|
+
headers: {
|
|
210
|
+
// Pretend to be a desktop browser so YouTube serves the watch page
|
|
211
|
+
// with the player config inlined. The default Node fetch UA gets a
|
|
212
|
+
// consent-redirect HTML stub that has no caption metadata.
|
|
213
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
|
214
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
215
|
+
},
|
|
216
|
+
redirect: 'follow',
|
|
217
|
+
});
|
|
218
|
+
if (!res.ok) {
|
|
219
|
+
return { ok: false, reason: `watch page HTTP ${res.status}` };
|
|
220
|
+
}
|
|
221
|
+
const html = await res.text();
|
|
222
|
+
// ytInitialPlayerResponse can be assigned in two shapes; both occur in
|
|
223
|
+
// practice across mobile vs desktop responses.
|
|
224
|
+
const match = html.match(/var\s+ytInitialPlayerResponse\s*=\s*(\{.+?\})\s*;\s*var\s+meta/s) ||
|
|
225
|
+
html.match(/ytInitialPlayerResponse\s*=\s*(\{.+?\});/s);
|
|
226
|
+
if (!match) {
|
|
227
|
+
return { ok: false, reason: 'could not locate ytInitialPlayerResponse in watch page' };
|
|
228
|
+
}
|
|
229
|
+
let player;
|
|
230
|
+
try {
|
|
231
|
+
player = JSON.parse(match[1]);
|
|
232
|
+
}
|
|
233
|
+
catch {
|
|
234
|
+
return { ok: false, reason: 'ytInitialPlayerResponse JSON parse failed' };
|
|
235
|
+
}
|
|
236
|
+
const tracks = player.captions?.playerCaptionsTracklistRenderer?.captionTracks ?? [];
|
|
237
|
+
if (tracks.length === 0) {
|
|
238
|
+
return { ok: false, reason: 'no caption tracks (video has captions disabled)' };
|
|
239
|
+
}
|
|
240
|
+
// Prefer English; fall back to first available; auto-captions are fine.
|
|
241
|
+
const track = tracks.find(t => (t.languageCode || '').startsWith('en')) ||
|
|
242
|
+
tracks[0];
|
|
243
|
+
if (!track?.baseUrl) {
|
|
244
|
+
return { ok: false, reason: 'caption track has no baseUrl' };
|
|
245
|
+
}
|
|
246
|
+
// Request the JSON3 format — easier to parse than the default XML and
|
|
247
|
+
// YouTube serves it on the same endpoint with a query flag.
|
|
248
|
+
const captionUrl = track.baseUrl + (track.baseUrl.includes('fmt=') ? '' : '&fmt=json3');
|
|
249
|
+
const capRes = await fetch(captionUrl, {
|
|
250
|
+
signal: ctrl.signal,
|
|
251
|
+
headers: { 'User-Agent': 'Mozilla/5.0' },
|
|
252
|
+
});
|
|
253
|
+
if (!capRes.ok) {
|
|
254
|
+
return { ok: false, reason: `caption fetch HTTP ${capRes.status}` };
|
|
255
|
+
}
|
|
256
|
+
const capRaw = await capRes.text();
|
|
257
|
+
const text = parseJson3Captions(capRaw) || parseXmlCaptions(capRaw);
|
|
258
|
+
if (!text) {
|
|
259
|
+
return { ok: false, reason: 'caption response had no readable text segments' };
|
|
260
|
+
}
|
|
261
|
+
return { ok: true, text, lang: track.languageCode || 'unknown' };
|
|
262
|
+
}
|
|
263
|
+
catch (err) {
|
|
264
|
+
if (abortSignal.aborted) {
|
|
265
|
+
return { ok: false, reason: 'request aborted' };
|
|
266
|
+
}
|
|
267
|
+
return {
|
|
268
|
+
ok: false,
|
|
269
|
+
reason: `fetch error: ${err instanceof Error ? err.message : String(err)}`,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
finally {
|
|
273
|
+
clearTimeout(timer);
|
|
274
|
+
abortSignal.removeEventListener('abort', onAbort);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
function parseJson3Captions(raw) {
|
|
278
|
+
try {
|
|
279
|
+
const obj = JSON.parse(raw);
|
|
280
|
+
if (!obj.events)
|
|
281
|
+
return '';
|
|
282
|
+
const out = [];
|
|
283
|
+
for (const ev of obj.events) {
|
|
284
|
+
if (!ev.segs)
|
|
285
|
+
continue;
|
|
286
|
+
for (const seg of ev.segs) {
|
|
287
|
+
if (seg.utf8)
|
|
288
|
+
out.push(seg.utf8);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// Collapse the per-word fragments YouTube emits into readable lines.
|
|
292
|
+
return out.join('').replace(/\n+/g, ' ').replace(/\s{2,}/g, ' ').trim();
|
|
293
|
+
}
|
|
294
|
+
catch {
|
|
295
|
+
return '';
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
function parseXmlCaptions(raw) {
|
|
299
|
+
// Fallback for older XML format. Regex-only parse — captions text is
|
|
300
|
+
// simple enough that pulling in xml2js for this would be overkill.
|
|
301
|
+
const matches = [...raw.matchAll(/<text[^>]*>([\s\S]*?)<\/text>/g)];
|
|
302
|
+
if (matches.length === 0)
|
|
303
|
+
return '';
|
|
304
|
+
return matches
|
|
305
|
+
.map(m => m[1]
|
|
306
|
+
.replace(/&/g, '&')
|
|
307
|
+
.replace(/</g, '<')
|
|
308
|
+
.replace(/>/g, '>')
|
|
309
|
+
.replace(/"/g, '"')
|
|
310
|
+
.replace(/'/g, "'")
|
|
311
|
+
.replace(/\s+/g, ' ')
|
|
312
|
+
.trim())
|
|
313
|
+
.filter(Boolean)
|
|
314
|
+
.join(' ');
|
|
315
|
+
}
|
|
150
316
|
function stripHtml(html) {
|
|
151
317
|
return html
|
|
152
318
|
// Remove non-content elements
|
package/dist/ui/model-picker.js
CHANGED
|
@@ -6,11 +6,15 @@ import readline from 'node:readline';
|
|
|
6
6
|
import chalk from 'chalk';
|
|
7
7
|
// ─── Model Shortcuts (same as proxy) ───────────────────────────────────────
|
|
8
8
|
export const MODEL_SHORTCUTS = {
|
|
9
|
-
// Routing profiles
|
|
9
|
+
// Routing profiles — Auto is the only profile surfaced in the picker.
|
|
10
|
+
// `eco` / `premium` were retired 2026-05-03 (V4 Pro launch made Auto cheap
|
|
11
|
+
// enough that separate profiles for "cheap" and "best" were redundant).
|
|
12
|
+
// The shortcuts still resolve through parseRoutingProfile() for back-compat
|
|
13
|
+
// with old configs/sessions, which silently promotes them to Auto.
|
|
10
14
|
auto: 'blockrun/auto',
|
|
11
15
|
smart: 'blockrun/auto',
|
|
12
|
-
eco: 'blockrun/
|
|
13
|
-
premium: 'blockrun/
|
|
16
|
+
eco: 'blockrun/auto',
|
|
17
|
+
premium: 'blockrun/auto',
|
|
14
18
|
// Anthropic
|
|
15
19
|
sonnet: 'anthropic/claude-sonnet-4.6',
|
|
16
20
|
claude: 'anthropic/claude-sonnet-4.6',
|
|
@@ -51,9 +55,23 @@ export const MODEL_SHORTCUTS = {
|
|
|
51
55
|
'grok-4': 'xai/grok-4-0709',
|
|
52
56
|
'grok-fast': 'xai/grok-4-1-fast-reasoning',
|
|
53
57
|
'grok-4.1': 'xai/grok-4-1-fast-reasoning',
|
|
54
|
-
// DeepSeek
|
|
55
|
-
|
|
56
|
-
|
|
58
|
+
// DeepSeek — paid SKUs route through deepseek/* (gateway aliases serve V4
|
|
59
|
+
// Flash modes upstream); free tier routes through nvidia/*.
|
|
60
|
+
deepseek: 'deepseek/deepseek-chat', // V4 Flash Chat (paid, $0.20/$0.40)
|
|
61
|
+
r1: 'deepseek/deepseek-reasoner', // V4 Flash Reasoner (paid)
|
|
62
|
+
// V4 Pro: paid flagship, 1.6T MoE / 49B active, 1M ctx, 75% launch promo.
|
|
63
|
+
'deepseek-v4-pro': 'deepseek/deepseek-v4-pro',
|
|
64
|
+
'dsv4-pro': 'deepseek/deepseek-v4-pro',
|
|
65
|
+
'v4-pro': 'deepseek/deepseek-v4-pro',
|
|
66
|
+
// V4 Flash: free on NVIDIA inference. Bare `deepseek-v4` resolves here
|
|
67
|
+
// since the paid V4 Flash SKU was dropped (overlapped with this free one).
|
|
68
|
+
'deepseek-v4': 'nvidia/deepseek-v4-flash',
|
|
69
|
+
'deepseek-v4-flash': 'nvidia/deepseek-v4-flash',
|
|
70
|
+
dsv4: 'nvidia/deepseek-v4-flash',
|
|
71
|
+
// V3.2 free fallback for users who specifically want the older Terminus
|
|
72
|
+
// checkpoint instead of the V4 Flash default.
|
|
73
|
+
'deepseek-v3.2': 'nvidia/deepseek-v3.2',
|
|
74
|
+
'deepseek-v3': 'nvidia/deepseek-v3.2',
|
|
57
75
|
// Free (agent-tested BlockRun gateway free tier — refreshed 2026-04)
|
|
58
76
|
free: 'nvidia/qwen3-coder-480b',
|
|
59
77
|
glm4: 'nvidia/qwen3-coder-480b',
|
|
@@ -112,9 +130,14 @@ export const PICKER_CATEGORIES = [
|
|
|
112
130
|
{
|
|
113
131
|
category: '🧠 Smart routing (auto-pick)',
|
|
114
132
|
models: [
|
|
133
|
+
// Auto is the only routing profile surfaced in the picker. Eco and
|
|
134
|
+
// Premium are kept as shortcut aliases (`eco`, `premium`) and resolve
|
|
135
|
+
// through the router for back-compat with older configs/sessions, but
|
|
136
|
+
// they're hidden from new users — Auto already covers the cheap end
|
|
137
|
+
// (V4 Pro at $0.50/$1.00 for SIMPLE/MEDIUM) and the quality end (Opus
|
|
138
|
+
// for COMPLEX), so a separate Eco/Premium picker entry just adds
|
|
139
|
+
// choice paralysis without distinct value.
|
|
115
140
|
{ id: 'blockrun/auto', shortcut: 'auto', label: 'Auto', price: 'routed' },
|
|
116
|
-
{ id: 'blockrun/eco', shortcut: 'eco', label: 'Eco', price: 'cheapest' },
|
|
117
|
-
{ id: 'blockrun/premium', shortcut: 'premium', label: 'Premium', price: 'best' },
|
|
118
141
|
],
|
|
119
142
|
},
|
|
120
143
|
{
|
|
@@ -139,7 +162,10 @@ export const PICKER_CATEGORIES = [
|
|
|
139
162
|
models: [
|
|
140
163
|
{ id: 'openai/o3', shortcut: 'o3', label: 'O3', price: '$2/$8' },
|
|
141
164
|
{ id: 'openai/gpt-5.3-codex', shortcut: 'codex', label: 'GPT-5.3 Codex', price: '$1.75/$14' },
|
|
142
|
-
|
|
165
|
+
// V4 Pro on launch promo (75% off through 2026-05-31). 1M context,
|
|
166
|
+
// 1.6T MoE → punches up to GPT-5.5/Opus on hard tasks at <1/10 the price.
|
|
167
|
+
{ id: 'deepseek/deepseek-v4-pro', shortcut: 'deepseek-v4-pro', label: 'DeepSeek V4 Pro', price: '$0.5/$1 (promo)', highlight: true },
|
|
168
|
+
{ id: 'deepseek/deepseek-reasoner', shortcut: 'r1', label: 'DeepSeek V4 Flash R.', price: '$0.2/$0.4' },
|
|
143
169
|
{ id: 'xai/grok-4-1-fast-reasoning', shortcut: 'grok-fast', label: 'Grok 4.1 Fast R.', price: '$0.2/$0.5' },
|
|
144
170
|
],
|
|
145
171
|
},
|
|
@@ -149,14 +175,22 @@ export const PICKER_CATEGORIES = [
|
|
|
149
175
|
{ id: 'anthropic/claude-haiku-4.5-20251001', shortcut: 'haiku', label: 'Claude Haiku 4.5', price: '$1/$5' },
|
|
150
176
|
{ id: 'openai/gpt-5-mini', shortcut: 'mini', label: 'GPT-5 Mini', price: '$0.25/$2' },
|
|
151
177
|
{ id: 'google/gemini-2.5-flash', shortcut: 'flash', label: 'Gemini 2.5 Flash', price: '$0.3/$2.5' },
|
|
152
|
-
|
|
178
|
+
// Re-aliased to V4 Flash Chat upstream — context 1M, price 30% lower.
|
|
179
|
+
{ id: 'deepseek/deepseek-chat', shortcut: 'deepseek', label: 'DeepSeek V4 Flash Chat', price: '$0.2/$0.4' },
|
|
153
180
|
{ id: 'moonshot/kimi-k2.6', shortcut: 'kimi', label: 'Kimi K2.6', price: '$0.95/$4' },
|
|
154
|
-
|
|
181
|
+
// Minimax M2.7 hidden to make room for V4 Pro in Reasoning + V4 Flash
|
|
182
|
+
// (free) without exceeding the picker's 24-entry cap. Shortcut `minimax`
|
|
183
|
+
// still resolves to it.
|
|
155
184
|
],
|
|
156
185
|
},
|
|
157
186
|
{
|
|
158
187
|
category: '🆓 Free (no USDC needed)',
|
|
159
188
|
models: [
|
|
189
|
+
// V4 Flash leads the section: newest gateway addition, general-purpose,
|
|
190
|
+
// fast — better default for most users than the coder-specialized Qwen.
|
|
191
|
+
// V3.2 hidden (shortcut `deepseek-v3` still works) since V4 Flash
|
|
192
|
+
// supersedes it; keeping the picker tight.
|
|
193
|
+
{ id: 'nvidia/deepseek-v4-flash', shortcut: 'deepseek-v4', label: 'DeepSeek V4 Flash', price: 'FREE', highlight: true },
|
|
160
194
|
{ id: 'nvidia/qwen3-coder-480b', shortcut: 'free', label: 'Qwen3 Coder 480B', price: 'FREE' },
|
|
161
195
|
{ id: 'nvidia/llama-4-maverick', shortcut: 'maverick', label: 'Llama 4 Maverick', price: 'FREE' },
|
|
162
196
|
],
|
package/package.json
CHANGED