@blockrun/cc 0.9.2 โ 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -18
- package/dist/proxy/fallback.js +1 -1
- package/dist/proxy/server.js +101 -29
- package/dist/proxy/sse-translator.d.ts +29 -0
- package/dist/proxy/sse-translator.js +296 -0
- package/dist/router/index.js +45 -19
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,7 +10,7 @@ You're paying $200/month and still can't work.<br><br>
|
|
|
10
10
|
|
|
11
11
|
<img src="https://img.shields.io/badge/๐_No_Rate_Limits-black?style=for-the-badge" alt="No rate limits">
|
|
12
12
|
<img src="https://img.shields.io/badge/๐_No_Account_Needed-blue?style=for-the-badge" alt="No account">
|
|
13
|
-
<img src="https://img.shields.io/badge/๐ค
|
|
13
|
+
<img src="https://img.shields.io/badge/๐ค_50+_Models-yellow?style=for-the-badge" alt="50+ models">
|
|
14
14
|
<img src="https://img.shields.io/badge/๐ฐ_Pay_Per_Use-purple?style=for-the-badge" alt="Pay per use">
|
|
15
15
|
<img src="https://img.shields.io/badge/โ_Base_+_Solana-green?style=for-the-badge" alt="Base + Solana">
|
|
16
16
|
|
|
@@ -28,7 +28,7 @@ You're paying $200/month and still can't work.<br><br>
|
|
|
28
28
|
|
|
29
29
|
</div>
|
|
30
30
|
|
|
31
|
-
> **brcc** is a local proxy that lets you run Claude Code with any LLM model โ GPT-5, Claude, Gemini, DeepSeek, Grok, and
|
|
31
|
+
> **brcc** is a local proxy that lets you run Claude Code with any LLM model โ GPT-5, Claude, Gemini, DeepSeek, Grok, and 50+ more โ without rate limits, without an Anthropic account, and without phone verification. You pay per request with USDC via the [x402](https://x402.org) protocol. Your wallet is your identity. Your private key never leaves your machine.
|
|
32
32
|
|
|
33
33
|
---
|
|
34
34
|
|
|
@@ -62,7 +62,7 @@ brcc eliminates all of these problems:
|
|
|
62
62
|
|--|---------------------|------------|----------|
|
|
63
63
|
| **Rate limits** | Constantly hit | Per-model limits | **None** |
|
|
64
64
|
| **Account required** | Yes + phone | Yes + email | **No** |
|
|
65
|
-
| **Models** | Claude only | 200+ (manual select) | **
|
|
65
|
+
| **Models** | Claude only | 200+ (manual select) | **50+ (auto or manual)** |
|
|
66
66
|
| **Payment** | Credit card, subscription | Credit card, pre-pay | **USDC per-request** |
|
|
67
67
|
| **Auth** | OAuth + API key conflicts | API key | **Wallet signature** |
|
|
68
68
|
| **Pricing** | Opaque | Transparent | **Transparent** |
|
|
@@ -101,7 +101,7 @@ brcc setup solana # Solana chain
|
|
|
101
101
|
brcc start
|
|
102
102
|
```
|
|
103
103
|
|
|
104
|
-
That's it. Claude Code opens with access to
|
|
104
|
+
That's it. Claude Code opens with access to 50+ models, no rate limits.
|
|
105
105
|
|
|
106
106
|
---
|
|
107
107
|
|
|
@@ -144,7 +144,21 @@ use eco # Switch to cheapest
|
|
|
144
144
|
use premium # Switch to best quality
|
|
145
145
|
use free # Switch to free models
|
|
146
146
|
use sonnet # Direct Claude Sonnet
|
|
147
|
-
use
|
|
147
|
+
use gpt # GPT-5.4
|
|
148
|
+
use codex # GPT-5.3 Codex
|
|
149
|
+
use o3 # OpenAI o3
|
|
150
|
+
use gemini # Gemini 2.5 Pro
|
|
151
|
+
use flash # Gemini 2.5 Flash
|
|
152
|
+
use grok # Grok 3
|
|
153
|
+
use grok-4 # Grok 4
|
|
154
|
+
use deepseek # DeepSeek Chat
|
|
155
|
+
use r1 # DeepSeek Reasoner
|
|
156
|
+
use minimax # Minimax M2.7
|
|
157
|
+
use kimi # Kimi K2.5
|
|
158
|
+
use free # Nemotron Ultra 253B (free)
|
|
159
|
+
use nemotron # Nemotron Ultra 253B (free)
|
|
160
|
+
use devstral # Devstral 2 123B (free)
|
|
161
|
+
use qwen-coder # Qwen3 Coder 480B (free)
|
|
148
162
|
```
|
|
149
163
|
|
|
150
164
|
### Inside Claude Code
|
|
@@ -165,18 +179,34 @@ $ brcc models
|
|
|
165
179
|
|
|
166
180
|
Free Models (no USDC needed)
|
|
167
181
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
182
|
+
nvidia/nemotron-ultra-253b
|
|
168
183
|
nvidia/gpt-oss-120b
|
|
169
|
-
nvidia/
|
|
184
|
+
nvidia/deepseek-v3.2
|
|
185
|
+
nvidia/mistral-large-3-675b
|
|
186
|
+
nvidia/qwen3-coder-480b
|
|
187
|
+
nvidia/devstral-2-123b
|
|
188
|
+
nvidia/llama-4-maverick
|
|
189
|
+
nvidia/glm-4.7
|
|
190
|
+
... (11 free models)
|
|
170
191
|
|
|
171
192
|
Paid Models
|
|
172
193
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
173
194
|
Model Input Output
|
|
195
|
+
openai/gpt-5-nano $0.05/M $0.40/M
|
|
196
|
+
xai/grok-4-1-fast-reasoning $0.20/M $0.50/M
|
|
174
197
|
deepseek/deepseek-chat $0.28/M $0.42/M
|
|
198
|
+
minimax/minimax-m2.7 $0.30/M $1.20/M
|
|
199
|
+
google/gemini-2.5-flash $0.30/M $2.50/M
|
|
200
|
+
moonshot/kimi-k2.5 $0.60/M $3.00/M
|
|
175
201
|
anthropic/claude-haiku-4.5 $1.00/M $5.00/M
|
|
202
|
+
openai/gpt-5.3-codex $1.75/M $14.00/M
|
|
203
|
+
google/gemini-2.5-pro $1.25/M $10.00/M
|
|
204
|
+
google/gemini-3.1-pro $2.00/M $12.00/M
|
|
176
205
|
openai/gpt-5.4 $2.50/M $15.00/M
|
|
177
206
|
anthropic/claude-sonnet-4.6 $3.00/M $15.00/M
|
|
178
207
|
anthropic/claude-opus-4.6 $5.00/M $25.00/M
|
|
179
|
-
|
|
208
|
+
openai/gpt-5.4-pro $30.00/M $180.00/M
|
|
209
|
+
... (50+ models total)
|
|
180
210
|
```
|
|
181
211
|
|
|
182
212
|
---
|
|
@@ -185,12 +215,16 @@ Paid Models
|
|
|
185
215
|
|
|
186
216
|
| Model | ~Requests per $5 | Best For |
|
|
187
217
|
|-------|-------------------|----------|
|
|
218
|
+
| Nemotron Ultra 253B | **Unlimited** | Free tier |
|
|
219
|
+
| Grok 4.1 Fast | ~3,500 | Fast reasoning (budget) |
|
|
188
220
|
| DeepSeek V3 | ~5,000 | Budget coding |
|
|
221
|
+
| Gemini 2.5 Flash | ~1,200 | Balanced speed/cost |
|
|
222
|
+
| Kimi K2.5 | ~800 | Mid-range coding |
|
|
189
223
|
| Claude Haiku 4.5 | ~500 | Fast tasks |
|
|
224
|
+
| GPT-5.3 Codex | ~180 | Code generation |
|
|
190
225
|
| Claude Sonnet 4.6 | ~100 | General coding |
|
|
191
226
|
| GPT-5.4 | ~80 | Reasoning |
|
|
192
227
|
| Claude Opus 4.6 | ~50 | Most capable |
|
|
193
|
-
| GPT-OSS 120B | **Unlimited** | Free tier |
|
|
194
228
|
|
|
195
229
|
---
|
|
196
230
|
|
|
@@ -220,7 +254,7 @@ Your wallet is saved to `~/.blockrun/` and shared with all BlockRun tools.
|
|
|
220
254
|
|
|
221
255
|
```bash
|
|
222
256
|
brcc start # Default model
|
|
223
|
-
brcc start --model nvidia/
|
|
257
|
+
brcc start --model nvidia/nemotron-ultra-253b # Free model
|
|
224
258
|
brcc start --model openai/gpt-5.4 # Specific model
|
|
225
259
|
brcc start --no-launch # Proxy only mode
|
|
226
260
|
brcc start --no-fallback # Disable auto-fallback
|
|
@@ -254,7 +288,7 @@ $ brcc stats
|
|
|
254
288
|
deepseek/deepseek-chat
|
|
255
289
|
620 req ยท $0.8901 (19.5%) ยท 180ms avg
|
|
256
290
|
โณ 12 fallback recoveries
|
|
257
|
-
nvidia/
|
|
291
|
+
nvidia/nemotron-ultra-253b
|
|
258
292
|
164 req ยท $0.0000 (0%) ยท 320ms avg
|
|
259
293
|
|
|
260
294
|
๐ฐ Savings vs Claude Opus
|
|
@@ -273,7 +307,7 @@ $ brcc stats --json # Output as JSON (for scripts)
|
|
|
273
307
|
### `brcc config`
|
|
274
308
|
|
|
275
309
|
```bash
|
|
276
|
-
brcc config set default-model nvidia/
|
|
310
|
+
brcc config set default-model nvidia/nemotron-ultra-253b
|
|
277
311
|
brcc config set sonnet-model openai/gpt-5.4
|
|
278
312
|
brcc config set opus-model anthropic/claude-opus-4.6
|
|
279
313
|
brcc config set haiku-model deepseek/deepseek-chat
|
|
@@ -288,15 +322,15 @@ When a model returns an error (429 rate limit, 500+ server error), brcc automati
|
|
|
288
322
|
|
|
289
323
|
**Default fallback chain:**
|
|
290
324
|
```
|
|
291
|
-
|
|
325
|
+
your-selected-model
|
|
292
326
|
โ (if 429/500/502/503/504)
|
|
293
|
-
|
|
327
|
+
blockrun/auto (smart routing)
|
|
294
328
|
โ
|
|
295
|
-
|
|
329
|
+
blockrun/eco (cheapest capable)
|
|
296
330
|
โ
|
|
297
|
-
|
|
331
|
+
deepseek/deepseek-chat
|
|
298
332
|
โ
|
|
299
|
-
nvidia/
|
|
333
|
+
nvidia/nemotron-ultra-253b (free, always available)
|
|
300
334
|
```
|
|
301
335
|
|
|
302
336
|
**How it looks:**
|
|
@@ -336,7 +370,7 @@ brcc start --no-fallback
|
|
|
336
370
|
โ BlockRun API (blockrun.ai or sol.blockrun.ai) โ
|
|
337
371
|
โ โ
|
|
338
372
|
โ Routes to: GPT-5 ยท Claude ยท Gemini ยท DeepSeek ยท Grok ยท โ
|
|
339
|
-
โ NVIDIA ยท MiniMax ยท Moonshot ยท
|
|
373
|
+
โ NVIDIA ยท MiniMax ยท Moonshot ยท 50+ models โ
|
|
340
374
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
341
375
|
```
|
|
342
376
|
|
|
@@ -368,7 +402,7 @@ Typical cost: **$0.001โ$0.05 per interaction**. $5 lasts most developers a wee
|
|
|
368
402
|
No. brcc connects Claude Code to BlockRun instead of Anthropic.
|
|
369
403
|
|
|
370
404
|
**Can I use non-Claude models?**
|
|
371
|
-
Yes. GPT-5, Gemini, DeepSeek, Grok, and
|
|
405
|
+
Yes. GPT-5, Gemini, DeepSeek, Grok, and 50+ others work through Claude Code via brcc.
|
|
372
406
|
|
|
373
407
|
---
|
|
374
408
|
|
package/dist/proxy/fallback.js
CHANGED
|
@@ -7,7 +7,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
|
|
|
7
7
|
'blockrun/auto', // Smart routing (default)
|
|
8
8
|
'blockrun/eco', // Cheapest capable model
|
|
9
9
|
'deepseek/deepseek-chat', // Direct fallback
|
|
10
|
-
'nvidia/
|
|
10
|
+
'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
|
|
11
11
|
],
|
|
12
12
|
retryOn: [429, 500, 502, 503, 504, 529],
|
|
13
13
|
maxRetries: 5,
|
package/dist/proxy/server.js
CHANGED
|
@@ -40,24 +40,56 @@ const DEFAULT_MAX_TOKENS = 4096;
|
|
|
40
40
|
let lastOutputTokens = 0;
|
|
41
41
|
// Model shortcuts for quick switching
|
|
42
42
|
const MODEL_SHORTCUTS = {
|
|
43
|
+
// Routing profiles
|
|
43
44
|
auto: 'blockrun/auto',
|
|
44
45
|
smart: 'blockrun/auto',
|
|
45
46
|
eco: 'blockrun/eco',
|
|
46
47
|
premium: 'blockrun/premium',
|
|
47
|
-
|
|
48
|
-
gpt5: 'openai/gpt-5.4',
|
|
49
|
-
'gpt-5': 'openai/gpt-5.4',
|
|
50
|
-
'gpt-5.4': 'openai/gpt-5.4',
|
|
48
|
+
// Anthropic
|
|
51
49
|
sonnet: 'anthropic/claude-sonnet-4.6',
|
|
52
50
|
claude: 'anthropic/claude-sonnet-4.6',
|
|
53
51
|
opus: 'anthropic/claude-opus-4.6',
|
|
54
52
|
haiku: 'anthropic/claude-haiku-4.5',
|
|
55
|
-
|
|
53
|
+
// OpenAI
|
|
54
|
+
gpt: 'openai/gpt-5.4',
|
|
55
|
+
gpt5: 'openai/gpt-5.4',
|
|
56
|
+
'gpt-5': 'openai/gpt-5.4',
|
|
57
|
+
'gpt-5.4': 'openai/gpt-5.4',
|
|
58
|
+
'gpt-5.4-pro': 'openai/gpt-5.4-pro',
|
|
59
|
+
'gpt-5.3': 'openai/gpt-5.3',
|
|
60
|
+
'gpt-5.2': 'openai/gpt-5.2',
|
|
61
|
+
'gpt-5.2-pro': 'openai/gpt-5.2-pro',
|
|
62
|
+
'gpt-4.1': 'openai/gpt-4.1',
|
|
63
|
+
codex: 'openai/gpt-5.3-codex',
|
|
64
|
+
nano: 'openai/gpt-5-nano',
|
|
65
|
+
mini: 'openai/gpt-5-mini',
|
|
66
|
+
o3: 'openai/o3',
|
|
67
|
+
o4: 'openai/o4-mini',
|
|
68
|
+
'o4-mini': 'openai/o4-mini',
|
|
69
|
+
o1: 'openai/o1',
|
|
70
|
+
// Google
|
|
56
71
|
gemini: 'google/gemini-2.5-pro',
|
|
72
|
+
flash: 'google/gemini-2.5-flash',
|
|
73
|
+
'gemini-3': 'google/gemini-3.1-pro',
|
|
74
|
+
// xAI
|
|
57
75
|
grok: 'xai/grok-3',
|
|
58
|
-
|
|
59
|
-
|
|
76
|
+
'grok-4': 'xai/grok-4-0709',
|
|
77
|
+
'grok-fast': 'xai/grok-4-1-fast-reasoning',
|
|
78
|
+
// DeepSeek
|
|
79
|
+
deepseek: 'deepseek/deepseek-chat',
|
|
80
|
+
r1: 'deepseek/deepseek-reasoner',
|
|
81
|
+
// Free models
|
|
82
|
+
free: 'nvidia/nemotron-ultra-253b',
|
|
83
|
+
nemotron: 'nvidia/nemotron-ultra-253b',
|
|
84
|
+
'deepseek-free': 'nvidia/deepseek-v3.2',
|
|
85
|
+
devstral: 'nvidia/devstral-2-123b',
|
|
86
|
+
'qwen-coder': 'nvidia/qwen3-coder-480b',
|
|
87
|
+
maverick: 'nvidia/llama-4-maverick',
|
|
88
|
+
// Minimax
|
|
89
|
+
minimax: 'minimax/minimax-m2.7',
|
|
90
|
+
// Others
|
|
60
91
|
glm: 'zai/glm-5',
|
|
92
|
+
kimi: 'moonshot/kimi-k2.5',
|
|
61
93
|
};
|
|
62
94
|
// Model pricing (per 1M tokens) - used for stats
|
|
63
95
|
const MODEL_PRICING = {
|
|
@@ -66,27 +98,68 @@ const MODEL_PRICING = {
|
|
|
66
98
|
'blockrun/eco': { input: 0.2, output: 1.0 },
|
|
67
99
|
'blockrun/premium': { input: 3.0, output: 15.0 },
|
|
68
100
|
'blockrun/free': { input: 0, output: 0 },
|
|
69
|
-
//
|
|
101
|
+
// FREE - NVIDIA models
|
|
102
|
+
'nvidia/gpt-oss-120b': { input: 0, output: 0 },
|
|
103
|
+
'nvidia/gpt-oss-20b': { input: 0, output: 0 },
|
|
104
|
+
'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
|
|
105
|
+
'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
|
|
106
|
+
'nvidia/nemotron-super-49b': { input: 0, output: 0 },
|
|
107
|
+
'nvidia/deepseek-v3.2': { input: 0, output: 0 },
|
|
108
|
+
'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
|
|
109
|
+
'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
|
|
110
|
+
'nvidia/devstral-2-123b': { input: 0, output: 0 },
|
|
111
|
+
'nvidia/glm-4.7': { input: 0, output: 0 },
|
|
112
|
+
'nvidia/llama-4-maverick': { input: 0, output: 0 },
|
|
113
|
+
// Anthropic
|
|
70
114
|
'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
|
|
71
115
|
'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
|
|
72
116
|
'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
|
|
73
|
-
|
|
117
|
+
// OpenAI
|
|
118
|
+
'openai/gpt-5-nano': { input: 0.05, output: 0.4 },
|
|
119
|
+
'openai/gpt-4.1-nano': { input: 0.1, output: 0.4 },
|
|
120
|
+
'openai/gpt-4o-mini': { input: 0.15, output: 0.6 },
|
|
74
121
|
'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
|
|
75
|
-
'
|
|
122
|
+
'openai/gpt-4.1-mini': { input: 0.4, output: 1.6 },
|
|
123
|
+
'openai/gpt-5.2': { input: 1.75, output: 14.0 },
|
|
124
|
+
'openai/gpt-5.3': { input: 1.75, output: 14.0 },
|
|
125
|
+
'openai/gpt-5.3-codex': { input: 1.75, output: 14.0 },
|
|
126
|
+
'openai/gpt-4.1': { input: 2.0, output: 8.0 },
|
|
127
|
+
'openai/o3': { input: 2.0, output: 8.0 },
|
|
128
|
+
'openai/gpt-4o': { input: 2.5, output: 10.0 },
|
|
129
|
+
'openai/gpt-5.4': { input: 2.5, output: 15.0 },
|
|
130
|
+
'openai/o1-mini': { input: 1.1, output: 4.4 },
|
|
131
|
+
'openai/o3-mini': { input: 1.1, output: 4.4 },
|
|
132
|
+
'openai/o4-mini': { input: 1.1, output: 4.4 },
|
|
133
|
+
'openai/o1': { input: 15.0, output: 60.0 },
|
|
134
|
+
'openai/gpt-5.2-pro': { input: 21.0, output: 168.0 },
|
|
135
|
+
'openai/gpt-5.4-pro': { input: 30.0, output: 180.0 },
|
|
136
|
+
// Google
|
|
137
|
+
'google/gemini-2.5-flash-lite': { input: 0.1, output: 0.4 },
|
|
76
138
|
'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
|
|
77
|
-
'
|
|
78
|
-
'
|
|
79
|
-
'
|
|
139
|
+
'google/gemini-3-flash-preview': { input: 0.5, output: 3.0 },
|
|
140
|
+
'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
|
|
141
|
+
'google/gemini-3-pro-preview': { input: 2.0, output: 12.0 },
|
|
142
|
+
'google/gemini-3.1-pro': { input: 2.0, output: 12.0 },
|
|
143
|
+
// xAI
|
|
80
144
|
'xai/grok-4-fast': { input: 0.2, output: 0.5 },
|
|
145
|
+
'xai/grok-4-fast-reasoning': { input: 0.2, output: 0.5 },
|
|
146
|
+
'xai/grok-4-1-fast': { input: 0.2, output: 0.5 },
|
|
81
147
|
'xai/grok-4-1-fast-reasoning': { input: 0.2, output: 0.5 },
|
|
82
|
-
'
|
|
83
|
-
'
|
|
148
|
+
'xai/grok-4-0709': { input: 0.2, output: 1.5 },
|
|
149
|
+
'xai/grok-3-mini': { input: 0.3, output: 0.5 },
|
|
150
|
+
'xai/grok-2-vision': { input: 2.0, output: 10.0 },
|
|
151
|
+
'xai/grok-3': { input: 3.0, output: 15.0 },
|
|
152
|
+
// DeepSeek
|
|
153
|
+
'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
|
|
154
|
+
'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
|
|
155
|
+
// Minimax
|
|
156
|
+
'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
|
|
157
|
+
'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
|
|
158
|
+
// Others
|
|
84
159
|
'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
|
|
85
|
-
'
|
|
86
|
-
'
|
|
87
|
-
'
|
|
88
|
-
'google/gemini-2.5-flash-lite': { input: 0.08, output: 0.3 },
|
|
89
|
-
'google/gemini-3.1-pro': { input: 1.25, output: 10.0 },
|
|
160
|
+
'nvidia/kimi-k2.5': { input: 0.55, output: 2.5 },
|
|
161
|
+
'zai/glm-5': { input: 1.0, output: 3.2 },
|
|
162
|
+
'zai/glm-5-turbo': { input: 1.2, output: 4.0 },
|
|
90
163
|
};
|
|
91
164
|
function estimateCost(model, inputTokens, outputTokens) {
|
|
92
165
|
const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
|
|
@@ -191,19 +264,18 @@ export function createProxy(options) {
|
|
|
191
264
|
res.end(JSON.stringify(fakeResponse));
|
|
192
265
|
return;
|
|
193
266
|
}
|
|
194
|
-
//
|
|
195
|
-
//
|
|
196
|
-
//
|
|
197
|
-
//
|
|
198
|
-
|
|
199
|
-
|
|
267
|
+
// Model override logic:
|
|
268
|
+
// - Claude Code sends native Anthropic IDs (e.g. "claude-sonnet-4-6-20250514")
|
|
269
|
+
// which don't contain "/" โ these MUST be replaced with currentModel.
|
|
270
|
+
// - BlockRun model IDs always contain "/" (e.g. "blockrun/auto", "nvidia/nemotron-ultra-253b")
|
|
271
|
+
// โ these should be passed through as-is.
|
|
272
|
+
// - If --model CLI flag is set, always override regardless.
|
|
273
|
+
if (options.modelOverride) {
|
|
200
274
|
parsed.model = currentModel;
|
|
201
275
|
}
|
|
202
|
-
else if (!parsed.model) {
|
|
203
|
-
// No model in request: use default
|
|
276
|
+
else if (!parsed.model || !parsed.model.includes('/')) {
|
|
204
277
|
parsed.model = currentModel || DEFAULT_MODEL;
|
|
205
278
|
}
|
|
206
|
-
// Otherwise: use the model from the request as-is
|
|
207
279
|
requestModel = parsed.model || DEFAULT_MODEL;
|
|
208
280
|
// Smart routing: if model is a routing profile, classify and route
|
|
209
281
|
const routingProfile = parseRoutingProfile(requestModel);
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSE Event Translator: OpenAI โ Anthropic Messages API format
|
|
3
|
+
*
|
|
4
|
+
* Handles three critical gaps in the streaming pipeline:
|
|
5
|
+
* 1. Tool calls: choice.delta.tool_calls โ content_block_start/content_block_delta (tool_use)
|
|
6
|
+
* 2. Reasoning: reasoning_content โ content_block_start/content_block_delta (thinking)
|
|
7
|
+
* 3. Ensures proper content_block_stop and message_stop events
|
|
8
|
+
*/
|
|
9
|
+
export declare class SSETranslator {
|
|
10
|
+
private state;
|
|
11
|
+
private buffer;
|
|
12
|
+
constructor(model?: string);
|
|
13
|
+
/**
|
|
14
|
+
* Detect whether an SSE chunk is in OpenAI format.
|
|
15
|
+
* Returns true if it contains OpenAI-style `choices[].delta` structure.
|
|
16
|
+
*/
|
|
17
|
+
static isOpenAIFormat(chunk: string): boolean;
|
|
18
|
+
/**
|
|
19
|
+
* Process a raw SSE text chunk and return translated Anthropic-format SSE events.
|
|
20
|
+
* Returns null if no translation needed (already Anthropic format or not parseable).
|
|
21
|
+
*/
|
|
22
|
+
processChunk(rawChunk: string): string | null;
|
|
23
|
+
private parseSSEEvents;
|
|
24
|
+
private formatSSE;
|
|
25
|
+
private closeThinkingBlock;
|
|
26
|
+
private closeTextBlock;
|
|
27
|
+
private closeToolCalls;
|
|
28
|
+
private closeActiveBlocks;
|
|
29
|
+
}
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSE Event Translator: OpenAI โ Anthropic Messages API format
|
|
3
|
+
*
|
|
4
|
+
* Handles three critical gaps in the streaming pipeline:
|
|
5
|
+
* 1. Tool calls: choice.delta.tool_calls โ content_block_start/content_block_delta (tool_use)
|
|
6
|
+
* 2. Reasoning: reasoning_content โ content_block_start/content_block_delta (thinking)
|
|
7
|
+
* 3. Ensures proper content_block_stop and message_stop events
|
|
8
|
+
*/
|
|
9
|
+
// โโโ SSE Translator โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
10
|
+
export class SSETranslator {
|
|
11
|
+
state;
|
|
12
|
+
buffer = '';
|
|
13
|
+
constructor(model = 'unknown') {
|
|
14
|
+
this.state = {
|
|
15
|
+
messageId: `msg_brcc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
16
|
+
model,
|
|
17
|
+
blockIndex: 0,
|
|
18
|
+
activeToolCalls: new Map(),
|
|
19
|
+
thinkingBlockActive: false,
|
|
20
|
+
textBlockActive: false,
|
|
21
|
+
messageStarted: false,
|
|
22
|
+
inputTokens: 0,
|
|
23
|
+
outputTokens: 0,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Detect whether an SSE chunk is in OpenAI format.
|
|
28
|
+
* Returns true if it contains OpenAI-style `choices[].delta` structure.
|
|
29
|
+
*/
|
|
30
|
+
static isOpenAIFormat(chunk) {
|
|
31
|
+
// Look for OpenAI-specific patterns in the SSE data
|
|
32
|
+
return (chunk.includes('"choices"') &&
|
|
33
|
+
chunk.includes('"delta"') &&
|
|
34
|
+
!chunk.includes('"content_block_'));
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Process a raw SSE text chunk and return translated Anthropic-format SSE events.
|
|
38
|
+
* Returns null if no translation needed (already Anthropic format or not parseable).
|
|
39
|
+
*/
|
|
40
|
+
processChunk(rawChunk) {
|
|
41
|
+
this.buffer += rawChunk;
|
|
42
|
+
const events = this.parseSSEEvents();
|
|
43
|
+
if (events.length === 0)
|
|
44
|
+
return null;
|
|
45
|
+
const translated = [];
|
|
46
|
+
for (const event of events) {
|
|
47
|
+
if (event.data === '[DONE]') {
|
|
48
|
+
// Close any active blocks, then emit message_stop
|
|
49
|
+
translated.push(...this.closeActiveBlocks());
|
|
50
|
+
translated.push(this.formatSSE('message_delta', {
|
|
51
|
+
type: 'message_delta',
|
|
52
|
+
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
|
53
|
+
usage: { output_tokens: this.state.outputTokens },
|
|
54
|
+
}));
|
|
55
|
+
translated.push(this.formatSSE('message_stop', { type: 'message_stop' }));
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
let parsed;
|
|
59
|
+
try {
|
|
60
|
+
parsed = JSON.parse(event.data);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
// Skip if this doesn't look like OpenAI format
|
|
66
|
+
if (!parsed.choices || parsed.choices.length === 0) {
|
|
67
|
+
// Could be a usage-only event
|
|
68
|
+
if (parsed.usage) {
|
|
69
|
+
this.state.inputTokens = parsed.usage.prompt_tokens || 0;
|
|
70
|
+
this.state.outputTokens = parsed.usage.completion_tokens || 0;
|
|
71
|
+
}
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
// Emit message_start on first chunk
|
|
75
|
+
if (!this.state.messageStarted) {
|
|
76
|
+
this.state.messageStarted = true;
|
|
77
|
+
if (parsed.model)
|
|
78
|
+
this.state.model = parsed.model;
|
|
79
|
+
translated.push(this.formatSSE('message_start', {
|
|
80
|
+
type: 'message_start',
|
|
81
|
+
message: {
|
|
82
|
+
id: this.state.messageId,
|
|
83
|
+
type: 'message',
|
|
84
|
+
role: 'assistant',
|
|
85
|
+
model: this.state.model,
|
|
86
|
+
content: [],
|
|
87
|
+
stop_reason: null,
|
|
88
|
+
stop_sequence: null,
|
|
89
|
+
usage: { input_tokens: this.state.inputTokens, output_tokens: 0 },
|
|
90
|
+
},
|
|
91
|
+
}));
|
|
92
|
+
translated.push(this.formatSSE('ping', { type: 'ping' }));
|
|
93
|
+
}
|
|
94
|
+
const choice = parsed.choices[0];
|
|
95
|
+
const delta = choice.delta;
|
|
96
|
+
// โโ Reasoning content โ thinking block โโ
|
|
97
|
+
if (delta.reasoning_content) {
|
|
98
|
+
if (!this.state.thinkingBlockActive) {
|
|
99
|
+
// Close text block if active
|
|
100
|
+
if (this.state.textBlockActive) {
|
|
101
|
+
translated.push(...this.closeTextBlock());
|
|
102
|
+
}
|
|
103
|
+
this.state.thinkingBlockActive = true;
|
|
104
|
+
translated.push(this.formatSSE('content_block_start', {
|
|
105
|
+
type: 'content_block_start',
|
|
106
|
+
index: this.state.blockIndex,
|
|
107
|
+
content_block: { type: 'thinking', thinking: '' },
|
|
108
|
+
}));
|
|
109
|
+
}
|
|
110
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
111
|
+
type: 'content_block_delta',
|
|
112
|
+
index: this.state.blockIndex,
|
|
113
|
+
delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
|
|
114
|
+
}));
|
|
115
|
+
this.state.outputTokens++;
|
|
116
|
+
}
|
|
117
|
+
// โโ Text content โ text block โโ
|
|
118
|
+
if (delta.content) {
|
|
119
|
+
// Close thinking block if transitioning
|
|
120
|
+
if (this.state.thinkingBlockActive) {
|
|
121
|
+
translated.push(...this.closeThinkingBlock());
|
|
122
|
+
}
|
|
123
|
+
if (!this.state.textBlockActive) {
|
|
124
|
+
// Close any active tool calls first
|
|
125
|
+
translated.push(...this.closeToolCalls());
|
|
126
|
+
this.state.textBlockActive = true;
|
|
127
|
+
translated.push(this.formatSSE('content_block_start', {
|
|
128
|
+
type: 'content_block_start',
|
|
129
|
+
index: this.state.blockIndex,
|
|
130
|
+
content_block: { type: 'text', text: '' },
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
134
|
+
type: 'content_block_delta',
|
|
135
|
+
index: this.state.blockIndex,
|
|
136
|
+
delta: { type: 'text_delta', text: delta.content },
|
|
137
|
+
}));
|
|
138
|
+
this.state.outputTokens++;
|
|
139
|
+
}
|
|
140
|
+
// โโ Tool calls โ tool_use blocks โโ
|
|
141
|
+
if (delta.tool_calls && delta.tool_calls.length > 0) {
|
|
142
|
+
// Close thinking/text blocks first
|
|
143
|
+
if (this.state.thinkingBlockActive) {
|
|
144
|
+
translated.push(...this.closeThinkingBlock());
|
|
145
|
+
}
|
|
146
|
+
if (this.state.textBlockActive) {
|
|
147
|
+
translated.push(...this.closeTextBlock());
|
|
148
|
+
}
|
|
149
|
+
for (const tc of delta.tool_calls) {
|
|
150
|
+
const tcIndex = tc.index;
|
|
151
|
+
if (tc.id && tc.function?.name) {
|
|
152
|
+
// New tool call start
|
|
153
|
+
// Close previous tool call at same index if exists
|
|
154
|
+
if (this.state.activeToolCalls.has(tcIndex)) {
|
|
155
|
+
translated.push(this.formatSSE('content_block_stop', {
|
|
156
|
+
type: 'content_block_stop',
|
|
157
|
+
index: this.state.blockIndex,
|
|
158
|
+
}));
|
|
159
|
+
this.state.blockIndex++;
|
|
160
|
+
}
|
|
161
|
+
const toolId = tc.id;
|
|
162
|
+
const toolName = tc.function.name;
|
|
163
|
+
this.state.activeToolCalls.set(tcIndex, { id: toolId, name: toolName });
|
|
164
|
+
translated.push(this.formatSSE('content_block_start', {
|
|
165
|
+
type: 'content_block_start',
|
|
166
|
+
index: this.state.blockIndex,
|
|
167
|
+
content_block: {
|
|
168
|
+
type: 'tool_use',
|
|
169
|
+
id: toolId,
|
|
170
|
+
name: toolName,
|
|
171
|
+
input: {},
|
|
172
|
+
},
|
|
173
|
+
}));
|
|
174
|
+
// If there are initial arguments, send them
|
|
175
|
+
if (tc.function.arguments) {
|
|
176
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
177
|
+
type: 'content_block_delta',
|
|
178
|
+
index: this.state.blockIndex,
|
|
179
|
+
delta: {
|
|
180
|
+
type: 'input_json_delta',
|
|
181
|
+
partial_json: tc.function.arguments,
|
|
182
|
+
},
|
|
183
|
+
}));
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
else if (tc.function?.arguments) {
|
|
187
|
+
// Continuation of existing tool call - stream arguments
|
|
188
|
+
translated.push(this.formatSSE('content_block_delta', {
|
|
189
|
+
type: 'content_block_delta',
|
|
190
|
+
index: this.state.blockIndex,
|
|
191
|
+
delta: {
|
|
192
|
+
type: 'input_json_delta',
|
|
193
|
+
partial_json: tc.function.arguments,
|
|
194
|
+
},
|
|
195
|
+
}));
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
this.state.outputTokens++;
|
|
199
|
+
}
|
|
200
|
+
// โโ Handle finish_reason โโ
|
|
201
|
+
if (choice.finish_reason) {
|
|
202
|
+
translated.push(...this.closeActiveBlocks());
|
|
203
|
+
const stopReason = choice.finish_reason === 'tool_calls'
|
|
204
|
+
? 'tool_use'
|
|
205
|
+
: choice.finish_reason === 'stop'
|
|
206
|
+
? 'end_turn'
|
|
207
|
+
: choice.finish_reason;
|
|
208
|
+
translated.push(this.formatSSE('message_delta', {
|
|
209
|
+
type: 'message_delta',
|
|
210
|
+
delta: { stop_reason: stopReason, stop_sequence: null },
|
|
211
|
+
usage: { output_tokens: this.state.outputTokens },
|
|
212
|
+
}));
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return translated.length > 0 ? translated.join('') : null;
|
|
216
|
+
}
|
|
217
|
+
// โโ Helpers โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
218
|
+
parseSSEEvents() {
|
|
219
|
+
const events = [];
|
|
220
|
+
const lines = this.buffer.split('\n');
|
|
221
|
+
let currentEvent;
|
|
222
|
+
let dataLines = [];
|
|
223
|
+
let consumed = 0;
|
|
224
|
+
for (let i = 0; i < lines.length; i++) {
|
|
225
|
+
const line = lines[i];
|
|
226
|
+
if (line.startsWith('event: ')) {
|
|
227
|
+
currentEvent = line.slice(7).trim();
|
|
228
|
+
}
|
|
229
|
+
else if (line.startsWith('data: ')) {
|
|
230
|
+
dataLines.push(line.slice(6));
|
|
231
|
+
}
|
|
232
|
+
else if (line === '' && dataLines.length > 0) {
|
|
233
|
+
// End of event
|
|
234
|
+
events.push({ event: currentEvent, data: dataLines.join('\n') });
|
|
235
|
+
currentEvent = undefined;
|
|
236
|
+
dataLines = [];
|
|
237
|
+
consumed = lines.slice(0, i + 1).join('\n').length + 1;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
// Keep unconsumed text in buffer
|
|
241
|
+
if (consumed > 0) {
|
|
242
|
+
this.buffer = this.buffer.slice(consumed);
|
|
243
|
+
}
|
|
244
|
+
return events;
|
|
245
|
+
}
|
|
246
|
+
formatSSE(event, data) {
|
|
247
|
+
return `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
|
|
248
|
+
}
|
|
249
|
+
closeThinkingBlock() {
|
|
250
|
+
if (!this.state.thinkingBlockActive)
|
|
251
|
+
return [];
|
|
252
|
+
this.state.thinkingBlockActive = false;
|
|
253
|
+
const events = [
|
|
254
|
+
this.formatSSE('content_block_stop', {
|
|
255
|
+
type: 'content_block_stop',
|
|
256
|
+
index: this.state.blockIndex,
|
|
257
|
+
}),
|
|
258
|
+
];
|
|
259
|
+
this.state.blockIndex++;
|
|
260
|
+
return events;
|
|
261
|
+
}
|
|
262
|
+
closeTextBlock() {
|
|
263
|
+
if (!this.state.textBlockActive)
|
|
264
|
+
return [];
|
|
265
|
+
this.state.textBlockActive = false;
|
|
266
|
+
const events = [
|
|
267
|
+
this.formatSSE('content_block_stop', {
|
|
268
|
+
type: 'content_block_stop',
|
|
269
|
+
index: this.state.blockIndex,
|
|
270
|
+
}),
|
|
271
|
+
];
|
|
272
|
+
this.state.blockIndex++;
|
|
273
|
+
return events;
|
|
274
|
+
}
|
|
275
|
+
closeToolCalls() {
|
|
276
|
+
if (this.state.activeToolCalls.size === 0)
|
|
277
|
+
return [];
|
|
278
|
+
const events = [];
|
|
279
|
+
for (const [_index] of this.state.activeToolCalls) {
|
|
280
|
+
events.push(this.formatSSE('content_block_stop', {
|
|
281
|
+
type: 'content_block_stop',
|
|
282
|
+
index: this.state.blockIndex,
|
|
283
|
+
}));
|
|
284
|
+
this.state.blockIndex++;
|
|
285
|
+
}
|
|
286
|
+
this.state.activeToolCalls.clear();
|
|
287
|
+
return events;
|
|
288
|
+
}
|
|
289
|
+
closeActiveBlocks() {
|
|
290
|
+
const events = [];
|
|
291
|
+
events.push(...this.closeThinkingBlock());
|
|
292
|
+
events.push(...this.closeTextBlock());
|
|
293
|
+
events.push(...this.closeToolCalls());
|
|
294
|
+
return events;
|
|
295
|
+
}
|
|
296
|
+
}
|
package/dist/router/index.js
CHANGED
|
@@ -6,11 +6,11 @@
|
|
|
6
6
|
const AUTO_TIERS = {
|
|
7
7
|
SIMPLE: {
|
|
8
8
|
primary: 'google/gemini-2.5-flash',
|
|
9
|
-
fallback: ['deepseek/deepseek-chat', 'nvidia/
|
|
9
|
+
fallback: ['deepseek/deepseek-chat', 'nvidia/nemotron-ultra-253b'],
|
|
10
10
|
},
|
|
11
11
|
MEDIUM: {
|
|
12
12
|
primary: 'moonshot/kimi-k2.5',
|
|
13
|
-
fallback: ['google/gemini-2.5-flash', '
|
|
13
|
+
fallback: ['google/gemini-2.5-flash', 'minimax/minimax-m2.7'],
|
|
14
14
|
},
|
|
15
15
|
COMPLEX: {
|
|
16
16
|
primary: 'google/gemini-3.1-pro',
|
|
@@ -23,20 +23,20 @@ const AUTO_TIERS = {
|
|
|
23
23
|
};
|
|
24
24
|
const ECO_TIERS = {
|
|
25
25
|
SIMPLE: {
|
|
26
|
-
primary: 'nvidia/
|
|
27
|
-
fallback: ['
|
|
26
|
+
primary: 'nvidia/nemotron-ultra-253b',
|
|
27
|
+
fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
|
|
28
28
|
},
|
|
29
29
|
MEDIUM: {
|
|
30
30
|
primary: 'google/gemini-2.5-flash-lite',
|
|
31
|
-
fallback: ['nvidia/
|
|
31
|
+
fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
|
|
32
32
|
},
|
|
33
33
|
COMPLEX: {
|
|
34
34
|
primary: 'google/gemini-2.5-flash-lite',
|
|
35
|
-
fallback: ['deepseek/deepseek-chat'],
|
|
35
|
+
fallback: ['deepseek/deepseek-chat', 'nvidia/mistral-large-3-675b'],
|
|
36
36
|
},
|
|
37
37
|
REASONING: {
|
|
38
38
|
primary: 'xai/grok-4-1-fast-reasoning',
|
|
39
|
-
fallback: ['deepseek/deepseek-reasoner'],
|
|
39
|
+
fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
|
|
40
40
|
},
|
|
41
41
|
};
|
|
42
42
|
const PREMIUM_TIERS = {
|
|
@@ -189,7 +189,7 @@ export function routeRequest(prompt, profile = 'auto') {
|
|
|
189
189
|
// Free profile - always use free model
|
|
190
190
|
if (profile === 'free') {
|
|
191
191
|
return {
|
|
192
|
-
model: 'nvidia/
|
|
192
|
+
model: 'nvidia/nemotron-ultra-253b',
|
|
193
193
|
tier: 'SIMPLE',
|
|
194
194
|
confidence: 1.0,
|
|
195
195
|
signals: ['free-profile'],
|
|
@@ -217,22 +217,48 @@ export function routeRequest(prompt, profile = 'auto') {
|
|
|
217
217
|
// Baseline: Claude Opus at $5/$25 per 1M tokens
|
|
218
218
|
const OPUS_COST_PER_1K = 0.015; // rough average
|
|
219
219
|
const modelCosts = {
|
|
220
|
+
// FREE
|
|
220
221
|
'nvidia/gpt-oss-120b': 0,
|
|
221
|
-
'
|
|
222
|
+
'nvidia/gpt-oss-20b': 0,
|
|
223
|
+
'nvidia/nemotron-ultra-253b': 0,
|
|
224
|
+
'nvidia/nemotron-3-super-120b': 0,
|
|
225
|
+
'nvidia/nemotron-super-49b': 0,
|
|
226
|
+
'nvidia/deepseek-v3.2': 0,
|
|
227
|
+
'nvidia/mistral-large-3-675b': 0,
|
|
228
|
+
'nvidia/qwen3-coder-480b': 0,
|
|
229
|
+
'nvidia/devstral-2-123b': 0,
|
|
230
|
+
'nvidia/glm-4.7': 0,
|
|
231
|
+
'nvidia/llama-4-maverick': 0,
|
|
232
|
+
// Budget
|
|
233
|
+
'openai/gpt-5-nano': 0.0002,
|
|
234
|
+
'openai/gpt-4.1-nano': 0.0003,
|
|
222
235
|
'google/gemini-2.5-flash-lite': 0.0003,
|
|
236
|
+
'xai/grok-4-fast': 0.0004,
|
|
237
|
+
'xai/grok-4-1-fast': 0.0004,
|
|
238
|
+
'xai/grok-4-1-fast-reasoning': 0.0004,
|
|
223
239
|
'deepseek/deepseek-chat': 0.0004,
|
|
224
|
-
'deepseek/deepseek-reasoner': 0.
|
|
225
|
-
'
|
|
226
|
-
'
|
|
227
|
-
'google/gemini-
|
|
240
|
+
'deepseek/deepseek-reasoner': 0.0004,
|
|
241
|
+
'minimax/minimax-m2.7': 0.0008,
|
|
242
|
+
'minimax/minimax-m2.5': 0.0008,
|
|
243
|
+
'google/gemini-2.5-flash': 0.0014,
|
|
244
|
+
'openai/gpt-5-mini': 0.0011,
|
|
245
|
+
'moonshot/kimi-k2.5': 0.0018,
|
|
246
|
+
// Mid-range
|
|
228
247
|
'anthropic/claude-haiku-4.5': 0.003,
|
|
248
|
+
'zai/glm-5': 0.0021,
|
|
249
|
+
'openai/o4-mini': 0.0028,
|
|
250
|
+
'google/gemini-2.5-pro': 0.0056,
|
|
251
|
+
'openai/gpt-5.3-codex': 0.0079,
|
|
252
|
+
'openai/gpt-5.2': 0.0079,
|
|
253
|
+
'openai/gpt-5.3': 0.0079,
|
|
254
|
+
'openai/gpt-4.1': 0.005,
|
|
255
|
+
'openai/o3': 0.005,
|
|
256
|
+
'google/gemini-3.1-pro': 0.007,
|
|
257
|
+
'openai/gpt-5.4': 0.0088,
|
|
258
|
+
// Premium
|
|
229
259
|
'anthropic/claude-sonnet-4.6': 0.009,
|
|
260
|
+
'xai/grok-3': 0.009,
|
|
230
261
|
'anthropic/claude-opus-4.6': 0.015,
|
|
231
|
-
'openai/gpt-5.3-codex': 0.008,
|
|
232
|
-
'openai/gpt-5.4': 0.009,
|
|
233
|
-
'openai/o3': 0.012,
|
|
234
|
-
'openai/o4-mini': 0.006,
|
|
235
|
-
'xai/grok-4-1-fast-reasoning': 0.0004,
|
|
236
262
|
};
|
|
237
263
|
const modelCost = modelCosts[model] ?? 0.005;
|
|
238
264
|
const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
|
|
@@ -257,7 +283,7 @@ export function getFallbackChain(tier, profile = 'auto') {
|
|
|
257
283
|
tierConfigs = PREMIUM_TIERS;
|
|
258
284
|
break;
|
|
259
285
|
case 'free':
|
|
260
|
-
return ['nvidia/
|
|
286
|
+
return ['nvidia/nemotron-ultra-253b'];
|
|
261
287
|
default:
|
|
262
288
|
tierConfigs = AUTO_TIERS;
|
|
263
289
|
}
|