@blockrun/cc 0.9.2 โ†’ 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,7 +10,7 @@ You're paying $200/month and still can't work.<br><br>
10
10
 
11
11
  <img src="https://img.shields.io/badge/๐Ÿš€_No_Rate_Limits-black?style=for-the-badge" alt="No rate limits">&nbsp;
12
12
  <img src="https://img.shields.io/badge/๐Ÿ”‘_No_Account_Needed-blue?style=for-the-badge" alt="No account">&nbsp;
13
- <img src="https://img.shields.io/badge/๐Ÿค–_40+_Models-yellow?style=for-the-badge" alt="40+ models">&nbsp;
13
+ <img src="https://img.shields.io/badge/๐Ÿค–_50+_Models-yellow?style=for-the-badge" alt="50+ models">&nbsp;
14
14
  <img src="https://img.shields.io/badge/๐Ÿ’ฐ_Pay_Per_Use-purple?style=for-the-badge" alt="Pay per use">&nbsp;
15
15
  <img src="https://img.shields.io/badge/โ›“_Base_+_Solana-green?style=for-the-badge" alt="Base + Solana">
16
16
 
@@ -28,7 +28,7 @@ You're paying $200/month and still can't work.<br><br>
28
28
 
29
29
  </div>
30
30
 
31
- > **brcc** is a local proxy that lets you run Claude Code with any LLM model โ€” GPT-5, Claude, Gemini, DeepSeek, Grok, and 40+ more โ€” without rate limits, without an Anthropic account, and without phone verification. You pay per request with USDC via the [x402](https://x402.org) protocol. Your wallet is your identity. Your private key never leaves your machine.
31
+ > **brcc** is a local proxy that lets you run Claude Code with any LLM model โ€” GPT-5, Claude, Gemini, DeepSeek, Grok, and 50+ more โ€” without rate limits, without an Anthropic account, and without phone verification. You pay per request with USDC via the [x402](https://x402.org) protocol. Your wallet is your identity. Your private key never leaves your machine.
32
32
 
33
33
  ---
34
34
 
@@ -62,7 +62,7 @@ brcc eliminates all of these problems:
62
62
  |--|---------------------|------------|----------|
63
63
  | **Rate limits** | Constantly hit | Per-model limits | **None** |
64
64
  | **Account required** | Yes + phone | Yes + email | **No** |
65
- | **Models** | Claude only | 200+ (manual select) | **40+ (auto or manual)** |
65
+ | **Models** | Claude only | 200+ (manual select) | **50+ (auto or manual)** |
66
66
  | **Payment** | Credit card, subscription | Credit card, pre-pay | **USDC per-request** |
67
67
  | **Auth** | OAuth + API key conflicts | API key | **Wallet signature** |
68
68
  | **Pricing** | Opaque | Transparent | **Transparent** |
@@ -101,7 +101,7 @@ brcc setup solana # Solana chain
101
101
  brcc start
102
102
  ```
103
103
 
104
- That's it. Claude Code opens with access to 40+ models, no rate limits.
104
+ That's it. Claude Code opens with access to 50+ models, no rate limits.
105
105
 
106
106
  ---
107
107
 
@@ -144,7 +144,21 @@ use eco # Switch to cheapest
144
144
  use premium # Switch to best quality
145
145
  use free # Switch to free models
146
146
  use sonnet # Direct Claude Sonnet
147
- use deepseek # Direct DeepSeek
147
+ use gpt # GPT-5.4
148
+ use codex # GPT-5.3 Codex
149
+ use o3 # OpenAI o3
150
+ use gemini # Gemini 2.5 Pro
151
+ use flash # Gemini 2.5 Flash
152
+ use grok # Grok 3
153
+ use grok-4 # Grok 4
154
+ use deepseek # DeepSeek Chat
155
+ use r1 # DeepSeek Reasoner
156
+ use minimax # Minimax M2.7
157
+ use kimi # Kimi K2.5
158
+ use free # Nemotron Ultra 253B (free)
159
+ use nemotron # Nemotron Ultra 253B (free)
160
+ use devstral # Devstral 2 123B (free)
161
+ use qwen-coder # Qwen3 Coder 480B (free)
148
162
  ```
149
163
 
150
164
  ### Inside Claude Code
@@ -165,18 +179,34 @@ $ brcc models
165
179
 
166
180
  Free Models (no USDC needed)
167
181
  โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
182
+ nvidia/nemotron-ultra-253b
168
183
  nvidia/gpt-oss-120b
169
- nvidia/gpt-oss-20b
184
+ nvidia/deepseek-v3.2
185
+ nvidia/mistral-large-3-675b
186
+ nvidia/qwen3-coder-480b
187
+ nvidia/devstral-2-123b
188
+ nvidia/llama-4-maverick
189
+ nvidia/glm-4.7
190
+ ... (11 free models)
170
191
 
171
192
  Paid Models
172
193
  โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
173
194
  Model Input Output
195
+ openai/gpt-5-nano $0.05/M $0.40/M
196
+ xai/grok-4-1-fast-reasoning $0.20/M $0.50/M
174
197
  deepseek/deepseek-chat $0.28/M $0.42/M
198
+ minimax/minimax-m2.7 $0.30/M $1.20/M
199
+ google/gemini-2.5-flash $0.30/M $2.50/M
200
+ moonshot/kimi-k2.5 $0.60/M $3.00/M
175
201
  anthropic/claude-haiku-4.5 $1.00/M $5.00/M
202
+ openai/gpt-5.3-codex $1.75/M $14.00/M
203
+ google/gemini-2.5-pro $1.25/M $10.00/M
204
+ google/gemini-3.1-pro $2.00/M $12.00/M
176
205
  openai/gpt-5.4 $2.50/M $15.00/M
177
206
  anthropic/claude-sonnet-4.6 $3.00/M $15.00/M
178
207
  anthropic/claude-opus-4.6 $5.00/M $25.00/M
179
- ... (31 models total)
208
+ openai/gpt-5.4-pro $30.00/M $180.00/M
209
+ ... (50+ models total)
180
210
  ```
181
211
 
182
212
  ---
@@ -185,12 +215,16 @@ Paid Models
185
215
 
186
216
  | Model | ~Requests per $5 | Best For |
187
217
  |-------|-------------------|----------|
218
+ | Nemotron Ultra 253B | **Unlimited** | Free tier |
219
+ | Grok 4.1 Fast | ~3,500 | Fast reasoning (budget) |
188
220
  | DeepSeek V3 | ~5,000 | Budget coding |
221
+ | Gemini 2.5 Flash | ~1,200 | Balanced speed/cost |
222
+ | Kimi K2.5 | ~800 | Mid-range coding |
189
223
  | Claude Haiku 4.5 | ~500 | Fast tasks |
224
+ | GPT-5.3 Codex | ~180 | Code generation |
190
225
  | Claude Sonnet 4.6 | ~100 | General coding |
191
226
  | GPT-5.4 | ~80 | Reasoning |
192
227
  | Claude Opus 4.6 | ~50 | Most capable |
193
- | GPT-OSS 120B | **Unlimited** | Free tier |
194
228
 
195
229
  ---
196
230
 
@@ -220,7 +254,7 @@ Your wallet is saved to `~/.blockrun/` and shared with all BlockRun tools.
220
254
 
221
255
  ```bash
222
256
  brcc start # Default model
223
- brcc start --model nvidia/gpt-oss-120b # Free model
257
+ brcc start --model nvidia/nemotron-ultra-253b # Free model
224
258
  brcc start --model openai/gpt-5.4 # Specific model
225
259
  brcc start --no-launch # Proxy only mode
226
260
  brcc start --no-fallback # Disable auto-fallback
@@ -254,7 +288,7 @@ $ brcc stats
254
288
  deepseek/deepseek-chat
255
289
  620 req ยท $0.8901 (19.5%) ยท 180ms avg
256
290
  โ†ณ 12 fallback recoveries
257
- nvidia/gpt-oss-120b
291
+ nvidia/nemotron-ultra-253b
258
292
  164 req ยท $0.0000 (0%) ยท 320ms avg
259
293
 
260
294
  ๐Ÿ’ฐ Savings vs Claude Opus
@@ -273,7 +307,7 @@ $ brcc stats --json # Output as JSON (for scripts)
273
307
  ### `brcc config`
274
308
 
275
309
  ```bash
276
- brcc config set default-model nvidia/gpt-oss-120b
310
+ brcc config set default-model nvidia/nemotron-ultra-253b
277
311
  brcc config set sonnet-model openai/gpt-5.4
278
312
  brcc config set opus-model anthropic/claude-opus-4.6
279
313
  brcc config set haiku-model deepseek/deepseek-chat
@@ -288,15 +322,15 @@ When a model returns an error (429 rate limit, 500+ server error), brcc automati
288
322
 
289
323
  **Default fallback chain:**
290
324
  ```
291
- anthropic/claude-sonnet-4.6
325
+ your-selected-model
292
326
  โ†“ (if 429/500/502/503/504)
293
- google/gemini-2.5-pro
327
+ blockrun/auto (smart routing)
294
328
  โ†“
295
- deepseek/deepseek-chat
329
+ blockrun/eco (cheapest capable)
296
330
  โ†“
297
- xai/grok-4-fast
331
+ deepseek/deepseek-chat
298
332
  โ†“
299
- nvidia/gpt-oss-120b (free, always available)
333
+ nvidia/nemotron-ultra-253b (free, always available)
300
334
  ```
301
335
 
302
336
  **How it looks:**
@@ -336,7 +370,7 @@ brcc start --no-fallback
336
370
  โ”‚ BlockRun API (blockrun.ai or sol.blockrun.ai) โ”‚
337
371
  โ”‚ โ”‚
338
372
  โ”‚ Routes to: GPT-5 ยท Claude ยท Gemini ยท DeepSeek ยท Grok ยท โ”‚
339
- โ”‚ NVIDIA ยท MiniMax ยท Moonshot ยท 40+ models โ”‚
373
+ โ”‚ NVIDIA ยท MiniMax ยท Moonshot ยท 50+ models โ”‚
340
374
  โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
341
375
  ```
342
376
 
@@ -368,7 +402,7 @@ Typical cost: **$0.001โ€“$0.05 per interaction**. $5 lasts most developers a wee
368
402
  No. brcc connects Claude Code to BlockRun instead of Anthropic.
369
403
 
370
404
  **Can I use non-Claude models?**
371
- Yes. GPT-5, Gemini, DeepSeek, Grok, and 30+ others work through Claude Code via brcc.
405
+ Yes. GPT-5, Gemini, DeepSeek, Grok, and 50+ others work through Claude Code via brcc.
372
406
 
373
407
  ---
374
408
 
@@ -7,7 +7,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
7
7
  'blockrun/auto', // Smart routing (default)
8
8
  'blockrun/eco', // Cheapest capable model
9
9
  'deepseek/deepseek-chat', // Direct fallback
10
- 'nvidia/gpt-oss-120b', // Free model as ultimate fallback
10
+ 'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
11
11
  ],
12
12
  retryOn: [429, 500, 502, 503, 504, 529],
13
13
  maxRetries: 5,
@@ -40,24 +40,56 @@ const DEFAULT_MAX_TOKENS = 4096;
40
40
  let lastOutputTokens = 0;
41
41
  // Model shortcuts for quick switching
42
42
  const MODEL_SHORTCUTS = {
43
+ // Routing profiles
43
44
  auto: 'blockrun/auto',
44
45
  smart: 'blockrun/auto',
45
46
  eco: 'blockrun/eco',
46
47
  premium: 'blockrun/premium',
47
- gpt: 'openai/gpt-5.4',
48
- gpt5: 'openai/gpt-5.4',
49
- 'gpt-5': 'openai/gpt-5.4',
50
- 'gpt-5.4': 'openai/gpt-5.4',
48
+ // Anthropic
51
49
  sonnet: 'anthropic/claude-sonnet-4.6',
52
50
  claude: 'anthropic/claude-sonnet-4.6',
53
51
  opus: 'anthropic/claude-opus-4.6',
54
52
  haiku: 'anthropic/claude-haiku-4.5',
55
- deepseek: 'deepseek/deepseek-chat',
53
+ // OpenAI
54
+ gpt: 'openai/gpt-5.4',
55
+ gpt5: 'openai/gpt-5.4',
56
+ 'gpt-5': 'openai/gpt-5.4',
57
+ 'gpt-5.4': 'openai/gpt-5.4',
58
+ 'gpt-5.4-pro': 'openai/gpt-5.4-pro',
59
+ 'gpt-5.3': 'openai/gpt-5.3',
60
+ 'gpt-5.2': 'openai/gpt-5.2',
61
+ 'gpt-5.2-pro': 'openai/gpt-5.2-pro',
62
+ 'gpt-4.1': 'openai/gpt-4.1',
63
+ codex: 'openai/gpt-5.3-codex',
64
+ nano: 'openai/gpt-5-nano',
65
+ mini: 'openai/gpt-5-mini',
66
+ o3: 'openai/o3',
67
+ o4: 'openai/o4-mini',
68
+ 'o4-mini': 'openai/o4-mini',
69
+ o1: 'openai/o1',
70
+ // Google
56
71
  gemini: 'google/gemini-2.5-pro',
72
+ flash: 'google/gemini-2.5-flash',
73
+ 'gemini-3': 'google/gemini-3.1-pro',
74
+ // xAI
57
75
  grok: 'xai/grok-3',
58
- free: 'nvidia/gpt-oss-120b',
59
- mini: 'openai/gpt-5-mini',
76
+ 'grok-4': 'xai/grok-4-0709',
77
+ 'grok-fast': 'xai/grok-4-1-fast-reasoning',
78
+ // DeepSeek
79
+ deepseek: 'deepseek/deepseek-chat',
80
+ r1: 'deepseek/deepseek-reasoner',
81
+ // Free models
82
+ free: 'nvidia/nemotron-ultra-253b',
83
+ nemotron: 'nvidia/nemotron-ultra-253b',
84
+ 'deepseek-free': 'nvidia/deepseek-v3.2',
85
+ devstral: 'nvidia/devstral-2-123b',
86
+ 'qwen-coder': 'nvidia/qwen3-coder-480b',
87
+ maverick: 'nvidia/llama-4-maverick',
88
+ // Minimax
89
+ minimax: 'minimax/minimax-m2.7',
90
+ // Others
60
91
  glm: 'zai/glm-5',
92
+ kimi: 'moonshot/kimi-k2.5',
61
93
  };
62
94
  // Model pricing (per 1M tokens) - used for stats
63
95
  const MODEL_PRICING = {
@@ -66,27 +98,68 @@ const MODEL_PRICING = {
66
98
  'blockrun/eco': { input: 0.2, output: 1.0 },
67
99
  'blockrun/premium': { input: 3.0, output: 15.0 },
68
100
  'blockrun/free': { input: 0, output: 0 },
69
- // Individual models
101
+ // FREE - NVIDIA models
102
+ 'nvidia/gpt-oss-120b': { input: 0, output: 0 },
103
+ 'nvidia/gpt-oss-20b': { input: 0, output: 0 },
104
+ 'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
105
+ 'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
106
+ 'nvidia/nemotron-super-49b': { input: 0, output: 0 },
107
+ 'nvidia/deepseek-v3.2': { input: 0, output: 0 },
108
+ 'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
109
+ 'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
110
+ 'nvidia/devstral-2-123b': { input: 0, output: 0 },
111
+ 'nvidia/glm-4.7': { input: 0, output: 0 },
112
+ 'nvidia/llama-4-maverick': { input: 0, output: 0 },
113
+ // Anthropic
70
114
  'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
71
115
  'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
72
116
  'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
73
- 'openai/gpt-5.4': { input: 2.5, output: 15.0 },
117
+ // OpenAI
118
+ 'openai/gpt-5-nano': { input: 0.05, output: 0.4 },
119
+ 'openai/gpt-4.1-nano': { input: 0.1, output: 0.4 },
120
+ 'openai/gpt-4o-mini': { input: 0.15, output: 0.6 },
74
121
  'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
75
- 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
122
+ 'openai/gpt-4.1-mini': { input: 0.4, output: 1.6 },
123
+ 'openai/gpt-5.2': { input: 1.75, output: 14.0 },
124
+ 'openai/gpt-5.3': { input: 1.75, output: 14.0 },
125
+ 'openai/gpt-5.3-codex': { input: 1.75, output: 14.0 },
126
+ 'openai/gpt-4.1': { input: 2.0, output: 8.0 },
127
+ 'openai/o3': { input: 2.0, output: 8.0 },
128
+ 'openai/gpt-4o': { input: 2.5, output: 10.0 },
129
+ 'openai/gpt-5.4': { input: 2.5, output: 15.0 },
130
+ 'openai/o1-mini': { input: 1.1, output: 4.4 },
131
+ 'openai/o3-mini': { input: 1.1, output: 4.4 },
132
+ 'openai/o4-mini': { input: 1.1, output: 4.4 },
133
+ 'openai/o1': { input: 15.0, output: 60.0 },
134
+ 'openai/gpt-5.2-pro': { input: 21.0, output: 168.0 },
135
+ 'openai/gpt-5.4-pro': { input: 30.0, output: 180.0 },
136
+ // Google
137
+ 'google/gemini-2.5-flash-lite': { input: 0.1, output: 0.4 },
76
138
  'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
77
- 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
78
- 'deepseek/deepseek-reasoner': { input: 0.55, output: 2.19 },
79
- 'xai/grok-3': { input: 3.0, output: 15.0 },
139
+ 'google/gemini-3-flash-preview': { input: 0.5, output: 3.0 },
140
+ 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
141
+ 'google/gemini-3-pro-preview': { input: 2.0, output: 12.0 },
142
+ 'google/gemini-3.1-pro': { input: 2.0, output: 12.0 },
143
+ // xAI
80
144
  'xai/grok-4-fast': { input: 0.2, output: 0.5 },
145
+ 'xai/grok-4-fast-reasoning': { input: 0.2, output: 0.5 },
146
+ 'xai/grok-4-1-fast': { input: 0.2, output: 0.5 },
81
147
  'xai/grok-4-1-fast-reasoning': { input: 0.2, output: 0.5 },
82
- 'nvidia/gpt-oss-120b': { input: 0, output: 0 },
83
- 'zai/glm-5': { input: 1.0, output: 3.2 },
148
+ 'xai/grok-4-0709': { input: 0.2, output: 1.5 },
149
+ 'xai/grok-3-mini': { input: 0.3, output: 0.5 },
150
+ 'xai/grok-2-vision': { input: 2.0, output: 10.0 },
151
+ 'xai/grok-3': { input: 3.0, output: 15.0 },
152
+ // DeepSeek
153
+ 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
154
+ 'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
155
+ // Minimax
156
+ 'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
157
+ 'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
158
+ // Others
84
159
  'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
85
- 'openai/gpt-5.3-codex': { input: 2.5, output: 10.0 },
86
- 'openai/o3': { input: 2.0, output: 8.0 },
87
- 'openai/o4-mini': { input: 1.1, output: 4.4 },
88
- 'google/gemini-2.5-flash-lite': { input: 0.08, output: 0.3 },
89
- 'google/gemini-3.1-pro': { input: 1.25, output: 10.0 },
160
+ 'nvidia/kimi-k2.5': { input: 0.55, output: 2.5 },
161
+ 'zai/glm-5': { input: 1.0, output: 3.2 },
162
+ 'zai/glm-5-turbo': { input: 1.2, output: 4.0 },
90
163
  };
91
164
  function estimateCost(model, inputTokens, outputTokens) {
92
165
  const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
@@ -191,19 +264,18 @@ export function createProxy(options) {
191
264
  res.end(JSON.stringify(fakeResponse));
192
265
  return;
193
266
  }
194
- // Apply model override only if:
195
- // 1. User specified --model on CLI (options.modelOverride)
196
- // 2. User switched model in-session (currentModel set by "use X" command)
197
- // 3. Request has no model specified
198
- if (options.modelOverride && currentModel) {
199
- // CLI --model flag: always use this
267
+ // Model override logic:
268
+ // - Claude Code sends native Anthropic IDs (e.g. "claude-sonnet-4-6-20250514")
269
+ // which don't contain "/" โ€” these MUST be replaced with currentModel.
270
+ // - BlockRun model IDs always contain "/" (e.g. "blockrun/auto", "nvidia/nemotron-ultra-253b")
271
+ // โ€” these should be passed through as-is.
272
+ // - If --model CLI flag is set, always override regardless.
273
+ if (options.modelOverride) {
200
274
  parsed.model = currentModel;
201
275
  }
202
- else if (!parsed.model) {
203
- // No model in request: use default
276
+ else if (!parsed.model || !parsed.model.includes('/')) {
204
277
  parsed.model = currentModel || DEFAULT_MODEL;
205
278
  }
206
- // Otherwise: use the model from the request as-is
207
279
  requestModel = parsed.model || DEFAULT_MODEL;
208
280
  // Smart routing: if model is a routing profile, classify and route
209
281
  const routingProfile = parseRoutingProfile(requestModel);
@@ -0,0 +1,29 @@
1
+ /**
2
+ * SSE Event Translator: OpenAI โ†’ Anthropic Messages API format
3
+ *
4
+ * Handles three critical gaps in the streaming pipeline:
5
+ * 1. Tool calls: choice.delta.tool_calls โ†’ content_block_start/content_block_delta (tool_use)
6
+ * 2. Reasoning: reasoning_content โ†’ content_block_start/content_block_delta (thinking)
7
+ * 3. Ensures proper content_block_stop and message_stop events
8
+ */
9
+ export declare class SSETranslator {
10
+ private state;
11
+ private buffer;
12
+ constructor(model?: string);
13
+ /**
14
+ * Detect whether an SSE chunk is in OpenAI format.
15
+ * Returns true if it contains OpenAI-style `choices[].delta` structure.
16
+ */
17
+ static isOpenAIFormat(chunk: string): boolean;
18
+ /**
19
+ * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
20
+ * Returns null if no translation needed (already Anthropic format or not parseable).
21
+ */
22
+ processChunk(rawChunk: string): string | null;
23
+ private parseSSEEvents;
24
+ private formatSSE;
25
+ private closeThinkingBlock;
26
+ private closeTextBlock;
27
+ private closeToolCalls;
28
+ private closeActiveBlocks;
29
+ }
@@ -0,0 +1,296 @@
1
+ /**
2
+ * SSE Event Translator: OpenAI โ†’ Anthropic Messages API format
3
+ *
4
+ * Handles three critical gaps in the streaming pipeline:
5
+ * 1. Tool calls: choice.delta.tool_calls โ†’ content_block_start/content_block_delta (tool_use)
6
+ * 2. Reasoning: reasoning_content โ†’ content_block_start/content_block_delta (thinking)
7
+ * 3. Ensures proper content_block_stop and message_stop events
8
+ */
9
+ // โ”€โ”€โ”€ SSE Translator โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
10
+ export class SSETranslator {
11
+ state;
12
+ buffer = '';
13
+ constructor(model = 'unknown') {
14
+ this.state = {
15
+ messageId: `msg_brcc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
16
+ model,
17
+ blockIndex: 0,
18
+ activeToolCalls: new Map(),
19
+ thinkingBlockActive: false,
20
+ textBlockActive: false,
21
+ messageStarted: false,
22
+ inputTokens: 0,
23
+ outputTokens: 0,
24
+ };
25
+ }
26
+ /**
27
+ * Detect whether an SSE chunk is in OpenAI format.
28
+ * Returns true if it contains OpenAI-style `choices[].delta` structure.
29
+ */
30
+ static isOpenAIFormat(chunk) {
31
+ // Look for OpenAI-specific patterns in the SSE data
32
+ return (chunk.includes('"choices"') &&
33
+ chunk.includes('"delta"') &&
34
+ !chunk.includes('"content_block_'));
35
+ }
36
+ /**
37
+ * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
38
+ * Returns null if no translation needed (already Anthropic format or not parseable).
39
+ */
40
+ processChunk(rawChunk) {
41
+ this.buffer += rawChunk;
42
+ const events = this.parseSSEEvents();
43
+ if (events.length === 0)
44
+ return null;
45
+ const translated = [];
46
+ for (const event of events) {
47
+ if (event.data === '[DONE]') {
48
+ // Close any active blocks, then emit message_stop
49
+ translated.push(...this.closeActiveBlocks());
50
+ translated.push(this.formatSSE('message_delta', {
51
+ type: 'message_delta',
52
+ delta: { stop_reason: 'end_turn', stop_sequence: null },
53
+ usage: { output_tokens: this.state.outputTokens },
54
+ }));
55
+ translated.push(this.formatSSE('message_stop', { type: 'message_stop' }));
56
+ continue;
57
+ }
58
+ let parsed;
59
+ try {
60
+ parsed = JSON.parse(event.data);
61
+ }
62
+ catch {
63
+ continue;
64
+ }
65
+ // Skip if this doesn't look like OpenAI format
66
+ if (!parsed.choices || parsed.choices.length === 0) {
67
+ // Could be a usage-only event
68
+ if (parsed.usage) {
69
+ this.state.inputTokens = parsed.usage.prompt_tokens || 0;
70
+ this.state.outputTokens = parsed.usage.completion_tokens || 0;
71
+ }
72
+ continue;
73
+ }
74
+ // Emit message_start on first chunk
75
+ if (!this.state.messageStarted) {
76
+ this.state.messageStarted = true;
77
+ if (parsed.model)
78
+ this.state.model = parsed.model;
79
+ translated.push(this.formatSSE('message_start', {
80
+ type: 'message_start',
81
+ message: {
82
+ id: this.state.messageId,
83
+ type: 'message',
84
+ role: 'assistant',
85
+ model: this.state.model,
86
+ content: [],
87
+ stop_reason: null,
88
+ stop_sequence: null,
89
+ usage: { input_tokens: this.state.inputTokens, output_tokens: 0 },
90
+ },
91
+ }));
92
+ translated.push(this.formatSSE('ping', { type: 'ping' }));
93
+ }
94
+ const choice = parsed.choices[0];
95
+ const delta = choice.delta;
96
+ // โ”€โ”€ Reasoning content โ†’ thinking block โ”€โ”€
97
+ if (delta.reasoning_content) {
98
+ if (!this.state.thinkingBlockActive) {
99
+ // Close text block if active
100
+ if (this.state.textBlockActive) {
101
+ translated.push(...this.closeTextBlock());
102
+ }
103
+ this.state.thinkingBlockActive = true;
104
+ translated.push(this.formatSSE('content_block_start', {
105
+ type: 'content_block_start',
106
+ index: this.state.blockIndex,
107
+ content_block: { type: 'thinking', thinking: '' },
108
+ }));
109
+ }
110
+ translated.push(this.formatSSE('content_block_delta', {
111
+ type: 'content_block_delta',
112
+ index: this.state.blockIndex,
113
+ delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
114
+ }));
115
+ this.state.outputTokens++;
116
+ }
117
+ // โ”€โ”€ Text content โ†’ text block โ”€โ”€
118
+ if (delta.content) {
119
+ // Close thinking block if transitioning
120
+ if (this.state.thinkingBlockActive) {
121
+ translated.push(...this.closeThinkingBlock());
122
+ }
123
+ if (!this.state.textBlockActive) {
124
+ // Close any active tool calls first
125
+ translated.push(...this.closeToolCalls());
126
+ this.state.textBlockActive = true;
127
+ translated.push(this.formatSSE('content_block_start', {
128
+ type: 'content_block_start',
129
+ index: this.state.blockIndex,
130
+ content_block: { type: 'text', text: '' },
131
+ }));
132
+ }
133
+ translated.push(this.formatSSE('content_block_delta', {
134
+ type: 'content_block_delta',
135
+ index: this.state.blockIndex,
136
+ delta: { type: 'text_delta', text: delta.content },
137
+ }));
138
+ this.state.outputTokens++;
139
+ }
140
+ // โ”€โ”€ Tool calls โ†’ tool_use blocks โ”€โ”€
141
+ if (delta.tool_calls && delta.tool_calls.length > 0) {
142
+ // Close thinking/text blocks first
143
+ if (this.state.thinkingBlockActive) {
144
+ translated.push(...this.closeThinkingBlock());
145
+ }
146
+ if (this.state.textBlockActive) {
147
+ translated.push(...this.closeTextBlock());
148
+ }
149
+ for (const tc of delta.tool_calls) {
150
+ const tcIndex = tc.index;
151
+ if (tc.id && tc.function?.name) {
152
+ // New tool call start
153
+ // Close previous tool call at same index if exists
154
+ if (this.state.activeToolCalls.has(tcIndex)) {
155
+ translated.push(this.formatSSE('content_block_stop', {
156
+ type: 'content_block_stop',
157
+ index: this.state.blockIndex,
158
+ }));
159
+ this.state.blockIndex++;
160
+ }
161
+ const toolId = tc.id;
162
+ const toolName = tc.function.name;
163
+ this.state.activeToolCalls.set(tcIndex, { id: toolId, name: toolName });
164
+ translated.push(this.formatSSE('content_block_start', {
165
+ type: 'content_block_start',
166
+ index: this.state.blockIndex,
167
+ content_block: {
168
+ type: 'tool_use',
169
+ id: toolId,
170
+ name: toolName,
171
+ input: {},
172
+ },
173
+ }));
174
+ // If there are initial arguments, send them
175
+ if (tc.function.arguments) {
176
+ translated.push(this.formatSSE('content_block_delta', {
177
+ type: 'content_block_delta',
178
+ index: this.state.blockIndex,
179
+ delta: {
180
+ type: 'input_json_delta',
181
+ partial_json: tc.function.arguments,
182
+ },
183
+ }));
184
+ }
185
+ }
186
+ else if (tc.function?.arguments) {
187
+ // Continuation of existing tool call - stream arguments
188
+ translated.push(this.formatSSE('content_block_delta', {
189
+ type: 'content_block_delta',
190
+ index: this.state.blockIndex,
191
+ delta: {
192
+ type: 'input_json_delta',
193
+ partial_json: tc.function.arguments,
194
+ },
195
+ }));
196
+ }
197
+ }
198
+ this.state.outputTokens++;
199
+ }
200
+ // โ”€โ”€ Handle finish_reason โ”€โ”€
201
+ if (choice.finish_reason) {
202
+ translated.push(...this.closeActiveBlocks());
203
+ const stopReason = choice.finish_reason === 'tool_calls'
204
+ ? 'tool_use'
205
+ : choice.finish_reason === 'stop'
206
+ ? 'end_turn'
207
+ : choice.finish_reason;
208
+ translated.push(this.formatSSE('message_delta', {
209
+ type: 'message_delta',
210
+ delta: { stop_reason: stopReason, stop_sequence: null },
211
+ usage: { output_tokens: this.state.outputTokens },
212
+ }));
213
+ }
214
+ }
215
+ return translated.length > 0 ? translated.join('') : null;
216
+ }
217
+ // โ”€โ”€ Helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
218
+ parseSSEEvents() {
219
+ const events = [];
220
+ const lines = this.buffer.split('\n');
221
+ let currentEvent;
222
+ let dataLines = [];
223
+ let consumed = 0;
224
+ for (let i = 0; i < lines.length; i++) {
225
+ const line = lines[i];
226
+ if (line.startsWith('event: ')) {
227
+ currentEvent = line.slice(7).trim();
228
+ }
229
+ else if (line.startsWith('data: ')) {
230
+ dataLines.push(line.slice(6));
231
+ }
232
+ else if (line === '' && dataLines.length > 0) {
233
+ // End of event
234
+ events.push({ event: currentEvent, data: dataLines.join('\n') });
235
+ currentEvent = undefined;
236
+ dataLines = [];
237
+ consumed = lines.slice(0, i + 1).join('\n').length + 1;
238
+ }
239
+ }
240
+ // Keep unconsumed text in buffer
241
+ if (consumed > 0) {
242
+ this.buffer = this.buffer.slice(consumed);
243
+ }
244
+ return events;
245
+ }
246
+ formatSSE(event, data) {
247
+ return `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
248
+ }
249
+ closeThinkingBlock() {
250
+ if (!this.state.thinkingBlockActive)
251
+ return [];
252
+ this.state.thinkingBlockActive = false;
253
+ const events = [
254
+ this.formatSSE('content_block_stop', {
255
+ type: 'content_block_stop',
256
+ index: this.state.blockIndex,
257
+ }),
258
+ ];
259
+ this.state.blockIndex++;
260
+ return events;
261
+ }
262
+ closeTextBlock() {
263
+ if (!this.state.textBlockActive)
264
+ return [];
265
+ this.state.textBlockActive = false;
266
+ const events = [
267
+ this.formatSSE('content_block_stop', {
268
+ type: 'content_block_stop',
269
+ index: this.state.blockIndex,
270
+ }),
271
+ ];
272
+ this.state.blockIndex++;
273
+ return events;
274
+ }
275
+ closeToolCalls() {
276
+ if (this.state.activeToolCalls.size === 0)
277
+ return [];
278
+ const events = [];
279
+ for (const [_index] of this.state.activeToolCalls) {
280
+ events.push(this.formatSSE('content_block_stop', {
281
+ type: 'content_block_stop',
282
+ index: this.state.blockIndex,
283
+ }));
284
+ this.state.blockIndex++;
285
+ }
286
+ this.state.activeToolCalls.clear();
287
+ return events;
288
+ }
289
+ closeActiveBlocks() {
290
+ const events = [];
291
+ events.push(...this.closeThinkingBlock());
292
+ events.push(...this.closeTextBlock());
293
+ events.push(...this.closeToolCalls());
294
+ return events;
295
+ }
296
+ }
@@ -6,11 +6,11 @@
6
6
  const AUTO_TIERS = {
7
7
  SIMPLE: {
8
8
  primary: 'google/gemini-2.5-flash',
9
- fallback: ['deepseek/deepseek-chat', 'nvidia/gpt-oss-120b'],
9
+ fallback: ['deepseek/deepseek-chat', 'nvidia/nemotron-ultra-253b'],
10
10
  },
11
11
  MEDIUM: {
12
12
  primary: 'moonshot/kimi-k2.5',
13
- fallback: ['google/gemini-2.5-flash', 'deepseek/deepseek-chat'],
13
+ fallback: ['google/gemini-2.5-flash', 'minimax/minimax-m2.7'],
14
14
  },
15
15
  COMPLEX: {
16
16
  primary: 'google/gemini-3.1-pro',
@@ -23,20 +23,20 @@ const AUTO_TIERS = {
23
23
  };
24
24
  const ECO_TIERS = {
25
25
  SIMPLE: {
26
- primary: 'nvidia/gpt-oss-120b',
27
- fallback: ['google/gemini-2.5-flash-lite'],
26
+ primary: 'nvidia/nemotron-ultra-253b',
27
+ fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
28
28
  },
29
29
  MEDIUM: {
30
30
  primary: 'google/gemini-2.5-flash-lite',
31
- fallback: ['nvidia/gpt-oss-120b'],
31
+ fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
32
32
  },
33
33
  COMPLEX: {
34
34
  primary: 'google/gemini-2.5-flash-lite',
35
- fallback: ['deepseek/deepseek-chat'],
35
+ fallback: ['deepseek/deepseek-chat', 'nvidia/mistral-large-3-675b'],
36
36
  },
37
37
  REASONING: {
38
38
  primary: 'xai/grok-4-1-fast-reasoning',
39
- fallback: ['deepseek/deepseek-reasoner'],
39
+ fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
40
40
  },
41
41
  };
42
42
  const PREMIUM_TIERS = {
@@ -189,7 +189,7 @@ export function routeRequest(prompt, profile = 'auto') {
189
189
  // Free profile - always use free model
190
190
  if (profile === 'free') {
191
191
  return {
192
- model: 'nvidia/gpt-oss-120b',
192
+ model: 'nvidia/nemotron-ultra-253b',
193
193
  tier: 'SIMPLE',
194
194
  confidence: 1.0,
195
195
  signals: ['free-profile'],
@@ -217,22 +217,48 @@ export function routeRequest(prompt, profile = 'auto') {
217
217
  // Baseline: Claude Opus at $5/$25 per 1M tokens
218
218
  const OPUS_COST_PER_1K = 0.015; // rough average
219
219
  const modelCosts = {
220
+ // FREE
220
221
  'nvidia/gpt-oss-120b': 0,
221
- 'google/gemini-2.5-flash': 0.001,
222
+ 'nvidia/gpt-oss-20b': 0,
223
+ 'nvidia/nemotron-ultra-253b': 0,
224
+ 'nvidia/nemotron-3-super-120b': 0,
225
+ 'nvidia/nemotron-super-49b': 0,
226
+ 'nvidia/deepseek-v3.2': 0,
227
+ 'nvidia/mistral-large-3-675b': 0,
228
+ 'nvidia/qwen3-coder-480b': 0,
229
+ 'nvidia/devstral-2-123b': 0,
230
+ 'nvidia/glm-4.7': 0,
231
+ 'nvidia/llama-4-maverick': 0,
232
+ // Budget
233
+ 'openai/gpt-5-nano': 0.0002,
234
+ 'openai/gpt-4.1-nano': 0.0003,
222
235
  'google/gemini-2.5-flash-lite': 0.0003,
236
+ 'xai/grok-4-fast': 0.0004,
237
+ 'xai/grok-4-1-fast': 0.0004,
238
+ 'xai/grok-4-1-fast-reasoning': 0.0004,
223
239
  'deepseek/deepseek-chat': 0.0004,
224
- 'deepseek/deepseek-reasoner': 0.003,
225
- 'moonshot/kimi-k2.5': 0.002,
226
- 'google/gemini-2.5-pro': 0.006,
227
- 'google/gemini-3.1-pro': 0.007,
240
+ 'deepseek/deepseek-reasoner': 0.0004,
241
+ 'minimax/minimax-m2.7': 0.0008,
242
+ 'minimax/minimax-m2.5': 0.0008,
243
+ 'google/gemini-2.5-flash': 0.0014,
244
+ 'openai/gpt-5-mini': 0.0011,
245
+ 'moonshot/kimi-k2.5': 0.0018,
246
+ // Mid-range
228
247
  'anthropic/claude-haiku-4.5': 0.003,
248
+ 'zai/glm-5': 0.0021,
249
+ 'openai/o4-mini': 0.0028,
250
+ 'google/gemini-2.5-pro': 0.0056,
251
+ 'openai/gpt-5.3-codex': 0.0079,
252
+ 'openai/gpt-5.2': 0.0079,
253
+ 'openai/gpt-5.3': 0.0079,
254
+ 'openai/gpt-4.1': 0.005,
255
+ 'openai/o3': 0.005,
256
+ 'google/gemini-3.1-pro': 0.007,
257
+ 'openai/gpt-5.4': 0.0088,
258
+ // Premium
229
259
  'anthropic/claude-sonnet-4.6': 0.009,
260
+ 'xai/grok-3': 0.009,
230
261
  'anthropic/claude-opus-4.6': 0.015,
231
- 'openai/gpt-5.3-codex': 0.008,
232
- 'openai/gpt-5.4': 0.009,
233
- 'openai/o3': 0.012,
234
- 'openai/o4-mini': 0.006,
235
- 'xai/grok-4-1-fast-reasoning': 0.0004,
236
262
  };
237
263
  const modelCost = modelCosts[model] ?? 0.005;
238
264
  const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
@@ -257,7 +283,7 @@ export function getFallbackChain(tier, profile = 'auto') {
257
283
  tierConfigs = PREMIUM_TIERS;
258
284
  break;
259
285
  case 'free':
260
- return ['nvidia/gpt-oss-120b'];
286
+ return ['nvidia/nemotron-ultra-253b'];
261
287
  default:
262
288
  tierConfigs = AUTO_TIERS;
263
289
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/cc",
3
- "version": "0.9.2",
3
+ "version": "0.9.4",
4
4
  "description": "Run Claude Code with any model โ€” no rate limits, no account locks, no phone verification. Pay per use with USDC.",
5
5
  "type": "module",
6
6
  "bin": {