@blockrun/cc 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
7
7
  'blockrun/auto', // Smart routing (default)
8
8
  'blockrun/eco', // Cheapest capable model
9
9
  'deepseek/deepseek-chat', // Direct fallback
10
- 'nvidia/gpt-oss-120b', // Free model as ultimate fallback
10
+ 'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
11
11
  ],
12
12
  retryOn: [429, 500, 502, 503, 504, 529],
13
13
  maxRetries: 5,
@@ -40,24 +40,56 @@ const DEFAULT_MAX_TOKENS = 4096;
40
40
  let lastOutputTokens = 0;
41
41
  // Model shortcuts for quick switching
42
42
  const MODEL_SHORTCUTS = {
43
+ // Routing profiles
43
44
  auto: 'blockrun/auto',
44
45
  smart: 'blockrun/auto',
45
46
  eco: 'blockrun/eco',
46
47
  premium: 'blockrun/premium',
47
- gpt: 'openai/gpt-5.4',
48
- gpt5: 'openai/gpt-5.4',
49
- 'gpt-5': 'openai/gpt-5.4',
50
- 'gpt-5.4': 'openai/gpt-5.4',
48
+ // Anthropic
51
49
  sonnet: 'anthropic/claude-sonnet-4.6',
52
50
  claude: 'anthropic/claude-sonnet-4.6',
53
51
  opus: 'anthropic/claude-opus-4.6',
54
52
  haiku: 'anthropic/claude-haiku-4.5',
55
- deepseek: 'deepseek/deepseek-chat',
53
+ // OpenAI
54
+ gpt: 'openai/gpt-5.4',
55
+ gpt5: 'openai/gpt-5.4',
56
+ 'gpt-5': 'openai/gpt-5.4',
57
+ 'gpt-5.4': 'openai/gpt-5.4',
58
+ 'gpt-5.4-pro': 'openai/gpt-5.4-pro',
59
+ 'gpt-5.3': 'openai/gpt-5.3',
60
+ 'gpt-5.2': 'openai/gpt-5.2',
61
+ 'gpt-5.2-pro': 'openai/gpt-5.2-pro',
62
+ 'gpt-4.1': 'openai/gpt-4.1',
63
+ codex: 'openai/gpt-5.3-codex',
64
+ nano: 'openai/gpt-5-nano',
65
+ mini: 'openai/gpt-5-mini',
66
+ o3: 'openai/o3',
67
+ o4: 'openai/o4-mini',
68
+ 'o4-mini': 'openai/o4-mini',
69
+ o1: 'openai/o1',
70
+ // Google
56
71
  gemini: 'google/gemini-2.5-pro',
72
+ flash: 'google/gemini-2.5-flash',
73
+ 'gemini-3': 'google/gemini-3.1-pro',
74
+ // xAI
57
75
  grok: 'xai/grok-3',
58
- free: 'nvidia/gpt-oss-120b',
59
- mini: 'openai/gpt-5-mini',
76
+ 'grok-4': 'xai/grok-4-0709',
77
+ 'grok-fast': 'xai/grok-4-1-fast-reasoning',
78
+ // DeepSeek
79
+ deepseek: 'deepseek/deepseek-chat',
80
+ r1: 'deepseek/deepseek-reasoner',
81
+ // Free models
82
+ free: 'nvidia/nemotron-ultra-253b',
83
+ nemotron: 'nvidia/nemotron-ultra-253b',
84
+ 'deepseek-free': 'nvidia/deepseek-v3.2',
85
+ devstral: 'nvidia/devstral-2-123b',
86
+ 'qwen-coder': 'nvidia/qwen3-coder-480b',
87
+ maverick: 'nvidia/llama-4-maverick',
88
+ // Minimax
89
+ minimax: 'minimax/minimax-m2.7',
90
+ // Others
60
91
  glm: 'zai/glm-5',
92
+ kimi: 'moonshot/kimi-k2.5',
61
93
  };
62
94
  // Model pricing (per 1M tokens) - used for stats
63
95
  const MODEL_PRICING = {
@@ -66,27 +98,68 @@ const MODEL_PRICING = {
66
98
  'blockrun/eco': { input: 0.2, output: 1.0 },
67
99
  'blockrun/premium': { input: 3.0, output: 15.0 },
68
100
  'blockrun/free': { input: 0, output: 0 },
69
- // Individual models
101
+ // FREE - NVIDIA models
102
+ 'nvidia/gpt-oss-120b': { input: 0, output: 0 },
103
+ 'nvidia/gpt-oss-20b': { input: 0, output: 0 },
104
+ 'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
105
+ 'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
106
+ 'nvidia/nemotron-super-49b': { input: 0, output: 0 },
107
+ 'nvidia/deepseek-v3.2': { input: 0, output: 0 },
108
+ 'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
109
+ 'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
110
+ 'nvidia/devstral-2-123b': { input: 0, output: 0 },
111
+ 'nvidia/glm-4.7': { input: 0, output: 0 },
112
+ 'nvidia/llama-4-maverick': { input: 0, output: 0 },
113
+ // Anthropic
70
114
  'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
71
115
  'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
72
116
  'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
73
- 'openai/gpt-5.4': { input: 2.5, output: 15.0 },
117
+ // OpenAI
118
+ 'openai/gpt-5-nano': { input: 0.05, output: 0.4 },
119
+ 'openai/gpt-4.1-nano': { input: 0.1, output: 0.4 },
120
+ 'openai/gpt-4o-mini': { input: 0.15, output: 0.6 },
74
121
  'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
75
- 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
122
+ 'openai/gpt-4.1-mini': { input: 0.4, output: 1.6 },
123
+ 'openai/gpt-5.2': { input: 1.75, output: 14.0 },
124
+ 'openai/gpt-5.3': { input: 1.75, output: 14.0 },
125
+ 'openai/gpt-5.3-codex': { input: 1.75, output: 14.0 },
126
+ 'openai/gpt-4.1': { input: 2.0, output: 8.0 },
127
+ 'openai/o3': { input: 2.0, output: 8.0 },
128
+ 'openai/gpt-4o': { input: 2.5, output: 10.0 },
129
+ 'openai/gpt-5.4': { input: 2.5, output: 15.0 },
130
+ 'openai/o1-mini': { input: 1.1, output: 4.4 },
131
+ 'openai/o3-mini': { input: 1.1, output: 4.4 },
132
+ 'openai/o4-mini': { input: 1.1, output: 4.4 },
133
+ 'openai/o1': { input: 15.0, output: 60.0 },
134
+ 'openai/gpt-5.2-pro': { input: 21.0, output: 168.0 },
135
+ 'openai/gpt-5.4-pro': { input: 30.0, output: 180.0 },
136
+ // Google
137
+ 'google/gemini-2.5-flash-lite': { input: 0.1, output: 0.4 },
76
138
  'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
77
- 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
78
- 'deepseek/deepseek-reasoner': { input: 0.55, output: 2.19 },
79
- 'xai/grok-3': { input: 3.0, output: 15.0 },
139
+ 'google/gemini-3-flash-preview': { input: 0.5, output: 3.0 },
140
+ 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
141
+ 'google/gemini-3-pro-preview': { input: 2.0, output: 12.0 },
142
+ 'google/gemini-3.1-pro': { input: 2.0, output: 12.0 },
143
+ // xAI
80
144
  'xai/grok-4-fast': { input: 0.2, output: 0.5 },
145
+ 'xai/grok-4-fast-reasoning': { input: 0.2, output: 0.5 },
146
+ 'xai/grok-4-1-fast': { input: 0.2, output: 0.5 },
81
147
  'xai/grok-4-1-fast-reasoning': { input: 0.2, output: 0.5 },
82
- 'nvidia/gpt-oss-120b': { input: 0, output: 0 },
83
- 'zai/glm-5': { input: 1.0, output: 3.2 },
148
+ 'xai/grok-4-0709': { input: 0.2, output: 1.5 },
149
+ 'xai/grok-3-mini': { input: 0.3, output: 0.5 },
150
+ 'xai/grok-2-vision': { input: 2.0, output: 10.0 },
151
+ 'xai/grok-3': { input: 3.0, output: 15.0 },
152
+ // DeepSeek
153
+ 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
154
+ 'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
155
+ // Minimax
156
+ 'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
157
+ 'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
158
+ // Others
84
159
  'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
85
- 'openai/gpt-5.3-codex': { input: 2.5, output: 10.0 },
86
- 'openai/o3': { input: 2.0, output: 8.0 },
87
- 'openai/o4-mini': { input: 1.1, output: 4.4 },
88
- 'google/gemini-2.5-flash-lite': { input: 0.08, output: 0.3 },
89
- 'google/gemini-3.1-pro': { input: 1.25, output: 10.0 },
160
+ 'nvidia/kimi-k2.5': { input: 0.55, output: 2.5 },
161
+ 'zai/glm-5': { input: 1.0, output: 3.2 },
162
+ 'zai/glm-5-turbo': { input: 1.2, output: 4.0 },
90
163
  };
91
164
  function estimateCost(model, inputTokens, outputTokens) {
92
165
  const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
@@ -0,0 +1,29 @@
1
+ /**
2
+ * SSE Event Translator: OpenAI → Anthropic Messages API format
3
+ *
4
+ * Handles three critical gaps in the streaming pipeline:
5
+ * 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
6
+ * 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
7
+ * 3. Ensures proper content_block_stop and message_stop events
8
+ */
9
+ export declare class SSETranslator {
10
+ private state;
11
+ private buffer;
12
+ constructor(model?: string);
13
+ /**
14
+ * Detect whether an SSE chunk is in OpenAI format.
15
+ * Returns true if it contains OpenAI-style `choices[].delta` structure.
16
+ */
17
+ static isOpenAIFormat(chunk: string): boolean;
18
+ /**
19
+ * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
20
+ * Returns null if no translation needed (already Anthropic format or not parseable).
21
+ */
22
+ processChunk(rawChunk: string): string | null;
23
+ private parseSSEEvents;
24
+ private formatSSE;
25
+ private closeThinkingBlock;
26
+ private closeTextBlock;
27
+ private closeToolCalls;
28
+ private closeActiveBlocks;
29
+ }
@@ -0,0 +1,296 @@
1
+ /**
2
+ * SSE Event Translator: OpenAI → Anthropic Messages API format
3
+ *
4
+ * Handles three critical gaps in the streaming pipeline:
5
+ * 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
6
+ * 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
7
+ * 3. Ensures proper content_block_stop and message_stop events
8
+ */
9
+ // ─── SSE Translator ─────────────────────────────────────────────────────────
10
+ export class SSETranslator {
11
+ state;
12
+ buffer = '';
13
+ constructor(model = 'unknown') {
14
+ this.state = {
15
+ messageId: `msg_brcc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
16
+ model,
17
+ blockIndex: 0,
18
+ activeToolCalls: new Map(),
19
+ thinkingBlockActive: false,
20
+ textBlockActive: false,
21
+ messageStarted: false,
22
+ inputTokens: 0,
23
+ outputTokens: 0,
24
+ };
25
+ }
26
+ /**
27
+ * Detect whether an SSE chunk is in OpenAI format.
28
+ * Returns true if it contains OpenAI-style `choices[].delta` structure.
29
+ */
30
+ static isOpenAIFormat(chunk) {
31
+ // Look for OpenAI-specific patterns in the SSE data
32
+ return (chunk.includes('"choices"') &&
33
+ chunk.includes('"delta"') &&
34
+ !chunk.includes('"content_block_'));
35
+ }
36
+ /**
37
+ * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
38
+ * Returns null if no translation needed (already Anthropic format or not parseable).
39
+ */
40
+ processChunk(rawChunk) {
41
+ this.buffer += rawChunk;
42
+ const events = this.parseSSEEvents();
43
+ if (events.length === 0)
44
+ return null;
45
+ const translated = [];
46
+ for (const event of events) {
47
+ if (event.data === '[DONE]') {
48
+ // Close any active blocks, then emit message_stop
49
+ translated.push(...this.closeActiveBlocks());
50
+ translated.push(this.formatSSE('message_delta', {
51
+ type: 'message_delta',
52
+ delta: { stop_reason: 'end_turn', stop_sequence: null },
53
+ usage: { output_tokens: this.state.outputTokens },
54
+ }));
55
+ translated.push(this.formatSSE('message_stop', { type: 'message_stop' }));
56
+ continue;
57
+ }
58
+ let parsed;
59
+ try {
60
+ parsed = JSON.parse(event.data);
61
+ }
62
+ catch {
63
+ continue;
64
+ }
65
+ // Skip if this doesn't look like OpenAI format
66
+ if (!parsed.choices || parsed.choices.length === 0) {
67
+ // Could be a usage-only event
68
+ if (parsed.usage) {
69
+ this.state.inputTokens = parsed.usage.prompt_tokens || 0;
70
+ this.state.outputTokens = parsed.usage.completion_tokens || 0;
71
+ }
72
+ continue;
73
+ }
74
+ // Emit message_start on first chunk
75
+ if (!this.state.messageStarted) {
76
+ this.state.messageStarted = true;
77
+ if (parsed.model)
78
+ this.state.model = parsed.model;
79
+ translated.push(this.formatSSE('message_start', {
80
+ type: 'message_start',
81
+ message: {
82
+ id: this.state.messageId,
83
+ type: 'message',
84
+ role: 'assistant',
85
+ model: this.state.model,
86
+ content: [],
87
+ stop_reason: null,
88
+ stop_sequence: null,
89
+ usage: { input_tokens: this.state.inputTokens, output_tokens: 0 },
90
+ },
91
+ }));
92
+ translated.push(this.formatSSE('ping', { type: 'ping' }));
93
+ }
94
+ const choice = parsed.choices[0];
95
+ const delta = choice.delta;
96
+ // ── Reasoning content → thinking block ──
97
+ if (delta.reasoning_content) {
98
+ if (!this.state.thinkingBlockActive) {
99
+ // Close text block if active
100
+ if (this.state.textBlockActive) {
101
+ translated.push(...this.closeTextBlock());
102
+ }
103
+ this.state.thinkingBlockActive = true;
104
+ translated.push(this.formatSSE('content_block_start', {
105
+ type: 'content_block_start',
106
+ index: this.state.blockIndex,
107
+ content_block: { type: 'thinking', thinking: '' },
108
+ }));
109
+ }
110
+ translated.push(this.formatSSE('content_block_delta', {
111
+ type: 'content_block_delta',
112
+ index: this.state.blockIndex,
113
+ delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
114
+ }));
115
+ this.state.outputTokens++;
116
+ }
117
+ // ── Text content → text block ──
118
+ if (delta.content) {
119
+ // Close thinking block if transitioning
120
+ if (this.state.thinkingBlockActive) {
121
+ translated.push(...this.closeThinkingBlock());
122
+ }
123
+ if (!this.state.textBlockActive) {
124
+ // Close any active tool calls first
125
+ translated.push(...this.closeToolCalls());
126
+ this.state.textBlockActive = true;
127
+ translated.push(this.formatSSE('content_block_start', {
128
+ type: 'content_block_start',
129
+ index: this.state.blockIndex,
130
+ content_block: { type: 'text', text: '' },
131
+ }));
132
+ }
133
+ translated.push(this.formatSSE('content_block_delta', {
134
+ type: 'content_block_delta',
135
+ index: this.state.blockIndex,
136
+ delta: { type: 'text_delta', text: delta.content },
137
+ }));
138
+ this.state.outputTokens++;
139
+ }
140
+ // ── Tool calls → tool_use blocks ──
141
+ if (delta.tool_calls && delta.tool_calls.length > 0) {
142
+ // Close thinking/text blocks first
143
+ if (this.state.thinkingBlockActive) {
144
+ translated.push(...this.closeThinkingBlock());
145
+ }
146
+ if (this.state.textBlockActive) {
147
+ translated.push(...this.closeTextBlock());
148
+ }
149
+ for (const tc of delta.tool_calls) {
150
+ const tcIndex = tc.index;
151
+ if (tc.id && tc.function?.name) {
152
+ // New tool call start
153
+ // Close previous tool call at same index if exists
154
+ if (this.state.activeToolCalls.has(tcIndex)) {
155
+ translated.push(this.formatSSE('content_block_stop', {
156
+ type: 'content_block_stop',
157
+ index: this.state.blockIndex,
158
+ }));
159
+ this.state.blockIndex++;
160
+ }
161
+ const toolId = tc.id;
162
+ const toolName = tc.function.name;
163
+ this.state.activeToolCalls.set(tcIndex, { id: toolId, name: toolName });
164
+ translated.push(this.formatSSE('content_block_start', {
165
+ type: 'content_block_start',
166
+ index: this.state.blockIndex,
167
+ content_block: {
168
+ type: 'tool_use',
169
+ id: toolId,
170
+ name: toolName,
171
+ input: {},
172
+ },
173
+ }));
174
+ // If there are initial arguments, send them
175
+ if (tc.function.arguments) {
176
+ translated.push(this.formatSSE('content_block_delta', {
177
+ type: 'content_block_delta',
178
+ index: this.state.blockIndex,
179
+ delta: {
180
+ type: 'input_json_delta',
181
+ partial_json: tc.function.arguments,
182
+ },
183
+ }));
184
+ }
185
+ }
186
+ else if (tc.function?.arguments) {
187
+ // Continuation of existing tool call - stream arguments
188
+ translated.push(this.formatSSE('content_block_delta', {
189
+ type: 'content_block_delta',
190
+ index: this.state.blockIndex,
191
+ delta: {
192
+ type: 'input_json_delta',
193
+ partial_json: tc.function.arguments,
194
+ },
195
+ }));
196
+ }
197
+ }
198
+ this.state.outputTokens++;
199
+ }
200
+ // ── Handle finish_reason ──
201
+ if (choice.finish_reason) {
202
+ translated.push(...this.closeActiveBlocks());
203
+ const stopReason = choice.finish_reason === 'tool_calls'
204
+ ? 'tool_use'
205
+ : choice.finish_reason === 'stop'
206
+ ? 'end_turn'
207
+ : choice.finish_reason;
208
+ translated.push(this.formatSSE('message_delta', {
209
+ type: 'message_delta',
210
+ delta: { stop_reason: stopReason, stop_sequence: null },
211
+ usage: { output_tokens: this.state.outputTokens },
212
+ }));
213
+ }
214
+ }
215
+ return translated.length > 0 ? translated.join('') : null;
216
+ }
217
+ // ── Helpers ─────────────────────────────────────────────────────────────
218
+ parseSSEEvents() {
219
+ const events = [];
220
+ const lines = this.buffer.split('\n');
221
+ let currentEvent;
222
+ let dataLines = [];
223
+ let consumed = 0;
224
+ for (let i = 0; i < lines.length; i++) {
225
+ const line = lines[i];
226
+ if (line.startsWith('event: ')) {
227
+ currentEvent = line.slice(7).trim();
228
+ }
229
+ else if (line.startsWith('data: ')) {
230
+ dataLines.push(line.slice(6));
231
+ }
232
+ else if (line === '' && dataLines.length > 0) {
233
+ // End of event
234
+ events.push({ event: currentEvent, data: dataLines.join('\n') });
235
+ currentEvent = undefined;
236
+ dataLines = [];
237
+ consumed = lines.slice(0, i + 1).join('\n').length + 1;
238
+ }
239
+ }
240
+ // Keep unconsumed text in buffer
241
+ if (consumed > 0) {
242
+ this.buffer = this.buffer.slice(consumed);
243
+ }
244
+ return events;
245
+ }
246
+ formatSSE(event, data) {
247
+ return `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
248
+ }
249
+ closeThinkingBlock() {
250
+ if (!this.state.thinkingBlockActive)
251
+ return [];
252
+ this.state.thinkingBlockActive = false;
253
+ const events = [
254
+ this.formatSSE('content_block_stop', {
255
+ type: 'content_block_stop',
256
+ index: this.state.blockIndex,
257
+ }),
258
+ ];
259
+ this.state.blockIndex++;
260
+ return events;
261
+ }
262
+ closeTextBlock() {
263
+ if (!this.state.textBlockActive)
264
+ return [];
265
+ this.state.textBlockActive = false;
266
+ const events = [
267
+ this.formatSSE('content_block_stop', {
268
+ type: 'content_block_stop',
269
+ index: this.state.blockIndex,
270
+ }),
271
+ ];
272
+ this.state.blockIndex++;
273
+ return events;
274
+ }
275
+ closeToolCalls() {
276
+ if (this.state.activeToolCalls.size === 0)
277
+ return [];
278
+ const events = [];
279
+ for (const [_index] of this.state.activeToolCalls) {
280
+ events.push(this.formatSSE('content_block_stop', {
281
+ type: 'content_block_stop',
282
+ index: this.state.blockIndex,
283
+ }));
284
+ this.state.blockIndex++;
285
+ }
286
+ this.state.activeToolCalls.clear();
287
+ return events;
288
+ }
289
+ closeActiveBlocks() {
290
+ const events = [];
291
+ events.push(...this.closeThinkingBlock());
292
+ events.push(...this.closeTextBlock());
293
+ events.push(...this.closeToolCalls());
294
+ return events;
295
+ }
296
+ }
@@ -6,11 +6,11 @@
6
6
  const AUTO_TIERS = {
7
7
  SIMPLE: {
8
8
  primary: 'google/gemini-2.5-flash',
9
- fallback: ['deepseek/deepseek-chat', 'nvidia/gpt-oss-120b'],
9
+ fallback: ['deepseek/deepseek-chat', 'nvidia/nemotron-ultra-253b'],
10
10
  },
11
11
  MEDIUM: {
12
12
  primary: 'moonshot/kimi-k2.5',
13
- fallback: ['google/gemini-2.5-flash', 'deepseek/deepseek-chat'],
13
+ fallback: ['google/gemini-2.5-flash', 'minimax/minimax-m2.7'],
14
14
  },
15
15
  COMPLEX: {
16
16
  primary: 'google/gemini-3.1-pro',
@@ -23,20 +23,20 @@ const AUTO_TIERS = {
23
23
  };
24
24
  const ECO_TIERS = {
25
25
  SIMPLE: {
26
- primary: 'nvidia/gpt-oss-120b',
27
- fallback: ['google/gemini-2.5-flash-lite'],
26
+ primary: 'nvidia/nemotron-ultra-253b',
27
+ fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
28
28
  },
29
29
  MEDIUM: {
30
30
  primary: 'google/gemini-2.5-flash-lite',
31
- fallback: ['nvidia/gpt-oss-120b'],
31
+ fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
32
32
  },
33
33
  COMPLEX: {
34
34
  primary: 'google/gemini-2.5-flash-lite',
35
- fallback: ['deepseek/deepseek-chat'],
35
+ fallback: ['deepseek/deepseek-chat', 'nvidia/mistral-large-3-675b'],
36
36
  },
37
37
  REASONING: {
38
38
  primary: 'xai/grok-4-1-fast-reasoning',
39
- fallback: ['deepseek/deepseek-reasoner'],
39
+ fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
40
40
  },
41
41
  };
42
42
  const PREMIUM_TIERS = {
@@ -189,7 +189,7 @@ export function routeRequest(prompt, profile = 'auto') {
189
189
  // Free profile - always use free model
190
190
  if (profile === 'free') {
191
191
  return {
192
- model: 'nvidia/gpt-oss-120b',
192
+ model: 'nvidia/nemotron-ultra-253b',
193
193
  tier: 'SIMPLE',
194
194
  confidence: 1.0,
195
195
  signals: ['free-profile'],
@@ -217,22 +217,48 @@ export function routeRequest(prompt, profile = 'auto') {
217
217
  // Baseline: Claude Opus at $5/$25 per 1M tokens
218
218
  const OPUS_COST_PER_1K = 0.015; // rough average
219
219
  const modelCosts = {
220
+ // FREE
220
221
  'nvidia/gpt-oss-120b': 0,
221
- 'google/gemini-2.5-flash': 0.001,
222
+ 'nvidia/gpt-oss-20b': 0,
223
+ 'nvidia/nemotron-ultra-253b': 0,
224
+ 'nvidia/nemotron-3-super-120b': 0,
225
+ 'nvidia/nemotron-super-49b': 0,
226
+ 'nvidia/deepseek-v3.2': 0,
227
+ 'nvidia/mistral-large-3-675b': 0,
228
+ 'nvidia/qwen3-coder-480b': 0,
229
+ 'nvidia/devstral-2-123b': 0,
230
+ 'nvidia/glm-4.7': 0,
231
+ 'nvidia/llama-4-maverick': 0,
232
+ // Budget
233
+ 'openai/gpt-5-nano': 0.0002,
234
+ 'openai/gpt-4.1-nano': 0.0003,
222
235
  'google/gemini-2.5-flash-lite': 0.0003,
236
+ 'xai/grok-4-fast': 0.0004,
237
+ 'xai/grok-4-1-fast': 0.0004,
238
+ 'xai/grok-4-1-fast-reasoning': 0.0004,
223
239
  'deepseek/deepseek-chat': 0.0004,
224
- 'deepseek/deepseek-reasoner': 0.003,
225
- 'moonshot/kimi-k2.5': 0.002,
226
- 'google/gemini-2.5-pro': 0.006,
227
- 'google/gemini-3.1-pro': 0.007,
240
+ 'deepseek/deepseek-reasoner': 0.0004,
241
+ 'minimax/minimax-m2.7': 0.0008,
242
+ 'minimax/minimax-m2.5': 0.0008,
243
+ 'google/gemini-2.5-flash': 0.0014,
244
+ 'openai/gpt-5-mini': 0.0011,
245
+ 'moonshot/kimi-k2.5': 0.0018,
246
+ // Mid-range
228
247
  'anthropic/claude-haiku-4.5': 0.003,
248
+ 'zai/glm-5': 0.0021,
249
+ 'openai/o4-mini': 0.0028,
250
+ 'google/gemini-2.5-pro': 0.0056,
251
+ 'openai/gpt-5.3-codex': 0.0079,
252
+ 'openai/gpt-5.2': 0.0079,
253
+ 'openai/gpt-5.3': 0.0079,
254
+ 'openai/gpt-4.1': 0.005,
255
+ 'openai/o3': 0.005,
256
+ 'google/gemini-3.1-pro': 0.007,
257
+ 'openai/gpt-5.4': 0.0088,
258
+ // Premium
229
259
  'anthropic/claude-sonnet-4.6': 0.009,
260
+ 'xai/grok-3': 0.009,
230
261
  'anthropic/claude-opus-4.6': 0.015,
231
- 'openai/gpt-5.3-codex': 0.008,
232
- 'openai/gpt-5.4': 0.009,
233
- 'openai/o3': 0.012,
234
- 'openai/o4-mini': 0.006,
235
- 'xai/grok-4-1-fast-reasoning': 0.0004,
236
262
  };
237
263
  const modelCost = modelCosts[model] ?? 0.005;
238
264
  const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
@@ -257,7 +283,7 @@ export function getFallbackChain(tier, profile = 'auto') {
257
283
  tierConfigs = PREMIUM_TIERS;
258
284
  break;
259
285
  case 'free':
260
- return ['nvidia/gpt-oss-120b'];
286
+ return ['nvidia/nemotron-ultra-253b'];
261
287
  default:
262
288
  tierConfigs = AUTO_TIERS;
263
289
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/cc",
3
- "version": "0.9.2",
3
+ "version": "0.9.3",
4
4
  "description": "Run Claude Code with any model — no rate limits, no account locks, no phone verification. Pay per use with USDC.",
5
5
  "type": "module",
6
6
  "bin": {