@blockrun/cc 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,8 +27,14 @@ export async function balanceCommand() {
27
27
  }
28
28
  }
29
29
  }
30
- catch {
31
- console.log(chalk.red('No wallet found. Run `brcc setup` first.'));
30
+ catch (err) {
31
+ const msg = err instanceof Error ? err.message : '';
32
+ if (msg.includes('ENOENT') || msg.includes('wallet') || msg.includes('key')) {
33
+ console.log(chalk.red('No wallet found. Run `brcc setup` first.'));
34
+ }
35
+ else {
36
+ console.log(chalk.red(`Error checking balance: ${msg || 'unknown error'}`));
37
+ }
32
38
  process.exit(1);
33
39
  }
34
40
  }
@@ -37,6 +37,13 @@ export async function modelsCommand() {
37
37
  console.log(`\n${chalk.dim(`${models.length} models available. Use:`)} ${chalk.bold('brcc start --model <model-id>')}`);
38
38
  }
39
39
  catch (err) {
40
- console.log(chalk.red(`Error: ${err instanceof Error ? err.message : 'Failed to fetch models'}`));
40
+ const msg = err instanceof Error ? err.message : 'unknown error';
41
+ if (msg.includes('fetch') || msg.includes('ECONNREFUSED') || msg.includes('ENOTFOUND')) {
42
+ console.log(chalk.red(`Cannot reach BlockRun API at ${apiUrl}`));
43
+ console.log(chalk.dim('Check your internet connection or try again later.'));
44
+ }
45
+ else {
46
+ console.log(chalk.red(`Error: ${msg}`));
47
+ }
41
48
  }
42
49
  }
@@ -8,6 +8,11 @@ export async function startCommand(options) {
8
8
  const chain = loadChain();
9
9
  const apiUrl = API_URLS[chain];
10
10
  const fallbackEnabled = options.fallback !== false; // Default true
11
+ const port = parseInt(options.port || String(DEFAULT_PROXY_PORT));
12
+ if (isNaN(port) || port < 1 || port > 65535) {
13
+ console.log(chalk.red(`Invalid port: ${options.port}. Must be 1-65535.`));
14
+ process.exit(1);
15
+ }
11
16
  if (chain === 'solana') {
12
17
  const wallet = await getOrCreateSolanaWallet();
13
18
  if (wallet.isNew) {
@@ -16,7 +21,6 @@ export async function startCommand(options) {
16
21
  console.log(`\nSend USDC on Solana to this address, then run ${chalk.bold('brcc start')} again.\n`);
17
22
  return;
18
23
  }
19
- const port = parseInt(options.port || String(DEFAULT_PROXY_PORT));
20
24
  const shouldLaunch = options.launch !== false;
21
25
  const model = options.model;
22
26
  console.log(chalk.bold('brcc — BlockRun Claude Code\n'));
@@ -35,7 +39,7 @@ export async function startCommand(options) {
35
39
  debug: options.debug,
36
40
  fallbackEnabled,
37
41
  });
38
- launchServer(server, port, shouldLaunch, model);
42
+ launchServer(server, port, shouldLaunch, model, options.debug);
39
43
  }
40
44
  else {
41
45
  const wallet = getOrCreateWallet();
@@ -45,7 +49,6 @@ export async function startCommand(options) {
45
49
  console.log(`\nSend USDC on Base to this address, then run ${chalk.bold('brcc start')} again.\n`);
46
50
  return;
47
51
  }
48
- const port = parseInt(options.port || String(DEFAULT_PROXY_PORT));
49
52
  const shouldLaunch = options.launch !== false;
50
53
  const model = options.model;
51
54
  console.log(chalk.bold('brcc — BlockRun Claude Code\n'));
@@ -64,13 +67,15 @@ export async function startCommand(options) {
64
67
  debug: options.debug,
65
68
  fallbackEnabled,
66
69
  });
67
- launchServer(server, port, shouldLaunch, model);
70
+ launchServer(server, port, shouldLaunch, model, options.debug);
68
71
  }
69
72
  }
70
- function launchServer(server, port, shouldLaunch, model) {
73
+ function launchServer(server, port, shouldLaunch, model, debug) {
71
74
  server.listen(port, () => {
72
75
  console.log(chalk.green(`✓ Proxy running on port ${port}`));
73
76
  console.log(chalk.dim(` Usage tracking: ~/.blockrun/brcc-stats.json`));
77
+ if (debug)
78
+ console.log(chalk.dim(` Debug log: ~/.blockrun/brcc-debug.log`));
74
79
  console.log(chalk.dim(` Run 'brcc stats' to view statistics\n`));
75
80
  if (shouldLaunch) {
76
81
  console.log('Starting Claude Code...\n');
package/dist/index.js CHANGED
@@ -6,12 +6,22 @@ import { balanceCommand } from './commands/balance.js';
6
6
  import { modelsCommand } from './commands/models.js';
7
7
  import { configCommand } from './commands/config.js';
8
8
  import { statsCommand } from './commands/stats.js';
9
+ import fs from 'node:fs';
10
+ import path from 'node:path';
11
+ import { fileURLToPath } from 'node:url';
12
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
13
+ let version = '0.9.0';
14
+ try {
15
+ const pkg = JSON.parse(fs.readFileSync(path.resolve(__dirname, '../package.json'), 'utf-8'));
16
+ version = pkg.version || version;
17
+ }
18
+ catch { /* use default */ }
9
19
  const program = new Command();
10
20
  program
11
21
  .name('brcc')
12
22
  .description('BlockRun Claude Code — run Claude Code with any model, pay with USDC.\n\n' +
13
23
  'Use /model inside Claude Code to switch between models on the fly.')
14
- .version('0.9.0');
24
+ .version(version);
15
25
  program
16
26
  .command('setup [chain]')
17
27
  .description('Create a new wallet for payments (base or solana)')
@@ -7,7 +7,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
7
7
  'blockrun/auto', // Smart routing (default)
8
8
  'blockrun/eco', // Cheapest capable model
9
9
  'deepseek/deepseek-chat', // Direct fallback
10
- 'nvidia/gpt-oss-120b', // Free model as ultimate fallback
10
+ 'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
11
11
  ],
12
12
  retryOn: [429, 500, 502, 503, 504, 529],
13
13
  maxRetries: 5,
@@ -40,24 +40,56 @@ const DEFAULT_MAX_TOKENS = 4096;
40
40
  let lastOutputTokens = 0;
41
41
  // Model shortcuts for quick switching
42
42
  const MODEL_SHORTCUTS = {
43
+ // Routing profiles
43
44
  auto: 'blockrun/auto',
44
45
  smart: 'blockrun/auto',
45
46
  eco: 'blockrun/eco',
46
47
  premium: 'blockrun/premium',
47
- gpt: 'openai/gpt-5.4',
48
- gpt5: 'openai/gpt-5.4',
49
- 'gpt-5': 'openai/gpt-5.4',
50
- 'gpt-5.4': 'openai/gpt-5.4',
48
+ // Anthropic
51
49
  sonnet: 'anthropic/claude-sonnet-4.6',
52
50
  claude: 'anthropic/claude-sonnet-4.6',
53
51
  opus: 'anthropic/claude-opus-4.6',
54
52
  haiku: 'anthropic/claude-haiku-4.5',
55
- deepseek: 'deepseek/deepseek-chat',
53
+ // OpenAI
54
+ gpt: 'openai/gpt-5.4',
55
+ gpt5: 'openai/gpt-5.4',
56
+ 'gpt-5': 'openai/gpt-5.4',
57
+ 'gpt-5.4': 'openai/gpt-5.4',
58
+ 'gpt-5.4-pro': 'openai/gpt-5.4-pro',
59
+ 'gpt-5.3': 'openai/gpt-5.3',
60
+ 'gpt-5.2': 'openai/gpt-5.2',
61
+ 'gpt-5.2-pro': 'openai/gpt-5.2-pro',
62
+ 'gpt-4.1': 'openai/gpt-4.1',
63
+ codex: 'openai/gpt-5.3-codex',
64
+ nano: 'openai/gpt-5-nano',
65
+ mini: 'openai/gpt-5-mini',
66
+ o3: 'openai/o3',
67
+ o4: 'openai/o4-mini',
68
+ 'o4-mini': 'openai/o4-mini',
69
+ o1: 'openai/o1',
70
+ // Google
56
71
  gemini: 'google/gemini-2.5-pro',
72
+ flash: 'google/gemini-2.5-flash',
73
+ 'gemini-3': 'google/gemini-3.1-pro',
74
+ // xAI
57
75
  grok: 'xai/grok-3',
58
- free: 'nvidia/gpt-oss-120b',
59
- mini: 'openai/gpt-5-mini',
76
+ 'grok-4': 'xai/grok-4-0709',
77
+ 'grok-fast': 'xai/grok-4-1-fast-reasoning',
78
+ // DeepSeek
79
+ deepseek: 'deepseek/deepseek-chat',
80
+ r1: 'deepseek/deepseek-reasoner',
81
+ // Free models
82
+ free: 'nvidia/nemotron-ultra-253b',
83
+ nemotron: 'nvidia/nemotron-ultra-253b',
84
+ 'deepseek-free': 'nvidia/deepseek-v3.2',
85
+ devstral: 'nvidia/devstral-2-123b',
86
+ 'qwen-coder': 'nvidia/qwen3-coder-480b',
87
+ maverick: 'nvidia/llama-4-maverick',
88
+ // Minimax
89
+ minimax: 'minimax/minimax-m2.7',
90
+ // Others
60
91
  glm: 'zai/glm-5',
92
+ kimi: 'moonshot/kimi-k2.5',
61
93
  };
62
94
  // Model pricing (per 1M tokens) - used for stats
63
95
  const MODEL_PRICING = {
@@ -66,20 +98,68 @@ const MODEL_PRICING = {
66
98
  'blockrun/eco': { input: 0.2, output: 1.0 },
67
99
  'blockrun/premium': { input: 3.0, output: 15.0 },
68
100
  'blockrun/free': { input: 0, output: 0 },
69
- // Individual models
101
+ // FREE - NVIDIA models
102
+ 'nvidia/gpt-oss-120b': { input: 0, output: 0 },
103
+ 'nvidia/gpt-oss-20b': { input: 0, output: 0 },
104
+ 'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
105
+ 'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
106
+ 'nvidia/nemotron-super-49b': { input: 0, output: 0 },
107
+ 'nvidia/deepseek-v3.2': { input: 0, output: 0 },
108
+ 'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
109
+ 'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
110
+ 'nvidia/devstral-2-123b': { input: 0, output: 0 },
111
+ 'nvidia/glm-4.7': { input: 0, output: 0 },
112
+ 'nvidia/llama-4-maverick': { input: 0, output: 0 },
113
+ // Anthropic
70
114
  'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
71
115
  'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
72
116
  'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
73
- 'openai/gpt-5.4': { input: 2.5, output: 15.0 },
117
+ // OpenAI
118
+ 'openai/gpt-5-nano': { input: 0.05, output: 0.4 },
119
+ 'openai/gpt-4.1-nano': { input: 0.1, output: 0.4 },
120
+ 'openai/gpt-4o-mini': { input: 0.15, output: 0.6 },
74
121
  'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
75
- 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
122
+ 'openai/gpt-4.1-mini': { input: 0.4, output: 1.6 },
123
+ 'openai/gpt-5.2': { input: 1.75, output: 14.0 },
124
+ 'openai/gpt-5.3': { input: 1.75, output: 14.0 },
125
+ 'openai/gpt-5.3-codex': { input: 1.75, output: 14.0 },
126
+ 'openai/gpt-4.1': { input: 2.0, output: 8.0 },
127
+ 'openai/o3': { input: 2.0, output: 8.0 },
128
+ 'openai/gpt-4o': { input: 2.5, output: 10.0 },
129
+ 'openai/gpt-5.4': { input: 2.5, output: 15.0 },
130
+ 'openai/o1-mini': { input: 1.1, output: 4.4 },
131
+ 'openai/o3-mini': { input: 1.1, output: 4.4 },
132
+ 'openai/o4-mini': { input: 1.1, output: 4.4 },
133
+ 'openai/o1': { input: 15.0, output: 60.0 },
134
+ 'openai/gpt-5.2-pro': { input: 21.0, output: 168.0 },
135
+ 'openai/gpt-5.4-pro': { input: 30.0, output: 180.0 },
136
+ // Google
137
+ 'google/gemini-2.5-flash-lite': { input: 0.1, output: 0.4 },
76
138
  'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
77
- 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
78
- 'xai/grok-3': { input: 3.0, output: 15.0 },
139
+ 'google/gemini-3-flash-preview': { input: 0.5, output: 3.0 },
140
+ 'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
141
+ 'google/gemini-3-pro-preview': { input: 2.0, output: 12.0 },
142
+ 'google/gemini-3.1-pro': { input: 2.0, output: 12.0 },
143
+ // xAI
79
144
  'xai/grok-4-fast': { input: 0.2, output: 0.5 },
80
- 'nvidia/gpt-oss-120b': { input: 0, output: 0 },
81
- 'zai/glm-5': { input: 1.0, output: 3.2 },
145
+ 'xai/grok-4-fast-reasoning': { input: 0.2, output: 0.5 },
146
+ 'xai/grok-4-1-fast': { input: 0.2, output: 0.5 },
147
+ 'xai/grok-4-1-fast-reasoning': { input: 0.2, output: 0.5 },
148
+ 'xai/grok-4-0709': { input: 0.2, output: 1.5 },
149
+ 'xai/grok-3-mini': { input: 0.3, output: 0.5 },
150
+ 'xai/grok-2-vision': { input: 2.0, output: 10.0 },
151
+ 'xai/grok-3': { input: 3.0, output: 15.0 },
152
+ // DeepSeek
153
+ 'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
154
+ 'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
155
+ // Minimax
156
+ 'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
157
+ 'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
158
+ // Others
82
159
  'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
160
+ 'nvidia/kimi-k2.5': { input: 0.55, output: 2.5 },
161
+ 'zai/glm-5': { input: 1.0, output: 3.2 },
162
+ 'zai/glm-5-turbo': { input: 1.2, output: 4.0 },
83
163
  };
84
164
  function estimateCost(model, inputTokens, outputTokens) {
85
165
  const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
@@ -301,23 +381,63 @@ export function createProxy(options) {
301
381
  response.headers.forEach((v, k) => {
302
382
  responseHeaders[k] = v;
303
383
  });
384
+ // Intercept error responses and ensure Anthropic-format errors
385
+ // so Claude Code doesn't fall back to showing a login page
386
+ if (response.status >= 400 && !responseHeaders['content-type']?.includes('text/event-stream')) {
387
+ let errorBody;
388
+ try {
389
+ const rawText = await response.text();
390
+ const parsed = JSON.parse(rawText);
391
+ // Already has Anthropic error shape? Pass through
392
+ if (parsed.type === 'error' && parsed.error) {
393
+ errorBody = rawText;
394
+ }
395
+ else {
396
+ // Wrap in Anthropic error format
397
+ const errorMsg = parsed.error?.message || parsed.message || rawText.slice(0, 500);
398
+ errorBody = JSON.stringify({
399
+ type: 'error',
400
+ error: {
401
+ type: response.status === 401 ? 'authentication_error'
402
+ : response.status === 402 ? 'invalid_request_error'
403
+ : response.status === 429 ? 'rate_limit_error'
404
+ : response.status === 400 ? 'invalid_request_error'
405
+ : 'api_error',
406
+ message: `[${finalModel}] ${errorMsg}`,
407
+ },
408
+ });
409
+ }
410
+ }
411
+ catch {
412
+ errorBody = JSON.stringify({
413
+ type: 'error',
414
+ error: { type: 'api_error', message: `Backend returned ${response.status}` },
415
+ });
416
+ }
417
+ res.writeHead(response.status, { 'Content-Type': 'application/json' });
418
+ res.end(errorBody);
419
+ log(`⚠️ ${response.status} from backend for ${finalModel}`);
420
+ return;
421
+ }
304
422
  res.writeHead(response.status, responseHeaders);
305
423
  const isStreaming = responseHeaders['content-type']?.includes('text/event-stream');
306
424
  if (response.body) {
307
425
  const reader = response.body.getReader();
308
426
  const decoder = new TextDecoder();
309
- let lastChunkText = '';
310
427
  let fullResponse = '';
428
+ const STREAM_CAP = 5_000_000; // 5MB cap on accumulated stream
311
429
  const pump = async () => {
312
430
  while (true) {
313
431
  const { done, value } = await reader.read();
314
432
  if (done) {
315
433
  // Record stats from streaming response
316
- if (isStreaming && lastChunkText) {
317
- const outputMatch = lastChunkText.match(/"output_tokens"\s*:\s*(\d+)/);
434
+ if (isStreaming && fullResponse) {
435
+ // Search full response for the last output_tokens value
436
+ const allOutputMatches = [...fullResponse.matchAll(/"output_tokens"\s*:\s*(\d+)/g)];
437
+ const lastOutputMatch = allOutputMatches[allOutputMatches.length - 1];
318
438
  const inputMatch = fullResponse.match(/"input_tokens"\s*:\s*(\d+)/);
319
- if (outputMatch) {
320
- lastOutputTokens = parseInt(outputMatch[1], 10);
439
+ if (lastOutputMatch) {
440
+ lastOutputTokens = parseInt(lastOutputMatch[1], 10);
321
441
  const inputTokens = inputMatch
322
442
  ? parseInt(inputMatch[1], 10)
323
443
  : 0;
@@ -330,9 +450,8 @@ export function createProxy(options) {
330
450
  res.end();
331
451
  break;
332
452
  }
333
- if (isStreaming) {
453
+ if (isStreaming && fullResponse.length < STREAM_CAP) {
334
454
  const chunk = decoder.decode(value, { stream: true });
335
- lastChunkText = chunk;
336
455
  fullResponse += chunk;
337
456
  }
338
457
  res.write(value);
@@ -0,0 +1,29 @@
1
+ /**
2
+ * SSE Event Translator: OpenAI → Anthropic Messages API format
3
+ *
4
+ * Handles three critical gaps in the streaming pipeline:
5
+ * 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
6
+ * 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
7
+ * 3. Ensures proper content_block_stop and message_stop events
8
+ */
9
+ export declare class SSETranslator {
10
+ private state;
11
+ private buffer;
12
+ constructor(model?: string);
13
+ /**
14
+ * Detect whether an SSE chunk is in OpenAI format.
15
+ * Returns true if it contains OpenAI-style `choices[].delta` structure.
16
+ */
17
+ static isOpenAIFormat(chunk: string): boolean;
18
+ /**
19
+ * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
20
+ * Returns null if no translation needed (already Anthropic format or not parseable).
21
+ */
22
+ processChunk(rawChunk: string): string | null;
23
+ private parseSSEEvents;
24
+ private formatSSE;
25
+ private closeThinkingBlock;
26
+ private closeTextBlock;
27
+ private closeToolCalls;
28
+ private closeActiveBlocks;
29
+ }
@@ -0,0 +1,296 @@
1
+ /**
2
+ * SSE Event Translator: OpenAI → Anthropic Messages API format
3
+ *
4
+ * Handles three critical gaps in the streaming pipeline:
5
+ * 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
6
+ * 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
7
+ * 3. Ensures proper content_block_stop and message_stop events
8
+ */
9
+ // ─── SSE Translator ─────────────────────────────────────────────────────────
10
+ export class SSETranslator {
11
+ state;
12
+ buffer = '';
13
+ constructor(model = 'unknown') {
14
+ this.state = {
15
+ messageId: `msg_brcc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
16
+ model,
17
+ blockIndex: 0,
18
+ activeToolCalls: new Map(),
19
+ thinkingBlockActive: false,
20
+ textBlockActive: false,
21
+ messageStarted: false,
22
+ inputTokens: 0,
23
+ outputTokens: 0,
24
+ };
25
+ }
26
+ /**
27
+ * Detect whether an SSE chunk is in OpenAI format.
28
+ * Returns true if it contains OpenAI-style `choices[].delta` structure.
29
+ */
30
+ static isOpenAIFormat(chunk) {
31
+ // Look for OpenAI-specific patterns in the SSE data
32
+ return (chunk.includes('"choices"') &&
33
+ chunk.includes('"delta"') &&
34
+ !chunk.includes('"content_block_'));
35
+ }
36
+ /**
37
+ * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
38
+ * Returns null if no translation needed (already Anthropic format or not parseable).
39
+ */
40
+ processChunk(rawChunk) {
41
+ this.buffer += rawChunk;
42
+ const events = this.parseSSEEvents();
43
+ if (events.length === 0)
44
+ return null;
45
+ const translated = [];
46
+ for (const event of events) {
47
+ if (event.data === '[DONE]') {
48
+ // Close any active blocks, then emit message_stop
49
+ translated.push(...this.closeActiveBlocks());
50
+ translated.push(this.formatSSE('message_delta', {
51
+ type: 'message_delta',
52
+ delta: { stop_reason: 'end_turn', stop_sequence: null },
53
+ usage: { output_tokens: this.state.outputTokens },
54
+ }));
55
+ translated.push(this.formatSSE('message_stop', { type: 'message_stop' }));
56
+ continue;
57
+ }
58
+ let parsed;
59
+ try {
60
+ parsed = JSON.parse(event.data);
61
+ }
62
+ catch {
63
+ continue;
64
+ }
65
+ // Skip if this doesn't look like OpenAI format
66
+ if (!parsed.choices || parsed.choices.length === 0) {
67
+ // Could be a usage-only event
68
+ if (parsed.usage) {
69
+ this.state.inputTokens = parsed.usage.prompt_tokens || 0;
70
+ this.state.outputTokens = parsed.usage.completion_tokens || 0;
71
+ }
72
+ continue;
73
+ }
74
+ // Emit message_start on first chunk
75
+ if (!this.state.messageStarted) {
76
+ this.state.messageStarted = true;
77
+ if (parsed.model)
78
+ this.state.model = parsed.model;
79
+ translated.push(this.formatSSE('message_start', {
80
+ type: 'message_start',
81
+ message: {
82
+ id: this.state.messageId,
83
+ type: 'message',
84
+ role: 'assistant',
85
+ model: this.state.model,
86
+ content: [],
87
+ stop_reason: null,
88
+ stop_sequence: null,
89
+ usage: { input_tokens: this.state.inputTokens, output_tokens: 0 },
90
+ },
91
+ }));
92
+ translated.push(this.formatSSE('ping', { type: 'ping' }));
93
+ }
94
+ const choice = parsed.choices[0];
95
+ const delta = choice.delta;
96
+ // ── Reasoning content → thinking block ──
97
+ if (delta.reasoning_content) {
98
+ if (!this.state.thinkingBlockActive) {
99
+ // Close text block if active
100
+ if (this.state.textBlockActive) {
101
+ translated.push(...this.closeTextBlock());
102
+ }
103
+ this.state.thinkingBlockActive = true;
104
+ translated.push(this.formatSSE('content_block_start', {
105
+ type: 'content_block_start',
106
+ index: this.state.blockIndex,
107
+ content_block: { type: 'thinking', thinking: '' },
108
+ }));
109
+ }
110
+ translated.push(this.formatSSE('content_block_delta', {
111
+ type: 'content_block_delta',
112
+ index: this.state.blockIndex,
113
+ delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
114
+ }));
115
+ this.state.outputTokens++;
116
+ }
117
+ // ── Text content → text block ──
118
+ if (delta.content) {
119
+ // Close thinking block if transitioning
120
+ if (this.state.thinkingBlockActive) {
121
+ translated.push(...this.closeThinkingBlock());
122
+ }
123
+ if (!this.state.textBlockActive) {
124
+ // Close any active tool calls first
125
+ translated.push(...this.closeToolCalls());
126
+ this.state.textBlockActive = true;
127
+ translated.push(this.formatSSE('content_block_start', {
128
+ type: 'content_block_start',
129
+ index: this.state.blockIndex,
130
+ content_block: { type: 'text', text: '' },
131
+ }));
132
+ }
133
+ translated.push(this.formatSSE('content_block_delta', {
134
+ type: 'content_block_delta',
135
+ index: this.state.blockIndex,
136
+ delta: { type: 'text_delta', text: delta.content },
137
+ }));
138
+ this.state.outputTokens++;
139
+ }
140
+ // ── Tool calls → tool_use blocks ──
141
+ if (delta.tool_calls && delta.tool_calls.length > 0) {
142
+ // Close thinking/text blocks first
143
+ if (this.state.thinkingBlockActive) {
144
+ translated.push(...this.closeThinkingBlock());
145
+ }
146
+ if (this.state.textBlockActive) {
147
+ translated.push(...this.closeTextBlock());
148
+ }
149
+ for (const tc of delta.tool_calls) {
150
+ const tcIndex = tc.index;
151
+ if (tc.id && tc.function?.name) {
152
+ // New tool call start
153
+ // Close previous tool call at same index if exists
154
+ if (this.state.activeToolCalls.has(tcIndex)) {
155
+ translated.push(this.formatSSE('content_block_stop', {
156
+ type: 'content_block_stop',
157
+ index: this.state.blockIndex,
158
+ }));
159
+ this.state.blockIndex++;
160
+ }
161
+ const toolId = tc.id;
162
+ const toolName = tc.function.name;
163
+ this.state.activeToolCalls.set(tcIndex, { id: toolId, name: toolName });
164
+ translated.push(this.formatSSE('content_block_start', {
165
+ type: 'content_block_start',
166
+ index: this.state.blockIndex,
167
+ content_block: {
168
+ type: 'tool_use',
169
+ id: toolId,
170
+ name: toolName,
171
+ input: {},
172
+ },
173
+ }));
174
+ // If there are initial arguments, send them
175
+ if (tc.function.arguments) {
176
+ translated.push(this.formatSSE('content_block_delta', {
177
+ type: 'content_block_delta',
178
+ index: this.state.blockIndex,
179
+ delta: {
180
+ type: 'input_json_delta',
181
+ partial_json: tc.function.arguments,
182
+ },
183
+ }));
184
+ }
185
+ }
186
+ else if (tc.function?.arguments) {
187
+ // Continuation of existing tool call - stream arguments
188
+ translated.push(this.formatSSE('content_block_delta', {
189
+ type: 'content_block_delta',
190
+ index: this.state.blockIndex,
191
+ delta: {
192
+ type: 'input_json_delta',
193
+ partial_json: tc.function.arguments,
194
+ },
195
+ }));
196
+ }
197
+ }
198
+ this.state.outputTokens++;
199
+ }
200
+ // ── Handle finish_reason ──
201
+ if (choice.finish_reason) {
202
+ translated.push(...this.closeActiveBlocks());
203
+ const stopReason = choice.finish_reason === 'tool_calls'
204
+ ? 'tool_use'
205
+ : choice.finish_reason === 'stop'
206
+ ? 'end_turn'
207
+ : choice.finish_reason;
208
+ translated.push(this.formatSSE('message_delta', {
209
+ type: 'message_delta',
210
+ delta: { stop_reason: stopReason, stop_sequence: null },
211
+ usage: { output_tokens: this.state.outputTokens },
212
+ }));
213
+ }
214
+ }
215
+ return translated.length > 0 ? translated.join('') : null;
216
+ }
217
+ // ── Helpers ─────────────────────────────────────────────────────────────
218
+ parseSSEEvents() {
219
+ const events = [];
220
+ const lines = this.buffer.split('\n');
221
+ let currentEvent;
222
+ let dataLines = [];
223
+ let consumed = 0;
224
+ for (let i = 0; i < lines.length; i++) {
225
+ const line = lines[i];
226
+ if (line.startsWith('event: ')) {
227
+ currentEvent = line.slice(7).trim();
228
+ }
229
+ else if (line.startsWith('data: ')) {
230
+ dataLines.push(line.slice(6));
231
+ }
232
+ else if (line === '' && dataLines.length > 0) {
233
+ // End of event
234
+ events.push({ event: currentEvent, data: dataLines.join('\n') });
235
+ currentEvent = undefined;
236
+ dataLines = [];
237
+ consumed = lines.slice(0, i + 1).join('\n').length + 1;
238
+ }
239
+ }
240
+ // Keep unconsumed text in buffer
241
+ if (consumed > 0) {
242
+ this.buffer = this.buffer.slice(consumed);
243
+ }
244
+ return events;
245
+ }
246
+ formatSSE(event, data) {
247
+ return `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
248
+ }
249
+ closeThinkingBlock() {
250
+ if (!this.state.thinkingBlockActive)
251
+ return [];
252
+ this.state.thinkingBlockActive = false;
253
+ const events = [
254
+ this.formatSSE('content_block_stop', {
255
+ type: 'content_block_stop',
256
+ index: this.state.blockIndex,
257
+ }),
258
+ ];
259
+ this.state.blockIndex++;
260
+ return events;
261
+ }
262
+ closeTextBlock() {
263
+ if (!this.state.textBlockActive)
264
+ return [];
265
+ this.state.textBlockActive = false;
266
+ const events = [
267
+ this.formatSSE('content_block_stop', {
268
+ type: 'content_block_stop',
269
+ index: this.state.blockIndex,
270
+ }),
271
+ ];
272
+ this.state.blockIndex++;
273
+ return events;
274
+ }
275
+ closeToolCalls() {
276
+ if (this.state.activeToolCalls.size === 0)
277
+ return [];
278
+ const events = [];
279
+ for (const [_index] of this.state.activeToolCalls) {
280
+ events.push(this.formatSSE('content_block_stop', {
281
+ type: 'content_block_stop',
282
+ index: this.state.blockIndex,
283
+ }));
284
+ this.state.blockIndex++;
285
+ }
286
+ this.state.activeToolCalls.clear();
287
+ return events;
288
+ }
289
+ closeActiveBlocks() {
290
+ const events = [];
291
+ events.push(...this.closeThinkingBlock());
292
+ events.push(...this.closeTextBlock());
293
+ events.push(...this.closeToolCalls());
294
+ return events;
295
+ }
296
+ }
@@ -6,11 +6,11 @@
6
6
  const AUTO_TIERS = {
7
7
  SIMPLE: {
8
8
  primary: 'google/gemini-2.5-flash',
9
- fallback: ['deepseek/deepseek-chat', 'nvidia/gpt-oss-120b'],
9
+ fallback: ['deepseek/deepseek-chat', 'nvidia/nemotron-ultra-253b'],
10
10
  },
11
11
  MEDIUM: {
12
12
  primary: 'moonshot/kimi-k2.5',
13
- fallback: ['google/gemini-2.5-flash', 'deepseek/deepseek-chat'],
13
+ fallback: ['google/gemini-2.5-flash', 'minimax/minimax-m2.7'],
14
14
  },
15
15
  COMPLEX: {
16
16
  primary: 'google/gemini-3.1-pro',
@@ -23,20 +23,20 @@ const AUTO_TIERS = {
23
23
  };
24
24
  const ECO_TIERS = {
25
25
  SIMPLE: {
26
- primary: 'nvidia/gpt-oss-120b',
27
- fallback: ['google/gemini-2.5-flash-lite'],
26
+ primary: 'nvidia/nemotron-ultra-253b',
27
+ fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
28
28
  },
29
29
  MEDIUM: {
30
30
  primary: 'google/gemini-2.5-flash-lite',
31
- fallback: ['nvidia/gpt-oss-120b'],
31
+ fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
32
32
  },
33
33
  COMPLEX: {
34
34
  primary: 'google/gemini-2.5-flash-lite',
35
- fallback: ['deepseek/deepseek-chat'],
35
+ fallback: ['deepseek/deepseek-chat', 'nvidia/mistral-large-3-675b'],
36
36
  },
37
37
  REASONING: {
38
38
  primary: 'xai/grok-4-1-fast-reasoning',
39
- fallback: ['deepseek/deepseek-reasoner'],
39
+ fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
40
40
  },
41
41
  };
42
42
  const PREMIUM_TIERS = {
@@ -189,7 +189,7 @@ export function routeRequest(prompt, profile = 'auto') {
189
189
  // Free profile - always use free model
190
190
  if (profile === 'free') {
191
191
  return {
192
- model: 'nvidia/gpt-oss-120b',
192
+ model: 'nvidia/nemotron-ultra-253b',
193
193
  tier: 'SIMPLE',
194
194
  confidence: 1.0,
195
195
  signals: ['free-profile'],
@@ -217,15 +217,48 @@ export function routeRequest(prompt, profile = 'auto') {
217
217
  // Baseline: Claude Opus at $5/$25 per 1M tokens
218
218
  const OPUS_COST_PER_1K = 0.015; // rough average
219
219
  const modelCosts = {
220
+ // FREE
220
221
  'nvidia/gpt-oss-120b': 0,
221
- 'google/gemini-2.5-flash': 0.001,
222
+ 'nvidia/gpt-oss-20b': 0,
223
+ 'nvidia/nemotron-ultra-253b': 0,
224
+ 'nvidia/nemotron-3-super-120b': 0,
225
+ 'nvidia/nemotron-super-49b': 0,
226
+ 'nvidia/deepseek-v3.2': 0,
227
+ 'nvidia/mistral-large-3-675b': 0,
228
+ 'nvidia/qwen3-coder-480b': 0,
229
+ 'nvidia/devstral-2-123b': 0,
230
+ 'nvidia/glm-4.7': 0,
231
+ 'nvidia/llama-4-maverick': 0,
232
+ // Budget
233
+ 'openai/gpt-5-nano': 0.0002,
234
+ 'openai/gpt-4.1-nano': 0.0003,
222
235
  'google/gemini-2.5-flash-lite': 0.0003,
236
+ 'xai/grok-4-fast': 0.0004,
237
+ 'xai/grok-4-1-fast': 0.0004,
238
+ 'xai/grok-4-1-fast-reasoning': 0.0004,
223
239
  'deepseek/deepseek-chat': 0.0004,
224
- 'moonshot/kimi-k2.5': 0.002,
240
+ 'deepseek/deepseek-reasoner': 0.0004,
241
+ 'minimax/minimax-m2.7': 0.0008,
242
+ 'minimax/minimax-m2.5': 0.0008,
243
+ 'google/gemini-2.5-flash': 0.0014,
244
+ 'openai/gpt-5-mini': 0.0011,
245
+ 'moonshot/kimi-k2.5': 0.0018,
246
+ // Mid-range
247
+ 'anthropic/claude-haiku-4.5': 0.003,
248
+ 'zai/glm-5': 0.0021,
249
+ 'openai/o4-mini': 0.0028,
250
+ 'google/gemini-2.5-pro': 0.0056,
251
+ 'openai/gpt-5.3-codex': 0.0079,
252
+ 'openai/gpt-5.2': 0.0079,
253
+ 'openai/gpt-5.3': 0.0079,
254
+ 'openai/gpt-4.1': 0.005,
255
+ 'openai/o3': 0.005,
225
256
  'google/gemini-3.1-pro': 0.007,
257
+ 'openai/gpt-5.4': 0.0088,
258
+ // Premium
226
259
  'anthropic/claude-sonnet-4.6': 0.009,
260
+ 'xai/grok-3': 0.009,
227
261
  'anthropic/claude-opus-4.6': 0.015,
228
- 'xai/grok-4-1-fast-reasoning': 0.0004,
229
262
  };
230
263
  const modelCost = modelCosts[model] ?? 0.005;
231
264
  const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
@@ -250,7 +283,7 @@ export function getFallbackChain(tier, profile = 'auto') {
250
283
  tierConfigs = PREMIUM_TIERS;
251
284
  break;
252
285
  case 'free':
253
- return ['nvidia/gpt-oss-120b'];
286
+ return ['nvidia/nemotron-ultra-253b'];
254
287
  default:
255
288
  tierConfigs = AUTO_TIERS;
256
289
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/cc",
3
- "version": "0.9.1",
3
+ "version": "0.9.3",
4
4
  "description": "Run Claude Code with any model — no rate limits, no account locks, no phone verification. Pay per use with USDC.",
5
5
  "type": "module",
6
6
  "bin": {