npm - @blockrun/cc - Versions diffs - 0.9.2 → 0.9.4 - Mend

@blockrun/cc 0.9.2 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +52 -18
package/dist/proxy/fallback.js +1 -1
package/dist/proxy/server.js +101 -29
package/dist/proxy/sse-translator.d.ts +29 -0
package/dist/proxy/sse-translator.js +296 -0
package/dist/router/index.js +45 -19
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -10,7 +10,7 @@ You're paying $200/month and still can't work.<br><br>
 <img src="https://img.shields.io/badge/🚀_No_Rate_Limits-black?style=for-the-badge" alt="No rate limits">&nbsp;
 <img src="https://img.shields.io/badge/🔑_No_Account_Needed-blue?style=for-the-badge" alt="No account">&nbsp;
-<img src="https://img.shields.io/badge/🤖_40+_Models-yellow?style=for-the-badge" alt="40+ models">&nbsp;
+<img src="https://img.shields.io/badge/🤖_50+_Models-yellow?style=for-the-badge" alt="50+ models">&nbsp;
 <img src="https://img.shields.io/badge/💰_Pay_Per_Use-purple?style=for-the-badge" alt="Pay per use">&nbsp;
 <img src="https://img.shields.io/badge/⛓_Base_+_Solana-green?style=for-the-badge" alt="Base + Solana">
@@ -28,7 +28,7 @@ You're paying $200/month and still can't work.<br><br>
 </div>
-> **brcc** is a local proxy that lets you run Claude Code with any LLM model — GPT-5, Claude, Gemini, DeepSeek, Grok, and 40+ more — without rate limits, without an Anthropic account, and without phone verification. You pay per request with USDC via the [x402](https://x402.org) protocol. Your wallet is your identity. Your private key never leaves your machine.
+> **brcc** is a local proxy that lets you run Claude Code with any LLM model — GPT-5, Claude, Gemini, DeepSeek, Grok, and 50+ more — without rate limits, without an Anthropic account, and without phone verification. You pay per request with USDC via the [x402](https://x402.org) protocol. Your wallet is your identity. Your private key never leaves your machine.
 ---
@@ -62,7 +62,7 @@ brcc eliminates all of these problems:
 |--|---------------------|------------|----------|
 | **Rate limits** | Constantly hit | Per-model limits | **None** |
 | **Account required** | Yes + phone | Yes + email | **No** |
-| **Models** | Claude only | 200+ (manual select) | **40+ (auto or manual)** |
+| **Models** | Claude only | 200+ (manual select) | **50+ (auto or manual)** |
 | **Payment** | Credit card, subscription | Credit card, pre-pay | **USDC per-request** |
 | **Auth** | OAuth + API key conflicts | API key | **Wallet signature** |
 | **Pricing** | Opaque | Transparent | **Transparent** |
@@ -101,7 +101,7 @@ brcc setup solana    # Solana chain
 brcc start
 ```
-That's it. Claude Code opens with access to 40+ models, no rate limits.
+That's it. Claude Code opens with access to 50+ models, no rate limits.
 ---
@@ -144,7 +144,21 @@ use eco       # Switch to cheapest
 use premium   # Switch to best quality
 use free      # Switch to free models
 use sonnet    # Direct Claude Sonnet
-use deepseek  # Direct DeepSeek
+use gpt       # GPT-5.4
+use codex     # GPT-5.3 Codex
+use o3        # OpenAI o3
+use gemini    # Gemini 2.5 Pro
+use flash     # Gemini 2.5 Flash
+use grok      # Grok 3
+use grok-4    # Grok 4
+use deepseek  # DeepSeek Chat
+use r1        # DeepSeek Reasoner
+use minimax   # Minimax M2.7
+use kimi      # Kimi K2.5
+use free      # Nemotron Ultra 253B (free)
+use nemotron  # Nemotron Ultra 253B (free)
+use devstral  # Devstral 2 123B (free)
+use qwen-coder # Qwen3 Coder 480B (free)
 ```
 ### Inside Claude Code
@@ -165,18 +179,34 @@ $ brcc models
 Free Models (no USDC needed)
 ──────────────────────────────────────────────────────────────────────
+  nvidia/nemotron-ultra-253b
   nvidia/gpt-oss-120b
-  nvidia/gpt-oss-20b
+  nvidia/deepseek-v3.2
+  nvidia/mistral-large-3-675b
+  nvidia/qwen3-coder-480b
+  nvidia/devstral-2-123b
+  nvidia/llama-4-maverick
+  nvidia/glm-4.7
+  ... (11 free models)
 Paid Models
 ──────────────────────────────────────────────────────────────────────
   Model                               Input        Output
+  openai/gpt-5-nano                   $0.05/M      $0.40/M
+  xai/grok-4-1-fast-reasoning         $0.20/M      $0.50/M
   deepseek/deepseek-chat              $0.28/M      $0.42/M
+  minimax/minimax-m2.7                $0.30/M      $1.20/M
+  google/gemini-2.5-flash             $0.30/M      $2.50/M
+  moonshot/kimi-k2.5                  $0.60/M      $3.00/M
   anthropic/claude-haiku-4.5          $1.00/M      $5.00/M
+  openai/gpt-5.3-codex                $1.75/M      $14.00/M
+  google/gemini-2.5-pro               $1.25/M      $10.00/M
+  google/gemini-3.1-pro               $2.00/M      $12.00/M
   openai/gpt-5.4                      $2.50/M      $15.00/M
   anthropic/claude-sonnet-4.6         $3.00/M      $15.00/M
   anthropic/claude-opus-4.6           $5.00/M      $25.00/M
-  ... (31 models total)
+  openai/gpt-5.4-pro                  $30.00/M     $180.00/M
+  ... (50+ models total)
 ```
 ---
@@ -185,12 +215,16 @@ Paid Models
 | Model | ~Requests per $5 | Best For |
 |-------|-------------------|----------|
+| Nemotron Ultra 253B | **Unlimited** | Free tier |
+| Grok 4.1 Fast | ~3,500 | Fast reasoning (budget) |
 | DeepSeek V3 | ~5,000 | Budget coding |
+| Gemini 2.5 Flash | ~1,200 | Balanced speed/cost |
+| Kimi K2.5 | ~800 | Mid-range coding |
 | Claude Haiku 4.5 | ~500 | Fast tasks |
+| GPT-5.3 Codex | ~180 | Code generation |
 | Claude Sonnet 4.6 | ~100 | General coding |
 | GPT-5.4 | ~80 | Reasoning |
 | Claude Opus 4.6 | ~50 | Most capable |
-| GPT-OSS 120B | **Unlimited** | Free tier |
 ---
@@ -220,7 +254,7 @@ Your wallet is saved to `~/.blockrun/` and shared with all BlockRun tools.
 ```bash
 brcc start                              # Default model
-brcc start --model nvidia/gpt-oss-120b  # Free model
+brcc start --model nvidia/nemotron-ultra-253b  # Free model
 brcc start --model openai/gpt-5.4       # Specific model
 brcc start --no-launch                  # Proxy only mode
 brcc start --no-fallback                # Disable auto-fallback
@@ -254,7 +288,7 @@ $ brcc stats
     deepseek/deepseek-chat
       620 req · $0.8901 (19.5%) · 180ms avg
       ↳ 12 fallback recoveries
-    nvidia/gpt-oss-120b
+    nvidia/nemotron-ultra-253b
       164 req · $0.0000 (0%) · 320ms avg
   💰 Savings vs Claude Opus
@@ -273,7 +307,7 @@ $ brcc stats --json    # Output as JSON (for scripts)
 ### `brcc config`
 ```bash
-brcc config set default-model nvidia/gpt-oss-120b
+brcc config set default-model nvidia/nemotron-ultra-253b
 brcc config set sonnet-model openai/gpt-5.4
 brcc config set opus-model anthropic/claude-opus-4.6
 brcc config set haiku-model deepseek/deepseek-chat
@@ -288,15 +322,15 @@ When a model returns an error (429 rate limit, 500+ server error), brcc automati
 **Default fallback chain:**
 ```
-anthropic/claude-sonnet-4.6
+your-selected-model
     ↓ (if 429/500/502/503/504)
-google/gemini-2.5-pro
+blockrun/auto (smart routing)
     ↓
-deepseek/deepseek-chat
+blockrun/eco (cheapest capable)
     ↓
-xai/grok-4-fast
+deepseek/deepseek-chat
     ↓
-nvidia/gpt-oss-120b (free, always available)
+nvidia/nemotron-ultra-253b (free, always available)
 ```
 **How it looks:**
@@ -336,7 +370,7 @@ brcc start --no-fallback
 │  BlockRun API (blockrun.ai or sol.blockrun.ai)              │
 │                                                              │
 │  Routes to: GPT-5 · Claude · Gemini · DeepSeek · Grok ·    │
-│             NVIDIA · MiniMax · Moonshot · 40+ models         │
+│             NVIDIA · MiniMax · Moonshot · 50+ models         │
 └─────────────────────────────────────────────────────────────┘
 ```
@@ -368,7 +402,7 @@ Typical cost: **$0.001–$0.05 per interaction**. $5 lasts most developers a wee
 No. brcc connects Claude Code to BlockRun instead of Anthropic.
 **Can I use non-Claude models?**
-Yes. GPT-5, Gemini, DeepSeek, Grok, and 30+ others work through Claude Code via brcc.
+Yes. GPT-5, Gemini, DeepSeek, Grok, and 50+ others work through Claude Code via brcc.
 ---

package/dist/proxy/fallback.js CHANGED Viewed

@@ -7,7 +7,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
         'blockrun/auto', // Smart routing (default)
         'blockrun/eco', // Cheapest capable model
         'deepseek/deepseek-chat', // Direct fallback
-        'nvidia/gpt-oss-120b', // Free model as ultimate fallback
+        'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
     ],
     retryOn: [429, 500, 502, 503, 504, 529],
     maxRetries: 5,

package/dist/proxy/server.js CHANGED Viewed

@@ -40,24 +40,56 @@ const DEFAULT_MAX_TOKENS = 4096;
 let lastOutputTokens = 0;
 // Model shortcuts for quick switching
 const MODEL_SHORTCUTS = {
+    // Routing profiles
     auto: 'blockrun/auto',
     smart: 'blockrun/auto',
     eco: 'blockrun/eco',
     premium: 'blockrun/premium',
-    gpt: 'openai/gpt-5.4',
-    gpt5: 'openai/gpt-5.4',
-    'gpt-5': 'openai/gpt-5.4',
-    'gpt-5.4': 'openai/gpt-5.4',
+    // Anthropic
     sonnet: 'anthropic/claude-sonnet-4.6',
     claude: 'anthropic/claude-sonnet-4.6',
     opus: 'anthropic/claude-opus-4.6',
     haiku: 'anthropic/claude-haiku-4.5',
-    deepseek: 'deepseek/deepseek-chat',
+    // OpenAI
+    gpt: 'openai/gpt-5.4',
+    gpt5: 'openai/gpt-5.4',
+    'gpt-5': 'openai/gpt-5.4',
+    'gpt-5.4': 'openai/gpt-5.4',
+    'gpt-5.4-pro': 'openai/gpt-5.4-pro',
+    'gpt-5.3': 'openai/gpt-5.3',
+    'gpt-5.2': 'openai/gpt-5.2',
+    'gpt-5.2-pro': 'openai/gpt-5.2-pro',
+    'gpt-4.1': 'openai/gpt-4.1',
+    codex: 'openai/gpt-5.3-codex',
+    nano: 'openai/gpt-5-nano',
+    mini: 'openai/gpt-5-mini',
+    o3: 'openai/o3',
+    o4: 'openai/o4-mini',
+    'o4-mini': 'openai/o4-mini',
+    o1: 'openai/o1',
+    // Google
     gemini: 'google/gemini-2.5-pro',
+    flash: 'google/gemini-2.5-flash',
+    'gemini-3': 'google/gemini-3.1-pro',
+    // xAI
     grok: 'xai/grok-3',
-    free: 'nvidia/gpt-oss-120b',
-    mini: 'openai/gpt-5-mini',
+    'grok-4': 'xai/grok-4-0709',
+    'grok-fast': 'xai/grok-4-1-fast-reasoning',
+    // DeepSeek
+    deepseek: 'deepseek/deepseek-chat',
+    r1: 'deepseek/deepseek-reasoner',
+    // Free models
+    free: 'nvidia/nemotron-ultra-253b',
+    nemotron: 'nvidia/nemotron-ultra-253b',
+    'deepseek-free': 'nvidia/deepseek-v3.2',
+    devstral: 'nvidia/devstral-2-123b',
+    'qwen-coder': 'nvidia/qwen3-coder-480b',
+    maverick: 'nvidia/llama-4-maverick',
+    // Minimax
+    minimax: 'minimax/minimax-m2.7',
+    // Others
     glm: 'zai/glm-5',
+    kimi: 'moonshot/kimi-k2.5',
 };
 // Model pricing (per 1M tokens) - used for stats
 const MODEL_PRICING = {
@@ -66,27 +98,68 @@ const MODEL_PRICING = {
     'blockrun/eco': { input: 0.2, output: 1.0 },
     'blockrun/premium': { input: 3.0, output: 15.0 },
     'blockrun/free': { input: 0, output: 0 },
-    // Individual models
+    // FREE - NVIDIA models
+    'nvidia/gpt-oss-120b': { input: 0, output: 0 },
+    'nvidia/gpt-oss-20b': { input: 0, output: 0 },
+    'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
+    'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
+    'nvidia/nemotron-super-49b': { input: 0, output: 0 },
+    'nvidia/deepseek-v3.2': { input: 0, output: 0 },
+    'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
+    'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
+    'nvidia/devstral-2-123b': { input: 0, output: 0 },
+    'nvidia/glm-4.7': { input: 0, output: 0 },
+    'nvidia/llama-4-maverick': { input: 0, output: 0 },
+    // Anthropic
     'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
     'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
     'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
-    'openai/gpt-5.4': { input: 2.5, output: 15.0 },
+    // OpenAI
+    'openai/gpt-5-nano': { input: 0.05, output: 0.4 },
+    'openai/gpt-4.1-nano': { input: 0.1, output: 0.4 },
+    'openai/gpt-4o-mini': { input: 0.15, output: 0.6 },
     'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
-    'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
+    'openai/gpt-4.1-mini': { input: 0.4, output: 1.6 },
+    'openai/gpt-5.2': { input: 1.75, output: 14.0 },
+    'openai/gpt-5.3': { input: 1.75, output: 14.0 },
+    'openai/gpt-5.3-codex': { input: 1.75, output: 14.0 },
+    'openai/gpt-4.1': { input: 2.0, output: 8.0 },
+    'openai/o3': { input: 2.0, output: 8.0 },
+    'openai/gpt-4o': { input: 2.5, output: 10.0 },
+    'openai/gpt-5.4': { input: 2.5, output: 15.0 },
+    'openai/o1-mini': { input: 1.1, output: 4.4 },
+    'openai/o3-mini': { input: 1.1, output: 4.4 },
+    'openai/o4-mini': { input: 1.1, output: 4.4 },
+    'openai/o1': { input: 15.0, output: 60.0 },
+    'openai/gpt-5.2-pro': { input: 21.0, output: 168.0 },
+    'openai/gpt-5.4-pro': { input: 30.0, output: 180.0 },
+    // Google
+    'google/gemini-2.5-flash-lite': { input: 0.1, output: 0.4 },
     'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
-    'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
-    'deepseek/deepseek-reasoner': { input: 0.55, output: 2.19 },
-    'xai/grok-3': { input: 3.0, output: 15.0 },
+    'google/gemini-3-flash-preview': { input: 0.5, output: 3.0 },
+    'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
+    'google/gemini-3-pro-preview': { input: 2.0, output: 12.0 },
+    'google/gemini-3.1-pro': { input: 2.0, output: 12.0 },
+    // xAI
     'xai/grok-4-fast': { input: 0.2, output: 0.5 },
+    'xai/grok-4-fast-reasoning': { input: 0.2, output: 0.5 },
+    'xai/grok-4-1-fast': { input: 0.2, output: 0.5 },
     'xai/grok-4-1-fast-reasoning': { input: 0.2, output: 0.5 },
-    'nvidia/gpt-oss-120b': { input: 0, output: 0 },
-    'zai/glm-5': { input: 1.0, output: 3.2 },
+    'xai/grok-4-0709': { input: 0.2, output: 1.5 },
+    'xai/grok-3-mini': { input: 0.3, output: 0.5 },
+    'xai/grok-2-vision': { input: 2.0, output: 10.0 },
+    'xai/grok-3': { input: 3.0, output: 15.0 },
+    // DeepSeek
+    'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
+    'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
+    // Minimax
+    'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
+    'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
+    // Others
     'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
-    'openai/gpt-5.3-codex': { input: 2.5, output: 10.0 },
-    'openai/o3': { input: 2.0, output: 8.0 },
-    'openai/o4-mini': { input: 1.1, output: 4.4 },
-    'google/gemini-2.5-flash-lite': { input: 0.08, output: 0.3 },
-    'google/gemini-3.1-pro': { input: 1.25, output: 10.0 },
+    'nvidia/kimi-k2.5': { input: 0.55, output: 2.5 },
+    'zai/glm-5': { input: 1.0, output: 3.2 },
+    'zai/glm-5-turbo': { input: 1.2, output: 4.0 },
 };
 function estimateCost(model, inputTokens, outputTokens) {
     const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };
@@ -191,19 +264,18 @@ export function createProxy(options) {
                             res.end(JSON.stringify(fakeResponse));
                             return;
                         }
-                        // Apply model override only if:
-                        // 1. User specified --model on CLI (options.modelOverride)
-                        // 2. User switched model in-session (currentModel set by "use X" command)
-                        // 3. Request has no model specified
-                        if (options.modelOverride && currentModel) {
-                            // CLI --model flag: always use this
+                        // Model override logic:
+                        // - Claude Code sends native Anthropic IDs (e.g. "claude-sonnet-4-6-20250514")
+                        //   which don't contain "/" — these MUST be replaced with currentModel.
+                        // - BlockRun model IDs always contain "/" (e.g. "blockrun/auto", "nvidia/nemotron-ultra-253b")
+                        //   — these should be passed through as-is.
+                        // - If --model CLI flag is set, always override regardless.
+                        if (options.modelOverride) {
                             parsed.model = currentModel;
                         }
-                        else if (!parsed.model) {
-                            // No model in request: use default
+                        else if (!parsed.model || !parsed.model.includes('/')) {
                             parsed.model = currentModel || DEFAULT_MODEL;
                         }
-                        // Otherwise: use the model from the request as-is
                         requestModel = parsed.model || DEFAULT_MODEL;
                         // Smart routing: if model is a routing profile, classify and route
                         const routingProfile = parseRoutingProfile(requestModel);

package/dist/proxy/sse-translator.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * SSE Event Translator: OpenAI → Anthropic Messages API format
+ *
+ * Handles three critical gaps in the streaming pipeline:
+ * 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
+ * 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
+ * 3. Ensures proper content_block_stop and message_stop events
+ */
+export declare class SSETranslator {
+    private state;
+    private buffer;
+    constructor(model?: string);
+    /**
+     * Detect whether an SSE chunk is in OpenAI format.
+     * Returns true if it contains OpenAI-style `choices[].delta` structure.
+     */
+    static isOpenAIFormat(chunk: string): boolean;
+    /**
+     * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
+     * Returns null if no translation needed (already Anthropic format or not parseable).
+     */
+    processChunk(rawChunk: string): string | null;
+    private parseSSEEvents;
+    private formatSSE;
+    private closeThinkingBlock;
+    private closeTextBlock;
+    private closeToolCalls;
+    private closeActiveBlocks;
+}

package/dist/proxy/sse-translator.js ADDED Viewed

@@ -0,0 +1,296 @@
+/**
+ * SSE Event Translator: OpenAI → Anthropic Messages API format
+ *
+ * Handles three critical gaps in the streaming pipeline:
+ * 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
+ * 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
+ * 3. Ensures proper content_block_stop and message_stop events
+ */
+// ─── SSE Translator ─────────────────────────────────────────────────────────
+export class SSETranslator {
+    state;
+    buffer = '';
+    constructor(model = 'unknown') {
+        this.state = {
+            messageId: `msg_brcc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
+            model,
+            blockIndex: 0,
+            activeToolCalls: new Map(),
+            thinkingBlockActive: false,
+            textBlockActive: false,
+            messageStarted: false,
+            inputTokens: 0,
+            outputTokens: 0,
+        };
+    }
+    /**
+     * Detect whether an SSE chunk is in OpenAI format.
+     * Returns true if it contains OpenAI-style `choices[].delta` structure.
+     */
+    static isOpenAIFormat(chunk) {
+        // Look for OpenAI-specific patterns in the SSE data
+        return (chunk.includes('"choices"') &&
+            chunk.includes('"delta"') &&
+            !chunk.includes('"content_block_'));
+    }
+    /**
+     * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
+     * Returns null if no translation needed (already Anthropic format or not parseable).
+     */
+    processChunk(rawChunk) {
+        this.buffer += rawChunk;
+        const events = this.parseSSEEvents();
+        if (events.length === 0)
+            return null;
+        const translated = [];
+        for (const event of events) {
+            if (event.data === '[DONE]') {
+                // Close any active blocks, then emit message_stop
+                translated.push(...this.closeActiveBlocks());
+                translated.push(this.formatSSE('message_delta', {
+                    type: 'message_delta',
+                    delta: { stop_reason: 'end_turn', stop_sequence: null },
+                    usage: { output_tokens: this.state.outputTokens },
+                }));
+                translated.push(this.formatSSE('message_stop', { type: 'message_stop' }));
+                continue;
+            }
+            let parsed;
+            try {
+                parsed = JSON.parse(event.data);
+            }
+            catch {
+                continue;
+            }
+            // Skip if this doesn't look like OpenAI format
+            if (!parsed.choices || parsed.choices.length === 0) {
+                // Could be a usage-only event
+                if (parsed.usage) {
+                    this.state.inputTokens = parsed.usage.prompt_tokens || 0;
+                    this.state.outputTokens = parsed.usage.completion_tokens || 0;
+                }
+                continue;
+            }
+            // Emit message_start on first chunk
+            if (!this.state.messageStarted) {
+                this.state.messageStarted = true;
+                if (parsed.model)
+                    this.state.model = parsed.model;
+                translated.push(this.formatSSE('message_start', {
+                    type: 'message_start',
+                    message: {
+                        id: this.state.messageId,
+                        type: 'message',
+                        role: 'assistant',
+                        model: this.state.model,
+                        content: [],
+                        stop_reason: null,
+                        stop_sequence: null,
+                        usage: { input_tokens: this.state.inputTokens, output_tokens: 0 },
+                    },
+                }));
+                translated.push(this.formatSSE('ping', { type: 'ping' }));
+            }
+            const choice = parsed.choices[0];
+            const delta = choice.delta;
+            // ── Reasoning content → thinking block ──
+            if (delta.reasoning_content) {
+                if (!this.state.thinkingBlockActive) {
+                    // Close text block if active
+                    if (this.state.textBlockActive) {
+                        translated.push(...this.closeTextBlock());
+                    }
+                    this.state.thinkingBlockActive = true;
+                    translated.push(this.formatSSE('content_block_start', {
+                        type: 'content_block_start',
+                        index: this.state.blockIndex,
+                        content_block: { type: 'thinking', thinking: '' },
+                    }));
+                }
+                translated.push(this.formatSSE('content_block_delta', {
+                    type: 'content_block_delta',
+                    index: this.state.blockIndex,
+                    delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
+                }));
+                this.state.outputTokens++;
+            }
+            // ── Text content → text block ──
+            if (delta.content) {
+                // Close thinking block if transitioning
+                if (this.state.thinkingBlockActive) {
+                    translated.push(...this.closeThinkingBlock());
+                }
+                if (!this.state.textBlockActive) {
+                    // Close any active tool calls first
+                    translated.push(...this.closeToolCalls());
+                    this.state.textBlockActive = true;
+                    translated.push(this.formatSSE('content_block_start', {
+                        type: 'content_block_start',
+                        index: this.state.blockIndex,
+                        content_block: { type: 'text', text: '' },
+                    }));
+                }
+                translated.push(this.formatSSE('content_block_delta', {
+                    type: 'content_block_delta',
+                    index: this.state.blockIndex,
+                    delta: { type: 'text_delta', text: delta.content },
+                }));
+                this.state.outputTokens++;
+            }
+            // ── Tool calls → tool_use blocks ──
+            if (delta.tool_calls && delta.tool_calls.length > 0) {
+                // Close thinking/text blocks first
+                if (this.state.thinkingBlockActive) {
+                    translated.push(...this.closeThinkingBlock());
+                }
+                if (this.state.textBlockActive) {
+                    translated.push(...this.closeTextBlock());
+                }
+                for (const tc of delta.tool_calls) {
+                    const tcIndex = tc.index;
+                    if (tc.id && tc.function?.name) {
+                        // New tool call start
+                        // Close previous tool call at same index if exists
+                        if (this.state.activeToolCalls.has(tcIndex)) {
+                            translated.push(this.formatSSE('content_block_stop', {
+                                type: 'content_block_stop',
+                                index: this.state.blockIndex,
+                            }));
+                            this.state.blockIndex++;
+                        }
+                        const toolId = tc.id;
+                        const toolName = tc.function.name;
+                        this.state.activeToolCalls.set(tcIndex, { id: toolId, name: toolName });
+                        translated.push(this.formatSSE('content_block_start', {
+                            type: 'content_block_start',
+                            index: this.state.blockIndex,
+                            content_block: {
+                                type: 'tool_use',
+                                id: toolId,
+                                name: toolName,
+                                input: {},
+                            },
+                        }));
+                        // If there are initial arguments, send them
+                        if (tc.function.arguments) {
+                            translated.push(this.formatSSE('content_block_delta', {
+                                type: 'content_block_delta',
+                                index: this.state.blockIndex,
+                                delta: {
+                                    type: 'input_json_delta',
+                                    partial_json: tc.function.arguments,
+                                },
+                            }));
+                        }
+                    }
+                    else if (tc.function?.arguments) {
+                        // Continuation of existing tool call - stream arguments
+                        translated.push(this.formatSSE('content_block_delta', {
+                            type: 'content_block_delta',
+                            index: this.state.blockIndex,
+                            delta: {
+                                type: 'input_json_delta',
+                                partial_json: tc.function.arguments,
+                            },
+                        }));
+                    }
+                }
+                this.state.outputTokens++;
+            }
+            // ── Handle finish_reason ──
+            if (choice.finish_reason) {
+                translated.push(...this.closeActiveBlocks());
+                const stopReason = choice.finish_reason === 'tool_calls'
+                    ? 'tool_use'
+                    : choice.finish_reason === 'stop'
+                        ? 'end_turn'
+                        : choice.finish_reason;
+                translated.push(this.formatSSE('message_delta', {
+                    type: 'message_delta',
+                    delta: { stop_reason: stopReason, stop_sequence: null },
+                    usage: { output_tokens: this.state.outputTokens },
+                }));
+            }
+        }
+        return translated.length > 0 ? translated.join('') : null;
+    }
+    // ── Helpers ─────────────────────────────────────────────────────────────
+    parseSSEEvents() {
+        const events = [];
+        const lines = this.buffer.split('\n');
+        let currentEvent;
+        let dataLines = [];
+        let consumed = 0;
+        for (let i = 0; i < lines.length; i++) {
+            const line = lines[i];
+            if (line.startsWith('event: ')) {
+                currentEvent = line.slice(7).trim();
+            }
+            else if (line.startsWith('data: ')) {
+                dataLines.push(line.slice(6));
+            }
+            else if (line === '' && dataLines.length > 0) {
+                // End of event
+                events.push({ event: currentEvent, data: dataLines.join('\n') });
+                currentEvent = undefined;
+                dataLines = [];
+                consumed = lines.slice(0, i + 1).join('\n').length + 1;
+            }
+        }
+        // Keep unconsumed text in buffer
+        if (consumed > 0) {
+            this.buffer = this.buffer.slice(consumed);
+        }
+        return events;
+    }
+    formatSSE(event, data) {
+        return `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
+    }
+    closeThinkingBlock() {
+        if (!this.state.thinkingBlockActive)
+            return [];
+        this.state.thinkingBlockActive = false;
+        const events = [
+            this.formatSSE('content_block_stop', {
+                type: 'content_block_stop',
+                index: this.state.blockIndex,
+            }),
+        ];
+        this.state.blockIndex++;
+        return events;
+    }
+    closeTextBlock() {
+        if (!this.state.textBlockActive)
+            return [];
+        this.state.textBlockActive = false;
+        const events = [
+            this.formatSSE('content_block_stop', {
+                type: 'content_block_stop',
+                index: this.state.blockIndex,
+            }),
+        ];
+        this.state.blockIndex++;
+        return events;
+    }
+    closeToolCalls() {
+        if (this.state.activeToolCalls.size === 0)
+            return [];
+        const events = [];
+        for (const [_index] of this.state.activeToolCalls) {
+            events.push(this.formatSSE('content_block_stop', {
+                type: 'content_block_stop',
+                index: this.state.blockIndex,
+            }));
+            this.state.blockIndex++;
+        }
+        this.state.activeToolCalls.clear();
+        return events;
+    }
+    closeActiveBlocks() {
+        const events = [];
+        events.push(...this.closeThinkingBlock());
+        events.push(...this.closeTextBlock());
+        events.push(...this.closeToolCalls());
+        return events;
+    }
+}

package/dist/router/index.js CHANGED Viewed

@@ -6,11 +6,11 @@
 const AUTO_TIERS = {
     SIMPLE: {
         primary: 'google/gemini-2.5-flash',
-        fallback: ['deepseek/deepseek-chat', 'nvidia/gpt-oss-120b'],
+        fallback: ['deepseek/deepseek-chat', 'nvidia/nemotron-ultra-253b'],
     },
     MEDIUM: {
         primary: 'moonshot/kimi-k2.5',
-        fallback: ['google/gemini-2.5-flash', 'deepseek/deepseek-chat'],
+        fallback: ['google/gemini-2.5-flash', 'minimax/minimax-m2.7'],
     },
     COMPLEX: {
         primary: 'google/gemini-3.1-pro',
@@ -23,20 +23,20 @@ const AUTO_TIERS = {
 };
 const ECO_TIERS = {
     SIMPLE: {
-        primary: 'nvidia/gpt-oss-120b',
-        fallback: ['google/gemini-2.5-flash-lite'],
+        primary: 'nvidia/nemotron-ultra-253b',
+        fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
     },
     MEDIUM: {
         primary: 'google/gemini-2.5-flash-lite',
-        fallback: ['nvidia/gpt-oss-120b'],
+        fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
     },
     COMPLEX: {
         primary: 'google/gemini-2.5-flash-lite',
-        fallback: ['deepseek/deepseek-chat'],
+        fallback: ['deepseek/deepseek-chat', 'nvidia/mistral-large-3-675b'],
     },
     REASONING: {
         primary: 'xai/grok-4-1-fast-reasoning',
-        fallback: ['deepseek/deepseek-reasoner'],
+        fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
     },
 };
 const PREMIUM_TIERS = {
@@ -189,7 +189,7 @@ export function routeRequest(prompt, profile = 'auto') {
     // Free profile - always use free model
     if (profile === 'free') {
         return {
-            model: 'nvidia/gpt-oss-120b',
+            model: 'nvidia/nemotron-ultra-253b',
             tier: 'SIMPLE',
             confidence: 1.0,
             signals: ['free-profile'],
@@ -217,22 +217,48 @@ export function routeRequest(prompt, profile = 'auto') {
     // Baseline: Claude Opus at $5/$25 per 1M tokens
     const OPUS_COST_PER_1K = 0.015; // rough average
     const modelCosts = {
+        // FREE
         'nvidia/gpt-oss-120b': 0,
-        'google/gemini-2.5-flash': 0.001,
+        'nvidia/gpt-oss-20b': 0,
+        'nvidia/nemotron-ultra-253b': 0,
+        'nvidia/nemotron-3-super-120b': 0,
+        'nvidia/nemotron-super-49b': 0,
+        'nvidia/deepseek-v3.2': 0,
+        'nvidia/mistral-large-3-675b': 0,
+        'nvidia/qwen3-coder-480b': 0,
+        'nvidia/devstral-2-123b': 0,
+        'nvidia/glm-4.7': 0,
+        'nvidia/llama-4-maverick': 0,
+        // Budget
+        'openai/gpt-5-nano': 0.0002,
+        'openai/gpt-4.1-nano': 0.0003,
         'google/gemini-2.5-flash-lite': 0.0003,
+        'xai/grok-4-fast': 0.0004,
+        'xai/grok-4-1-fast': 0.0004,
+        'xai/grok-4-1-fast-reasoning': 0.0004,
         'deepseek/deepseek-chat': 0.0004,
-        'deepseek/deepseek-reasoner': 0.003,
-        'moonshot/kimi-k2.5': 0.002,
-        'google/gemini-2.5-pro': 0.006,
-        'google/gemini-3.1-pro': 0.007,
+        'deepseek/deepseek-reasoner': 0.0004,
+        'minimax/minimax-m2.7': 0.0008,
+        'minimax/minimax-m2.5': 0.0008,
+        'google/gemini-2.5-flash': 0.0014,
+        'openai/gpt-5-mini': 0.0011,
+        'moonshot/kimi-k2.5': 0.0018,
+        // Mid-range
         'anthropic/claude-haiku-4.5': 0.003,
+        'zai/glm-5': 0.0021,
+        'openai/o4-mini': 0.0028,
+        'google/gemini-2.5-pro': 0.0056,
+        'openai/gpt-5.3-codex': 0.0079,
+        'openai/gpt-5.2': 0.0079,
+        'openai/gpt-5.3': 0.0079,
+        'openai/gpt-4.1': 0.005,
+        'openai/o3': 0.005,
+        'google/gemini-3.1-pro': 0.007,
+        'openai/gpt-5.4': 0.0088,
+        // Premium
         'anthropic/claude-sonnet-4.6': 0.009,
+        'xai/grok-3': 0.009,
         'anthropic/claude-opus-4.6': 0.015,
-        'openai/gpt-5.3-codex': 0.008,
-        'openai/gpt-5.4': 0.009,
-        'openai/o3': 0.012,
-        'openai/o4-mini': 0.006,
-        'xai/grok-4-1-fast-reasoning': 0.0004,
     };
     const modelCost = modelCosts[model] ?? 0.005;
     const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
@@ -257,7 +283,7 @@ export function getFallbackChain(tier, profile = 'auto') {
             tierConfigs = PREMIUM_TIERS;
             break;
         case 'free':
-            return ['nvidia/gpt-oss-120b'];
+            return ['nvidia/nemotron-ultra-253b'];
         default:
             tierConfigs = AUTO_TIERS;
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/cc",
-  "version": "0.9.2",
+  "version": "0.9.4",
   "description": "Run Claude Code with any model — no rate limits, no account locks, no phone verification. Pay per use with USDC.",
   "type": "module",
   "bin": {