npm - @blockrun/cc - Versions diffs - 0.9.2 → 0.9.3 - Mend

@blockrun/cc 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/proxy/fallback.js +1 -1
package/dist/proxy/server.js +93 -20
package/dist/proxy/sse-translator.d.ts +29 -0
package/dist/proxy/sse-translator.js +296 -0
package/dist/router/index.js +45 -19
package/package.json +1 -1

package/dist/proxy/fallback.js CHANGED Viewed

@@ -7,7 +7,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
         'blockrun/auto', // Smart routing (default)
         'blockrun/eco', // Cheapest capable model
         'deepseek/deepseek-chat', // Direct fallback
-        'nvidia/gpt-oss-120b', // Free model as ultimate fallback
+        'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
     ],
     retryOn: [429, 500, 502, 503, 504, 529],
     maxRetries: 5,

package/dist/proxy/server.js CHANGED Viewed

@@ -40,24 +40,56 @@ const DEFAULT_MAX_TOKENS = 4096;
 let lastOutputTokens = 0;
 // Model shortcuts for quick switching
 const MODEL_SHORTCUTS = {
+    // Routing profiles
     auto: 'blockrun/auto',
     smart: 'blockrun/auto',
     eco: 'blockrun/eco',
     premium: 'blockrun/premium',
-    gpt: 'openai/gpt-5.4',
-    gpt5: 'openai/gpt-5.4',
-    'gpt-5': 'openai/gpt-5.4',
-    'gpt-5.4': 'openai/gpt-5.4',
+    // Anthropic
     sonnet: 'anthropic/claude-sonnet-4.6',
     claude: 'anthropic/claude-sonnet-4.6',
     opus: 'anthropic/claude-opus-4.6',
     haiku: 'anthropic/claude-haiku-4.5',
-    deepseek: 'deepseek/deepseek-chat',
+    // OpenAI
+    gpt: 'openai/gpt-5.4',
+    gpt5: 'openai/gpt-5.4',
+    'gpt-5': 'openai/gpt-5.4',
+    'gpt-5.4': 'openai/gpt-5.4',
+    'gpt-5.4-pro': 'openai/gpt-5.4-pro',
+    'gpt-5.3': 'openai/gpt-5.3',
+    'gpt-5.2': 'openai/gpt-5.2',
+    'gpt-5.2-pro': 'openai/gpt-5.2-pro',
+    'gpt-4.1': 'openai/gpt-4.1',
+    codex: 'openai/gpt-5.3-codex',
+    nano: 'openai/gpt-5-nano',
+    mini: 'openai/gpt-5-mini',
+    o3: 'openai/o3',
+    o4: 'openai/o4-mini',
+    'o4-mini': 'openai/o4-mini',
+    o1: 'openai/o1',
+    // Google
     gemini: 'google/gemini-2.5-pro',
+    flash: 'google/gemini-2.5-flash',
+    'gemini-3': 'google/gemini-3.1-pro',
+    // xAI
     grok: 'xai/grok-3',
-    free: 'nvidia/gpt-oss-120b',
-    mini: 'openai/gpt-5-mini',
+    'grok-4': 'xai/grok-4-0709',
+    'grok-fast': 'xai/grok-4-1-fast-reasoning',
+    // DeepSeek
+    deepseek: 'deepseek/deepseek-chat',
+    r1: 'deepseek/deepseek-reasoner',
+    // Free models
+    free: 'nvidia/nemotron-ultra-253b',
+    nemotron: 'nvidia/nemotron-ultra-253b',
+    'deepseek-free': 'nvidia/deepseek-v3.2',
+    devstral: 'nvidia/devstral-2-123b',
+    'qwen-coder': 'nvidia/qwen3-coder-480b',
+    maverick: 'nvidia/llama-4-maverick',
+    // Minimax
+    minimax: 'minimax/minimax-m2.7',
+    // Others
     glm: 'zai/glm-5',
+    kimi: 'moonshot/kimi-k2.5',
 };
 // Model pricing (per 1M tokens) - used for stats
 const MODEL_PRICING = {
@@ -66,27 +98,68 @@ const MODEL_PRICING = {
     'blockrun/eco': { input: 0.2, output: 1.0 },
     'blockrun/premium': { input: 3.0, output: 15.0 },
     'blockrun/free': { input: 0, output: 0 },
-    // Individual models
+    // FREE - NVIDIA models
+    'nvidia/gpt-oss-120b': { input: 0, output: 0 },
+    'nvidia/gpt-oss-20b': { input: 0, output: 0 },
+    'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
+    'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
+    'nvidia/nemotron-super-49b': { input: 0, output: 0 },
+    'nvidia/deepseek-v3.2': { input: 0, output: 0 },
+    'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
+    'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
+    'nvidia/devstral-2-123b': { input: 0, output: 0 },
+    'nvidia/glm-4.7': { input: 0, output: 0 },
+    'nvidia/llama-4-maverick': { input: 0, output: 0 },
+    // Anthropic
     'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
     'anthropic/claude-opus-4.6': { input: 5.0, output: 25.0 },
     'anthropic/claude-haiku-4.5': { input: 1.0, output: 5.0 },
-    'openai/gpt-5.4': { input: 2.5, output: 15.0 },
+    // OpenAI
+    'openai/gpt-5-nano': { input: 0.05, output: 0.4 },
+    'openai/gpt-4.1-nano': { input: 0.1, output: 0.4 },
+    'openai/gpt-4o-mini': { input: 0.15, output: 0.6 },
     'openai/gpt-5-mini': { input: 0.25, output: 2.0 },
-    'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
+    'openai/gpt-4.1-mini': { input: 0.4, output: 1.6 },
+    'openai/gpt-5.2': { input: 1.75, output: 14.0 },
+    'openai/gpt-5.3': { input: 1.75, output: 14.0 },
+    'openai/gpt-5.3-codex': { input: 1.75, output: 14.0 },
+    'openai/gpt-4.1': { input: 2.0, output: 8.0 },
+    'openai/o3': { input: 2.0, output: 8.0 },
+    'openai/gpt-4o': { input: 2.5, output: 10.0 },
+    'openai/gpt-5.4': { input: 2.5, output: 15.0 },
+    'openai/o1-mini': { input: 1.1, output: 4.4 },
+    'openai/o3-mini': { input: 1.1, output: 4.4 },
+    'openai/o4-mini': { input: 1.1, output: 4.4 },
+    'openai/o1': { input: 15.0, output: 60.0 },
+    'openai/gpt-5.2-pro': { input: 21.0, output: 168.0 },
+    'openai/gpt-5.4-pro': { input: 30.0, output: 180.0 },
+    // Google
+    'google/gemini-2.5-flash-lite': { input: 0.1, output: 0.4 },
     'google/gemini-2.5-flash': { input: 0.3, output: 2.5 },
-    'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
-    'deepseek/deepseek-reasoner': { input: 0.55, output: 2.19 },
-    'xai/grok-3': { input: 3.0, output: 15.0 },
+    'google/gemini-3-flash-preview': { input: 0.5, output: 3.0 },
+    'google/gemini-2.5-pro': { input: 1.25, output: 10.0 },
+    'google/gemini-3-pro-preview': { input: 2.0, output: 12.0 },
+    'google/gemini-3.1-pro': { input: 2.0, output: 12.0 },
+    // xAI
     'xai/grok-4-fast': { input: 0.2, output: 0.5 },
+    'xai/grok-4-fast-reasoning': { input: 0.2, output: 0.5 },
+    'xai/grok-4-1-fast': { input: 0.2, output: 0.5 },
     'xai/grok-4-1-fast-reasoning': { input: 0.2, output: 0.5 },
-    'nvidia/gpt-oss-120b': { input: 0, output: 0 },
-    'zai/glm-5': { input: 1.0, output: 3.2 },
+    'xai/grok-4-0709': { input: 0.2, output: 1.5 },
+    'xai/grok-3-mini': { input: 0.3, output: 0.5 },
+    'xai/grok-2-vision': { input: 2.0, output: 10.0 },
+    'xai/grok-3': { input: 3.0, output: 15.0 },
+    // DeepSeek
+    'deepseek/deepseek-chat': { input: 0.28, output: 0.42 },
+    'deepseek/deepseek-reasoner': { input: 0.28, output: 0.42 },
+    // Minimax
+    'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
+    'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
+    // Others
     'moonshot/kimi-k2.5': { input: 0.6, output: 3.0 },
-    'openai/gpt-5.3-codex': { input: 2.5, output: 10.0 },
-    'openai/o3': { input: 2.0, output: 8.0 },
-    'openai/o4-mini': { input: 1.1, output: 4.4 },
-    'google/gemini-2.5-flash-lite': { input: 0.08, output: 0.3 },
-    'google/gemini-3.1-pro': { input: 1.25, output: 10.0 },
+    'nvidia/kimi-k2.5': { input: 0.55, output: 2.5 },
+    'zai/glm-5': { input: 1.0, output: 3.2 },
+    'zai/glm-5-turbo': { input: 1.2, output: 4.0 },
 };
 function estimateCost(model, inputTokens, outputTokens) {
     const pricing = MODEL_PRICING[model] || { input: 2.0, output: 10.0 };

package/dist/proxy/sse-translator.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * SSE Event Translator: OpenAI → Anthropic Messages API format
+ *
+ * Handles three critical gaps in the streaming pipeline:
+ * 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
+ * 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
+ * 3. Ensures proper content_block_stop and message_stop events
+ */
+export declare class SSETranslator {
+    private state;
+    private buffer;
+    constructor(model?: string);
+    /**
+     * Detect whether an SSE chunk is in OpenAI format.
+     * Returns true if it contains OpenAI-style `choices[].delta` structure.
+     */
+    static isOpenAIFormat(chunk: string): boolean;
+    /**
+     * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
+     * Returns null if no translation needed (already Anthropic format or not parseable).
+     */
+    processChunk(rawChunk: string): string | null;
+    private parseSSEEvents;
+    private formatSSE;
+    private closeThinkingBlock;
+    private closeTextBlock;
+    private closeToolCalls;
+    private closeActiveBlocks;
+}

package/dist/proxy/sse-translator.js ADDED Viewed

@@ -0,0 +1,296 @@
+/**
+ * SSE Event Translator: OpenAI → Anthropic Messages API format
+ *
+ * Handles three critical gaps in the streaming pipeline:
+ * 1. Tool calls: choice.delta.tool_calls → content_block_start/content_block_delta (tool_use)
+ * 2. Reasoning: reasoning_content → content_block_start/content_block_delta (thinking)
+ * 3. Ensures proper content_block_stop and message_stop events
+ */
+// ─── SSE Translator ─────────────────────────────────────────────────────────
+export class SSETranslator {
+    state;
+    buffer = '';
+    constructor(model = 'unknown') {
+        this.state = {
+            messageId: `msg_brcc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
+            model,
+            blockIndex: 0,
+            activeToolCalls: new Map(),
+            thinkingBlockActive: false,
+            textBlockActive: false,
+            messageStarted: false,
+            inputTokens: 0,
+            outputTokens: 0,
+        };
+    }
+    /**
+     * Detect whether an SSE chunk is in OpenAI format.
+     * Returns true if it contains OpenAI-style `choices[].delta` structure.
+     */
+    static isOpenAIFormat(chunk) {
+        // Look for OpenAI-specific patterns in the SSE data
+        return (chunk.includes('"choices"') &&
+            chunk.includes('"delta"') &&
+            !chunk.includes('"content_block_'));
+    }
+    /**
+     * Process a raw SSE text chunk and return translated Anthropic-format SSE events.
+     * Returns null if no translation needed (already Anthropic format or not parseable).
+     */
+    processChunk(rawChunk) {
+        this.buffer += rawChunk;
+        const events = this.parseSSEEvents();
+        if (events.length === 0)
+            return null;
+        const translated = [];
+        for (const event of events) {
+            if (event.data === '[DONE]') {
+                // Close any active blocks, then emit message_stop
+                translated.push(...this.closeActiveBlocks());
+                translated.push(this.formatSSE('message_delta', {
+                    type: 'message_delta',
+                    delta: { stop_reason: 'end_turn', stop_sequence: null },
+                    usage: { output_tokens: this.state.outputTokens },
+                }));
+                translated.push(this.formatSSE('message_stop', { type: 'message_stop' }));
+                continue;
+            }
+            let parsed;
+            try {
+                parsed = JSON.parse(event.data);
+            }
+            catch {
+                continue;
+            }
+            // Skip if this doesn't look like OpenAI format
+            if (!parsed.choices || parsed.choices.length === 0) {
+                // Could be a usage-only event
+                if (parsed.usage) {
+                    this.state.inputTokens = parsed.usage.prompt_tokens || 0;
+                    this.state.outputTokens = parsed.usage.completion_tokens || 0;
+                }
+                continue;
+            }
+            // Emit message_start on first chunk
+            if (!this.state.messageStarted) {
+                this.state.messageStarted = true;
+                if (parsed.model)
+                    this.state.model = parsed.model;
+                translated.push(this.formatSSE('message_start', {
+                    type: 'message_start',
+                    message: {
+                        id: this.state.messageId,
+                        type: 'message',
+                        role: 'assistant',
+                        model: this.state.model,
+                        content: [],
+                        stop_reason: null,
+                        stop_sequence: null,
+                        usage: { input_tokens: this.state.inputTokens, output_tokens: 0 },
+                    },
+                }));
+                translated.push(this.formatSSE('ping', { type: 'ping' }));
+            }
+            const choice = parsed.choices[0];
+            const delta = choice.delta;
+            // ── Reasoning content → thinking block ──
+            if (delta.reasoning_content) {
+                if (!this.state.thinkingBlockActive) {
+                    // Close text block if active
+                    if (this.state.textBlockActive) {
+                        translated.push(...this.closeTextBlock());
+                    }
+                    this.state.thinkingBlockActive = true;
+                    translated.push(this.formatSSE('content_block_start', {
+                        type: 'content_block_start',
+                        index: this.state.blockIndex,
+                        content_block: { type: 'thinking', thinking: '' },
+                    }));
+                }
+                translated.push(this.formatSSE('content_block_delta', {
+                    type: 'content_block_delta',
+                    index: this.state.blockIndex,
+                    delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
+                }));
+                this.state.outputTokens++;
+            }
+            // ── Text content → text block ──
+            if (delta.content) {
+                // Close thinking block if transitioning
+                if (this.state.thinkingBlockActive) {
+                    translated.push(...this.closeThinkingBlock());
+                }
+                if (!this.state.textBlockActive) {
+                    // Close any active tool calls first
+                    translated.push(...this.closeToolCalls());
+                    this.state.textBlockActive = true;
+                    translated.push(this.formatSSE('content_block_start', {
+                        type: 'content_block_start',
+                        index: this.state.blockIndex,
+                        content_block: { type: 'text', text: '' },
+                    }));
+                }
+                translated.push(this.formatSSE('content_block_delta', {
+                    type: 'content_block_delta',
+                    index: this.state.blockIndex,
+                    delta: { type: 'text_delta', text: delta.content },
+                }));
+                this.state.outputTokens++;
+            }
+            // ── Tool calls → tool_use blocks ──
+            if (delta.tool_calls && delta.tool_calls.length > 0) {
+                // Close thinking/text blocks first
+                if (this.state.thinkingBlockActive) {
+                    translated.push(...this.closeThinkingBlock());
+                }
+                if (this.state.textBlockActive) {
+                    translated.push(...this.closeTextBlock());
+                }
+                for (const tc of delta.tool_calls) {
+                    const tcIndex = tc.index;
+                    if (tc.id && tc.function?.name) {
+                        // New tool call start
+                        // Close previous tool call at same index if exists
+                        if (this.state.activeToolCalls.has(tcIndex)) {
+                            translated.push(this.formatSSE('content_block_stop', {
+                                type: 'content_block_stop',
+                                index: this.state.blockIndex,
+                            }));
+                            this.state.blockIndex++;
+                        }
+                        const toolId = tc.id;
+                        const toolName = tc.function.name;
+                        this.state.activeToolCalls.set(tcIndex, { id: toolId, name: toolName });
+                        translated.push(this.formatSSE('content_block_start', {
+                            type: 'content_block_start',
+                            index: this.state.blockIndex,
+                            content_block: {
+                                type: 'tool_use',
+                                id: toolId,
+                                name: toolName,
+                                input: {},
+                            },
+                        }));
+                        // If there are initial arguments, send them
+                        if (tc.function.arguments) {
+                            translated.push(this.formatSSE('content_block_delta', {
+                                type: 'content_block_delta',
+                                index: this.state.blockIndex,
+                                delta: {
+                                    type: 'input_json_delta',
+                                    partial_json: tc.function.arguments,
+                                },
+                            }));
+                        }
+                    }
+                    else if (tc.function?.arguments) {
+                        // Continuation of existing tool call - stream arguments
+                        translated.push(this.formatSSE('content_block_delta', {
+                            type: 'content_block_delta',
+                            index: this.state.blockIndex,
+                            delta: {
+                                type: 'input_json_delta',
+                                partial_json: tc.function.arguments,
+                            },
+                        }));
+                    }
+                }
+                this.state.outputTokens++;
+            }
+            // ── Handle finish_reason ──
+            if (choice.finish_reason) {
+                translated.push(...this.closeActiveBlocks());
+                const stopReason = choice.finish_reason === 'tool_calls'
+                    ? 'tool_use'
+                    : choice.finish_reason === 'stop'
+                        ? 'end_turn'
+                        : choice.finish_reason;
+                translated.push(this.formatSSE('message_delta', {
+                    type: 'message_delta',
+                    delta: { stop_reason: stopReason, stop_sequence: null },
+                    usage: { output_tokens: this.state.outputTokens },
+                }));
+            }
+        }
+        return translated.length > 0 ? translated.join('') : null;
+    }
+    // ── Helpers ─────────────────────────────────────────────────────────────
+    parseSSEEvents() {
+        const events = [];
+        const lines = this.buffer.split('\n');
+        let currentEvent;
+        let dataLines = [];
+        let consumed = 0;
+        for (let i = 0; i < lines.length; i++) {
+            const line = lines[i];
+            if (line.startsWith('event: ')) {
+                currentEvent = line.slice(7).trim();
+            }
+            else if (line.startsWith('data: ')) {
+                dataLines.push(line.slice(6));
+            }
+            else if (line === '' && dataLines.length > 0) {
+                // End of event
+                events.push({ event: currentEvent, data: dataLines.join('\n') });
+                currentEvent = undefined;
+                dataLines = [];
+                consumed = lines.slice(0, i + 1).join('\n').length + 1;
+            }
+        }
+        // Keep unconsumed text in buffer
+        if (consumed > 0) {
+            this.buffer = this.buffer.slice(consumed);
+        }
+        return events;
+    }
+    formatSSE(event, data) {
+        return `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
+    }
+    closeThinkingBlock() {
+        if (!this.state.thinkingBlockActive)
+            return [];
+        this.state.thinkingBlockActive = false;
+        const events = [
+            this.formatSSE('content_block_stop', {
+                type: 'content_block_stop',
+                index: this.state.blockIndex,
+            }),
+        ];
+        this.state.blockIndex++;
+        return events;
+    }
+    closeTextBlock() {
+        if (!this.state.textBlockActive)
+            return [];
+        this.state.textBlockActive = false;
+        const events = [
+            this.formatSSE('content_block_stop', {
+                type: 'content_block_stop',
+                index: this.state.blockIndex,
+            }),
+        ];
+        this.state.blockIndex++;
+        return events;
+    }
+    closeToolCalls() {
+        if (this.state.activeToolCalls.size === 0)
+            return [];
+        const events = [];
+        for (const [_index] of this.state.activeToolCalls) {
+            events.push(this.formatSSE('content_block_stop', {
+                type: 'content_block_stop',
+                index: this.state.blockIndex,
+            }));
+            this.state.blockIndex++;
+        }
+        this.state.activeToolCalls.clear();
+        return events;
+    }
+    closeActiveBlocks() {
+        const events = [];
+        events.push(...this.closeThinkingBlock());
+        events.push(...this.closeTextBlock());
+        events.push(...this.closeToolCalls());
+        return events;
+    }
+}

package/dist/router/index.js CHANGED Viewed

@@ -6,11 +6,11 @@
 const AUTO_TIERS = {
     SIMPLE: {
         primary: 'google/gemini-2.5-flash',
-        fallback: ['deepseek/deepseek-chat', 'nvidia/gpt-oss-120b'],
+        fallback: ['deepseek/deepseek-chat', 'nvidia/nemotron-ultra-253b'],
     },
     MEDIUM: {
         primary: 'moonshot/kimi-k2.5',
-        fallback: ['google/gemini-2.5-flash', 'deepseek/deepseek-chat'],
+        fallback: ['google/gemini-2.5-flash', 'minimax/minimax-m2.7'],
     },
     COMPLEX: {
         primary: 'google/gemini-3.1-pro',
@@ -23,20 +23,20 @@ const AUTO_TIERS = {
 };
 const ECO_TIERS = {
     SIMPLE: {
-        primary: 'nvidia/gpt-oss-120b',
-        fallback: ['google/gemini-2.5-flash-lite'],
+        primary: 'nvidia/nemotron-ultra-253b',
+        fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
     },
     MEDIUM: {
         primary: 'google/gemini-2.5-flash-lite',
-        fallback: ['nvidia/gpt-oss-120b'],
+        fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
     },
     COMPLEX: {
         primary: 'google/gemini-2.5-flash-lite',
-        fallback: ['deepseek/deepseek-chat'],
+        fallback: ['deepseek/deepseek-chat', 'nvidia/mistral-large-3-675b'],
     },
     REASONING: {
         primary: 'xai/grok-4-1-fast-reasoning',
-        fallback: ['deepseek/deepseek-reasoner'],
+        fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
     },
 };
 const PREMIUM_TIERS = {
@@ -189,7 +189,7 @@ export function routeRequest(prompt, profile = 'auto') {
     // Free profile - always use free model
     if (profile === 'free') {
         return {
-            model: 'nvidia/gpt-oss-120b',
+            model: 'nvidia/nemotron-ultra-253b',
             tier: 'SIMPLE',
             confidence: 1.0,
             signals: ['free-profile'],
@@ -217,22 +217,48 @@ export function routeRequest(prompt, profile = 'auto') {
     // Baseline: Claude Opus at $5/$25 per 1M tokens
     const OPUS_COST_PER_1K = 0.015; // rough average
     const modelCosts = {
+        // FREE
         'nvidia/gpt-oss-120b': 0,
-        'google/gemini-2.5-flash': 0.001,
+        'nvidia/gpt-oss-20b': 0,
+        'nvidia/nemotron-ultra-253b': 0,
+        'nvidia/nemotron-3-super-120b': 0,
+        'nvidia/nemotron-super-49b': 0,
+        'nvidia/deepseek-v3.2': 0,
+        'nvidia/mistral-large-3-675b': 0,
+        'nvidia/qwen3-coder-480b': 0,
+        'nvidia/devstral-2-123b': 0,
+        'nvidia/glm-4.7': 0,
+        'nvidia/llama-4-maverick': 0,
+        // Budget
+        'openai/gpt-5-nano': 0.0002,
+        'openai/gpt-4.1-nano': 0.0003,
         'google/gemini-2.5-flash-lite': 0.0003,
+        'xai/grok-4-fast': 0.0004,
+        'xai/grok-4-1-fast': 0.0004,
+        'xai/grok-4-1-fast-reasoning': 0.0004,
         'deepseek/deepseek-chat': 0.0004,
-        'deepseek/deepseek-reasoner': 0.003,
-        'moonshot/kimi-k2.5': 0.002,
-        'google/gemini-2.5-pro': 0.006,
-        'google/gemini-3.1-pro': 0.007,
+        'deepseek/deepseek-reasoner': 0.0004,
+        'minimax/minimax-m2.7': 0.0008,
+        'minimax/minimax-m2.5': 0.0008,
+        'google/gemini-2.5-flash': 0.0014,
+        'openai/gpt-5-mini': 0.0011,
+        'moonshot/kimi-k2.5': 0.0018,
+        // Mid-range
         'anthropic/claude-haiku-4.5': 0.003,
+        'zai/glm-5': 0.0021,
+        'openai/o4-mini': 0.0028,
+        'google/gemini-2.5-pro': 0.0056,
+        'openai/gpt-5.3-codex': 0.0079,
+        'openai/gpt-5.2': 0.0079,
+        'openai/gpt-5.3': 0.0079,
+        'openai/gpt-4.1': 0.005,
+        'openai/o3': 0.005,
+        'google/gemini-3.1-pro': 0.007,
+        'openai/gpt-5.4': 0.0088,
+        // Premium
         'anthropic/claude-sonnet-4.6': 0.009,
+        'xai/grok-3': 0.009,
         'anthropic/claude-opus-4.6': 0.015,
-        'openai/gpt-5.3-codex': 0.008,
-        'openai/gpt-5.4': 0.009,
-        'openai/o3': 0.012,
-        'openai/o4-mini': 0.006,
-        'xai/grok-4-1-fast-reasoning': 0.0004,
     };
     const modelCost = modelCosts[model] ?? 0.005;
     const savings = Math.max(0, (OPUS_COST_PER_1K - modelCost) / OPUS_COST_PER_1K);
@@ -257,7 +283,7 @@ export function getFallbackChain(tier, profile = 'auto') {
             tierConfigs = PREMIUM_TIERS;
             break;
         case 'free':
-            return ['nvidia/gpt-oss-120b'];
+            return ['nvidia/nemotron-ultra-253b'];
         default:
             tierConfigs = AUTO_TIERS;
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/cc",
-  "version": "0.9.2",
+  "version": "0.9.3",
   "description": "Run Claude Code with any model — no rate limits, no account locks, no phone verification. Pay per use with USDC.",
   "type": "module",
   "bin": {