npm - modelmix - Versions diffs - 4.4.14 → 4.4.18 - Mend

modelmix 4.4.14 → 4.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +6 -4
package/demo/cache.js +52 -0
package/demo/gemini.js +12 -9
package/demo/gpt-realtime.js +22 -0
package/demo/{gpt51.js → gpt54.js} +2 -2
package/index.js +389 -22
package/package.json +13 -7
package/test/bottleneck.test.js +23 -78
package/test/images.test.js +2 -3
package/test/json.test.js +18 -41
package/test/setup.js +12 -0
package/test/templates.test.js +60 -155
package/test/tokens.test.js +64 -1
package/demo/save_the_cat-spanish.md +0 -109
package/demo/story.md +0 -15

package/README.md CHANGED Viewed

@@ -135,9 +135,10 @@ Here's a comprehensive list of available methods:
 | Method             | Provider   | Model                          | Price (I/O) per 1 M tokens |
 | ------------------ | ---------- | ------------------------------ | -------------------------- |
+| `gpt54()`          | OpenAI     | gpt-5.4                        | [\$2.50 / \$15.00][1]      |
+| `gpt53codex()`     | OpenAI     | gpt-5.3-codex                  | [\$1.25 / \$14.00][1]      |
 | `gpt52()`          | OpenAI     | gpt-5.2                        | [\$1.75 / \$14.00][1]      |
 | `gpt51()`          | OpenAI     | gpt-5.1                        | [\$1.25 / \$10.00][1]      |
-| `gpt5()`           | OpenAI     | gpt-5                          | [\$1.25 / \$10.00][1]      |
 | `gpt5mini()`       | OpenAI     | gpt-5-mini                     | [\$0.25 / \$2.00][1]       |
 | `gpt5nano()`       | OpenAI     | gpt-5-nano                     | [\$0.05 / \$0.40][1]       |
 | `gpt41()`          | OpenAI     | gpt-4.1                        | [\$2.00 / \$8.00][1]       |
@@ -405,6 +406,7 @@ Every response from `raw()` now includes a `tokens` object with the following st
     input: 150,    // Number of tokens in the prompt/input
     output: 75,    // Number of tokens in the completion/output
     total: 225,    // Total tokens used (input + output)
+    cached: 100,   // Cached input tokens reported by the provider (0 when absent)
     cost: 0.0012,  // Estimated cost in USD (null if model not in pricing table)
     speed: 42      // Output tokens per second (int)
   }
@@ -418,10 +420,10 @@ After calling `message()` or `json()`, use `lastRaw` to access the complete resp
 ```javascript
 const text = await model.message();
 console.log(model.lastRaw.tokens);
-// { input: 122, output: 86, total: 541, cost: 0.000319, speed: 38 }
+// { input: 122, output: 86, total: 208, cached: 41, cost: 0.000319, speed: 38 }
 ```
-The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
+The `cached` field is a single aggregated count of cached input tokens reported by the provider. The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
 ## 🐛 Enabling Debug Mode
@@ -515,7 +517,7 @@ new ModelMix(args = { options: {}, config: {} })
   - `message`: The text response from the model
   - `think`: Reasoning/thinking content (if available)
   - `toolCalls`: Array of tool calls made by the model (if any)
-  - `tokens`: Object with `input`, `output`, `total` token counts, `cost` (USD), and `speed` (output tokens/sec)
+  - `tokens`: Object with `input`, `output`, `total`, and `cached` token counts, plus `cost` (USD) and `speed` (output tokens/sec)
   - `response`: The raw API response
 - `stream(callback)`: Sends the message and streams the response, invoking the callback with each streamed part.
 - `json(schemaExample, descriptions = {}, options = {})`: Forces the model to return a response in a specific JSON format.

package/demo/cache.js ADDED Viewed

@@ -0,0 +1,52 @@
+import { ModelMix } from '../index.js';
+try { process.loadEnvFile(); } catch {}
+console.log("\n" + '--------| gpt54() prompt cache |--------');
+// Keep the reusable prefix first and only vary the question at the end.
+const sharedPrefix = [
+    "You are a concise science tutor.",
+    "The repeated block below is intentionally long so OpenAI can reuse cached prompt tokens on the second request.",
+    Array.from({ length: 80 }, (_, index) =>
+        `Reference ${String(index + 1).padStart(3, '0')}: Quantum systems are described with probabilities, measurements collapse possibilities into outcomes, and explanations must stay concrete, brief, and easy to understand.`
+    ).join("\n")
+].join("\n\n");
+const buildPrompt = (question) => `${sharedPrefix}\n\nQuestion: ${question}`;
+const createModel = () => ModelMix.new({
+    config: {
+        debug: 3,
+    }
+}).gpt54({
+    options: {
+        reasoning_effort: "none",
+        verbosity: "low",
+        prompt_cache_key: "demo-gpt54-prompt-cache",
+        prompt_cache_retention: "24h"
+    }
+});
+const runRequest = async (label, question) => {
+    const model = createModel();
+    model.addText(buildPrompt(question));
+    const result = await model.raw();
+    console.log(`\n${label}`);
+    console.log("message:", result.message);
+    console.log("tokens:", result.tokens);
+    return result;
+};
+await runRequest(
+    "Request 1 (warms the cache)",
+    "Explain quantum entanglement in simple Spanish in 3 short bullet points."
+);
+await runRequest(
+    "Request 2 (reuses the cached prefix)",
+    "Now explain quantum entanglement in simple Spanish with a different analogy and 3 short bullet points."
+);

package/demo/gemini.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { ModelMix, MixGoogle } from '../index.js';
-try { process.loadEnvFile(); } catch {}
+try { process.loadEnvFile(); } catch { }
 const mmix = new ModelMix({
     options: {
@@ -12,9 +12,9 @@ const mmix = new ModelMix({
     }
 });
-// Using gemini25flash (Gemini 2.5 Flash) with built-in method
+// Using gemini3flash (Gemini 3 Flash) with built-in method
 console.log("\n" + '--------| gemini25flash() |--------');
-const flash = await mmix.gemini25flash()
+const flash = await mmix.gemini3flash()
     .addText('Hi there! Do you like cats?')
     .message();
@@ -22,20 +22,23 @@ console.log(flash);
 // Using gemini3pro (Gemini 3 Pro) with custom config
 console.log("\n" + '--------| gemini3pro() with JSON response |--------');
-const pro = mmix.new().gemini3pro();
+const pro = mmix.new().gemini31pro();
 pro.addText('Give me a fun fact about cats');
-const jsonResponse = await pro.json({
+const jsonExampleAndSchema = {
     fact: 'A fun fact about cats',
-    category: 'animal behavior'
-});
+    category: 'animal behavior'
+};
+const jsonResponse = await pro.json(jsonExampleAndSchema, jsonExampleAndSchema);
 console.log(jsonResponse);
 // Using attach method with MixGoogle for custom model
 console.log("\n" + '--------| Custom Gemini with attach() |--------');
-mmix.attach('gemini-2.5-flash', new MixGoogle());
+const customModel = mmix.new().attach('gemini-2.5-flash', new MixGoogle());
-const custom = await mmix.addText('Tell me a short joke about cats.').message();
+const custom = await customModel.addText('Tell me a short joke about cats.').message();
 console.log(custom);

package/demo/gpt-realtime.js ADDED Viewed

@@ -0,0 +1,22 @@
+import { ModelMix } from '../index.js';
+try { process.loadEnvFile(); } catch {}
+const mmix = new ModelMix({
+    config: {
+        debug: 3
+    }
+});
+console.log('\n--------| gptRealtime() |--------');
+const realtime = mmix.gptRealtimeMini({
+    options: {
+        stream: true
+    }
+});
+realtime.addText('Explain quantum entanglement in simple terms.');
+const response = await realtime.stream(({ delta }) => {
+    process.stdout.write(delta || '');
+});
+console.log('\n\n[done]\n', response.tokens);

package/demo/{gpt51.js → gpt54.js} RENAMED Viewed

@@ -8,10 +8,10 @@ const mmix = new ModelMix({
     }
 });
-console.log("\n" + '--------| gpt51() |--------');
+console.log("\n" + '--------| gpt54() |--------');
 const gptArgs = { options: { reasoning_effort: "none", verbosity: "low" } };
-const gpt = mmix.gpt51(gptArgs);
+const gpt = mmix.gpt54(gptArgs);
 gpt.addText("Explain quantum entanglement in simple terms.");
 const response = await gpt.message();

package/index.js CHANGED Viewed

@@ -5,6 +5,7 @@ const { inspect } = require('util');
 const log = require('lemonlog')('ModelMix');
 const Bottleneck = require('bottleneck');
 const path = require('path');
+const WebSocket = require('ws');
 const generateJsonSchema = require('./schema');
 const { Client } = require("@modelcontextprotocol/sdk/client/index.js");
 const { StdioClientTransport } = require("@modelcontextprotocol/sdk/client/stdio.js");
@@ -14,6 +15,11 @@ const { MCPToolsManager } = require('./mcp-tools');
 // Based on provider pricing pages linked in README
 const MODEL_PRICING = {
     // OpenAI
+    'gpt-realtime-mini': [0.60, 2.40],
+    'gpt-realtime': [4.00, 16.00],
+    'gpt-5.4': [2.50, 15.00],
+    'gpt-5.4-pro': [30, 180.00],
+    'gpt-5.3-codex': [1.75, 14.00],
     'gpt-5.2': [1.75, 14.00],
     'gpt-5.2-chat-latest': [1.75, 14.00],
     'gpt-5.1': [1.25, 10.00],
@@ -175,6 +181,15 @@ class ModelMix {
         return (tokens.input * inputPerMillion / 1_000_000) + (tokens.output * outputPerMillion / 1_000_000);
     }
+    static extractCacheTokens(usage = {}) {
+        return usage.input_tokens_details?.cached_tokens
+            || usage.prompt_tokens_details?.cached_tokens
+            || usage.cache_read_input_tokens
+            || usage.cachedContentTokenCount
+            || usage.cached_content_token_count
+            || 0;
+    }
     static formatInputSummary(messages, system, debug = 2) {
         const lastMessage = messages[messages.length - 1];
         let inputText = '';
@@ -244,15 +259,6 @@ class ModelMix {
     gpt41nano({ options = {}, config = {} } = {}) {
         return this.attach('gpt-4.1-nano', new MixOpenAI({ options, config }));
     }
-    o4mini({ options = {}, config = {} } = {}) {
-        return this.attach('o4-mini', new MixOpenAI({ options, config }));
-    }
-    o3({ options = {}, config = {} } = {}) {
-        return this.attach('o3', new MixOpenAI({ options, config }));
-    }
-    gpt45({ options = {}, config = {} } = {}) {
-        return this.attach('gpt-4.5-preview', new MixOpenAI({ options, config }));
-    }
     gpt5({ options = {}, config = {} } = {}) {
         return this.attach('gpt-5', new MixOpenAI({ options, config }));
     }
@@ -263,13 +269,28 @@ class ModelMix {
         return this.attach('gpt-5-nano', new MixOpenAI({ options, config }));
     }
     gpt51({ options = {}, config = {} } = {}) {
-        return this.attach('gpt-5.1', new MixOpenAI({ options, config }));
+        return this.attach('gpt-5.1', new MixOpenAIResponses({ options, config }));
     }
     gpt52({ options = {}, config = {} } = {}) {
-        return this.attach('gpt-5.2', new MixOpenAI({ options, config }));
+        return this.attach('gpt-5.2', new MixOpenAIResponses({ options, config }));
     }
-    gpt52chat({ options = {}, config = {} } = {}) {
-        return this.attach('gpt-5.2-chat-latest', new MixOpenAI({ options, config }));
+    gpt54({ options = {}, config = {} } = {}) {
+        return this.attach('gpt-5.4', new MixOpenAIResponses({ options, config }));
+    }
+    gpt54pro({ options = {}, config = {} } = {}) {
+        return this.attach('gpt-5.4-pro', new MixOpenAIResponses({ options, config }));
+    }
+    gptRealtime({ options = {}, config = {} } = {}) {
+        return this.attach('gpt-realtime', new MixOpenAIWebSocket({ options, config }));
+    }
+    gptRealtimeMini({ options = {}, config = {} } = {}) {
+        return this.attach('gpt-realtime-mini', new MixOpenAIWebSocket({ options, config }));
+    }
+    gpt53codex({ options = {}, config = {} } = {}) {
+        return this.attach('gpt-5.3-codex', new MixOpenAIResponses({ options, config }));
+    }
+    gpt53chat({ options = {}, config = {} } = {}) {
+        return this.attach('gpt-5.3-chat-latest', new MixOpenAIResponses({ options, config }));
     }
     gptOss({ options = {}, config = {}, mix = {} } = {}) {
         mix = { ...this.mix, ...mix };
@@ -942,7 +963,10 @@ class ModelMix {
                     // debug level 2: Readable summary of output
                     if (currentConfig.debug >= 2) {
                         const tokenInfo = result.tokens
-                            ? ` ${result.tokens.input} → ${result.tokens.output} tok` + (result.tokens.speed ? ` ${result.tokens.speed} t/s` : '') + (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
+                            ? ` ${result.tokens.input} → ${result.tokens.output} tok`
+                                + (result.tokens.cached ? ` (cached:${result.tokens.cached})` : '')
+                                + (result.tokens.speed ? `| ${result.tokens.speed} t/s` : '')
+                                + (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
                             : '';
                         console.log(`✓${tokenInfo}\n${ModelMix.formatOutputSummary(result, currentConfig.debug).trim()}`);
                     }
@@ -1306,7 +1330,7 @@ class MixCustom {
                 message: message.trim(),
                 toolCalls: [],
                 think: null,
-                tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0 }
+                tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0, cached: 0 }
             }));
             response.data.on('error', reject);
         });
@@ -1358,13 +1382,15 @@ class MixCustom {
             return {
                 input: data.usage.prompt_tokens || 0,
                 output: data.usage.completion_tokens || 0,
-                total: data.usage.total_tokens || 0
+                total: data.usage.total_tokens || 0,
+                cached: ModelMix.extractCacheTokens(data.usage)
             };
         }
         return {
             input: 0,
             output: 0,
-            total: 0
+            total: 0,
+            cached: 0
         };
     }
@@ -1499,6 +1525,343 @@ class MixOpenAI extends MixCustom {
     }
 }
+class MixOpenAIResponses extends MixOpenAI {
+    async create({ config = {}, options = {} } = {}) {
+        // Keep GPT/o-model option normalization behavior
+        if (options.model?.startsWith('o')) {
+            delete options.max_tokens;
+            delete options.temperature;
+        }
+        if (options.model?.includes('gpt-5')) {
+            if (options.max_tokens) {
+                options.max_completion_tokens = options.max_tokens;
+                delete options.max_tokens;
+            }
+            delete options.temperature;
+        }
+        const responsesUrl = this.config.url.replace('/chat/completions', '/responses');
+        const request = MixOpenAIResponses.buildResponsesRequest(options);
+        const response = await axios.post(responsesUrl, request, {
+            headers: this.headers
+        });
+        return MixOpenAIResponses.processResponsesResponse(response);
+    }
+    static buildResponsesRequest(options = {}) {
+        const request = {
+            model: options.model,
+            input: MixOpenAIResponses.messagesToResponsesInput(options.messages),
+            stream: false
+        };
+        if (options.reasoning_effort) request.reasoning = { effort: options.reasoning_effort };
+        if (options.verbosity) request.text = { verbosity: options.verbosity };
+        if (typeof options.max_completion_tokens === 'number') {
+            request.max_output_tokens = options.max_completion_tokens;
+        } else if (typeof options.max_tokens === 'number') {
+            request.max_output_tokens = options.max_tokens;
+        }
+        if (typeof options.temperature === 'number') request.temperature = options.temperature;
+        if (typeof options.top_p === 'number') request.top_p = options.top_p;
+        if (typeof options.presence_penalty === 'number') request.presence_penalty = options.presence_penalty;
+        if (typeof options.frequency_penalty === 'number') request.frequency_penalty = options.frequency_penalty;
+        if (options.stop !== undefined) request.stop = options.stop;
+        if (typeof options.n === 'number') request.n = options.n;
+        if (options.logit_bias !== undefined) request.logit_bias = options.logit_bias;
+        if (options.user !== undefined) request.user = options.user;
+        if (options.prompt_cache_key !== undefined) request.prompt_cache_key = options.prompt_cache_key;
+        if (options.prompt_cache_retention !== undefined) request.prompt_cache_retention = options.prompt_cache_retention;
+        return request;
+    }
+    static processResponsesResponse(response) {
+        const message = MixOpenAIResponses.extractResponsesMessage(response.data);
+        return {
+            message,
+            think: null,
+            toolCalls: [],
+            tokens: MixOpenAIResponses.extractResponsesTokens(response.data),
+            response: response.data
+        };
+    }
+    static extractResponsesTokens(data) {
+        if (data.usage) {
+            return {
+                input: data.usage.input_tokens || 0,
+                output: data.usage.output_tokens || 0,
+                total: data.usage.total_tokens || ((data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)),
+                cached: ModelMix.extractCacheTokens(data.usage)
+            };
+        }
+        return {
+            input: 0,
+            output: 0,
+            total: 0,
+            cached: 0
+        };
+    }
+    static extractResponsesMessage(data) {
+        if (!Array.isArray(data.output)) return '';
+        return data.output
+            .filter(item => item.type === 'message')
+            .flatMap(item => Array.isArray(item.content) ? item.content : [])
+            .filter(content => content.type === 'output_text' && typeof content.text === 'string')
+            .map(content => content.text)
+            .join('\n')
+            .trim();
+    }
+    static messagesToResponsesInput(messages = []) {
+        const mapped = [];
+        for (const message of messages) {
+            if (!message || !message.role) continue;
+            if (message.tool_calls || message.role === 'tool') continue;
+            let text = '';
+            if (typeof message.content === 'string') {
+                text = message.content;
+            } else if (Array.isArray(message.content)) {
+                text = message.content
+                    .filter(item => item && item.type === 'text' && typeof item.text === 'string')
+                    .map(item => item.text)
+                    .join('\n');
+            }
+            if (!text) continue;
+            mapped.push({
+                role: message.role,
+                content: [{ type: 'input_text', text }]
+            });
+        }
+        return mapped;
+    }
+}
+class MixOpenAIWebSocket extends MixOpenAIResponses {
+    getDefaultConfig(customConfig) {
+        return super.getDefaultConfig({
+            realtimeUrl: 'wss://api.openai.com/v1/realtime',
+            websocketTimeoutMs: 120000,
+            ...customConfig
+        });
+    }
+    async create({ config = {}, options = {} } = {}) {
+        if (options.model?.startsWith('o')) {
+            delete options.max_tokens;
+            delete options.temperature;
+        }
+        if (options.model?.includes('gpt-5')) {
+            if (options.max_tokens) {
+                options.max_completion_tokens = options.max_tokens;
+                delete options.max_tokens;
+            }
+            delete options.temperature;
+        }
+        const mergedConfig = { ...this.config, ...config };
+        const realtimeUrl = `${mergedConfig.realtimeUrl}?model=${encodeURIComponent(options.model)}`;
+        const timeoutMs = mergedConfig.websocketTimeoutMs || 120000;
+        return await new Promise((resolve, reject) => {
+            const ws = new WebSocket(realtimeUrl, {
+                headers: {
+                    authorization: `Bearer ${mergedConfig.apiKey}`
+                }
+            });
+            const events = [];
+            let message = '';
+            let settled = false;
+            let finalResponse = null;
+            const timeout = setTimeout(() => {
+                if (settled) return;
+                settled = true;
+                ws.close();
+                reject({
+                    message: `Realtime WebSocket timed out after ${timeoutMs}ms`,
+                    statusCode: null,
+                    details: null,
+                    config: mergedConfig,
+                    options
+                });
+            }, timeoutMs);
+            const cleanUp = () => clearTimeout(timeout);
+            ws.on('open', () => {
+                const session = {
+                    type: 'realtime',
+                    output_modalities: ['text']
+                };
+                if (mergedConfig.system) session.instructions = mergedConfig.system;
+                if (Array.isArray(options.tools) && options.tools.length > 0) {
+                    session.tools = options.tools;
+                }
+                ws.send(JSON.stringify({ type: 'session.update', session }));
+                const items = MixOpenAIWebSocket.messagesToConversationItems(options.messages);
+                for (const item of items) {
+                    ws.send(JSON.stringify({
+                        type: 'conversation.item.create',
+                        item
+                    }));
+                }
+                const responseConfig = { output_modalities: ['text'] };
+                if (typeof options.max_completion_tokens === 'number') {
+                    responseConfig.max_output_tokens = Math.min(options.max_completion_tokens, 4096);
+                } else if (typeof options.max_tokens === 'number') {
+                    responseConfig.max_output_tokens = Math.min(options.max_tokens, 4096);
+                }
+                if (Array.isArray(options.tools) && options.tools.length > 0) responseConfig.tools = options.tools;
+                ws.send(JSON.stringify({
+                    type: 'response.create',
+                    response: responseConfig
+                }));
+            });
+            ws.on('message', raw => {
+                let event;
+                try {
+                    event = JSON.parse(raw.toString());
+                } catch {
+                    return;
+                }
+                events.push(event);
+                const isTextDeltaEvent = event.type === 'response.text.delta' || event.type === 'response.output_text.delta';
+                if (isTextDeltaEvent) {
+                    const delta = MixOpenAIWebSocket.extractDelta(event);
+                    if (delta) {
+                        message += delta;
+                        if (this.streamCallback) {
+                            this.streamCallback({ response: event, message, delta });
+                        }
+                    }
+                    return;
+                }
+                if (event.type === 'response.done') {
+                    finalResponse = event.response || null;
+                    if (!message && finalResponse) {
+                        message = MixOpenAIResponses.extractResponsesMessage(finalResponse);
+                    }
+                    if (!settled) {
+                        settled = true;
+                        cleanUp();
+                        ws.close();
+                        resolve({
+                            message: message.trim(),
+                            think: null,
+                            toolCalls: [],
+                            tokens: MixOpenAIResponses.extractResponsesTokens(finalResponse || {}),
+                            response: {
+                                response: finalResponse,
+                                events
+                            }
+                        });
+                    }
+                    return;
+                }
+                if (event.type === 'error' && !settled) {
+                    settled = true;
+                    cleanUp();
+                    ws.close();
+                    reject({
+                        message: event.error?.message || 'Realtime WebSocket error',
+                        statusCode: null,
+                        details: event.error || event,
+                        config: mergedConfig,
+                        options
+                    });
+                }
+            });
+            ws.on('error', error => {
+                if (settled) return;
+                settled = true;
+                cleanUp();
+                reject({
+                    message: error.message || 'Realtime WebSocket connection error',
+                    statusCode: null,
+                    details: null,
+                    stack: error.stack,
+                    config: mergedConfig,
+                    options
+                });
+            });
+            ws.on('close', () => {
+                if (settled) return;
+                settled = true;
+                cleanUp();
+                reject({
+                    message: 'Realtime WebSocket closed before response.done',
+                    statusCode: null,
+                    details: null,
+                    config: mergedConfig,
+                    options
+                });
+            });
+        });
+    }
+    static messagesToConversationItems(messages = []) {
+        const items = [];
+        for (const message of messages) {
+            if (!message || !message.role) continue;
+            if (message.role === 'tool' || message.tool_calls) continue;
+            const role = message.role === 'assistant' ? 'assistant' : (message.role === 'system' ? 'system' : 'user');
+            const content = [];
+            if (typeof message.content === 'string') {
+                content.push({
+                    type: role === 'assistant' ? 'text' : 'input_text',
+                    text: message.content
+                });
+            } else if (Array.isArray(message.content)) {
+                for (const item of message.content) {
+                    if (!item || item.type !== 'text' || typeof item.text !== 'string') continue;
+                    content.push({
+                        type: role === 'assistant' ? 'text' : 'input_text',
+                        text: item.text
+                    });
+                }
+            }
+            if (content.length === 0) continue;
+            items.push({ type: 'message', role, content });
+        }
+        return items;
+    }
+    static extractDelta(event) {
+        if (typeof event.delta === 'string') return event.delta;
+        return '';
+    }
+}
 class MixOpenRouter extends MixOpenAI {
     getDefaultConfig(customConfig) {
@@ -1684,13 +2047,15 @@ class MixAnthropic extends MixCustom {
             return {
                 input: data.usage.input_tokens || 0,
                 output: data.usage.output_tokens || 0,
-                total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)
+                total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0),
+                cached: ModelMix.extractCacheTokens(data.usage)
             };
         }
         return {
             input: 0,
             output: 0,
-            total: 0
+            total: 0,
+            cached: 0
         };
     }
@@ -2219,13 +2584,15 @@ class MixGoogle extends MixCustom {
             return {
                 input: data.usageMetadata.promptTokenCount || 0,
                 output: data.usageMetadata.candidatesTokenCount || 0,
-                total: data.usageMetadata.totalTokenCount || 0
+                total: data.usageMetadata.totalTokenCount || 0,
+                cached: ModelMix.extractCacheTokens(data.usageMetadata)
             };
         }
         return {
             input: 0,
             output: 0,
-            total: 0
+            total: 0,
+            cached: 0
         };
     }
@@ -2273,4 +2640,4 @@ class MixGoogle extends MixCustom {
     }
 }
-module.exports = { MixCustom, ModelMix, MixAnthropic, MixMiniMax, MixOpenAI, MixOpenRouter, MixPerplexity, MixOllama, MixLMStudio, MixGroq, MixTogether, MixGrok, MixCerebras, MixGoogle, MixFireworks };
+module.exports = { MixCustom, ModelMix, MixAnthropic, MixMiniMax, MixOpenAI, MixOpenAIResponses, MixOpenAIWebSocket, MixOpenRouter, MixPerplexity, MixOllama, MixLMStudio, MixGroq, MixTogether, MixGrok, MixCerebras, MixGoogle, MixFireworks };