npm - modelmix - Versions diffs - 4.4.16 → 4.4.20 - Mend

modelmix 4.4.16 → 4.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md CHANGED Viewed

@@ -136,9 +136,9 @@ Here's a comprehensive list of available methods:
 | Method             | Provider   | Model                          | Price (I/O) per 1 M tokens |
 | ------------------ | ---------- | ------------------------------ | -------------------------- |
 | `gpt54()`          | OpenAI     | gpt-5.4                        | [\$2.50 / \$15.00][1]      |
+| `gpt53codex()`     | OpenAI     | gpt-5.3-codex                  | [\$1.25 / \$14.00][1]      |
 | `gpt52()`          | OpenAI     | gpt-5.2                        | [\$1.75 / \$14.00][1]      |
 | `gpt51()`          | OpenAI     | gpt-5.1                        | [\$1.25 / \$10.00][1]      |
-| `gpt53codex()`     | OpenAI     | gpt-5.3-codex                  | [\$1.25 / \$14.00][1]      |
 | `gpt5mini()`       | OpenAI     | gpt-5-mini                     | [\$0.25 / \$2.00][1]       |
 | `gpt5nano()`       | OpenAI     | gpt-5-nano                     | [\$0.05 / \$0.40][1]       |
 | `gpt41()`          | OpenAI     | gpt-4.1                        | [\$2.00 / \$8.00][1]       |
@@ -185,7 +185,7 @@ const result = await ModelMix.new({
         options: { temperature: 0.7 },
         config: { system: "You are a helpful assistant" }
     })
-    .sonnet37()
+    .sonnet46()
     .addText("Tell me a story about a cat");
     .message();
 ```
@@ -406,6 +406,7 @@ Every response from `raw()` now includes a `tokens` object with the following st
     input: 150,    // Number of tokens in the prompt/input
     output: 75,    // Number of tokens in the completion/output
     total: 225,    // Total tokens used (input + output)
+    cached: 100,   // Cached input tokens reported by the provider (0 when absent)
     cost: 0.0012,  // Estimated cost in USD (null if model not in pricing table)
     speed: 42      // Output tokens per second (int)
   }
@@ -419,10 +420,10 @@ After calling `message()` or `json()`, use `lastRaw` to access the complete resp
 ```javascript
 const text = await model.message();
 console.log(model.lastRaw.tokens);
-// { input: 122, output: 86, total: 541, cost: 0.000319, speed: 38 }
+// { input: 122, output: 86, total: 208, cached: 41, cost: 0.000319, speed: 38 }
 ```
-The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
+The `cached` field is a single aggregated count of cached input tokens reported by the provider. The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
 ## 🐛 Enabling Debug Mode
@@ -516,7 +517,7 @@ new ModelMix(args = { options: {}, config: {} })
   - `message`: The text response from the model
   - `think`: Reasoning/thinking content (if available)
   - `toolCalls`: Array of tool calls made by the model (if any)
-  - `tokens`: Object with `input`, `output`, `total` token counts, `cost` (USD), and `speed` (output tokens/sec)
+  - `tokens`: Object with `input`, `output`, `total`, and `cached` token counts, plus `cost` (USD) and `speed` (output tokens/sec)
   - `response`: The raw API response
 - `stream(callback)`: Sends the message and streams the response, invoking the callback with each streamed part.
 - `json(schemaExample, descriptions = {}, options = {})`: Forces the model to return a response in a specific JSON format.

package/demo/cache.js ADDED Viewed

@@ -0,0 +1,52 @@
+import { ModelMix } from '../index.js';
+try { process.loadEnvFile(); } catch {}
+console.log("\n" + '--------| gpt54() prompt cache |--------');
+// Keep the reusable prefix first and only vary the question at the end.
+const sharedPrefix = [
+    "You are a concise science tutor.",
+    "The repeated block below is intentionally long so OpenAI can reuse cached prompt tokens on the second request.",
+    Array.from({ length: 80 }, (_, index) =>
+        `Reference ${String(index + 1).padStart(3, '0')}: Quantum systems are described with probabilities, measurements collapse possibilities into outcomes, and explanations must stay concrete, brief, and easy to understand.`
+    ).join("\n")
+].join("\n\n");
+const buildPrompt = (question) => `${sharedPrefix}\n\nQuestion: ${question}`;
+const createModel = () => ModelMix.new({
+    config: {
+        debug: 3,
+    }
+}).gpt54({
+    options: {
+        reasoning_effort: "none",
+        verbosity: "low",
+        prompt_cache_key: "demo-gpt54-prompt-cache",
+        prompt_cache_retention: "24h"
+    }
+});
+const runRequest = async (label, question) => {
+    const model = createModel();
+    model.addText(buildPrompt(question));
+    const result = await model.raw();
+    console.log(`\n${label}`);
+    console.log("message:", result.message);
+    console.log("tokens:", result.tokens);
+    return result;
+};
+await runRequest(
+    "Request 1 (warms the cache)",
+    "Explain quantum entanglement in simple Spanish in 3 short bullet points."
+);
+await runRequest(
+    "Request 2 (reuses the cached prefix)",
+    "Now explain quantum entanglement in simple Spanish with a different analogy and 3 short bullet points."
+);

package/demo/fallback.js CHANGED Viewed

@@ -15,7 +15,7 @@ const mmix = new ModelMix({
     }
 });
-mmix.sonnet37({ config: { url: 'fail' } }).gpt41nano();
+mmix.sonnet46({ config: { url: 'fail' } }).gpt41nano();
 async function main() {
     mmix.addText('hola, como estas?');

package/demo/json.js CHANGED Viewed

@@ -6,7 +6,6 @@ const model = await ModelMix.new({ options: { max_tokens: 10000 }, config: { deb
     // .gptOss()
     // .scout({ config: { temperature: 0 } })
     // .o4mini()
-    // .sonnet37think()
     // .gpt45()
     // .gemini25flash()
     .addText("Name and capital of 3 South American countries.")

package/index.js CHANGED Viewed

@@ -181,6 +181,15 @@ class ModelMix {
         return (tokens.input * inputPerMillion / 1_000_000) + (tokens.output * outputPerMillion / 1_000_000);
     }
+    static extractCacheTokens(usage = {}) {
+        return usage.input_tokens_details?.cached_tokens
+            || usage.prompt_tokens_details?.cached_tokens
+            || usage.cache_read_input_tokens
+            || usage.cachedContentTokenCount
+            || usage.cached_content_token_count
+            || 0;
+    }
     static formatInputSummary(messages, system, debug = 2) {
         const lastMessage = messages[messages.length - 1];
         let inputText = '';
@@ -250,15 +259,6 @@ class ModelMix {
     gpt41nano({ options = {}, config = {} } = {}) {
         return this.attach('gpt-4.1-nano', new MixOpenAI({ options, config }));
     }
-    o4mini({ options = {}, config = {} } = {}) {
-        return this.attach('o4-mini', new MixOpenAI({ options, config }));
-    }
-    o3({ options = {}, config = {} } = {}) {
-        return this.attach('o3', new MixOpenAI({ options, config }));
-    }
-    gpt45({ options = {}, config = {} } = {}) {
-        return this.attach('gpt-4.5-preview', new MixOpenAI({ options, config }));
-    }
     gpt5({ options = {}, config = {} } = {}) {
         return this.attach('gpt-5', new MixOpenAI({ options, config }));
     }
@@ -269,10 +269,10 @@ class ModelMix {
         return this.attach('gpt-5-nano', new MixOpenAI({ options, config }));
     }
     gpt51({ options = {}, config = {} } = {}) {
-        return this.attach('gpt-5.1', new MixOpenAI({ options, config }));
+        return this.attach('gpt-5.1', new MixOpenAIResponses({ options, config }));
     }
     gpt52({ options = {}, config = {} } = {}) {
-        return this.attach('gpt-5.2', new MixOpenAI({ options, config }));
+        return this.attach('gpt-5.2', new MixOpenAIResponses({ options, config }));
     }
     gpt54({ options = {}, config = {} } = {}) {
         return this.attach('gpt-5.4', new MixOpenAIResponses({ options, config }));
@@ -289,8 +289,8 @@ class ModelMix {
     gpt53codex({ options = {}, config = {} } = {}) {
         return this.attach('gpt-5.3-codex', new MixOpenAIResponses({ options, config }));
     }
-    gpt52chat({ options = {}, config = {} } = {}) {
-        return this.attach('gpt-5.2-chat-latest', new MixOpenAI({ options, config }));
+    gpt53chat({ options = {}, config = {} } = {}) {
+        return this.attach('gpt-5.3-chat-latest', new MixOpenAIResponses({ options, config }));
     }
     gptOss({ options = {}, config = {}, mix = {} } = {}) {
         mix = { ...this.mix, ...mix };
@@ -343,13 +343,6 @@ class ModelMix {
         options = { ...MixAnthropic.thinkingOptions, ...options };
         return this.attach('claude-sonnet-4-5-20250929', new MixAnthropic({ options, config }));
     }
-    sonnet37({ options = {}, config = {} } = {}) {
-        return this.attach('claude-3-7-sonnet-20250219', new MixAnthropic({ options, config }));
-    }
-    sonnet37think({ options = {}, config = {} } = {}) {
-        options = { ...MixAnthropic.thinkingOptions, ...options };
-        return this.attach('claude-3-7-sonnet-20250219', new MixAnthropic({ options, config }));
-    }
     haiku35({ options = {}, config = {} } = {}) {
         return this.attach('claude-3-5-haiku-20241022', new MixAnthropic({ options, config }));
     }
@@ -963,7 +956,10 @@ class ModelMix {
                     // debug level 2: Readable summary of output
                     if (currentConfig.debug >= 2) {
                         const tokenInfo = result.tokens
-                            ? ` ${result.tokens.input} → ${result.tokens.output} tok` + (result.tokens.speed ? ` ${result.tokens.speed} t/s` : '') + (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
+                            ? ` ${result.tokens.input} → ${result.tokens.output} tok`
+                                + (result.tokens.cached ? ` (cached:${result.tokens.cached})` : '')
+                                + (result.tokens.speed ? `| ${result.tokens.speed} t/s` : '')
+                                + (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
                             : '';
                         console.log(`✓${tokenInfo}\n${ModelMix.formatOutputSummary(result, currentConfig.debug).trim()}`);
                     }
@@ -1327,7 +1323,7 @@ class MixCustom {
                 message: message.trim(),
                 toolCalls: [],
                 think: null,
-                tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0 }
+                tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0, cached: 0 }
             }));
             response.data.on('error', reject);
         });
@@ -1379,13 +1375,15 @@ class MixCustom {
             return {
                 input: data.usage.prompt_tokens || 0,
                 output: data.usage.completion_tokens || 0,
-                total: data.usage.total_tokens || 0
+                total: data.usage.total_tokens || 0,
+                cached: ModelMix.extractCacheTokens(data.usage)
             };
         }
         return {
             input: 0,
             output: 0,
-            total: 0
+            total: 0,
+            cached: 0
         };
     }
@@ -1569,6 +1567,8 @@ class MixOpenAIResponses extends MixOpenAI {
         if (typeof options.n === 'number') request.n = options.n;
         if (options.logit_bias !== undefined) request.logit_bias = options.logit_bias;
         if (options.user !== undefined) request.user = options.user;
+        if (options.prompt_cache_key !== undefined) request.prompt_cache_key = options.prompt_cache_key;
+        if (options.prompt_cache_retention !== undefined) request.prompt_cache_retention = options.prompt_cache_retention;
         return request;
     }
@@ -1589,13 +1589,15 @@ class MixOpenAIResponses extends MixOpenAI {
             return {
                 input: data.usage.input_tokens || 0,
                 output: data.usage.output_tokens || 0,
-                total: data.usage.total_tokens || ((data.usage.input_tokens || 0) + (data.usage.output_tokens || 0))
+                total: data.usage.total_tokens || ((data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)),
+                cached: ModelMix.extractCacheTokens(data.usage)
             };
         }
         return {
             input: 0,
             output: 0,
-            total: 0
+            total: 0,
+            cached: 0
         };
     }
@@ -2038,13 +2040,15 @@ class MixAnthropic extends MixCustom {
             return {
                 input: data.usage.input_tokens || 0,
                 output: data.usage.output_tokens || 0,
-                total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)
+                total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0),
+                cached: ModelMix.extractCacheTokens(data.usage)
             };
         }
         return {
             input: 0,
             output: 0,
-            total: 0
+            total: 0,
+            cached: 0
         };
     }
@@ -2573,13 +2577,15 @@ class MixGoogle extends MixCustom {
             return {
                 input: data.usageMetadata.promptTokenCount || 0,
                 output: data.usageMetadata.candidatesTokenCount || 0,
-                total: data.usageMetadata.totalTokenCount || 0
+                total: data.usageMetadata.totalTokenCount || 0,
+                cached: ModelMix.extractCacheTokens(data.usageMetadata)
             };
         }
         return {
             input: 0,
             output: 0,
-            total: 0
+            total: 0,
+            cached: 0
         };
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "modelmix",
-  "version": "4.4.16",
+  "version": "4.4.20",
   "description": "🧬 Reliable interface with automatic fallback for AI LLMs.",
   "main": "index.js",
   "repository": {
@@ -50,7 +50,7 @@
     "@modelcontextprotocol/sdk": "^1.27.1",
     "axios": "^1.13.5",
     "bottleneck": "^2.19.5",
-    "file-type": "^16.5.4",
+    "file-type": "^21.3.1",
     "form-data": "^4.0.4",
     "lemonlog": "^1.2.0",
     "ws": "^8.19.0"
@@ -61,6 +61,10 @@
     "nock": "^14.0.9",
     "sinon": "^21.0.0"
   },
+  "overrides": {
+    "diff": ">=8.0.3",
+    "serialize-javascript": ">=7.0.3"
+  },
   "scripts": {
     "test": "mocha test/**/*.js --timeout 10000 --require test/setup.js",
     "test:watch": "mocha test/**/*.js --watch --timeout 10000 --require test/setup.js",
@@ -71,6 +75,7 @@
     "test:bottleneck": "mocha test/bottleneck.test.js --timeout 10000 --require test/setup.js",
     "test:live": "mocha test/live.test.js --timeout 10000 --require test/setup.js",
     "test:live.mcp": "mocha test/live.mcp.js --timeout 60000 --require test/setup.js",
-    "test:tokens": "mocha test/tokens.test.js --timeout 10000 --require test/setup.js"
+    "test:tokens": "mocha test/tokens.test.js --timeout 10000 --require test/setup.js",
+    "test:offline": "mocha test/json.test.js test/fallback.test.js test/templates.test.js test/images.test.js test/bottleneck.test.js test/tokens.test.js test/history.test.js --timeout 10000 --require test/setup.js"
   }
 }

package/skills/modelmix/SKILL.md CHANGED Viewed

@@ -96,9 +96,9 @@ If `sonnet46` fails, it automatically tries `gpt52`, then `gemini3flash`.
 `gpt52()` `gpt52chat()` `gpt51()` `gpt5()` `gpt5mini()` `gpt5nano()` `gpt45()` `gpt41()` `gpt41mini()` `gpt41nano()` `o3()` `o4mini()`
 ### Anthropic
-`opus46()` `opus45()` `opus41()` `sonnet46()` `sonnet45()` `sonnet4()` `sonnet37()` `haiku45()` `haiku35()`
+`opus46()` `opus45()` `opus41()` `sonnet46()` `sonnet45()` `sonnet4()` `haiku45()` `haiku35()`
-Thinking variants: append `think` — e.g. `opus46think()` `sonnet46think()` `sonnet45think()` `sonnet4think()` `sonnet37think()` `opus45think()` `opus41think()` `haiku45think()`
+Thinking variants: append `think` — e.g. `opus46think()` `sonnet46think()` `sonnet45think()` `sonnet4think()` `opus45think()` `opus41think()` `haiku45think()`
 ### Google
 `gemini3pro()` `gemini3flash()` `gemini25pro()` `gemini25flash()`

package/test/bottleneck.test.js CHANGED Viewed

@@ -74,20 +74,13 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             model.gpt51();
-            // Mock API responses
+            // Mock API responses (gpt51 uses /v1/responses)
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .times(3)
                 .reply(function() {
                     startTimes.push(Date.now());
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: `Response ${startTimes.length}`
-                            }
-                        }]
-                    }];
+                    return [200, testUtils.createMockResponse('openai-responses', `Response ${startTimes.length}`)];
                 });
             // Start three requests sequentially to test rate limiting
@@ -125,9 +118,9 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             model.gpt51();
-            // Mock API with delay to simulate concurrent requests
+            // Mock API with delay to simulate concurrent requests (gpt51 uses /v1/responses)
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .times(5)
                 .reply(function() {
                     concurrentCount++;
@@ -136,14 +129,7 @@ describe('Rate Limiting with Bottleneck Tests', () => {
                     return new Promise(resolve => {
                         setTimeout(() => {
                             concurrentCount--;
-                            resolve([200, {
-                                choices: [{
-                                    message: {
-                                        role: 'assistant',
-                                        content: 'Concurrent response'
-                                    }
-                                }]
-                            }]);
+                            resolve([200, testUtils.createMockResponse('openai-responses', 'Concurrent response')]);
                         }, 100);
                     });
                 });
@@ -188,18 +174,11 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             model.gpt51();
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .times(2)
                 .reply(function() {
                     requestTimes.push(Date.now());
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'OpenAI rate limited response'
-                            }
-                        }]
-                    }];
+                    return [200, testUtils.createMockResponse('openai-responses', 'OpenAI rate limited response')];
                 });
             const start = Date.now();
@@ -271,7 +250,7 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             model.gpt51();
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(429, {
                     error: {
                         message: 'Rate limit exceeded',
@@ -294,7 +273,7 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             // First request fails
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function() {
                     requestTimes.push(Date.now());
                     return [500, { error: 'Server error' }];
@@ -302,17 +281,10 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             // Second request succeeds
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function() {
                     requestTimes.push(Date.now());
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Success after error'
-                            }
-                        }]
-                    }];
+                    return [200, testUtils.createMockResponse('openai-responses', 'Success after error')];
                 });
             const start = Date.now();
@@ -352,18 +324,11 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             let requestCount = 0;
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .times(5)
                 .reply(function() {
                     requestCount++;
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: `Response ${requestCount}`
-                            }
-                        }]
-                    }];
+                    return [200, testUtils.createMockResponse('openai-responses', `Response ${requestCount}`)];
                 });
             const startTime = Date.now();
@@ -400,19 +365,13 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             const results = [];
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .times(3)
                 .reply(function(uri, body) {
-                    const content = body.messages[0].content;
-                    results.push(content);
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: `Processed: ${content}`
-                            }
-                        }]
-                    }];
+                    const lastInput = body.input[body.input.length - 1];
+                    const text = lastInput?.content?.[0]?.text ?? '';
+                    results.push(text);
+                    return [200, testUtils.createMockResponse('openai-responses', `Processed: ${text}`)];
                 });
             // Submit requests with different priorities
@@ -447,16 +406,9 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             model.gpt51();
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .times(3)
-                .reply(200, {
-                    choices: [{
-                        message: {
-                            role: 'assistant',
-                            content: 'Statistics tracking response'
-                        }
-                    }]
-                });
+                .reply(200, testUtils.createMockResponse('openai-responses', 'Statistics tracking response'));
             // Make multiple requests
             await Promise.all([
@@ -496,15 +448,8 @@ describe('Rate Limiting with Bottleneck Tests', () => {
             model.gpt51();
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
-                .reply(200, {
-                    choices: [{
-                        message: {
-                            role: 'assistant',
-                            content: 'Event handling response'
-                        }
-                    }]
-                });
+                .post('/v1/responses')
+                .reply(200, testUtils.createMockResponse('openai-responses', 'Event handling response'));
             // Make a request to trigger events
             model.addText('Event test').message();

package/test/images.test.js CHANGED Viewed

@@ -25,7 +25,8 @@ describe('Image Processing and Multimodal Support Tests', () => {
         it('should handle base64 image data correctly', async () => {
             const base64Image = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8z8BQz0AEYBxVSF+FABJADveWkH6oAAAAAElFTkSuQmCC';
-            model.gpt52()
+            // Use gpt5mini (chat/completions) - gpt52 uses Responses API which has different image format
+            model.gpt5mini()
                 .addText('What do you see in this image?')
                 .addImageFromUrl(base64Image);
@@ -62,8 +63,6 @@ describe('Image Processing and Multimodal Support Tests', () => {
             nock('https://api.anthropic.com')
                 .post('/v1/messages')
                 .reply(function (uri, body) {
-                    console.log(body.messages);
-                    // body is already parsed as JSON by nock
                     expect(body.messages).to.be.an('array');
                     // Find the message with the image
                     const userMsg = body.messages.find(m => m.role === 'user');

package/test/json.test.js CHANGED Viewed

@@ -357,23 +357,15 @@ describe('JSON Schema and Structured Output Tests', () => {
             model.gpt52().addText('List 3 countries');
-            // Mock the API response
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
-                .reply(200, {
-                    choices: [{
-                        message: {
-                            role: 'assistant',
-                            content: JSON.stringify({
-                                countries: [
-                                    { name: 'France', capital: 'Paris' },
-                                    { name: 'Germany', capital: 'Berlin' },
-                                    { name: 'Spain', capital: 'Madrid' }
-                                ]
-                            })
-                        }
-                    }]
-                });
+                .post('/v1/responses')
+                .reply(200, testUtils.createMockResponse('openai-responses', JSON.stringify({
+                    countries: [
+                        { name: 'France', capital: 'Paris' },
+                        { name: 'Germany', capital: 'Berlin' },
+                        { name: 'Spain', capital: 'Madrid' }
+                    ]
+                })));
             const result = await model.json(example);
@@ -429,17 +421,9 @@ describe('JSON Schema and Structured Output Tests', () => {
         it('should handle JSON parsing errors gracefully', async () => {
             model.gpt52().addText('Generate invalid JSON');
-            // Mock invalid JSON response
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
-                .reply(200, {
-                    choices: [{
-                        message: {
-                            role: 'assistant',
-                            content: 'This is not valid JSON'
-                        }
-                    }]
-                });
+                .post('/v1/responses')
+                .reply(200, testUtils.createMockResponse('openai-responses', 'This is not valid JSON'));
             try {
                 await model.json({ name: 'test' });
@@ -453,21 +437,14 @@ describe('JSON Schema and Structured Output Tests', () => {
             model.gpt52().addText('List 3 countries');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
-                .reply(200, {
-                    choices: [{
-                        message: {
-                            role: 'assistant',
-                            content: JSON.stringify({
-                                out: [
-                                    { name: 'France' },
-                                    { name: 'Germany' },
-                                    { name: 'Spain' }
-                                ]
-                            })
-                        }
-                    }]
-                });
+                .post('/v1/responses')
+                .reply(200, testUtils.createMockResponse('openai-responses', JSON.stringify({
+                    out: [
+                        { name: 'France' },
+                        { name: 'Germany' },
+                        { name: 'Spain' }
+                    ]
+                })));
             const result = await model.json([{ name: 'France' }]);

package/test/setup.js CHANGED Viewed

@@ -104,6 +104,18 @@ global.testUtils = {
                         }
                     }]
                 };
+            case 'openai-responses':
+                return {
+                    output: [{
+                        type: 'message',
+                        content: [{ type: 'output_text', text: content }]
+                    }],
+                    usage: {
+                        input_tokens: 10,
+                        output_tokens: 5,
+                        total_tokens: 15
+                    }
+                };
             case 'anthropic':
                 return {

package/test/templates.test.js CHANGED Viewed

@@ -36,19 +36,11 @@ describe('Template and File Operations Tests', () => {
                 .addText('Hello {{name}}, you are {{age}} years old and live in {{city}}.');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    expect(body.messages[1].content[0].text).to.equal('Hello Alice, you are 30 years old and live in New York.');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Template processed successfully'
-                            }
-                        }]
-                    }];
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    expect(userMsg.content[0].text).to.equal('Hello Alice, you are 30 years old and live in New York.');
+                    return [200, testUtils.createMockResponse('openai-responses', 'Template processed successfully')];
                 });
             const response = await model.message();
@@ -63,18 +55,11 @@ describe('Template and File Operations Tests', () => {
                 .addText('{{greeting}} {{name}}, {{action}} to our platform!');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    expect(body.messages[1].content[0].text).to.equal('Hello Bob, welcome to our platform!');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Multiple templates replaced'
-                            }
-                        }]
-                    }];
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    expect(userMsg.content[0].text).to.equal('Hello Bob, welcome to our platform!');
+                    return [200, testUtils.createMockResponse('openai-responses', 'Multiple templates replaced')];
                 });
             const response = await model.message();
@@ -92,18 +77,11 @@ describe('Template and File Operations Tests', () => {
                 .addText('User {{user_name}} with role {{user_role}} works at {{company_name}} ({{company_domain}})');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    expect(body.messages[1].content[0].text).to.equal('User Charlie with role admin works at TechCorp (techcorp.com)');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Nested templates working'
-                            }
-                        }]
-                    }];
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    expect(userMsg.content[0].text).to.equal('User Charlie with role admin works at TechCorp (techcorp.com)');
+                    return [200, testUtils.createMockResponse('openai-responses', 'Nested templates working')];
                 });
             const response = await model.message();
@@ -116,18 +94,11 @@ describe('Template and File Operations Tests', () => {
                 .addText('Hello {{name}}, your ID is {{user_id}} and status is {{status}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    expect(body.messages[1].content[0].text).to.equal('Hello David, your ID is {{user_id}} and status is {{status}}');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Partial template replacement'
-                            }
-                        }]
-                    }];
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    expect(userMsg.content[0].text).to.equal('Hello David, your ID is {{user_id}} and status is {{status}}');
+                    return [200, testUtils.createMockResponse('openai-responses', 'Partial template replacement')];
                 });
             const response = await model.message();
@@ -145,18 +116,11 @@ describe('Template and File Operations Tests', () => {
                 .addText('Empty: {{empty}}, Special: {{special}}, Number: {{number}}, Boolean: {{boolean}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    expect(body.messages[1].content[0].text).to.equal('Empty: , Special: Hello & "World" <test>, Number: 42, Boolean: true');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Special characters handled'
-                            }
-                        }]
-                    }];
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    expect(userMsg.content[0].text).to.equal('Empty: , Special: Hello & "World" <test>, Number: 42, Boolean: true');
+                    return [200, testUtils.createMockResponse('openai-responses', 'Special characters handled')];
                 });
             const response = await model.message();
@@ -189,24 +153,16 @@ describe('Template and File Operations Tests', () => {
                 .addText('Process this template: {{template}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    const content = body.messages[1].content[0].text;
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    const content = userMsg.content[0].text;
                     expect(content).to.include('Hello Eve, welcome to ModelMix!');
                     expect(content).to.include('Username: eve_user');
                     expect(content).to.include('Role: developer');
                     expect(content).to.include('Created: 2023-12-01');
                     expect(content).to.include('The AI Solutions Team');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Template file processed'
-                            }
-                        }]
-                    }];
+                    return [200, testUtils.createMockResponse('openai-responses', 'Template file processed')];
                 });
             const response = await model.message();
@@ -219,10 +175,10 @@ describe('Template and File Operations Tests', () => {
                 .addText('Process this data: {{data}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    const content = body.messages[1].content[0].text;
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    const content = userMsg.content[0].text;
                     expect(content).to.include('Alice Smith');
                     expect(content).to.include('alice@example.com');
                     expect(content).to.include('admin');
@@ -230,15 +186,7 @@ describe('Template and File Operations Tests', () => {
                     expect(content).to.include('Carol Davis');
                     expect(content).to.include('"theme": "dark"');
                     expect(content).to.include('"version": "1.0.0"');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'JSON data processed'
-                            }
-                        }]
-                    }];
+                    return [200, testUtils.createMockResponse('openai-responses', 'JSON data processed')];
                 });
             const response = await model.message();
@@ -251,19 +199,11 @@ describe('Template and File Operations Tests', () => {
                 .addText('This should contain: {{missing}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    // The template should remain unreplaced if file doesn't exist
-                    expect(body.messages[1].content[0].text).to.equal('This should contain: {{missing}}');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'File not found handled'
-                            }
-                        }]
-                    }];
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    expect(userMsg.content[0].text).to.equal('This should contain: {{missing}}');
+                    return [200, testUtils.createMockResponse('openai-responses', 'File not found handled')];
                 });
             const response = await model.message();
@@ -286,26 +226,15 @@ describe('Template and File Operations Tests', () => {
                 .addText('Template: {{template}}\n\nData: {{data}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    const content = body.messages[1].content[0].text;
-                    // Should contain processed template
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    const content = userMsg.content[0].text;
                     expect(content).to.include('Hello Frank, welcome to TestPlatform!');
                     expect(content).to.include('Username: frank_test');
-                    // Should contain JSON data
                     expect(content).to.include('Alice Smith');
                     expect(content).to.include('"theme": "dark"');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Multiple files processed'
-                            }
-                        }]
-                    }];
+                    return [200, testUtils.createMockResponse('openai-responses', 'Multiple files processed')];
                 });
             const response = await model.message();
@@ -329,21 +258,13 @@ describe('Template and File Operations Tests', () => {
                 .addText('Absolute path content: {{absolute}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    const content = body.messages[1].content[0].text;
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    const content = userMsg.content[0].text;
                     expect(content).to.include('Hello Grace, welcome to AbsolutePath!');
                     expect(content).to.include('The Absolute Corp Team');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Absolute path works'
-                            }
-                        }]
-                    }];
+                    return [200, testUtils.createMockResponse('openai-responses', 'Absolute path works')];
                 });
             const response = await model.message();
@@ -372,22 +293,14 @@ describe('Template and File Operations Tests', () => {
                 .addText('Please {{action}} the following {{target}} and generate a {{format}}:\n\n{{user_data}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    const content = body.messages[1].content[0].text;
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    const content = userMsg.content[0].text;
                     expect(content).to.include('Please analyze the following user behavior patterns and generate a detailed report:');
                     expect(content).to.include('Alice Smith');
                     expect(content).to.include('total_users');
-                    return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: 'Complex template integration successful'
-                            }
-                        }]
-                    }];
+                    return [200, testUtils.createMockResponse('openai-responses', 'Complex template integration successful')];
                 });
             const response = await model.message();
@@ -408,23 +321,22 @@ describe('Template and File Operations Tests', () => {
                 .addText('{{instruction}} from this data: {{data}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
+                .post('/v1/responses')
                 .reply(function (uri, body) {
-                    expect(body.messages[1].content[0].text).to.include('Count active users by role');
-                    expect(body.messages[1].content[0].text).to.include('Alice Smith');
+                    const userMsg = body.input.find(m => m.role === 'user');
+                    expect(userMsg.content[0].text).to.include('Count active users by role');
+                    expect(userMsg.content[0].text).to.include('Alice Smith');
                     return [200, {
-                        choices: [{
-                            message: {
-                                role: 'assistant',
-                                content: JSON.stringify({
-                                    summary: 'User analysis completed',
-                                    user_count: 3,
-                                    active_users: 2,
-                                    roles: ['admin', 'user', 'moderator']
-                                })
-                            }
-                        }]
+                        output: [{
+                            type: 'message',
+                            content: [{ type: 'output_text', text: JSON.stringify({
+                                summary: 'User analysis completed',
+                                user_count: 3,
+                                active_users: 2,
+                                roles: ['admin', 'user', 'moderator']
+                            }) }]
+                        }],
+                        usage: { input_tokens: 10, output_tokens: 5, total_tokens: 15 }
                     }];
                 });
@@ -461,15 +373,8 @@ describe('Template and File Operations Tests', () => {
                 .addText('Content: {{bad_file}}');
             nock('https://api.openai.com')
-                .post('/v1/chat/completions')
-                .reply(200, {
-                    choices: [{
-                        message: {
-                            role: 'assistant',
-                            content: 'Error handled gracefully'
-                        }
-                    }]
-                });
+                .post('/v1/responses')
+                .reply(200, testUtils.createMockResponse('openai-responses', 'Error handled gracefully'));
             const response = await model.message();
             expect(response).to.include('Error handled gracefully');

package/test/tokens.test.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { expect } from 'chai';
-import { ModelMix } from '../index.js';
+import { ModelMix, MixAnthropic, MixCustom, MixGoogle, MixOpenAIResponses } from '../index.js';
 import { createRequire } from 'module';
 const require = createRequire(import.meta.url);
@@ -18,6 +18,64 @@ describe('Token Usage Tracking', () => {
         nock.activate();
     });
+    it('should extract cached tokens from supported provider usage formats', function () {
+        const openAIChatTokens = MixCustom.extractTokens({
+            usage: {
+                prompt_tokens: 120,
+                completion_tokens: 30,
+                total_tokens: 150,
+                prompt_tokens_details: {
+                    cached_tokens: 80
+                }
+            }
+        });
+        const openAIResponsesTokens = MixOpenAIResponses.extractResponsesTokens({
+            usage: {
+                input_tokens: 90,
+                output_tokens: 20,
+                total_tokens: 110,
+                input_tokens_details: {
+                    cached_tokens: 45
+                }
+            }
+        });
+        const anthropicTokens = MixAnthropic.extractTokens({
+            usage: {
+                input_tokens: 60,
+                output_tokens: 15,
+                cache_read_input_tokens: 25
+            }
+        });
+        const googleTokens = MixGoogle.extractTokens({
+            usageMetadata: {
+                promptTokenCount: 70,
+                candidatesTokenCount: 10,
+                totalTokenCount: 80,
+                cachedContentTokenCount: 35
+            }
+        });
+        expect(openAIChatTokens.cached).to.equal(80);
+        expect(openAIResponsesTokens.cached).to.equal(45);
+        expect(anthropicTokens.cached).to.equal(25);
+        expect(googleTokens.cached).to.equal(35);
+    });
+    it('should pass OpenAI Responses prompt cache options through the request body', function () {
+        const request = MixOpenAIResponses.buildResponsesRequest({
+            model: 'gpt-5.4',
+            messages: [{
+                role: 'user',
+                content: [{ type: 'text', text: 'Explain caching briefly.' }]
+            }],
+            prompt_cache_key: 'demo-gpt54-cache',
+            prompt_cache_retention: '24h'
+        });
+        expect(request.prompt_cache_key).to.equal('demo-gpt54-cache');
+        expect(request.prompt_cache_retention).to.equal('24h');
+    });
     it('should track tokens in OpenAI response', async function () {
         this.timeout(30000);
@@ -31,10 +89,12 @@ describe('Token Usage Tracking', () => {
         expect(result.tokens).to.have.property('input');
         expect(result.tokens).to.have.property('output');
         expect(result.tokens).to.have.property('total');
+        expect(result.tokens).to.have.property('cached');
         expect(result.tokens.input).to.be.a('number');
         expect(result.tokens.output).to.be.a('number');
         expect(result.tokens.total).to.be.a('number');
+        expect(result.tokens.cached).to.be.a('number');
         expect(result.tokens.input).to.be.greaterThan(0);
         expect(result.tokens.output).to.be.greaterThan(0);
@@ -54,6 +114,7 @@ describe('Token Usage Tracking', () => {
         expect(result.tokens).to.have.property('input');
         expect(result.tokens).to.have.property('output');
         expect(result.tokens).to.have.property('total');
+        expect(result.tokens).to.have.property('cached');
         expect(result.tokens.input).to.be.greaterThan(0);
         expect(result.tokens.output).to.be.greaterThan(0);
@@ -73,6 +134,7 @@ describe('Token Usage Tracking', () => {
         expect(result.tokens).to.have.property('input');
         expect(result.tokens).to.have.property('output');
         expect(result.tokens).to.have.property('total');
+        expect(result.tokens).to.have.property('cached');
         expect(result.tokens.input).to.be.greaterThan(0);
         expect(result.tokens.output).to.be.greaterThan(0);
@@ -140,6 +202,7 @@ describe('Token Usage Tracking', () => {
             expect(result.tokens.input, `${provider.name} should have input`).to.be.a('number');
             expect(result.tokens.output, `${provider.name} should have output`).to.be.a('number');
             expect(result.tokens.total, `${provider.name} should have total`).to.be.a('number');
+            expect(result.tokens.cached, `${provider.name} should have cached`).to.be.a('number');
             // Verify values are positive
             expect(result.tokens.input, `${provider.name} input should be > 0`).to.be.greaterThan(0);