modelmix 4.4.16 → 4.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -136,9 +136,9 @@ Here's a comprehensive list of available methods:
136
136
  | Method | Provider | Model | Price (I/O) per 1 M tokens |
137
137
  | ------------------ | ---------- | ------------------------------ | -------------------------- |
138
138
  | `gpt54()` | OpenAI | gpt-5.4 | [\$2.50 / \$15.00][1] |
139
+ | `gpt53codex()` | OpenAI | gpt-5.3-codex | [\$1.25 / \$14.00][1] |
139
140
  | `gpt52()` | OpenAI | gpt-5.2 | [\$1.75 / \$14.00][1] |
140
141
  | `gpt51()` | OpenAI | gpt-5.1 | [\$1.25 / \$10.00][1] |
141
- | `gpt53codex()` | OpenAI | gpt-5.3-codex | [\$1.25 / \$14.00][1] |
142
142
  | `gpt5mini()` | OpenAI | gpt-5-mini | [\$0.25 / \$2.00][1] |
143
143
  | `gpt5nano()` | OpenAI | gpt-5-nano | [\$0.05 / \$0.40][1] |
144
144
  | `gpt41()` | OpenAI | gpt-4.1 | [\$2.00 / \$8.00][1] |
@@ -406,6 +406,7 @@ Every response from `raw()` now includes a `tokens` object with the following st
406
406
  input: 150, // Number of tokens in the prompt/input
407
407
  output: 75, // Number of tokens in the completion/output
408
408
  total: 225, // Total tokens used (input + output)
409
+ cached: 100, // Cached input tokens reported by the provider (0 when absent)
409
410
  cost: 0.0012, // Estimated cost in USD (null if model not in pricing table)
410
411
  speed: 42 // Output tokens per second (int)
411
412
  }
@@ -419,10 +420,10 @@ After calling `message()` or `json()`, use `lastRaw` to access the complete resp
419
420
  ```javascript
420
421
  const text = await model.message();
421
422
  console.log(model.lastRaw.tokens);
422
- // { input: 122, output: 86, total: 541, cost: 0.000319, speed: 38 }
423
+ // { input: 122, output: 86, total: 208, cached: 41, cost: 0.000319, speed: 38 }
423
424
  ```
424
425
 
425
- The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
426
+ The `cached` field is a single aggregated count of cached input tokens reported by the provider. The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
426
427
 
427
428
  ## 🐛 Enabling Debug Mode
428
429
 
@@ -516,7 +517,7 @@ new ModelMix(args = { options: {}, config: {} })
516
517
  - `message`: The text response from the model
517
518
  - `think`: Reasoning/thinking content (if available)
518
519
  - `toolCalls`: Array of tool calls made by the model (if any)
519
- - `tokens`: Object with `input`, `output`, `total` token counts, `cost` (USD), and `speed` (output tokens/sec)
520
+ - `tokens`: Object with `input`, `output`, `total`, and `cached` token counts, plus `cost` (USD) and `speed` (output tokens/sec)
520
521
  - `response`: The raw API response
521
522
  - `stream(callback)`: Sends the message and streams the response, invoking the callback with each streamed part.
522
523
  - `json(schemaExample, descriptions = {}, options = {})`: Forces the model to return a response in a specific JSON format.
package/demo/cache.js ADDED
@@ -0,0 +1,52 @@
1
+ import { ModelMix } from '../index.js';
2
+ try { process.loadEnvFile(); } catch {}
3
+
4
+ console.log("\n" + '--------| gpt54() prompt cache |--------');
5
+
6
+ // Keep the reusable prefix first and only vary the question at the end.
7
+ const sharedPrefix = [
8
+ "You are a concise science tutor.",
9
+ "The repeated block below is intentionally long so OpenAI can reuse cached prompt tokens on the second request.",
10
+ Array.from({ length: 80 }, (_, index) =>
11
+ `Reference ${String(index + 1).padStart(3, '0')}: Quantum systems are described with probabilities, measurements collapse possibilities into outcomes, and explanations must stay concrete, brief, and easy to understand.`
12
+ ).join("\n")
13
+ ].join("\n\n");
14
+
15
+ const buildPrompt = (question) => `${sharedPrefix}\n\nQuestion: ${question}`;
16
+
17
+ const createModel = () => ModelMix.new({
18
+ config: {
19
+ debug: 3,
20
+ }
21
+ }).gpt54({
22
+ options: {
23
+ reasoning_effort: "none",
24
+ verbosity: "low",
25
+ prompt_cache_key: "demo-gpt54-prompt-cache",
26
+ prompt_cache_retention: "24h"
27
+ }
28
+ });
29
+
30
+ const runRequest = async (label, question) => {
31
+ const model = createModel();
32
+ model.addText(buildPrompt(question));
33
+
34
+ const result = await model.raw();
35
+
36
+ console.log(`\n${label}`);
37
+ console.log("message:", result.message);
38
+ console.log("tokens:", result.tokens);
39
+
40
+ return result;
41
+ };
42
+
43
+ await runRequest(
44
+ "Request 1 (warms the cache)",
45
+ "Explain quantum entanglement in simple Spanish in 3 short bullet points."
46
+ );
47
+
48
+ await runRequest(
49
+ "Request 2 (reuses the cached prefix)",
50
+ "Now explain quantum entanglement in simple Spanish with a different analogy and 3 short bullet points."
51
+ );
52
+
package/index.js CHANGED
@@ -181,6 +181,15 @@ class ModelMix {
181
181
  return (tokens.input * inputPerMillion / 1_000_000) + (tokens.output * outputPerMillion / 1_000_000);
182
182
  }
183
183
 
184
+ static extractCacheTokens(usage = {}) {
185
+ return usage.input_tokens_details?.cached_tokens
186
+ || usage.prompt_tokens_details?.cached_tokens
187
+ || usage.cache_read_input_tokens
188
+ || usage.cachedContentTokenCount
189
+ || usage.cached_content_token_count
190
+ || 0;
191
+ }
192
+
184
193
  static formatInputSummary(messages, system, debug = 2) {
185
194
  const lastMessage = messages[messages.length - 1];
186
195
  let inputText = '';
@@ -250,15 +259,6 @@ class ModelMix {
250
259
  gpt41nano({ options = {}, config = {} } = {}) {
251
260
  return this.attach('gpt-4.1-nano', new MixOpenAI({ options, config }));
252
261
  }
253
- o4mini({ options = {}, config = {} } = {}) {
254
- return this.attach('o4-mini', new MixOpenAI({ options, config }));
255
- }
256
- o3({ options = {}, config = {} } = {}) {
257
- return this.attach('o3', new MixOpenAI({ options, config }));
258
- }
259
- gpt45({ options = {}, config = {} } = {}) {
260
- return this.attach('gpt-4.5-preview', new MixOpenAI({ options, config }));
261
- }
262
262
  gpt5({ options = {}, config = {} } = {}) {
263
263
  return this.attach('gpt-5', new MixOpenAI({ options, config }));
264
264
  }
@@ -269,10 +269,10 @@ class ModelMix {
269
269
  return this.attach('gpt-5-nano', new MixOpenAI({ options, config }));
270
270
  }
271
271
  gpt51({ options = {}, config = {} } = {}) {
272
- return this.attach('gpt-5.1', new MixOpenAI({ options, config }));
272
+ return this.attach('gpt-5.1', new MixOpenAIResponses({ options, config }));
273
273
  }
274
274
  gpt52({ options = {}, config = {} } = {}) {
275
- return this.attach('gpt-5.2', new MixOpenAI({ options, config }));
275
+ return this.attach('gpt-5.2', new MixOpenAIResponses({ options, config }));
276
276
  }
277
277
  gpt54({ options = {}, config = {} } = {}) {
278
278
  return this.attach('gpt-5.4', new MixOpenAIResponses({ options, config }));
@@ -289,8 +289,8 @@ class ModelMix {
289
289
  gpt53codex({ options = {}, config = {} } = {}) {
290
290
  return this.attach('gpt-5.3-codex', new MixOpenAIResponses({ options, config }));
291
291
  }
292
- gpt52chat({ options = {}, config = {} } = {}) {
293
- return this.attach('gpt-5.2-chat-latest', new MixOpenAI({ options, config }));
292
+ gpt53chat({ options = {}, config = {} } = {}) {
293
+ return this.attach('gpt-5.3-chat-latest', new MixOpenAIResponses({ options, config }));
294
294
  }
295
295
  gptOss({ options = {}, config = {}, mix = {} } = {}) {
296
296
  mix = { ...this.mix, ...mix };
@@ -963,7 +963,10 @@ class ModelMix {
963
963
  // debug level 2: Readable summary of output
964
964
  if (currentConfig.debug >= 2) {
965
965
  const tokenInfo = result.tokens
966
- ? ` ${result.tokens.input} → ${result.tokens.output} tok` + (result.tokens.speed ? ` ${result.tokens.speed} t/s` : '') + (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
966
+ ? ` ${result.tokens.input} → ${result.tokens.output} tok`
967
+ + (result.tokens.cached ? ` (cached:${result.tokens.cached})` : '')
968
+ + (result.tokens.speed ? `| ${result.tokens.speed} t/s` : '')
969
+ + (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
967
970
  : '';
968
971
  console.log(`✓${tokenInfo}\n${ModelMix.formatOutputSummary(result, currentConfig.debug).trim()}`);
969
972
  }
@@ -1327,7 +1330,7 @@ class MixCustom {
1327
1330
  message: message.trim(),
1328
1331
  toolCalls: [],
1329
1332
  think: null,
1330
- tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0 }
1333
+ tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0, cached: 0 }
1331
1334
  }));
1332
1335
  response.data.on('error', reject);
1333
1336
  });
@@ -1379,13 +1382,15 @@ class MixCustom {
1379
1382
  return {
1380
1383
  input: data.usage.prompt_tokens || 0,
1381
1384
  output: data.usage.completion_tokens || 0,
1382
- total: data.usage.total_tokens || 0
1385
+ total: data.usage.total_tokens || 0,
1386
+ cached: ModelMix.extractCacheTokens(data.usage)
1383
1387
  };
1384
1388
  }
1385
1389
  return {
1386
1390
  input: 0,
1387
1391
  output: 0,
1388
- total: 0
1392
+ total: 0,
1393
+ cached: 0
1389
1394
  };
1390
1395
  }
1391
1396
 
@@ -1569,6 +1574,8 @@ class MixOpenAIResponses extends MixOpenAI {
1569
1574
  if (typeof options.n === 'number') request.n = options.n;
1570
1575
  if (options.logit_bias !== undefined) request.logit_bias = options.logit_bias;
1571
1576
  if (options.user !== undefined) request.user = options.user;
1577
+ if (options.prompt_cache_key !== undefined) request.prompt_cache_key = options.prompt_cache_key;
1578
+ if (options.prompt_cache_retention !== undefined) request.prompt_cache_retention = options.prompt_cache_retention;
1572
1579
 
1573
1580
  return request;
1574
1581
  }
@@ -1589,13 +1596,15 @@ class MixOpenAIResponses extends MixOpenAI {
1589
1596
  return {
1590
1597
  input: data.usage.input_tokens || 0,
1591
1598
  output: data.usage.output_tokens || 0,
1592
- total: data.usage.total_tokens || ((data.usage.input_tokens || 0) + (data.usage.output_tokens || 0))
1599
+ total: data.usage.total_tokens || ((data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)),
1600
+ cached: ModelMix.extractCacheTokens(data.usage)
1593
1601
  };
1594
1602
  }
1595
1603
  return {
1596
1604
  input: 0,
1597
1605
  output: 0,
1598
- total: 0
1606
+ total: 0,
1607
+ cached: 0
1599
1608
  };
1600
1609
  }
1601
1610
 
@@ -2038,13 +2047,15 @@ class MixAnthropic extends MixCustom {
2038
2047
  return {
2039
2048
  input: data.usage.input_tokens || 0,
2040
2049
  output: data.usage.output_tokens || 0,
2041
- total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)
2050
+ total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0),
2051
+ cached: ModelMix.extractCacheTokens(data.usage)
2042
2052
  };
2043
2053
  }
2044
2054
  return {
2045
2055
  input: 0,
2046
2056
  output: 0,
2047
- total: 0
2057
+ total: 0,
2058
+ cached: 0
2048
2059
  };
2049
2060
  }
2050
2061
 
@@ -2573,13 +2584,15 @@ class MixGoogle extends MixCustom {
2573
2584
  return {
2574
2585
  input: data.usageMetadata.promptTokenCount || 0,
2575
2586
  output: data.usageMetadata.candidatesTokenCount || 0,
2576
- total: data.usageMetadata.totalTokenCount || 0
2587
+ total: data.usageMetadata.totalTokenCount || 0,
2588
+ cached: ModelMix.extractCacheTokens(data.usageMetadata)
2577
2589
  };
2578
2590
  }
2579
2591
  return {
2580
2592
  input: 0,
2581
2593
  output: 0,
2582
- total: 0
2594
+ total: 0,
2595
+ cached: 0
2583
2596
  };
2584
2597
  }
2585
2598
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "modelmix",
3
- "version": "4.4.16",
3
+ "version": "4.4.18",
4
4
  "description": "🧬 Reliable interface with automatic fallback for AI LLMs.",
5
5
  "main": "index.js",
6
6
  "repository": {
@@ -50,7 +50,7 @@
50
50
  "@modelcontextprotocol/sdk": "^1.27.1",
51
51
  "axios": "^1.13.5",
52
52
  "bottleneck": "^2.19.5",
53
- "file-type": "^16.5.4",
53
+ "file-type": "^21.3.1",
54
54
  "form-data": "^4.0.4",
55
55
  "lemonlog": "^1.2.0",
56
56
  "ws": "^8.19.0"
@@ -61,6 +61,10 @@
61
61
  "nock": "^14.0.9",
62
62
  "sinon": "^21.0.0"
63
63
  },
64
+ "overrides": {
65
+ "diff": ">=8.0.3",
66
+ "serialize-javascript": ">=7.0.3"
67
+ },
64
68
  "scripts": {
65
69
  "test": "mocha test/**/*.js --timeout 10000 --require test/setup.js",
66
70
  "test:watch": "mocha test/**/*.js --watch --timeout 10000 --require test/setup.js",
@@ -71,6 +75,7 @@
71
75
  "test:bottleneck": "mocha test/bottleneck.test.js --timeout 10000 --require test/setup.js",
72
76
  "test:live": "mocha test/live.test.js --timeout 10000 --require test/setup.js",
73
77
  "test:live.mcp": "mocha test/live.mcp.js --timeout 60000 --require test/setup.js",
74
- "test:tokens": "mocha test/tokens.test.js --timeout 10000 --require test/setup.js"
78
+ "test:tokens": "mocha test/tokens.test.js --timeout 10000 --require test/setup.js",
79
+ "test:offline": "mocha test/json.test.js test/fallback.test.js test/templates.test.js test/images.test.js test/bottleneck.test.js test/tokens.test.js test/history.test.js --timeout 10000 --require test/setup.js"
75
80
  }
76
81
  }
@@ -74,20 +74,13 @@ describe('Rate Limiting with Bottleneck Tests', () => {
74
74
 
75
75
  model.gpt51();
76
76
 
77
- // Mock API responses
77
+ // Mock API responses (gpt51 uses /v1/responses)
78
78
  nock('https://api.openai.com')
79
- .post('/v1/chat/completions')
79
+ .post('/v1/responses')
80
80
  .times(3)
81
81
  .reply(function() {
82
82
  startTimes.push(Date.now());
83
- return [200, {
84
- choices: [{
85
- message: {
86
- role: 'assistant',
87
- content: `Response ${startTimes.length}`
88
- }
89
- }]
90
- }];
83
+ return [200, testUtils.createMockResponse('openai-responses', `Response ${startTimes.length}`)];
91
84
  });
92
85
 
93
86
  // Start three requests sequentially to test rate limiting
@@ -125,9 +118,9 @@ describe('Rate Limiting with Bottleneck Tests', () => {
125
118
 
126
119
  model.gpt51();
127
120
 
128
- // Mock API with delay to simulate concurrent requests
121
+ // Mock API with delay to simulate concurrent requests (gpt51 uses /v1/responses)
129
122
  nock('https://api.openai.com')
130
- .post('/v1/chat/completions')
123
+ .post('/v1/responses')
131
124
  .times(5)
132
125
  .reply(function() {
133
126
  concurrentCount++;
@@ -136,14 +129,7 @@ describe('Rate Limiting with Bottleneck Tests', () => {
136
129
  return new Promise(resolve => {
137
130
  setTimeout(() => {
138
131
  concurrentCount--;
139
- resolve([200, {
140
- choices: [{
141
- message: {
142
- role: 'assistant',
143
- content: 'Concurrent response'
144
- }
145
- }]
146
- }]);
132
+ resolve([200, testUtils.createMockResponse('openai-responses', 'Concurrent response')]);
147
133
  }, 100);
148
134
  });
149
135
  });
@@ -188,18 +174,11 @@ describe('Rate Limiting with Bottleneck Tests', () => {
188
174
  model.gpt51();
189
175
 
190
176
  nock('https://api.openai.com')
191
- .post('/v1/chat/completions')
177
+ .post('/v1/responses')
192
178
  .times(2)
193
179
  .reply(function() {
194
180
  requestTimes.push(Date.now());
195
- return [200, {
196
- choices: [{
197
- message: {
198
- role: 'assistant',
199
- content: 'OpenAI rate limited response'
200
- }
201
- }]
202
- }];
181
+ return [200, testUtils.createMockResponse('openai-responses', 'OpenAI rate limited response')];
203
182
  });
204
183
 
205
184
  const start = Date.now();
@@ -271,7 +250,7 @@ describe('Rate Limiting with Bottleneck Tests', () => {
271
250
  model.gpt51();
272
251
 
273
252
  nock('https://api.openai.com')
274
- .post('/v1/chat/completions')
253
+ .post('/v1/responses')
275
254
  .reply(429, {
276
255
  error: {
277
256
  message: 'Rate limit exceeded',
@@ -294,7 +273,7 @@ describe('Rate Limiting with Bottleneck Tests', () => {
294
273
 
295
274
  // First request fails
296
275
  nock('https://api.openai.com')
297
- .post('/v1/chat/completions')
276
+ .post('/v1/responses')
298
277
  .reply(function() {
299
278
  requestTimes.push(Date.now());
300
279
  return [500, { error: 'Server error' }];
@@ -302,17 +281,10 @@ describe('Rate Limiting with Bottleneck Tests', () => {
302
281
 
303
282
  // Second request succeeds
304
283
  nock('https://api.openai.com')
305
- .post('/v1/chat/completions')
284
+ .post('/v1/responses')
306
285
  .reply(function() {
307
286
  requestTimes.push(Date.now());
308
- return [200, {
309
- choices: [{
310
- message: {
311
- role: 'assistant',
312
- content: 'Success after error'
313
- }
314
- }]
315
- }];
287
+ return [200, testUtils.createMockResponse('openai-responses', 'Success after error')];
316
288
  });
317
289
 
318
290
  const start = Date.now();
@@ -352,18 +324,11 @@ describe('Rate Limiting with Bottleneck Tests', () => {
352
324
  let requestCount = 0;
353
325
 
354
326
  nock('https://api.openai.com')
355
- .post('/v1/chat/completions')
327
+ .post('/v1/responses')
356
328
  .times(5)
357
329
  .reply(function() {
358
330
  requestCount++;
359
- return [200, {
360
- choices: [{
361
- message: {
362
- role: 'assistant',
363
- content: `Response ${requestCount}`
364
- }
365
- }]
366
- }];
331
+ return [200, testUtils.createMockResponse('openai-responses', `Response ${requestCount}`)];
367
332
  });
368
333
 
369
334
  const startTime = Date.now();
@@ -400,19 +365,13 @@ describe('Rate Limiting with Bottleneck Tests', () => {
400
365
  const results = [];
401
366
 
402
367
  nock('https://api.openai.com')
403
- .post('/v1/chat/completions')
368
+ .post('/v1/responses')
404
369
  .times(3)
405
370
  .reply(function(uri, body) {
406
- const content = body.messages[0].content;
407
- results.push(content);
408
- return [200, {
409
- choices: [{
410
- message: {
411
- role: 'assistant',
412
- content: `Processed: ${content}`
413
- }
414
- }]
415
- }];
371
+ const lastInput = body.input[body.input.length - 1];
372
+ const text = lastInput?.content?.[0]?.text ?? '';
373
+ results.push(text);
374
+ return [200, testUtils.createMockResponse('openai-responses', `Processed: ${text}`)];
416
375
  });
417
376
 
418
377
  // Submit requests with different priorities
@@ -447,16 +406,9 @@ describe('Rate Limiting with Bottleneck Tests', () => {
447
406
  model.gpt51();
448
407
 
449
408
  nock('https://api.openai.com')
450
- .post('/v1/chat/completions')
409
+ .post('/v1/responses')
451
410
  .times(3)
452
- .reply(200, {
453
- choices: [{
454
- message: {
455
- role: 'assistant',
456
- content: 'Statistics tracking response'
457
- }
458
- }]
459
- });
411
+ .reply(200, testUtils.createMockResponse('openai-responses', 'Statistics tracking response'));
460
412
 
461
413
  // Make multiple requests
462
414
  await Promise.all([
@@ -496,15 +448,8 @@ describe('Rate Limiting with Bottleneck Tests', () => {
496
448
  model.gpt51();
497
449
 
498
450
  nock('https://api.openai.com')
499
- .post('/v1/chat/completions')
500
- .reply(200, {
501
- choices: [{
502
- message: {
503
- role: 'assistant',
504
- content: 'Event handling response'
505
- }
506
- }]
507
- });
451
+ .post('/v1/responses')
452
+ .reply(200, testUtils.createMockResponse('openai-responses', 'Event handling response'));
508
453
 
509
454
  // Make a request to trigger events
510
455
  model.addText('Event test').message();
@@ -25,7 +25,8 @@ describe('Image Processing and Multimodal Support Tests', () => {
25
25
  it('should handle base64 image data correctly', async () => {
26
26
  const base64Image = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8z8BQz0AEYBxVSF+FABJADveWkH6oAAAAAElFTkSuQmCC';
27
27
 
28
- model.gpt52()
28
+ // Use gpt5mini (chat/completions) - gpt52 uses Responses API which has different image format
29
+ model.gpt5mini()
29
30
  .addText('What do you see in this image?')
30
31
  .addImageFromUrl(base64Image);
31
32
 
@@ -62,8 +63,6 @@ describe('Image Processing and Multimodal Support Tests', () => {
62
63
  nock('https://api.anthropic.com')
63
64
  .post('/v1/messages')
64
65
  .reply(function (uri, body) {
65
- console.log(body.messages);
66
- // body is already parsed as JSON by nock
67
66
  expect(body.messages).to.be.an('array');
68
67
  // Find the message with the image
69
68
  const userMsg = body.messages.find(m => m.role === 'user');
package/test/json.test.js CHANGED
@@ -357,23 +357,15 @@ describe('JSON Schema and Structured Output Tests', () => {
357
357
 
358
358
  model.gpt52().addText('List 3 countries');
359
359
 
360
- // Mock the API response
361
360
  nock('https://api.openai.com')
362
- .post('/v1/chat/completions')
363
- .reply(200, {
364
- choices: [{
365
- message: {
366
- role: 'assistant',
367
- content: JSON.stringify({
368
- countries: [
369
- { name: 'France', capital: 'Paris' },
370
- { name: 'Germany', capital: 'Berlin' },
371
- { name: 'Spain', capital: 'Madrid' }
372
- ]
373
- })
374
- }
375
- }]
376
- });
361
+ .post('/v1/responses')
362
+ .reply(200, testUtils.createMockResponse('openai-responses', JSON.stringify({
363
+ countries: [
364
+ { name: 'France', capital: 'Paris' },
365
+ { name: 'Germany', capital: 'Berlin' },
366
+ { name: 'Spain', capital: 'Madrid' }
367
+ ]
368
+ })));
377
369
 
378
370
  const result = await model.json(example);
379
371
 
@@ -429,17 +421,9 @@ describe('JSON Schema and Structured Output Tests', () => {
429
421
  it('should handle JSON parsing errors gracefully', async () => {
430
422
  model.gpt52().addText('Generate invalid JSON');
431
423
 
432
- // Mock invalid JSON response
433
424
  nock('https://api.openai.com')
434
- .post('/v1/chat/completions')
435
- .reply(200, {
436
- choices: [{
437
- message: {
438
- role: 'assistant',
439
- content: 'This is not valid JSON'
440
- }
441
- }]
442
- });
425
+ .post('/v1/responses')
426
+ .reply(200, testUtils.createMockResponse('openai-responses', 'This is not valid JSON'));
443
427
 
444
428
  try {
445
429
  await model.json({ name: 'test' });
@@ -453,21 +437,14 @@ describe('JSON Schema and Structured Output Tests', () => {
453
437
  model.gpt52().addText('List 3 countries');
454
438
 
455
439
  nock('https://api.openai.com')
456
- .post('/v1/chat/completions')
457
- .reply(200, {
458
- choices: [{
459
- message: {
460
- role: 'assistant',
461
- content: JSON.stringify({
462
- out: [
463
- { name: 'France' },
464
- { name: 'Germany' },
465
- { name: 'Spain' }
466
- ]
467
- })
468
- }
469
- }]
470
- });
440
+ .post('/v1/responses')
441
+ .reply(200, testUtils.createMockResponse('openai-responses', JSON.stringify({
442
+ out: [
443
+ { name: 'France' },
444
+ { name: 'Germany' },
445
+ { name: 'Spain' }
446
+ ]
447
+ })));
471
448
 
472
449
  const result = await model.json([{ name: 'France' }]);
473
450
 
package/test/setup.js CHANGED
@@ -104,6 +104,18 @@ global.testUtils = {
104
104
  }
105
105
  }]
106
106
  };
107
+ case 'openai-responses':
108
+ return {
109
+ output: [{
110
+ type: 'message',
111
+ content: [{ type: 'output_text', text: content }]
112
+ }],
113
+ usage: {
114
+ input_tokens: 10,
115
+ output_tokens: 5,
116
+ total_tokens: 15
117
+ }
118
+ };
107
119
 
108
120
  case 'anthropic':
109
121
  return {
@@ -36,19 +36,11 @@ describe('Template and File Operations Tests', () => {
36
36
  .addText('Hello {{name}}, you are {{age}} years old and live in {{city}}.');
37
37
 
38
38
  nock('https://api.openai.com')
39
- .post('/v1/chat/completions')
39
+ .post('/v1/responses')
40
40
  .reply(function (uri, body) {
41
-
42
- expect(body.messages[1].content[0].text).to.equal('Hello Alice, you are 30 years old and live in New York.');
43
-
44
- return [200, {
45
- choices: [{
46
- message: {
47
- role: 'assistant',
48
- content: 'Template processed successfully'
49
- }
50
- }]
51
- }];
41
+ const userMsg = body.input.find(m => m.role === 'user');
42
+ expect(userMsg.content[0].text).to.equal('Hello Alice, you are 30 years old and live in New York.');
43
+ return [200, testUtils.createMockResponse('openai-responses', 'Template processed successfully')];
52
44
  });
53
45
 
54
46
  const response = await model.message();
@@ -63,18 +55,11 @@ describe('Template and File Operations Tests', () => {
63
55
  .addText('{{greeting}} {{name}}, {{action}} to our platform!');
64
56
 
65
57
  nock('https://api.openai.com')
66
- .post('/v1/chat/completions')
58
+ .post('/v1/responses')
67
59
  .reply(function (uri, body) {
68
- expect(body.messages[1].content[0].text).to.equal('Hello Bob, welcome to our platform!');
69
-
70
- return [200, {
71
- choices: [{
72
- message: {
73
- role: 'assistant',
74
- content: 'Multiple templates replaced'
75
- }
76
- }]
77
- }];
60
+ const userMsg = body.input.find(m => m.role === 'user');
61
+ expect(userMsg.content[0].text).to.equal('Hello Bob, welcome to our platform!');
62
+ return [200, testUtils.createMockResponse('openai-responses', 'Multiple templates replaced')];
78
63
  });
79
64
 
80
65
  const response = await model.message();
@@ -92,18 +77,11 @@ describe('Template and File Operations Tests', () => {
92
77
  .addText('User {{user_name}} with role {{user_role}} works at {{company_name}} ({{company_domain}})');
93
78
 
94
79
  nock('https://api.openai.com')
95
- .post('/v1/chat/completions')
80
+ .post('/v1/responses')
96
81
  .reply(function (uri, body) {
97
- expect(body.messages[1].content[0].text).to.equal('User Charlie with role admin works at TechCorp (techcorp.com)');
98
-
99
- return [200, {
100
- choices: [{
101
- message: {
102
- role: 'assistant',
103
- content: 'Nested templates working'
104
- }
105
- }]
106
- }];
82
+ const userMsg = body.input.find(m => m.role === 'user');
83
+ expect(userMsg.content[0].text).to.equal('User Charlie with role admin works at TechCorp (techcorp.com)');
84
+ return [200, testUtils.createMockResponse('openai-responses', 'Nested templates working')];
107
85
  });
108
86
 
109
87
  const response = await model.message();
@@ -116,18 +94,11 @@ describe('Template and File Operations Tests', () => {
116
94
  .addText('Hello {{name}}, your ID is {{user_id}} and status is {{status}}');
117
95
 
118
96
  nock('https://api.openai.com')
119
- .post('/v1/chat/completions')
97
+ .post('/v1/responses')
120
98
  .reply(function (uri, body) {
121
- expect(body.messages[1].content[0].text).to.equal('Hello David, your ID is {{user_id}} and status is {{status}}');
122
-
123
- return [200, {
124
- choices: [{
125
- message: {
126
- role: 'assistant',
127
- content: 'Partial template replacement'
128
- }
129
- }]
130
- }];
99
+ const userMsg = body.input.find(m => m.role === 'user');
100
+ expect(userMsg.content[0].text).to.equal('Hello David, your ID is {{user_id}} and status is {{status}}');
101
+ return [200, testUtils.createMockResponse('openai-responses', 'Partial template replacement')];
131
102
  });
132
103
 
133
104
  const response = await model.message();
@@ -145,18 +116,11 @@ describe('Template and File Operations Tests', () => {
145
116
  .addText('Empty: {{empty}}, Special: {{special}}, Number: {{number}}, Boolean: {{boolean}}');
146
117
 
147
118
  nock('https://api.openai.com')
148
- .post('/v1/chat/completions')
119
+ .post('/v1/responses')
149
120
  .reply(function (uri, body) {
150
- expect(body.messages[1].content[0].text).to.equal('Empty: , Special: Hello & "World" <test>, Number: 42, Boolean: true');
151
-
152
- return [200, {
153
- choices: [{
154
- message: {
155
- role: 'assistant',
156
- content: 'Special characters handled'
157
- }
158
- }]
159
- }];
121
+ const userMsg = body.input.find(m => m.role === 'user');
122
+ expect(userMsg.content[0].text).to.equal('Empty: , Special: Hello & "World" <test>, Number: 42, Boolean: true');
123
+ return [200, testUtils.createMockResponse('openai-responses', 'Special characters handled')];
160
124
  });
161
125
 
162
126
  const response = await model.message();
@@ -189,24 +153,16 @@ describe('Template and File Operations Tests', () => {
189
153
  .addText('Process this template: {{template}}');
190
154
 
191
155
  nock('https://api.openai.com')
192
- .post('/v1/chat/completions')
156
+ .post('/v1/responses')
193
157
  .reply(function (uri, body) {
194
- const content = body.messages[1].content[0].text;
195
-
158
+ const userMsg = body.input.find(m => m.role === 'user');
159
+ const content = userMsg.content[0].text;
196
160
  expect(content).to.include('Hello Eve, welcome to ModelMix!');
197
161
  expect(content).to.include('Username: eve_user');
198
162
  expect(content).to.include('Role: developer');
199
163
  expect(content).to.include('Created: 2023-12-01');
200
164
  expect(content).to.include('The AI Solutions Team');
201
-
202
- return [200, {
203
- choices: [{
204
- message: {
205
- role: 'assistant',
206
- content: 'Template file processed'
207
- }
208
- }]
209
- }];
165
+ return [200, testUtils.createMockResponse('openai-responses', 'Template file processed')];
210
166
  });
211
167
 
212
168
  const response = await model.message();
@@ -219,10 +175,10 @@ describe('Template and File Operations Tests', () => {
219
175
  .addText('Process this data: {{data}}');
220
176
 
221
177
  nock('https://api.openai.com')
222
- .post('/v1/chat/completions')
178
+ .post('/v1/responses')
223
179
  .reply(function (uri, body) {
224
- const content = body.messages[1].content[0].text;
225
-
180
+ const userMsg = body.input.find(m => m.role === 'user');
181
+ const content = userMsg.content[0].text;
226
182
  expect(content).to.include('Alice Smith');
227
183
  expect(content).to.include('alice@example.com');
228
184
  expect(content).to.include('admin');
@@ -230,15 +186,7 @@ describe('Template and File Operations Tests', () => {
230
186
  expect(content).to.include('Carol Davis');
231
187
  expect(content).to.include('"theme": "dark"');
232
188
  expect(content).to.include('"version": "1.0.0"');
233
-
234
- return [200, {
235
- choices: [{
236
- message: {
237
- role: 'assistant',
238
- content: 'JSON data processed'
239
- }
240
- }]
241
- }];
189
+ return [200, testUtils.createMockResponse('openai-responses', 'JSON data processed')];
242
190
  });
243
191
 
244
192
  const response = await model.message();
@@ -251,19 +199,11 @@ describe('Template and File Operations Tests', () => {
251
199
  .addText('This should contain: {{missing}}');
252
200
 
253
201
  nock('https://api.openai.com')
254
- .post('/v1/chat/completions')
202
+ .post('/v1/responses')
255
203
  .reply(function (uri, body) {
256
- // The template should remain unreplaced if file doesn't exist
257
- expect(body.messages[1].content[0].text).to.equal('This should contain: {{missing}}');
258
-
259
- return [200, {
260
- choices: [{
261
- message: {
262
- role: 'assistant',
263
- content: 'File not found handled'
264
- }
265
- }]
266
- }];
204
+ const userMsg = body.input.find(m => m.role === 'user');
205
+ expect(userMsg.content[0].text).to.equal('This should contain: {{missing}}');
206
+ return [200, testUtils.createMockResponse('openai-responses', 'File not found handled')];
267
207
  });
268
208
 
269
209
  const response = await model.message();
@@ -286,26 +226,15 @@ describe('Template and File Operations Tests', () => {
286
226
  .addText('Template: {{template}}\n\nData: {{data}}');
287
227
 
288
228
  nock('https://api.openai.com')
289
- .post('/v1/chat/completions')
229
+ .post('/v1/responses')
290
230
  .reply(function (uri, body) {
291
- const content = body.messages[1].content[0].text;
292
-
293
- // Should contain processed template
231
+ const userMsg = body.input.find(m => m.role === 'user');
232
+ const content = userMsg.content[0].text;
294
233
  expect(content).to.include('Hello Frank, welcome to TestPlatform!');
295
234
  expect(content).to.include('Username: frank_test');
296
-
297
- // Should contain JSON data
298
235
  expect(content).to.include('Alice Smith');
299
236
  expect(content).to.include('"theme": "dark"');
300
-
301
- return [200, {
302
- choices: [{
303
- message: {
304
- role: 'assistant',
305
- content: 'Multiple files processed'
306
- }
307
- }]
308
- }];
237
+ return [200, testUtils.createMockResponse('openai-responses', 'Multiple files processed')];
309
238
  });
310
239
 
311
240
  const response = await model.message();
@@ -329,21 +258,13 @@ describe('Template and File Operations Tests', () => {
329
258
  .addText('Absolute path content: {{absolute}}');
330
259
 
331
260
  nock('https://api.openai.com')
332
- .post('/v1/chat/completions')
261
+ .post('/v1/responses')
333
262
  .reply(function (uri, body) {
334
- const content = body.messages[1].content[0].text;
335
-
263
+ const userMsg = body.input.find(m => m.role === 'user');
264
+ const content = userMsg.content[0].text;
336
265
  expect(content).to.include('Hello Grace, welcome to AbsolutePath!');
337
266
  expect(content).to.include('The Absolute Corp Team');
338
-
339
- return [200, {
340
- choices: [{
341
- message: {
342
- role: 'assistant',
343
- content: 'Absolute path works'
344
- }
345
- }]
346
- }];
267
+ return [200, testUtils.createMockResponse('openai-responses', 'Absolute path works')];
347
268
  });
348
269
 
349
270
  const response = await model.message();
@@ -372,22 +293,14 @@ describe('Template and File Operations Tests', () => {
372
293
  .addText('Please {{action}} the following {{target}} and generate a {{format}}:\n\n{{user_data}}');
373
294
 
374
295
  nock('https://api.openai.com')
375
- .post('/v1/chat/completions')
296
+ .post('/v1/responses')
376
297
  .reply(function (uri, body) {
377
- const content = body.messages[1].content[0].text;
378
-
298
+ const userMsg = body.input.find(m => m.role === 'user');
299
+ const content = userMsg.content[0].text;
379
300
  expect(content).to.include('Please analyze the following user behavior patterns and generate a detailed report:');
380
301
  expect(content).to.include('Alice Smith');
381
302
  expect(content).to.include('total_users');
382
-
383
- return [200, {
384
- choices: [{
385
- message: {
386
- role: 'assistant',
387
- content: 'Complex template integration successful'
388
- }
389
- }]
390
- }];
303
+ return [200, testUtils.createMockResponse('openai-responses', 'Complex template integration successful')];
391
304
  });
392
305
 
393
306
  const response = await model.message();
@@ -408,23 +321,22 @@ describe('Template and File Operations Tests', () => {
408
321
  .addText('{{instruction}} from this data: {{data}}');
409
322
 
410
323
  nock('https://api.openai.com')
411
- .post('/v1/chat/completions')
324
+ .post('/v1/responses')
412
325
  .reply(function (uri, body) {
413
- expect(body.messages[1].content[0].text).to.include('Count active users by role');
414
- expect(body.messages[1].content[0].text).to.include('Alice Smith');
415
-
326
+ const userMsg = body.input.find(m => m.role === 'user');
327
+ expect(userMsg.content[0].text).to.include('Count active users by role');
328
+ expect(userMsg.content[0].text).to.include('Alice Smith');
416
329
  return [200, {
417
- choices: [{
418
- message: {
419
- role: 'assistant',
420
- content: JSON.stringify({
421
- summary: 'User analysis completed',
422
- user_count: 3,
423
- active_users: 2,
424
- roles: ['admin', 'user', 'moderator']
425
- })
426
- }
427
- }]
330
+ output: [{
331
+ type: 'message',
332
+ content: [{ type: 'output_text', text: JSON.stringify({
333
+ summary: 'User analysis completed',
334
+ user_count: 3,
335
+ active_users: 2,
336
+ roles: ['admin', 'user', 'moderator']
337
+ }) }]
338
+ }],
339
+ usage: { input_tokens: 10, output_tokens: 5, total_tokens: 15 }
428
340
  }];
429
341
  });
430
342
 
@@ -461,15 +373,8 @@ describe('Template and File Operations Tests', () => {
461
373
  .addText('Content: {{bad_file}}');
462
374
 
463
375
  nock('https://api.openai.com')
464
- .post('/v1/chat/completions')
465
- .reply(200, {
466
- choices: [{
467
- message: {
468
- role: 'assistant',
469
- content: 'Error handled gracefully'
470
- }
471
- }]
472
- });
376
+ .post('/v1/responses')
377
+ .reply(200, testUtils.createMockResponse('openai-responses', 'Error handled gracefully'));
473
378
 
474
379
  const response = await model.message();
475
380
  expect(response).to.include('Error handled gracefully');
@@ -1,5 +1,5 @@
1
1
  import { expect } from 'chai';
2
- import { ModelMix } from '../index.js';
2
+ import { ModelMix, MixAnthropic, MixCustom, MixGoogle, MixOpenAIResponses } from '../index.js';
3
3
  import { createRequire } from 'module';
4
4
 
5
5
  const require = createRequire(import.meta.url);
@@ -18,6 +18,64 @@ describe('Token Usage Tracking', () => {
18
18
  nock.activate();
19
19
  });
20
20
 
21
+ it('should extract cached tokens from supported provider usage formats', function () {
22
+ const openAIChatTokens = MixCustom.extractTokens({
23
+ usage: {
24
+ prompt_tokens: 120,
25
+ completion_tokens: 30,
26
+ total_tokens: 150,
27
+ prompt_tokens_details: {
28
+ cached_tokens: 80
29
+ }
30
+ }
31
+ });
32
+ const openAIResponsesTokens = MixOpenAIResponses.extractResponsesTokens({
33
+ usage: {
34
+ input_tokens: 90,
35
+ output_tokens: 20,
36
+ total_tokens: 110,
37
+ input_tokens_details: {
38
+ cached_tokens: 45
39
+ }
40
+ }
41
+ });
42
+ const anthropicTokens = MixAnthropic.extractTokens({
43
+ usage: {
44
+ input_tokens: 60,
45
+ output_tokens: 15,
46
+ cache_read_input_tokens: 25
47
+ }
48
+ });
49
+ const googleTokens = MixGoogle.extractTokens({
50
+ usageMetadata: {
51
+ promptTokenCount: 70,
52
+ candidatesTokenCount: 10,
53
+ totalTokenCount: 80,
54
+ cachedContentTokenCount: 35
55
+ }
56
+ });
57
+
58
+ expect(openAIChatTokens.cached).to.equal(80);
59
+ expect(openAIResponsesTokens.cached).to.equal(45);
60
+ expect(anthropicTokens.cached).to.equal(25);
61
+ expect(googleTokens.cached).to.equal(35);
62
+ });
63
+
64
+ it('should pass OpenAI Responses prompt cache options through the request body', function () {
65
+ const request = MixOpenAIResponses.buildResponsesRequest({
66
+ model: 'gpt-5.4',
67
+ messages: [{
68
+ role: 'user',
69
+ content: [{ type: 'text', text: 'Explain caching briefly.' }]
70
+ }],
71
+ prompt_cache_key: 'demo-gpt54-cache',
72
+ prompt_cache_retention: '24h'
73
+ });
74
+
75
+ expect(request.prompt_cache_key).to.equal('demo-gpt54-cache');
76
+ expect(request.prompt_cache_retention).to.equal('24h');
77
+ });
78
+
21
79
  it('should track tokens in OpenAI response', async function () {
22
80
  this.timeout(30000);
23
81
 
@@ -31,10 +89,12 @@ describe('Token Usage Tracking', () => {
31
89
  expect(result.tokens).to.have.property('input');
32
90
  expect(result.tokens).to.have.property('output');
33
91
  expect(result.tokens).to.have.property('total');
92
+ expect(result.tokens).to.have.property('cached');
34
93
 
35
94
  expect(result.tokens.input).to.be.a('number');
36
95
  expect(result.tokens.output).to.be.a('number');
37
96
  expect(result.tokens.total).to.be.a('number');
97
+ expect(result.tokens.cached).to.be.a('number');
38
98
 
39
99
  expect(result.tokens.input).to.be.greaterThan(0);
40
100
  expect(result.tokens.output).to.be.greaterThan(0);
@@ -54,6 +114,7 @@ describe('Token Usage Tracking', () => {
54
114
  expect(result.tokens).to.have.property('input');
55
115
  expect(result.tokens).to.have.property('output');
56
116
  expect(result.tokens).to.have.property('total');
117
+ expect(result.tokens).to.have.property('cached');
57
118
 
58
119
  expect(result.tokens.input).to.be.greaterThan(0);
59
120
  expect(result.tokens.output).to.be.greaterThan(0);
@@ -73,6 +134,7 @@ describe('Token Usage Tracking', () => {
73
134
  expect(result.tokens).to.have.property('input');
74
135
  expect(result.tokens).to.have.property('output');
75
136
  expect(result.tokens).to.have.property('total');
137
+ expect(result.tokens).to.have.property('cached');
76
138
 
77
139
  expect(result.tokens.input).to.be.greaterThan(0);
78
140
  expect(result.tokens.output).to.be.greaterThan(0);
@@ -140,6 +202,7 @@ describe('Token Usage Tracking', () => {
140
202
  expect(result.tokens.input, `${provider.name} should have input`).to.be.a('number');
141
203
  expect(result.tokens.output, `${provider.name} should have output`).to.be.a('number');
142
204
  expect(result.tokens.total, `${provider.name} should have total`).to.be.a('number');
205
+ expect(result.tokens.cached, `${provider.name} should have cached`).to.be.a('number');
143
206
 
144
207
  // Verify values are positive
145
208
  expect(result.tokens.input, `${provider.name} input should be > 0`).to.be.greaterThan(0);