modelmix 4.4.14 → 4.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -135,9 +135,10 @@ Here's a comprehensive list of available methods:
135
135
 
136
136
  | Method | Provider | Model | Price (I/O) per 1 M tokens |
137
137
  | ------------------ | ---------- | ------------------------------ | -------------------------- |
138
+ | `gpt54()` | OpenAI | gpt-5.4 | [\$2.50 / \$15.00][1] |
139
+ | `gpt53codex()` | OpenAI | gpt-5.3-codex | [\$1.25 / \$14.00][1] |
138
140
  | `gpt52()` | OpenAI | gpt-5.2 | [\$1.75 / \$14.00][1] |
139
141
  | `gpt51()` | OpenAI | gpt-5.1 | [\$1.25 / \$10.00][1] |
140
- | `gpt5()` | OpenAI | gpt-5 | [\$1.25 / \$10.00][1] |
141
142
  | `gpt5mini()` | OpenAI | gpt-5-mini | [\$0.25 / \$2.00][1] |
142
143
  | `gpt5nano()` | OpenAI | gpt-5-nano | [\$0.05 / \$0.40][1] |
143
144
  | `gpt41()` | OpenAI | gpt-4.1 | [\$2.00 / \$8.00][1] |
@@ -405,6 +406,7 @@ Every response from `raw()` now includes a `tokens` object with the following st
405
406
  input: 150, // Number of tokens in the prompt/input
406
407
  output: 75, // Number of tokens in the completion/output
407
408
  total: 225, // Total tokens used (input + output)
409
+ cached: 100, // Cached input tokens reported by the provider (0 when absent)
408
410
  cost: 0.0012, // Estimated cost in USD (null if model not in pricing table)
409
411
  speed: 42 // Output tokens per second (int)
410
412
  }
@@ -418,10 +420,10 @@ After calling `message()` or `json()`, use `lastRaw` to access the complete resp
418
420
  ```javascript
419
421
  const text = await model.message();
420
422
  console.log(model.lastRaw.tokens);
421
- // { input: 122, output: 86, total: 541, cost: 0.000319, speed: 38 }
423
+ // { input: 122, output: 86, total: 208, cached: 41, cost: 0.000319, speed: 38 }
422
424
  ```
423
425
 
424
- The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
426
+ The `cached` field is a single aggregated count of cached input tokens reported by the provider. The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
425
427
 
426
428
  ## 🐛 Enabling Debug Mode
427
429
 
@@ -515,7 +517,7 @@ new ModelMix(args = { options: {}, config: {} })
515
517
  - `message`: The text response from the model
516
518
  - `think`: Reasoning/thinking content (if available)
517
519
  - `toolCalls`: Array of tool calls made by the model (if any)
518
- - `tokens`: Object with `input`, `output`, `total` token counts, `cost` (USD), and `speed` (output tokens/sec)
520
+ - `tokens`: Object with `input`, `output`, `total`, and `cached` token counts, plus `cost` (USD) and `speed` (output tokens/sec)
519
521
  - `response`: The raw API response
520
522
  - `stream(callback)`: Sends the message and streams the response, invoking the callback with each streamed part.
521
523
  - `json(schemaExample, descriptions = {}, options = {})`: Forces the model to return a response in a specific JSON format.
package/demo/cache.js ADDED
@@ -0,0 +1,52 @@
1
+ import { ModelMix } from '../index.js';
2
+ try { process.loadEnvFile(); } catch {}
3
+
4
+ console.log("\n" + '--------| gpt54() prompt cache |--------');
5
+
6
+ // Keep the reusable prefix first and only vary the question at the end.
7
+ const sharedPrefix = [
8
+ "You are a concise science tutor.",
9
+ "The repeated block below is intentionally long so OpenAI can reuse cached prompt tokens on the second request.",
10
+ Array.from({ length: 80 }, (_, index) =>
11
+ `Reference ${String(index + 1).padStart(3, '0')}: Quantum systems are described with probabilities, measurements collapse possibilities into outcomes, and explanations must stay concrete, brief, and easy to understand.`
12
+ ).join("\n")
13
+ ].join("\n\n");
14
+
15
+ const buildPrompt = (question) => `${sharedPrefix}\n\nQuestion: ${question}`;
16
+
17
+ const createModel = () => ModelMix.new({
18
+ config: {
19
+ debug: 3,
20
+ }
21
+ }).gpt54({
22
+ options: {
23
+ reasoning_effort: "none",
24
+ verbosity: "low",
25
+ prompt_cache_key: "demo-gpt54-prompt-cache",
26
+ prompt_cache_retention: "24h"
27
+ }
28
+ });
29
+
30
+ const runRequest = async (label, question) => {
31
+ const model = createModel();
32
+ model.addText(buildPrompt(question));
33
+
34
+ const result = await model.raw();
35
+
36
+ console.log(`\n${label}`);
37
+ console.log("message:", result.message);
38
+ console.log("tokens:", result.tokens);
39
+
40
+ return result;
41
+ };
42
+
43
+ await runRequest(
44
+ "Request 1 (warms the cache)",
45
+ "Explain quantum entanglement in simple Spanish in 3 short bullet points."
46
+ );
47
+
48
+ await runRequest(
49
+ "Request 2 (reuses the cached prefix)",
50
+ "Now explain quantum entanglement in simple Spanish with a different analogy and 3 short bullet points."
51
+ );
52
+
package/demo/gemini.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { ModelMix, MixGoogle } from '../index.js';
2
- try { process.loadEnvFile(); } catch {}
2
+ try { process.loadEnvFile(); } catch { }
3
3
 
4
4
  const mmix = new ModelMix({
5
5
  options: {
@@ -12,9 +12,9 @@ const mmix = new ModelMix({
12
12
  }
13
13
  });
14
14
 
15
- // Using gemini25flash (Gemini 2.5 Flash) with built-in method
15
+ // Using gemini3flash (Gemini 3 Flash) with built-in method
16
16
  console.log("\n" + '--------| gemini25flash() |--------');
17
- const flash = await mmix.gemini25flash()
17
+ const flash = await mmix.gemini3flash()
18
18
  .addText('Hi there! Do you like cats?')
19
19
  .message();
20
20
 
@@ -22,20 +22,23 @@ console.log(flash);
22
22
 
23
23
  // Using gemini3pro (Gemini 3 Pro) with custom config
24
24
  console.log("\n" + '--------| gemini3pro() with JSON response |--------');
25
- const pro = mmix.new().gemini3pro();
25
+ const pro = mmix.new().gemini31pro();
26
26
 
27
27
  pro.addText('Give me a fun fact about cats');
28
- const jsonResponse = await pro.json({
28
+
29
+ const jsonExampleAndSchema = {
29
30
  fact: 'A fun fact about cats',
30
- category: 'animal behavior'
31
- });
31
+ category: 'animal behavior'
32
+ };
33
+
34
+ const jsonResponse = await pro.json(jsonExampleAndSchema, jsonExampleAndSchema);
32
35
 
33
36
  console.log(jsonResponse);
34
37
 
35
38
  // Using attach method with MixGoogle for custom model
36
39
  console.log("\n" + '--------| Custom Gemini with attach() |--------');
37
- mmix.attach('gemini-2.5-flash', new MixGoogle());
40
+ const customModel = mmix.new().attach('gemini-2.5-flash', new MixGoogle());
38
41
 
39
- const custom = await mmix.addText('Tell me a short joke about cats.').message();
42
+ const custom = await customModel.addText('Tell me a short joke about cats.').message();
40
43
  console.log(custom);
41
44
 
@@ -0,0 +1,22 @@
1
+ import { ModelMix } from '../index.js';
2
+ try { process.loadEnvFile(); } catch {}
3
+
4
+ const mmix = new ModelMix({
5
+ config: {
6
+ debug: 3
7
+ }
8
+ });
9
+
10
+ console.log('\n--------| gptRealtime() |--------');
11
+
12
+ const realtime = mmix.gptRealtimeMini({
13
+ options: {
14
+ stream: true
15
+ }
16
+ });
17
+
18
+ realtime.addText('Explain quantum entanglement in simple terms.');
19
+ const response = await realtime.stream(({ delta }) => {
20
+ process.stdout.write(delta || '');
21
+ });
22
+ console.log('\n\n[done]\n', response.tokens);
@@ -8,10 +8,10 @@ const mmix = new ModelMix({
8
8
  }
9
9
  });
10
10
 
11
- console.log("\n" + '--------| gpt51() |--------');
11
+ console.log("\n" + '--------| gpt54() |--------');
12
12
 
13
13
  const gptArgs = { options: { reasoning_effort: "none", verbosity: "low" } };
14
- const gpt = mmix.gpt51(gptArgs);
14
+ const gpt = mmix.gpt54(gptArgs);
15
15
 
16
16
  gpt.addText("Explain quantum entanglement in simple terms.");
17
17
  const response = await gpt.message();
package/index.js CHANGED
@@ -5,6 +5,7 @@ const { inspect } = require('util');
5
5
  const log = require('lemonlog')('ModelMix');
6
6
  const Bottleneck = require('bottleneck');
7
7
  const path = require('path');
8
+ const WebSocket = require('ws');
8
9
  const generateJsonSchema = require('./schema');
9
10
  const { Client } = require("@modelcontextprotocol/sdk/client/index.js");
10
11
  const { StdioClientTransport } = require("@modelcontextprotocol/sdk/client/stdio.js");
@@ -14,6 +15,11 @@ const { MCPToolsManager } = require('./mcp-tools');
14
15
  // Based on provider pricing pages linked in README
15
16
  const MODEL_PRICING = {
16
17
  // OpenAI
18
+ 'gpt-realtime-mini': [0.60, 2.40],
19
+ 'gpt-realtime': [4.00, 16.00],
20
+ 'gpt-5.4': [2.50, 15.00],
21
+ 'gpt-5.4-pro': [30, 180.00],
22
+ 'gpt-5.3-codex': [1.75, 14.00],
17
23
  'gpt-5.2': [1.75, 14.00],
18
24
  'gpt-5.2-chat-latest': [1.75, 14.00],
19
25
  'gpt-5.1': [1.25, 10.00],
@@ -175,6 +181,15 @@ class ModelMix {
175
181
  return (tokens.input * inputPerMillion / 1_000_000) + (tokens.output * outputPerMillion / 1_000_000);
176
182
  }
177
183
 
184
+ static extractCacheTokens(usage = {}) {
185
+ return usage.input_tokens_details?.cached_tokens
186
+ || usage.prompt_tokens_details?.cached_tokens
187
+ || usage.cache_read_input_tokens
188
+ || usage.cachedContentTokenCount
189
+ || usage.cached_content_token_count
190
+ || 0;
191
+ }
192
+
178
193
  static formatInputSummary(messages, system, debug = 2) {
179
194
  const lastMessage = messages[messages.length - 1];
180
195
  let inputText = '';
@@ -244,15 +259,6 @@ class ModelMix {
244
259
  gpt41nano({ options = {}, config = {} } = {}) {
245
260
  return this.attach('gpt-4.1-nano', new MixOpenAI({ options, config }));
246
261
  }
247
- o4mini({ options = {}, config = {} } = {}) {
248
- return this.attach('o4-mini', new MixOpenAI({ options, config }));
249
- }
250
- o3({ options = {}, config = {} } = {}) {
251
- return this.attach('o3', new MixOpenAI({ options, config }));
252
- }
253
- gpt45({ options = {}, config = {} } = {}) {
254
- return this.attach('gpt-4.5-preview', new MixOpenAI({ options, config }));
255
- }
256
262
  gpt5({ options = {}, config = {} } = {}) {
257
263
  return this.attach('gpt-5', new MixOpenAI({ options, config }));
258
264
  }
@@ -263,13 +269,28 @@ class ModelMix {
263
269
  return this.attach('gpt-5-nano', new MixOpenAI({ options, config }));
264
270
  }
265
271
  gpt51({ options = {}, config = {} } = {}) {
266
- return this.attach('gpt-5.1', new MixOpenAI({ options, config }));
272
+ return this.attach('gpt-5.1', new MixOpenAIResponses({ options, config }));
267
273
  }
268
274
  gpt52({ options = {}, config = {} } = {}) {
269
- return this.attach('gpt-5.2', new MixOpenAI({ options, config }));
275
+ return this.attach('gpt-5.2', new MixOpenAIResponses({ options, config }));
270
276
  }
271
- gpt52chat({ options = {}, config = {} } = {}) {
272
- return this.attach('gpt-5.2-chat-latest', new MixOpenAI({ options, config }));
277
+ gpt54({ options = {}, config = {} } = {}) {
278
+ return this.attach('gpt-5.4', new MixOpenAIResponses({ options, config }));
279
+ }
280
+ gpt54pro({ options = {}, config = {} } = {}) {
281
+ return this.attach('gpt-5.4-pro', new MixOpenAIResponses({ options, config }));
282
+ }
283
+ gptRealtime({ options = {}, config = {} } = {}) {
284
+ return this.attach('gpt-realtime', new MixOpenAIWebSocket({ options, config }));
285
+ }
286
+ gptRealtimeMini({ options = {}, config = {} } = {}) {
287
+ return this.attach('gpt-realtime-mini', new MixOpenAIWebSocket({ options, config }));
288
+ }
289
+ gpt53codex({ options = {}, config = {} } = {}) {
290
+ return this.attach('gpt-5.3-codex', new MixOpenAIResponses({ options, config }));
291
+ }
292
+ gpt53chat({ options = {}, config = {} } = {}) {
293
+ return this.attach('gpt-5.3-chat-latest', new MixOpenAIResponses({ options, config }));
273
294
  }
274
295
  gptOss({ options = {}, config = {}, mix = {} } = {}) {
275
296
  mix = { ...this.mix, ...mix };
@@ -942,7 +963,10 @@ class ModelMix {
942
963
  // debug level 2: Readable summary of output
943
964
  if (currentConfig.debug >= 2) {
944
965
  const tokenInfo = result.tokens
945
- ? ` ${result.tokens.input} → ${result.tokens.output} tok` + (result.tokens.speed ? ` ${result.tokens.speed} t/s` : '') + (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
966
+ ? ` ${result.tokens.input} → ${result.tokens.output} tok`
967
+ + (result.tokens.cached ? ` (cached:${result.tokens.cached})` : '')
968
+ + (result.tokens.speed ? `| ${result.tokens.speed} t/s` : '')
969
+ + (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
946
970
  : '';
947
971
  console.log(`✓${tokenInfo}\n${ModelMix.formatOutputSummary(result, currentConfig.debug).trim()}`);
948
972
  }
@@ -1306,7 +1330,7 @@ class MixCustom {
1306
1330
  message: message.trim(),
1307
1331
  toolCalls: [],
1308
1332
  think: null,
1309
- tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0 }
1333
+ tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0, cached: 0 }
1310
1334
  }));
1311
1335
  response.data.on('error', reject);
1312
1336
  });
@@ -1358,13 +1382,15 @@ class MixCustom {
1358
1382
  return {
1359
1383
  input: data.usage.prompt_tokens || 0,
1360
1384
  output: data.usage.completion_tokens || 0,
1361
- total: data.usage.total_tokens || 0
1385
+ total: data.usage.total_tokens || 0,
1386
+ cached: ModelMix.extractCacheTokens(data.usage)
1362
1387
  };
1363
1388
  }
1364
1389
  return {
1365
1390
  input: 0,
1366
1391
  output: 0,
1367
- total: 0
1392
+ total: 0,
1393
+ cached: 0
1368
1394
  };
1369
1395
  }
1370
1396
 
@@ -1499,6 +1525,343 @@ class MixOpenAI extends MixCustom {
1499
1525
  }
1500
1526
  }
1501
1527
 
1528
+ class MixOpenAIResponses extends MixOpenAI {
1529
+ async create({ config = {}, options = {} } = {}) {
1530
+
1531
+ // Keep GPT/o-model option normalization behavior
1532
+ if (options.model?.startsWith('o')) {
1533
+ delete options.max_tokens;
1534
+ delete options.temperature;
1535
+ }
1536
+ if (options.model?.includes('gpt-5')) {
1537
+ if (options.max_tokens) {
1538
+ options.max_completion_tokens = options.max_tokens;
1539
+ delete options.max_tokens;
1540
+ }
1541
+ delete options.temperature;
1542
+ }
1543
+
1544
+ const responsesUrl = this.config.url.replace('/chat/completions', '/responses');
1545
+ const request = MixOpenAIResponses.buildResponsesRequest(options);
1546
+ const response = await axios.post(responsesUrl, request, {
1547
+ headers: this.headers
1548
+ });
1549
+
1550
+ return MixOpenAIResponses.processResponsesResponse(response);
1551
+ }
1552
+
1553
+ static buildResponsesRequest(options = {}) {
1554
+ const request = {
1555
+ model: options.model,
1556
+ input: MixOpenAIResponses.messagesToResponsesInput(options.messages),
1557
+ stream: false
1558
+ };
1559
+
1560
+ if (options.reasoning_effort) request.reasoning = { effort: options.reasoning_effort };
1561
+ if (options.verbosity) request.text = { verbosity: options.verbosity };
1562
+
1563
+ if (typeof options.max_completion_tokens === 'number') {
1564
+ request.max_output_tokens = options.max_completion_tokens;
1565
+ } else if (typeof options.max_tokens === 'number') {
1566
+ request.max_output_tokens = options.max_tokens;
1567
+ }
1568
+
1569
+ if (typeof options.temperature === 'number') request.temperature = options.temperature;
1570
+ if (typeof options.top_p === 'number') request.top_p = options.top_p;
1571
+ if (typeof options.presence_penalty === 'number') request.presence_penalty = options.presence_penalty;
1572
+ if (typeof options.frequency_penalty === 'number') request.frequency_penalty = options.frequency_penalty;
1573
+ if (options.stop !== undefined) request.stop = options.stop;
1574
+ if (typeof options.n === 'number') request.n = options.n;
1575
+ if (options.logit_bias !== undefined) request.logit_bias = options.logit_bias;
1576
+ if (options.user !== undefined) request.user = options.user;
1577
+ if (options.prompt_cache_key !== undefined) request.prompt_cache_key = options.prompt_cache_key;
1578
+ if (options.prompt_cache_retention !== undefined) request.prompt_cache_retention = options.prompt_cache_retention;
1579
+
1580
+ return request;
1581
+ }
1582
+
1583
+ static processResponsesResponse(response) {
1584
+ const message = MixOpenAIResponses.extractResponsesMessage(response.data);
1585
+ return {
1586
+ message,
1587
+ think: null,
1588
+ toolCalls: [],
1589
+ tokens: MixOpenAIResponses.extractResponsesTokens(response.data),
1590
+ response: response.data
1591
+ };
1592
+ }
1593
+
1594
+ static extractResponsesTokens(data) {
1595
+ if (data.usage) {
1596
+ return {
1597
+ input: data.usage.input_tokens || 0,
1598
+ output: data.usage.output_tokens || 0,
1599
+ total: data.usage.total_tokens || ((data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)),
1600
+ cached: ModelMix.extractCacheTokens(data.usage)
1601
+ };
1602
+ }
1603
+ return {
1604
+ input: 0,
1605
+ output: 0,
1606
+ total: 0,
1607
+ cached: 0
1608
+ };
1609
+ }
1610
+
1611
+ static extractResponsesMessage(data) {
1612
+ if (!Array.isArray(data.output)) return '';
1613
+ return data.output
1614
+ .filter(item => item.type === 'message')
1615
+ .flatMap(item => Array.isArray(item.content) ? item.content : [])
1616
+ .filter(content => content.type === 'output_text' && typeof content.text === 'string')
1617
+ .map(content => content.text)
1618
+ .join('\n')
1619
+ .trim();
1620
+ }
1621
+
1622
+ static messagesToResponsesInput(messages = []) {
1623
+ const mapped = [];
1624
+
1625
+ for (const message of messages) {
1626
+ if (!message || !message.role) continue;
1627
+ if (message.tool_calls || message.role === 'tool') continue;
1628
+
1629
+ let text = '';
1630
+ if (typeof message.content === 'string') {
1631
+ text = message.content;
1632
+ } else if (Array.isArray(message.content)) {
1633
+ text = message.content
1634
+ .filter(item => item && item.type === 'text' && typeof item.text === 'string')
1635
+ .map(item => item.text)
1636
+ .join('\n');
1637
+ }
1638
+
1639
+ if (!text) continue;
1640
+ mapped.push({
1641
+ role: message.role,
1642
+ content: [{ type: 'input_text', text }]
1643
+ });
1644
+ }
1645
+
1646
+ return mapped;
1647
+ }
1648
+ }
1649
+
1650
+ class MixOpenAIWebSocket extends MixOpenAIResponses {
1651
+ getDefaultConfig(customConfig) {
1652
+ return super.getDefaultConfig({
1653
+ realtimeUrl: 'wss://api.openai.com/v1/realtime',
1654
+ websocketTimeoutMs: 120000,
1655
+ ...customConfig
1656
+ });
1657
+ }
1658
+
1659
+ async create({ config = {}, options = {} } = {}) {
1660
+ if (options.model?.startsWith('o')) {
1661
+ delete options.max_tokens;
1662
+ delete options.temperature;
1663
+ }
1664
+ if (options.model?.includes('gpt-5')) {
1665
+ if (options.max_tokens) {
1666
+ options.max_completion_tokens = options.max_tokens;
1667
+ delete options.max_tokens;
1668
+ }
1669
+ delete options.temperature;
1670
+ }
1671
+
1672
+ const mergedConfig = { ...this.config, ...config };
1673
+ const realtimeUrl = `${mergedConfig.realtimeUrl}?model=${encodeURIComponent(options.model)}`;
1674
+ const timeoutMs = mergedConfig.websocketTimeoutMs || 120000;
1675
+
1676
+ return await new Promise((resolve, reject) => {
1677
+ const ws = new WebSocket(realtimeUrl, {
1678
+ headers: {
1679
+ authorization: `Bearer ${mergedConfig.apiKey}`
1680
+ }
1681
+ });
1682
+
1683
+ const events = [];
1684
+ let message = '';
1685
+ let settled = false;
1686
+ let finalResponse = null;
1687
+
1688
+ const timeout = setTimeout(() => {
1689
+ if (settled) return;
1690
+ settled = true;
1691
+ ws.close();
1692
+ reject({
1693
+ message: `Realtime WebSocket timed out after ${timeoutMs}ms`,
1694
+ statusCode: null,
1695
+ details: null,
1696
+ config: mergedConfig,
1697
+ options
1698
+ });
1699
+ }, timeoutMs);
1700
+
1701
+ const cleanUp = () => clearTimeout(timeout);
1702
+
1703
+ ws.on('open', () => {
1704
+ const session = {
1705
+ type: 'realtime',
1706
+ output_modalities: ['text']
1707
+ };
1708
+
1709
+ if (mergedConfig.system) session.instructions = mergedConfig.system;
1710
+ if (Array.isArray(options.tools) && options.tools.length > 0) {
1711
+ session.tools = options.tools;
1712
+ }
1713
+
1714
+ ws.send(JSON.stringify({ type: 'session.update', session }));
1715
+
1716
+ const items = MixOpenAIWebSocket.messagesToConversationItems(options.messages);
1717
+ for (const item of items) {
1718
+ ws.send(JSON.stringify({
1719
+ type: 'conversation.item.create',
1720
+ item
1721
+ }));
1722
+ }
1723
+
1724
+ const responseConfig = { output_modalities: ['text'] };
1725
+ if (typeof options.max_completion_tokens === 'number') {
1726
+ responseConfig.max_output_tokens = Math.min(options.max_completion_tokens, 4096);
1727
+ } else if (typeof options.max_tokens === 'number') {
1728
+ responseConfig.max_output_tokens = Math.min(options.max_tokens, 4096);
1729
+ }
1730
+ if (Array.isArray(options.tools) && options.tools.length > 0) responseConfig.tools = options.tools;
1731
+
1732
+ ws.send(JSON.stringify({
1733
+ type: 'response.create',
1734
+ response: responseConfig
1735
+ }));
1736
+ });
1737
+
1738
+ ws.on('message', raw => {
1739
+ let event;
1740
+ try {
1741
+ event = JSON.parse(raw.toString());
1742
+ } catch {
1743
+ return;
1744
+ }
1745
+
1746
+ events.push(event);
1747
+
1748
+ const isTextDeltaEvent = event.type === 'response.text.delta' || event.type === 'response.output_text.delta';
1749
+ if (isTextDeltaEvent) {
1750
+ const delta = MixOpenAIWebSocket.extractDelta(event);
1751
+ if (delta) {
1752
+ message += delta;
1753
+ if (this.streamCallback) {
1754
+ this.streamCallback({ response: event, message, delta });
1755
+ }
1756
+ }
1757
+ return;
1758
+ }
1759
+
1760
+ if (event.type === 'response.done') {
1761
+ finalResponse = event.response || null;
1762
+ if (!message && finalResponse) {
1763
+ message = MixOpenAIResponses.extractResponsesMessage(finalResponse);
1764
+ }
1765
+
1766
+ if (!settled) {
1767
+ settled = true;
1768
+ cleanUp();
1769
+ ws.close();
1770
+ resolve({
1771
+ message: message.trim(),
1772
+ think: null,
1773
+ toolCalls: [],
1774
+ tokens: MixOpenAIResponses.extractResponsesTokens(finalResponse || {}),
1775
+ response: {
1776
+ response: finalResponse,
1777
+ events
1778
+ }
1779
+ });
1780
+ }
1781
+ return;
1782
+ }
1783
+
1784
+ if (event.type === 'error' && !settled) {
1785
+ settled = true;
1786
+ cleanUp();
1787
+ ws.close();
1788
+ reject({
1789
+ message: event.error?.message || 'Realtime WebSocket error',
1790
+ statusCode: null,
1791
+ details: event.error || event,
1792
+ config: mergedConfig,
1793
+ options
1794
+ });
1795
+ }
1796
+ });
1797
+
1798
+ ws.on('error', error => {
1799
+ if (settled) return;
1800
+ settled = true;
1801
+ cleanUp();
1802
+ reject({
1803
+ message: error.message || 'Realtime WebSocket connection error',
1804
+ statusCode: null,
1805
+ details: null,
1806
+ stack: error.stack,
1807
+ config: mergedConfig,
1808
+ options
1809
+ });
1810
+ });
1811
+
1812
+ ws.on('close', () => {
1813
+ if (settled) return;
1814
+ settled = true;
1815
+ cleanUp();
1816
+ reject({
1817
+ message: 'Realtime WebSocket closed before response.done',
1818
+ statusCode: null,
1819
+ details: null,
1820
+ config: mergedConfig,
1821
+ options
1822
+ });
1823
+ });
1824
+ });
1825
+ }
1826
+
1827
+ static messagesToConversationItems(messages = []) {
1828
+ const items = [];
1829
+
1830
+ for (const message of messages) {
1831
+ if (!message || !message.role) continue;
1832
+ if (message.role === 'tool' || message.tool_calls) continue;
1833
+
1834
+ const role = message.role === 'assistant' ? 'assistant' : (message.role === 'system' ? 'system' : 'user');
1835
+ const content = [];
1836
+
1837
+ if (typeof message.content === 'string') {
1838
+ content.push({
1839
+ type: role === 'assistant' ? 'text' : 'input_text',
1840
+ text: message.content
1841
+ });
1842
+ } else if (Array.isArray(message.content)) {
1843
+ for (const item of message.content) {
1844
+ if (!item || item.type !== 'text' || typeof item.text !== 'string') continue;
1845
+ content.push({
1846
+ type: role === 'assistant' ? 'text' : 'input_text',
1847
+ text: item.text
1848
+ });
1849
+ }
1850
+ }
1851
+
1852
+ if (content.length === 0) continue;
1853
+ items.push({ type: 'message', role, content });
1854
+ }
1855
+
1856
+ return items;
1857
+ }
1858
+
1859
+ static extractDelta(event) {
1860
+ if (typeof event.delta === 'string') return event.delta;
1861
+ return '';
1862
+ }
1863
+ }
1864
+
1502
1865
  class MixOpenRouter extends MixOpenAI {
1503
1866
  getDefaultConfig(customConfig) {
1504
1867
 
@@ -1684,13 +2047,15 @@ class MixAnthropic extends MixCustom {
1684
2047
  return {
1685
2048
  input: data.usage.input_tokens || 0,
1686
2049
  output: data.usage.output_tokens || 0,
1687
- total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)
2050
+ total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0),
2051
+ cached: ModelMix.extractCacheTokens(data.usage)
1688
2052
  };
1689
2053
  }
1690
2054
  return {
1691
2055
  input: 0,
1692
2056
  output: 0,
1693
- total: 0
2057
+ total: 0,
2058
+ cached: 0
1694
2059
  };
1695
2060
  }
1696
2061
 
@@ -2219,13 +2584,15 @@ class MixGoogle extends MixCustom {
2219
2584
  return {
2220
2585
  input: data.usageMetadata.promptTokenCount || 0,
2221
2586
  output: data.usageMetadata.candidatesTokenCount || 0,
2222
- total: data.usageMetadata.totalTokenCount || 0
2587
+ total: data.usageMetadata.totalTokenCount || 0,
2588
+ cached: ModelMix.extractCacheTokens(data.usageMetadata)
2223
2589
  };
2224
2590
  }
2225
2591
  return {
2226
2592
  input: 0,
2227
2593
  output: 0,
2228
- total: 0
2594
+ total: 0,
2595
+ cached: 0
2229
2596
  };
2230
2597
  }
2231
2598
 
@@ -2273,4 +2640,4 @@ class MixGoogle extends MixCustom {
2273
2640
  }
2274
2641
  }
2275
2642
 
2276
- module.exports = { MixCustom, ModelMix, MixAnthropic, MixMiniMax, MixOpenAI, MixOpenRouter, MixPerplexity, MixOllama, MixLMStudio, MixGroq, MixTogether, MixGrok, MixCerebras, MixGoogle, MixFireworks };
2643
+ module.exports = { MixCustom, ModelMix, MixAnthropic, MixMiniMax, MixOpenAI, MixOpenAIResponses, MixOpenAIWebSocket, MixOpenRouter, MixPerplexity, MixOllama, MixLMStudio, MixGroq, MixTogether, MixGrok, MixCerebras, MixGoogle, MixFireworks };