modelmix 4.4.14 → 4.4.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/demo/cache.js +52 -0
- package/demo/gemini.js +12 -9
- package/demo/gpt-realtime.js +22 -0
- package/demo/{gpt51.js → gpt54.js} +2 -2
- package/index.js +389 -22
- package/package.json +13 -7
- package/test/bottleneck.test.js +23 -78
- package/test/images.test.js +2 -3
- package/test/json.test.js +18 -41
- package/test/setup.js +12 -0
- package/test/templates.test.js +60 -155
- package/test/tokens.test.js +64 -1
- package/demo/save_the_cat-spanish.md +0 -109
- package/demo/story.md +0 -15
package/README.md
CHANGED
|
@@ -135,9 +135,10 @@ Here's a comprehensive list of available methods:
|
|
|
135
135
|
|
|
136
136
|
| Method | Provider | Model | Price (I/O) per 1 M tokens |
|
|
137
137
|
| ------------------ | ---------- | ------------------------------ | -------------------------- |
|
|
138
|
+
| `gpt54()` | OpenAI | gpt-5.4 | [\$2.50 / \$15.00][1] |
|
|
139
|
+
| `gpt53codex()` | OpenAI | gpt-5.3-codex | [\$1.25 / \$14.00][1] |
|
|
138
140
|
| `gpt52()` | OpenAI | gpt-5.2 | [\$1.75 / \$14.00][1] |
|
|
139
141
|
| `gpt51()` | OpenAI | gpt-5.1 | [\$1.25 / \$10.00][1] |
|
|
140
|
-
| `gpt5()` | OpenAI | gpt-5 | [\$1.25 / \$10.00][1] |
|
|
141
142
|
| `gpt5mini()` | OpenAI | gpt-5-mini | [\$0.25 / \$2.00][1] |
|
|
142
143
|
| `gpt5nano()` | OpenAI | gpt-5-nano | [\$0.05 / \$0.40][1] |
|
|
143
144
|
| `gpt41()` | OpenAI | gpt-4.1 | [\$2.00 / \$8.00][1] |
|
|
@@ -405,6 +406,7 @@ Every response from `raw()` now includes a `tokens` object with the following st
|
|
|
405
406
|
input: 150, // Number of tokens in the prompt/input
|
|
406
407
|
output: 75, // Number of tokens in the completion/output
|
|
407
408
|
total: 225, // Total tokens used (input + output)
|
|
409
|
+
cached: 100, // Cached input tokens reported by the provider (0 when absent)
|
|
408
410
|
cost: 0.0012, // Estimated cost in USD (null if model not in pricing table)
|
|
409
411
|
speed: 42 // Output tokens per second (int)
|
|
410
412
|
}
|
|
@@ -418,10 +420,10 @@ After calling `message()` or `json()`, use `lastRaw` to access the complete resp
|
|
|
418
420
|
```javascript
|
|
419
421
|
const text = await model.message();
|
|
420
422
|
console.log(model.lastRaw.tokens);
|
|
421
|
-
// { input: 122, output: 86, total:
|
|
423
|
+
// { input: 122, output: 86, total: 208, cached: 41, cost: 0.000319, speed: 38 }
|
|
422
424
|
```
|
|
423
425
|
|
|
424
|
-
The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
|
|
426
|
+
The `cached` field is a single aggregated count of cached input tokens reported by the provider. The `cost` field is the estimated cost in USD based on the model's pricing per 1M tokens (input/output). If the model is not found in the pricing table, `cost` will be `null`. The `speed` field is the generation speed measured in output tokens per second (integer).
|
|
425
427
|
|
|
426
428
|
## 🐛 Enabling Debug Mode
|
|
427
429
|
|
|
@@ -515,7 +517,7 @@ new ModelMix(args = { options: {}, config: {} })
|
|
|
515
517
|
- `message`: The text response from the model
|
|
516
518
|
- `think`: Reasoning/thinking content (if available)
|
|
517
519
|
- `toolCalls`: Array of tool calls made by the model (if any)
|
|
518
|
-
- `tokens`: Object with `input`, `output`, `total` token counts, `cost` (USD)
|
|
520
|
+
- `tokens`: Object with `input`, `output`, `total`, and `cached` token counts, plus `cost` (USD) and `speed` (output tokens/sec)
|
|
519
521
|
- `response`: The raw API response
|
|
520
522
|
- `stream(callback)`: Sends the message and streams the response, invoking the callback with each streamed part.
|
|
521
523
|
- `json(schemaExample, descriptions = {}, options = {})`: Forces the model to return a response in a specific JSON format.
|
package/demo/cache.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { ModelMix } from '../index.js';
|
|
2
|
+
try { process.loadEnvFile(); } catch {}
|
|
3
|
+
|
|
4
|
+
console.log("\n" + '--------| gpt54() prompt cache |--------');
|
|
5
|
+
|
|
6
|
+
// Keep the reusable prefix first and only vary the question at the end.
|
|
7
|
+
const sharedPrefix = [
|
|
8
|
+
"You are a concise science tutor.",
|
|
9
|
+
"The repeated block below is intentionally long so OpenAI can reuse cached prompt tokens on the second request.",
|
|
10
|
+
Array.from({ length: 80 }, (_, index) =>
|
|
11
|
+
`Reference ${String(index + 1).padStart(3, '0')}: Quantum systems are described with probabilities, measurements collapse possibilities into outcomes, and explanations must stay concrete, brief, and easy to understand.`
|
|
12
|
+
).join("\n")
|
|
13
|
+
].join("\n\n");
|
|
14
|
+
|
|
15
|
+
const buildPrompt = (question) => `${sharedPrefix}\n\nQuestion: ${question}`;
|
|
16
|
+
|
|
17
|
+
const createModel = () => ModelMix.new({
|
|
18
|
+
config: {
|
|
19
|
+
debug: 3,
|
|
20
|
+
}
|
|
21
|
+
}).gpt54({
|
|
22
|
+
options: {
|
|
23
|
+
reasoning_effort: "none",
|
|
24
|
+
verbosity: "low",
|
|
25
|
+
prompt_cache_key: "demo-gpt54-prompt-cache",
|
|
26
|
+
prompt_cache_retention: "24h"
|
|
27
|
+
}
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const runRequest = async (label, question) => {
|
|
31
|
+
const model = createModel();
|
|
32
|
+
model.addText(buildPrompt(question));
|
|
33
|
+
|
|
34
|
+
const result = await model.raw();
|
|
35
|
+
|
|
36
|
+
console.log(`\n${label}`);
|
|
37
|
+
console.log("message:", result.message);
|
|
38
|
+
console.log("tokens:", result.tokens);
|
|
39
|
+
|
|
40
|
+
return result;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
await runRequest(
|
|
44
|
+
"Request 1 (warms the cache)",
|
|
45
|
+
"Explain quantum entanglement in simple Spanish in 3 short bullet points."
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
await runRequest(
|
|
49
|
+
"Request 2 (reuses the cached prefix)",
|
|
50
|
+
"Now explain quantum entanglement in simple Spanish with a different analogy and 3 short bullet points."
|
|
51
|
+
);
|
|
52
|
+
|
package/demo/gemini.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { ModelMix, MixGoogle } from '../index.js';
|
|
2
|
-
try { process.loadEnvFile(); } catch {}
|
|
2
|
+
try { process.loadEnvFile(); } catch { }
|
|
3
3
|
|
|
4
4
|
const mmix = new ModelMix({
|
|
5
5
|
options: {
|
|
@@ -12,9 +12,9 @@ const mmix = new ModelMix({
|
|
|
12
12
|
}
|
|
13
13
|
});
|
|
14
14
|
|
|
15
|
-
// Using
|
|
15
|
+
// Using gemini3flash (Gemini 3 Flash) with built-in method
|
|
16
16
|
console.log("\n" + '--------| gemini25flash() |--------');
|
|
17
|
-
const flash = await mmix.
|
|
17
|
+
const flash = await mmix.gemini3flash()
|
|
18
18
|
.addText('Hi there! Do you like cats?')
|
|
19
19
|
.message();
|
|
20
20
|
|
|
@@ -22,20 +22,23 @@ console.log(flash);
|
|
|
22
22
|
|
|
23
23
|
// Using gemini3pro (Gemini 3 Pro) with custom config
|
|
24
24
|
console.log("\n" + '--------| gemini3pro() with JSON response |--------');
|
|
25
|
-
const pro = mmix.new().
|
|
25
|
+
const pro = mmix.new().gemini31pro();
|
|
26
26
|
|
|
27
27
|
pro.addText('Give me a fun fact about cats');
|
|
28
|
-
|
|
28
|
+
|
|
29
|
+
const jsonExampleAndSchema = {
|
|
29
30
|
fact: 'A fun fact about cats',
|
|
30
|
-
category: 'animal behavior'
|
|
31
|
-
}
|
|
31
|
+
category: 'animal behavior'
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const jsonResponse = await pro.json(jsonExampleAndSchema, jsonExampleAndSchema);
|
|
32
35
|
|
|
33
36
|
console.log(jsonResponse);
|
|
34
37
|
|
|
35
38
|
// Using attach method with MixGoogle for custom model
|
|
36
39
|
console.log("\n" + '--------| Custom Gemini with attach() |--------');
|
|
37
|
-
mmix.attach('gemini-2.5-flash', new MixGoogle());
|
|
40
|
+
const customModel = mmix.new().attach('gemini-2.5-flash', new MixGoogle());
|
|
38
41
|
|
|
39
|
-
const custom = await
|
|
42
|
+
const custom = await customModel.addText('Tell me a short joke about cats.').message();
|
|
40
43
|
console.log(custom);
|
|
41
44
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { ModelMix } from '../index.js';
|
|
2
|
+
try { process.loadEnvFile(); } catch {}
|
|
3
|
+
|
|
4
|
+
const mmix = new ModelMix({
|
|
5
|
+
config: {
|
|
6
|
+
debug: 3
|
|
7
|
+
}
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
console.log('\n--------| gptRealtime() |--------');
|
|
11
|
+
|
|
12
|
+
const realtime = mmix.gptRealtimeMini({
|
|
13
|
+
options: {
|
|
14
|
+
stream: true
|
|
15
|
+
}
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
realtime.addText('Explain quantum entanglement in simple terms.');
|
|
19
|
+
const response = await realtime.stream(({ delta }) => {
|
|
20
|
+
process.stdout.write(delta || '');
|
|
21
|
+
});
|
|
22
|
+
console.log('\n\n[done]\n', response.tokens);
|
|
@@ -8,10 +8,10 @@ const mmix = new ModelMix({
|
|
|
8
8
|
}
|
|
9
9
|
});
|
|
10
10
|
|
|
11
|
-
console.log("\n" + '--------|
|
|
11
|
+
console.log("\n" + '--------| gpt54() |--------');
|
|
12
12
|
|
|
13
13
|
const gptArgs = { options: { reasoning_effort: "none", verbosity: "low" } };
|
|
14
|
-
const gpt = mmix.
|
|
14
|
+
const gpt = mmix.gpt54(gptArgs);
|
|
15
15
|
|
|
16
16
|
gpt.addText("Explain quantum entanglement in simple terms.");
|
|
17
17
|
const response = await gpt.message();
|
package/index.js
CHANGED
|
@@ -5,6 +5,7 @@ const { inspect } = require('util');
|
|
|
5
5
|
const log = require('lemonlog')('ModelMix');
|
|
6
6
|
const Bottleneck = require('bottleneck');
|
|
7
7
|
const path = require('path');
|
|
8
|
+
const WebSocket = require('ws');
|
|
8
9
|
const generateJsonSchema = require('./schema');
|
|
9
10
|
const { Client } = require("@modelcontextprotocol/sdk/client/index.js");
|
|
10
11
|
const { StdioClientTransport } = require("@modelcontextprotocol/sdk/client/stdio.js");
|
|
@@ -14,6 +15,11 @@ const { MCPToolsManager } = require('./mcp-tools');
|
|
|
14
15
|
// Based on provider pricing pages linked in README
|
|
15
16
|
const MODEL_PRICING = {
|
|
16
17
|
// OpenAI
|
|
18
|
+
'gpt-realtime-mini': [0.60, 2.40],
|
|
19
|
+
'gpt-realtime': [4.00, 16.00],
|
|
20
|
+
'gpt-5.4': [2.50, 15.00],
|
|
21
|
+
'gpt-5.4-pro': [30, 180.00],
|
|
22
|
+
'gpt-5.3-codex': [1.75, 14.00],
|
|
17
23
|
'gpt-5.2': [1.75, 14.00],
|
|
18
24
|
'gpt-5.2-chat-latest': [1.75, 14.00],
|
|
19
25
|
'gpt-5.1': [1.25, 10.00],
|
|
@@ -175,6 +181,15 @@ class ModelMix {
|
|
|
175
181
|
return (tokens.input * inputPerMillion / 1_000_000) + (tokens.output * outputPerMillion / 1_000_000);
|
|
176
182
|
}
|
|
177
183
|
|
|
184
|
+
static extractCacheTokens(usage = {}) {
|
|
185
|
+
return usage.input_tokens_details?.cached_tokens
|
|
186
|
+
|| usage.prompt_tokens_details?.cached_tokens
|
|
187
|
+
|| usage.cache_read_input_tokens
|
|
188
|
+
|| usage.cachedContentTokenCount
|
|
189
|
+
|| usage.cached_content_token_count
|
|
190
|
+
|| 0;
|
|
191
|
+
}
|
|
192
|
+
|
|
178
193
|
static formatInputSummary(messages, system, debug = 2) {
|
|
179
194
|
const lastMessage = messages[messages.length - 1];
|
|
180
195
|
let inputText = '';
|
|
@@ -244,15 +259,6 @@ class ModelMix {
|
|
|
244
259
|
gpt41nano({ options = {}, config = {} } = {}) {
|
|
245
260
|
return this.attach('gpt-4.1-nano', new MixOpenAI({ options, config }));
|
|
246
261
|
}
|
|
247
|
-
o4mini({ options = {}, config = {} } = {}) {
|
|
248
|
-
return this.attach('o4-mini', new MixOpenAI({ options, config }));
|
|
249
|
-
}
|
|
250
|
-
o3({ options = {}, config = {} } = {}) {
|
|
251
|
-
return this.attach('o3', new MixOpenAI({ options, config }));
|
|
252
|
-
}
|
|
253
|
-
gpt45({ options = {}, config = {} } = {}) {
|
|
254
|
-
return this.attach('gpt-4.5-preview', new MixOpenAI({ options, config }));
|
|
255
|
-
}
|
|
256
262
|
gpt5({ options = {}, config = {} } = {}) {
|
|
257
263
|
return this.attach('gpt-5', new MixOpenAI({ options, config }));
|
|
258
264
|
}
|
|
@@ -263,13 +269,28 @@ class ModelMix {
|
|
|
263
269
|
return this.attach('gpt-5-nano', new MixOpenAI({ options, config }));
|
|
264
270
|
}
|
|
265
271
|
gpt51({ options = {}, config = {} } = {}) {
|
|
266
|
-
return this.attach('gpt-5.1', new
|
|
272
|
+
return this.attach('gpt-5.1', new MixOpenAIResponses({ options, config }));
|
|
267
273
|
}
|
|
268
274
|
gpt52({ options = {}, config = {} } = {}) {
|
|
269
|
-
return this.attach('gpt-5.2', new
|
|
275
|
+
return this.attach('gpt-5.2', new MixOpenAIResponses({ options, config }));
|
|
270
276
|
}
|
|
271
|
-
|
|
272
|
-
return this.attach('gpt-5.
|
|
277
|
+
gpt54({ options = {}, config = {} } = {}) {
|
|
278
|
+
return this.attach('gpt-5.4', new MixOpenAIResponses({ options, config }));
|
|
279
|
+
}
|
|
280
|
+
gpt54pro({ options = {}, config = {} } = {}) {
|
|
281
|
+
return this.attach('gpt-5.4-pro', new MixOpenAIResponses({ options, config }));
|
|
282
|
+
}
|
|
283
|
+
gptRealtime({ options = {}, config = {} } = {}) {
|
|
284
|
+
return this.attach('gpt-realtime', new MixOpenAIWebSocket({ options, config }));
|
|
285
|
+
}
|
|
286
|
+
gptRealtimeMini({ options = {}, config = {} } = {}) {
|
|
287
|
+
return this.attach('gpt-realtime-mini', new MixOpenAIWebSocket({ options, config }));
|
|
288
|
+
}
|
|
289
|
+
gpt53codex({ options = {}, config = {} } = {}) {
|
|
290
|
+
return this.attach('gpt-5.3-codex', new MixOpenAIResponses({ options, config }));
|
|
291
|
+
}
|
|
292
|
+
gpt53chat({ options = {}, config = {} } = {}) {
|
|
293
|
+
return this.attach('gpt-5.3-chat-latest', new MixOpenAIResponses({ options, config }));
|
|
273
294
|
}
|
|
274
295
|
gptOss({ options = {}, config = {}, mix = {} } = {}) {
|
|
275
296
|
mix = { ...this.mix, ...mix };
|
|
@@ -942,7 +963,10 @@ class ModelMix {
|
|
|
942
963
|
// debug level 2: Readable summary of output
|
|
943
964
|
if (currentConfig.debug >= 2) {
|
|
944
965
|
const tokenInfo = result.tokens
|
|
945
|
-
? ` ${result.tokens.input} → ${result.tokens.output} tok`
|
|
966
|
+
? ` ${result.tokens.input} → ${result.tokens.output} tok`
|
|
967
|
+
+ (result.tokens.cached ? ` (cached:${result.tokens.cached})` : '')
|
|
968
|
+
+ (result.tokens.speed ? `| ${result.tokens.speed} t/s` : '')
|
|
969
|
+
+ (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
|
|
946
970
|
: '';
|
|
947
971
|
console.log(`✓${tokenInfo}\n${ModelMix.formatOutputSummary(result, currentConfig.debug).trim()}`);
|
|
948
972
|
}
|
|
@@ -1306,7 +1330,7 @@ class MixCustom {
|
|
|
1306
1330
|
message: message.trim(),
|
|
1307
1331
|
toolCalls: [],
|
|
1308
1332
|
think: null,
|
|
1309
|
-
tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0 }
|
|
1333
|
+
tokens: raw.length > 0 ? MixCustom.extractTokens(raw[raw.length - 1]) : { input: 0, output: 0, total: 0, cached: 0 }
|
|
1310
1334
|
}));
|
|
1311
1335
|
response.data.on('error', reject);
|
|
1312
1336
|
});
|
|
@@ -1358,13 +1382,15 @@ class MixCustom {
|
|
|
1358
1382
|
return {
|
|
1359
1383
|
input: data.usage.prompt_tokens || 0,
|
|
1360
1384
|
output: data.usage.completion_tokens || 0,
|
|
1361
|
-
total: data.usage.total_tokens || 0
|
|
1385
|
+
total: data.usage.total_tokens || 0,
|
|
1386
|
+
cached: ModelMix.extractCacheTokens(data.usage)
|
|
1362
1387
|
};
|
|
1363
1388
|
}
|
|
1364
1389
|
return {
|
|
1365
1390
|
input: 0,
|
|
1366
1391
|
output: 0,
|
|
1367
|
-
total: 0
|
|
1392
|
+
total: 0,
|
|
1393
|
+
cached: 0
|
|
1368
1394
|
};
|
|
1369
1395
|
}
|
|
1370
1396
|
|
|
@@ -1499,6 +1525,343 @@ class MixOpenAI extends MixCustom {
|
|
|
1499
1525
|
}
|
|
1500
1526
|
}
|
|
1501
1527
|
|
|
1528
|
+
class MixOpenAIResponses extends MixOpenAI {
|
|
1529
|
+
async create({ config = {}, options = {} } = {}) {
|
|
1530
|
+
|
|
1531
|
+
// Keep GPT/o-model option normalization behavior
|
|
1532
|
+
if (options.model?.startsWith('o')) {
|
|
1533
|
+
delete options.max_tokens;
|
|
1534
|
+
delete options.temperature;
|
|
1535
|
+
}
|
|
1536
|
+
if (options.model?.includes('gpt-5')) {
|
|
1537
|
+
if (options.max_tokens) {
|
|
1538
|
+
options.max_completion_tokens = options.max_tokens;
|
|
1539
|
+
delete options.max_tokens;
|
|
1540
|
+
}
|
|
1541
|
+
delete options.temperature;
|
|
1542
|
+
}
|
|
1543
|
+
|
|
1544
|
+
const responsesUrl = this.config.url.replace('/chat/completions', '/responses');
|
|
1545
|
+
const request = MixOpenAIResponses.buildResponsesRequest(options);
|
|
1546
|
+
const response = await axios.post(responsesUrl, request, {
|
|
1547
|
+
headers: this.headers
|
|
1548
|
+
});
|
|
1549
|
+
|
|
1550
|
+
return MixOpenAIResponses.processResponsesResponse(response);
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
static buildResponsesRequest(options = {}) {
|
|
1554
|
+
const request = {
|
|
1555
|
+
model: options.model,
|
|
1556
|
+
input: MixOpenAIResponses.messagesToResponsesInput(options.messages),
|
|
1557
|
+
stream: false
|
|
1558
|
+
};
|
|
1559
|
+
|
|
1560
|
+
if (options.reasoning_effort) request.reasoning = { effort: options.reasoning_effort };
|
|
1561
|
+
if (options.verbosity) request.text = { verbosity: options.verbosity };
|
|
1562
|
+
|
|
1563
|
+
if (typeof options.max_completion_tokens === 'number') {
|
|
1564
|
+
request.max_output_tokens = options.max_completion_tokens;
|
|
1565
|
+
} else if (typeof options.max_tokens === 'number') {
|
|
1566
|
+
request.max_output_tokens = options.max_tokens;
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
if (typeof options.temperature === 'number') request.temperature = options.temperature;
|
|
1570
|
+
if (typeof options.top_p === 'number') request.top_p = options.top_p;
|
|
1571
|
+
if (typeof options.presence_penalty === 'number') request.presence_penalty = options.presence_penalty;
|
|
1572
|
+
if (typeof options.frequency_penalty === 'number') request.frequency_penalty = options.frequency_penalty;
|
|
1573
|
+
if (options.stop !== undefined) request.stop = options.stop;
|
|
1574
|
+
if (typeof options.n === 'number') request.n = options.n;
|
|
1575
|
+
if (options.logit_bias !== undefined) request.logit_bias = options.logit_bias;
|
|
1576
|
+
if (options.user !== undefined) request.user = options.user;
|
|
1577
|
+
if (options.prompt_cache_key !== undefined) request.prompt_cache_key = options.prompt_cache_key;
|
|
1578
|
+
if (options.prompt_cache_retention !== undefined) request.prompt_cache_retention = options.prompt_cache_retention;
|
|
1579
|
+
|
|
1580
|
+
return request;
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1583
|
+
static processResponsesResponse(response) {
|
|
1584
|
+
const message = MixOpenAIResponses.extractResponsesMessage(response.data);
|
|
1585
|
+
return {
|
|
1586
|
+
message,
|
|
1587
|
+
think: null,
|
|
1588
|
+
toolCalls: [],
|
|
1589
|
+
tokens: MixOpenAIResponses.extractResponsesTokens(response.data),
|
|
1590
|
+
response: response.data
|
|
1591
|
+
};
|
|
1592
|
+
}
|
|
1593
|
+
|
|
1594
|
+
static extractResponsesTokens(data) {
|
|
1595
|
+
if (data.usage) {
|
|
1596
|
+
return {
|
|
1597
|
+
input: data.usage.input_tokens || 0,
|
|
1598
|
+
output: data.usage.output_tokens || 0,
|
|
1599
|
+
total: data.usage.total_tokens || ((data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)),
|
|
1600
|
+
cached: ModelMix.extractCacheTokens(data.usage)
|
|
1601
|
+
};
|
|
1602
|
+
}
|
|
1603
|
+
return {
|
|
1604
|
+
input: 0,
|
|
1605
|
+
output: 0,
|
|
1606
|
+
total: 0,
|
|
1607
|
+
cached: 0
|
|
1608
|
+
};
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
static extractResponsesMessage(data) {
|
|
1612
|
+
if (!Array.isArray(data.output)) return '';
|
|
1613
|
+
return data.output
|
|
1614
|
+
.filter(item => item.type === 'message')
|
|
1615
|
+
.flatMap(item => Array.isArray(item.content) ? item.content : [])
|
|
1616
|
+
.filter(content => content.type === 'output_text' && typeof content.text === 'string')
|
|
1617
|
+
.map(content => content.text)
|
|
1618
|
+
.join('\n')
|
|
1619
|
+
.trim();
|
|
1620
|
+
}
|
|
1621
|
+
|
|
1622
|
+
static messagesToResponsesInput(messages = []) {
|
|
1623
|
+
const mapped = [];
|
|
1624
|
+
|
|
1625
|
+
for (const message of messages) {
|
|
1626
|
+
if (!message || !message.role) continue;
|
|
1627
|
+
if (message.tool_calls || message.role === 'tool') continue;
|
|
1628
|
+
|
|
1629
|
+
let text = '';
|
|
1630
|
+
if (typeof message.content === 'string') {
|
|
1631
|
+
text = message.content;
|
|
1632
|
+
} else if (Array.isArray(message.content)) {
|
|
1633
|
+
text = message.content
|
|
1634
|
+
.filter(item => item && item.type === 'text' && typeof item.text === 'string')
|
|
1635
|
+
.map(item => item.text)
|
|
1636
|
+
.join('\n');
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1639
|
+
if (!text) continue;
|
|
1640
|
+
mapped.push({
|
|
1641
|
+
role: message.role,
|
|
1642
|
+
content: [{ type: 'input_text', text }]
|
|
1643
|
+
});
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
return mapped;
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
|
|
1650
|
+
class MixOpenAIWebSocket extends MixOpenAIResponses {
|
|
1651
|
+
getDefaultConfig(customConfig) {
|
|
1652
|
+
return super.getDefaultConfig({
|
|
1653
|
+
realtimeUrl: 'wss://api.openai.com/v1/realtime',
|
|
1654
|
+
websocketTimeoutMs: 120000,
|
|
1655
|
+
...customConfig
|
|
1656
|
+
});
|
|
1657
|
+
}
|
|
1658
|
+
|
|
1659
|
+
async create({ config = {}, options = {} } = {}) {
|
|
1660
|
+
if (options.model?.startsWith('o')) {
|
|
1661
|
+
delete options.max_tokens;
|
|
1662
|
+
delete options.temperature;
|
|
1663
|
+
}
|
|
1664
|
+
if (options.model?.includes('gpt-5')) {
|
|
1665
|
+
if (options.max_tokens) {
|
|
1666
|
+
options.max_completion_tokens = options.max_tokens;
|
|
1667
|
+
delete options.max_tokens;
|
|
1668
|
+
}
|
|
1669
|
+
delete options.temperature;
|
|
1670
|
+
}
|
|
1671
|
+
|
|
1672
|
+
const mergedConfig = { ...this.config, ...config };
|
|
1673
|
+
const realtimeUrl = `${mergedConfig.realtimeUrl}?model=${encodeURIComponent(options.model)}`;
|
|
1674
|
+
const timeoutMs = mergedConfig.websocketTimeoutMs || 120000;
|
|
1675
|
+
|
|
1676
|
+
return await new Promise((resolve, reject) => {
|
|
1677
|
+
const ws = new WebSocket(realtimeUrl, {
|
|
1678
|
+
headers: {
|
|
1679
|
+
authorization: `Bearer ${mergedConfig.apiKey}`
|
|
1680
|
+
}
|
|
1681
|
+
});
|
|
1682
|
+
|
|
1683
|
+
const events = [];
|
|
1684
|
+
let message = '';
|
|
1685
|
+
let settled = false;
|
|
1686
|
+
let finalResponse = null;
|
|
1687
|
+
|
|
1688
|
+
const timeout = setTimeout(() => {
|
|
1689
|
+
if (settled) return;
|
|
1690
|
+
settled = true;
|
|
1691
|
+
ws.close();
|
|
1692
|
+
reject({
|
|
1693
|
+
message: `Realtime WebSocket timed out after ${timeoutMs}ms`,
|
|
1694
|
+
statusCode: null,
|
|
1695
|
+
details: null,
|
|
1696
|
+
config: mergedConfig,
|
|
1697
|
+
options
|
|
1698
|
+
});
|
|
1699
|
+
}, timeoutMs);
|
|
1700
|
+
|
|
1701
|
+
const cleanUp = () => clearTimeout(timeout);
|
|
1702
|
+
|
|
1703
|
+
ws.on('open', () => {
|
|
1704
|
+
const session = {
|
|
1705
|
+
type: 'realtime',
|
|
1706
|
+
output_modalities: ['text']
|
|
1707
|
+
};
|
|
1708
|
+
|
|
1709
|
+
if (mergedConfig.system) session.instructions = mergedConfig.system;
|
|
1710
|
+
if (Array.isArray(options.tools) && options.tools.length > 0) {
|
|
1711
|
+
session.tools = options.tools;
|
|
1712
|
+
}
|
|
1713
|
+
|
|
1714
|
+
ws.send(JSON.stringify({ type: 'session.update', session }));
|
|
1715
|
+
|
|
1716
|
+
const items = MixOpenAIWebSocket.messagesToConversationItems(options.messages);
|
|
1717
|
+
for (const item of items) {
|
|
1718
|
+
ws.send(JSON.stringify({
|
|
1719
|
+
type: 'conversation.item.create',
|
|
1720
|
+
item
|
|
1721
|
+
}));
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
const responseConfig = { output_modalities: ['text'] };
|
|
1725
|
+
if (typeof options.max_completion_tokens === 'number') {
|
|
1726
|
+
responseConfig.max_output_tokens = Math.min(options.max_completion_tokens, 4096);
|
|
1727
|
+
} else if (typeof options.max_tokens === 'number') {
|
|
1728
|
+
responseConfig.max_output_tokens = Math.min(options.max_tokens, 4096);
|
|
1729
|
+
}
|
|
1730
|
+
if (Array.isArray(options.tools) && options.tools.length > 0) responseConfig.tools = options.tools;
|
|
1731
|
+
|
|
1732
|
+
ws.send(JSON.stringify({
|
|
1733
|
+
type: 'response.create',
|
|
1734
|
+
response: responseConfig
|
|
1735
|
+
}));
|
|
1736
|
+
});
|
|
1737
|
+
|
|
1738
|
+
ws.on('message', raw => {
|
|
1739
|
+
let event;
|
|
1740
|
+
try {
|
|
1741
|
+
event = JSON.parse(raw.toString());
|
|
1742
|
+
} catch {
|
|
1743
|
+
return;
|
|
1744
|
+
}
|
|
1745
|
+
|
|
1746
|
+
events.push(event);
|
|
1747
|
+
|
|
1748
|
+
const isTextDeltaEvent = event.type === 'response.text.delta' || event.type === 'response.output_text.delta';
|
|
1749
|
+
if (isTextDeltaEvent) {
|
|
1750
|
+
const delta = MixOpenAIWebSocket.extractDelta(event);
|
|
1751
|
+
if (delta) {
|
|
1752
|
+
message += delta;
|
|
1753
|
+
if (this.streamCallback) {
|
|
1754
|
+
this.streamCallback({ response: event, message, delta });
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
return;
|
|
1758
|
+
}
|
|
1759
|
+
|
|
1760
|
+
if (event.type === 'response.done') {
|
|
1761
|
+
finalResponse = event.response || null;
|
|
1762
|
+
if (!message && finalResponse) {
|
|
1763
|
+
message = MixOpenAIResponses.extractResponsesMessage(finalResponse);
|
|
1764
|
+
}
|
|
1765
|
+
|
|
1766
|
+
if (!settled) {
|
|
1767
|
+
settled = true;
|
|
1768
|
+
cleanUp();
|
|
1769
|
+
ws.close();
|
|
1770
|
+
resolve({
|
|
1771
|
+
message: message.trim(),
|
|
1772
|
+
think: null,
|
|
1773
|
+
toolCalls: [],
|
|
1774
|
+
tokens: MixOpenAIResponses.extractResponsesTokens(finalResponse || {}),
|
|
1775
|
+
response: {
|
|
1776
|
+
response: finalResponse,
|
|
1777
|
+
events
|
|
1778
|
+
}
|
|
1779
|
+
});
|
|
1780
|
+
}
|
|
1781
|
+
return;
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
if (event.type === 'error' && !settled) {
|
|
1785
|
+
settled = true;
|
|
1786
|
+
cleanUp();
|
|
1787
|
+
ws.close();
|
|
1788
|
+
reject({
|
|
1789
|
+
message: event.error?.message || 'Realtime WebSocket error',
|
|
1790
|
+
statusCode: null,
|
|
1791
|
+
details: event.error || event,
|
|
1792
|
+
config: mergedConfig,
|
|
1793
|
+
options
|
|
1794
|
+
});
|
|
1795
|
+
}
|
|
1796
|
+
});
|
|
1797
|
+
|
|
1798
|
+
ws.on('error', error => {
|
|
1799
|
+
if (settled) return;
|
|
1800
|
+
settled = true;
|
|
1801
|
+
cleanUp();
|
|
1802
|
+
reject({
|
|
1803
|
+
message: error.message || 'Realtime WebSocket connection error',
|
|
1804
|
+
statusCode: null,
|
|
1805
|
+
details: null,
|
|
1806
|
+
stack: error.stack,
|
|
1807
|
+
config: mergedConfig,
|
|
1808
|
+
options
|
|
1809
|
+
});
|
|
1810
|
+
});
|
|
1811
|
+
|
|
1812
|
+
ws.on('close', () => {
|
|
1813
|
+
if (settled) return;
|
|
1814
|
+
settled = true;
|
|
1815
|
+
cleanUp();
|
|
1816
|
+
reject({
|
|
1817
|
+
message: 'Realtime WebSocket closed before response.done',
|
|
1818
|
+
statusCode: null,
|
|
1819
|
+
details: null,
|
|
1820
|
+
config: mergedConfig,
|
|
1821
|
+
options
|
|
1822
|
+
});
|
|
1823
|
+
});
|
|
1824
|
+
});
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
static messagesToConversationItems(messages = []) {
|
|
1828
|
+
const items = [];
|
|
1829
|
+
|
|
1830
|
+
for (const message of messages) {
|
|
1831
|
+
if (!message || !message.role) continue;
|
|
1832
|
+
if (message.role === 'tool' || message.tool_calls) continue;
|
|
1833
|
+
|
|
1834
|
+
const role = message.role === 'assistant' ? 'assistant' : (message.role === 'system' ? 'system' : 'user');
|
|
1835
|
+
const content = [];
|
|
1836
|
+
|
|
1837
|
+
if (typeof message.content === 'string') {
|
|
1838
|
+
content.push({
|
|
1839
|
+
type: role === 'assistant' ? 'text' : 'input_text',
|
|
1840
|
+
text: message.content
|
|
1841
|
+
});
|
|
1842
|
+
} else if (Array.isArray(message.content)) {
|
|
1843
|
+
for (const item of message.content) {
|
|
1844
|
+
if (!item || item.type !== 'text' || typeof item.text !== 'string') continue;
|
|
1845
|
+
content.push({
|
|
1846
|
+
type: role === 'assistant' ? 'text' : 'input_text',
|
|
1847
|
+
text: item.text
|
|
1848
|
+
});
|
|
1849
|
+
}
|
|
1850
|
+
}
|
|
1851
|
+
|
|
1852
|
+
if (content.length === 0) continue;
|
|
1853
|
+
items.push({ type: 'message', role, content });
|
|
1854
|
+
}
|
|
1855
|
+
|
|
1856
|
+
return items;
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
static extractDelta(event) {
|
|
1860
|
+
if (typeof event.delta === 'string') return event.delta;
|
|
1861
|
+
return '';
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1864
|
+
|
|
1502
1865
|
class MixOpenRouter extends MixOpenAI {
|
|
1503
1866
|
getDefaultConfig(customConfig) {
|
|
1504
1867
|
|
|
@@ -1684,13 +2047,15 @@ class MixAnthropic extends MixCustom {
|
|
|
1684
2047
|
return {
|
|
1685
2048
|
input: data.usage.input_tokens || 0,
|
|
1686
2049
|
output: data.usage.output_tokens || 0,
|
|
1687
|
-
total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0)
|
|
2050
|
+
total: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0),
|
|
2051
|
+
cached: ModelMix.extractCacheTokens(data.usage)
|
|
1688
2052
|
};
|
|
1689
2053
|
}
|
|
1690
2054
|
return {
|
|
1691
2055
|
input: 0,
|
|
1692
2056
|
output: 0,
|
|
1693
|
-
total: 0
|
|
2057
|
+
total: 0,
|
|
2058
|
+
cached: 0
|
|
1694
2059
|
};
|
|
1695
2060
|
}
|
|
1696
2061
|
|
|
@@ -2219,13 +2584,15 @@ class MixGoogle extends MixCustom {
|
|
|
2219
2584
|
return {
|
|
2220
2585
|
input: data.usageMetadata.promptTokenCount || 0,
|
|
2221
2586
|
output: data.usageMetadata.candidatesTokenCount || 0,
|
|
2222
|
-
total: data.usageMetadata.totalTokenCount || 0
|
|
2587
|
+
total: data.usageMetadata.totalTokenCount || 0,
|
|
2588
|
+
cached: ModelMix.extractCacheTokens(data.usageMetadata)
|
|
2223
2589
|
};
|
|
2224
2590
|
}
|
|
2225
2591
|
return {
|
|
2226
2592
|
input: 0,
|
|
2227
2593
|
output: 0,
|
|
2228
|
-
total: 0
|
|
2594
|
+
total: 0,
|
|
2595
|
+
cached: 0
|
|
2229
2596
|
};
|
|
2230
2597
|
}
|
|
2231
2598
|
|
|
@@ -2273,4 +2640,4 @@ class MixGoogle extends MixCustom {
|
|
|
2273
2640
|
}
|
|
2274
2641
|
}
|
|
2275
2642
|
|
|
2276
|
-
module.exports = { MixCustom, ModelMix, MixAnthropic, MixMiniMax, MixOpenAI, MixOpenRouter, MixPerplexity, MixOllama, MixLMStudio, MixGroq, MixTogether, MixGrok, MixCerebras, MixGoogle, MixFireworks };
|
|
2643
|
+
module.exports = { MixCustom, ModelMix, MixAnthropic, MixMiniMax, MixOpenAI, MixOpenAIResponses, MixOpenAIWebSocket, MixOpenRouter, MixPerplexity, MixOllama, MixLMStudio, MixGroq, MixTogether, MixGrok, MixCerebras, MixGoogle, MixFireworks };
|