ai-speedometer 1.2.5 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/cli.js +406 -97
- package/dist/ai-speedometer +58 -74
- package/package.json +4 -7
package/cli.js
CHANGED
|
@@ -6,7 +6,6 @@ import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
|
|
6
6
|
import { createAnthropic } from '@ai-sdk/anthropic';
|
|
7
7
|
import { streamText } from 'ai'; // Changed from streamText to generateText
|
|
8
8
|
import { testPrompt } from './test-prompt.js';
|
|
9
|
-
import { LLMBenchmark } from './benchmark-rest.js';
|
|
10
9
|
import { getAllProviders, searchProviders, getModelsForProvider } from './models-dev.js';
|
|
11
10
|
import {
|
|
12
11
|
getAllAvailableProviders,
|
|
@@ -29,8 +28,64 @@ import {
|
|
|
29
28
|
import 'dotenv/config';
|
|
30
29
|
import Table from 'cli-table3';
|
|
31
30
|
|
|
32
|
-
//
|
|
33
|
-
|
|
31
|
+
// Parse command line arguments
|
|
32
|
+
function parseCliArgs() {
|
|
33
|
+
const args = process.argv.slice(2);
|
|
34
|
+
const parsed = {
|
|
35
|
+
debug: false,
|
|
36
|
+
bench: null,
|
|
37
|
+
apiKey: null,
|
|
38
|
+
useAiSdk: false,
|
|
39
|
+
formatted: false,
|
|
40
|
+
help: false
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
for (let i = 0; i < args.length; i++) {
|
|
44
|
+
const arg = args[i];
|
|
45
|
+
|
|
46
|
+
if (arg === '--debug') {
|
|
47
|
+
parsed.debug = true;
|
|
48
|
+
} else if (arg === '--bench') {
|
|
49
|
+
parsed.bench = args[++i];
|
|
50
|
+
} else if (arg === '--api-key') {
|
|
51
|
+
parsed.apiKey = args[++i];
|
|
52
|
+
} else if (arg === '--ai-sdk') {
|
|
53
|
+
parsed.useAiSdk = true;
|
|
54
|
+
} else if (arg === '--formatted') {
|
|
55
|
+
parsed.formatted = true;
|
|
56
|
+
} else if (arg === '--help' || arg === '-h') {
|
|
57
|
+
parsed.help = true;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return parsed;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function showHelp() {
|
|
65
|
+
console.log(colorText('ai-speedometer - Benchmark AI models', 'cyan'));
|
|
66
|
+
console.log('');
|
|
67
|
+
console.log(colorText('Usage:', 'yellow'));
|
|
68
|
+
console.log(' ai-speedometer ' + colorText('# Interactive mode', 'dim'));
|
|
69
|
+
console.log(' ai-speedometer --bench <provider:model> ' + colorText('# Headless benchmark', 'dim'));
|
|
70
|
+
console.log('');
|
|
71
|
+
console.log(colorText('Options:', 'yellow'));
|
|
72
|
+
console.log(' --bench <provider:model> ' + colorText('Run benchmark in headless mode', 'dim'));
|
|
73
|
+
console.log(' --api-key <key> ' + colorText('Override API key (optional)', 'dim'));
|
|
74
|
+
console.log(' --ai-sdk ' + colorText('Use AI SDK instead of REST API', 'dim'));
|
|
75
|
+
console.log(' --formatted ' + colorText('Format JSON output for human readability', 'dim'));
|
|
76
|
+
console.log(' --debug ' + colorText('Enable debug logging', 'dim'));
|
|
77
|
+
console.log(' --help, -h ' + colorText('Show this help message', 'dim'));
|
|
78
|
+
console.log('');
|
|
79
|
+
console.log(colorText('Examples:', 'yellow'));
|
|
80
|
+
console.log(' ai-speedometer --bench openai:gpt-4');
|
|
81
|
+
console.log(' ai-speedometer --bench anthropic:claude-3-opus --api-key "sk-..."');
|
|
82
|
+
console.log(' ai-speedometer --bench openai:gpt-4 --ai-sdk');
|
|
83
|
+
console.log(' ai-speedometer --bench openai:gpt-4 --formatted');
|
|
84
|
+
console.log('');
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const cliArgs = parseCliArgs();
|
|
88
|
+
const debugMode = cliArgs.debug;
|
|
34
89
|
let logFile = null;
|
|
35
90
|
|
|
36
91
|
function log(message) {
|
|
@@ -125,7 +180,6 @@ function clearScreen() {
|
|
|
125
180
|
function showHeader() {
|
|
126
181
|
console.log(colorText('Ai-speedometer', 'cyan'));
|
|
127
182
|
console.log(colorText('=============================', 'cyan'));
|
|
128
|
-
console.log(colorText('Note: opencode uses ai-sdk', 'dim'));
|
|
129
183
|
console.log('');
|
|
130
184
|
}
|
|
131
185
|
|
|
@@ -302,7 +356,6 @@ async function selectModelsCircular() {
|
|
|
302
356
|
// Add header
|
|
303
357
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
304
358
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
305
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
306
359
|
screenContent += '\n';
|
|
307
360
|
|
|
308
361
|
screenContent += colorText('Select Models for Benchmark', 'magenta') + '\n';
|
|
@@ -805,8 +858,9 @@ async function displayColorfulResults(results, method = 'AI SDK', models = []) {
|
|
|
805
858
|
console.log(colorText('COMPREHENSIVE PERFORMANCE SUMMARY', 'yellow'));
|
|
806
859
|
|
|
807
860
|
// Add note about method differences
|
|
808
|
-
console.log(colorText('Note: ', 'cyan') + colorText('
|
|
809
|
-
console.log(colorText(' ', 'cyan') + colorText('
|
|
861
|
+
console.log(colorText('Note: ', 'cyan') + colorText('REST API with streaming now supports TTFT measurement. AI SDK also supports streaming.', 'dim'));
|
|
862
|
+
console.log(colorText(' ', 'cyan') + colorText('For thinking models, TTFT will be higher as thinking tokens are processed before output tokens.', 'dim'));
|
|
863
|
+
console.log(colorText(' ', 'cyan') + colorText('[est] markers indicate token counts were estimated (API did not provide usage metadata).', 'dim'));
|
|
810
864
|
console.log('');
|
|
811
865
|
|
|
812
866
|
const table = new Table({
|
|
@@ -833,15 +887,19 @@ async function displayColorfulResults(results, method = 'AI SDK', models = []) {
|
|
|
833
887
|
|
|
834
888
|
// Add data rows (already ranked by sort order)
|
|
835
889
|
sortedResults.forEach((result) => {
|
|
890
|
+
const outputTokenDisplay = result.tokenCount.toString() + (result.usedEstimateForOutput ? ' [est]' : '');
|
|
891
|
+
const promptTokenDisplay = result.promptTokens.toString() + (result.usedEstimateForInput ? ' [est]' : '');
|
|
892
|
+
const totalTokenDisplay = result.totalTokens.toString() + ((result.usedEstimateForInput || result.usedEstimateForOutput) ? ' [est]' : '');
|
|
893
|
+
|
|
836
894
|
table.push([
|
|
837
895
|
colorText(result.model, 'white'),
|
|
838
896
|
colorText(result.provider, 'white'),
|
|
839
897
|
colorText((result.totalTime / 1000).toFixed(2), 'green'),
|
|
840
898
|
colorText((result.timeToFirstToken / 1000).toFixed(2), 'yellow'),
|
|
841
899
|
colorText(result.tokensPerSecond.toFixed(1), 'magenta'),
|
|
842
|
-
colorText(result.
|
|
843
|
-
colorText(result.
|
|
844
|
-
colorText(result.
|
|
900
|
+
colorText(outputTokenDisplay, result.usedEstimateForOutput ? 'yellow' : 'blue'),
|
|
901
|
+
colorText(promptTokenDisplay, result.usedEstimateForInput ? 'yellow' : 'blue'),
|
|
902
|
+
colorText(totalTokenDisplay, (result.usedEstimateForInput || result.usedEstimateForOutput) ? 'yellow' : 'bright')
|
|
845
903
|
]);
|
|
846
904
|
});
|
|
847
905
|
|
|
@@ -1007,7 +1065,6 @@ async function addVerifiedProvider() {
|
|
|
1007
1065
|
// Add header
|
|
1008
1066
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1009
1067
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1010
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1011
1068
|
screenContent += '\n';
|
|
1012
1069
|
|
|
1013
1070
|
screenContent += colorText('Add Verified Provider', 'magenta') + '\n';
|
|
@@ -1223,7 +1280,6 @@ async function addCustomProviderCLI() {
|
|
|
1223
1280
|
// Add header
|
|
1224
1281
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1225
1282
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1226
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1227
1283
|
screenContent += '\n';
|
|
1228
1284
|
|
|
1229
1285
|
screenContent += colorText('Add Custom Provider', 'magenta') + '\n';
|
|
@@ -1486,7 +1542,6 @@ async function addCustomModelsMenu() {
|
|
|
1486
1542
|
// Add header
|
|
1487
1543
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1488
1544
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1489
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1490
1545
|
screenContent += '\n';
|
|
1491
1546
|
|
|
1492
1547
|
screenContent += colorText('Add Custom Models', 'magenta') + '\n';
|
|
@@ -1553,7 +1608,6 @@ async function addModelsToExistingProvider() {
|
|
|
1553
1608
|
// Add header
|
|
1554
1609
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1555
1610
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1556
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1557
1611
|
screenContent += '\n';
|
|
1558
1612
|
|
|
1559
1613
|
screenContent += colorText('Add Models to Existing Provider', 'magenta') + '\n';
|
|
@@ -1665,23 +1719,9 @@ async function addModelsToExistingProvider() {
|
|
|
1665
1719
|
await question(colorText('\nPress Enter to continue...', 'yellow'));
|
|
1666
1720
|
}
|
|
1667
1721
|
|
|
1668
|
-
//
|
|
1669
|
-
async function
|
|
1670
|
-
|
|
1671
|
-
console.log(colorText('No models selected for benchmarking.', 'red'));
|
|
1672
|
-
return;
|
|
1673
|
-
}
|
|
1674
|
-
|
|
1675
|
-
clearScreen();
|
|
1676
|
-
showHeader();
|
|
1677
|
-
console.log(colorText('Running REST API Benchmark...', 'green'));
|
|
1678
|
-
console.log(colorText(`Running ${models.length} models in parallel...`, 'cyan'));
|
|
1679
|
-
console.log(colorText('Note: This uses direct REST API calls instead of AI SDK', 'dim'));
|
|
1680
|
-
console.log('');
|
|
1681
|
-
|
|
1682
|
-
// Create a function to benchmark a single model using REST API
|
|
1683
|
-
const benchmarkModelRest = async (model) => {
|
|
1684
|
-
console.log(colorText(`Testing ${model.name} (${model.providerName}) via REST API...`, 'yellow'));
|
|
1722
|
+
// Silent benchmark helper (returns raw result without UI)
|
|
1723
|
+
async function benchmarkSingleModelRest(model) {
|
|
1724
|
+
|
|
1685
1725
|
|
|
1686
1726
|
try {
|
|
1687
1727
|
// Validate required configuration
|
|
@@ -1693,14 +1733,24 @@ async function runRestApiBenchmark(models) {
|
|
|
1693
1733
|
throw new Error(`Missing base URL for provider ${model.providerName}`);
|
|
1694
1734
|
}
|
|
1695
1735
|
|
|
1736
|
+
// Extract the actual model ID for API calls (moved before usage)
|
|
1737
|
+
let actualModelId = model.name;
|
|
1738
|
+
if (model.id && model.id.includes('_')) {
|
|
1739
|
+
actualModelId = model.id.split('_')[1];
|
|
1740
|
+
}
|
|
1741
|
+
actualModelId = actualModelId.trim();
|
|
1742
|
+
|
|
1696
1743
|
const startTime = Date.now();
|
|
1744
|
+
let firstTokenTime = null;
|
|
1745
|
+
let streamedText = '';
|
|
1746
|
+
let tokenCount = 0;
|
|
1697
1747
|
|
|
1698
1748
|
// Use correct endpoint based on provider type
|
|
1699
1749
|
let endpoint;
|
|
1700
1750
|
if (model.providerType === 'anthropic') {
|
|
1701
1751
|
endpoint = '/messages';
|
|
1702
1752
|
} else if (model.providerType === 'google') {
|
|
1703
|
-
endpoint = '/models/' + actualModelId + ':
|
|
1753
|
+
endpoint = '/models/' + actualModelId + ':streamGenerateContent';
|
|
1704
1754
|
} else {
|
|
1705
1755
|
endpoint = '/chat/completions';
|
|
1706
1756
|
}
|
|
@@ -1709,17 +1759,7 @@ async function runRestApiBenchmark(models) {
|
|
|
1709
1759
|
const baseUrl = model.providerConfig.baseUrl.replace(/\/$/, '');
|
|
1710
1760
|
const url = `${baseUrl}${endpoint}`;
|
|
1711
1761
|
|
|
1712
|
-
|
|
1713
|
-
let actualModelId = model.name;
|
|
1714
|
-
if (model.id && model.id.includes('_')) {
|
|
1715
|
-
// For models with provider prefix, extract the actual model ID
|
|
1716
|
-
actualModelId = model.id.split('_')[1];
|
|
1717
|
-
console.log(colorText(` Using extracted model ID: ${actualModelId}`, 'cyan'));
|
|
1718
|
-
}
|
|
1719
|
-
|
|
1720
|
-
// Trim any trailing spaces from model names
|
|
1721
|
-
actualModelId = actualModelId.trim();
|
|
1722
|
-
console.log(colorText(` Using final model ID: "${actualModelId}"`, 'cyan'));
|
|
1762
|
+
|
|
1723
1763
|
|
|
1724
1764
|
const headers = {
|
|
1725
1765
|
'Content-Type': 'application/json',
|
|
@@ -1731,7 +1771,6 @@ async function runRestApiBenchmark(models) {
|
|
|
1731
1771
|
headers['x-api-key'] = model.providerConfig.apiKey;
|
|
1732
1772
|
headers['anthropic-version'] = '2023-06-01';
|
|
1733
1773
|
} else if (model.providerType === 'google') {
|
|
1734
|
-
// Google uses different auth
|
|
1735
1774
|
delete headers['Authorization'];
|
|
1736
1775
|
headers['x-goog-api-key'] = model.providerConfig.apiKey;
|
|
1737
1776
|
}
|
|
@@ -1742,14 +1781,15 @@ async function runRestApiBenchmark(models) {
|
|
|
1742
1781
|
{ role: 'user', content: testPrompt }
|
|
1743
1782
|
],
|
|
1744
1783
|
max_tokens: 500,
|
|
1745
|
-
temperature: 0.7
|
|
1784
|
+
temperature: 0.7,
|
|
1785
|
+
stream: true
|
|
1746
1786
|
};
|
|
1747
1787
|
|
|
1748
1788
|
// Adjust for provider-specific formats
|
|
1749
1789
|
if (model.providerType === 'anthropic') {
|
|
1750
1790
|
body.max_tokens = 500;
|
|
1791
|
+
body.stream = true;
|
|
1751
1792
|
} else if (model.providerType === 'google') {
|
|
1752
|
-
// Google format is slightly different
|
|
1753
1793
|
body.contents = [{ parts: [{ text: testPrompt }] }];
|
|
1754
1794
|
body.generationConfig = {
|
|
1755
1795
|
maxOutputTokens: 500,
|
|
@@ -1757,10 +1797,10 @@ async function runRestApiBenchmark(models) {
|
|
|
1757
1797
|
};
|
|
1758
1798
|
delete body.messages;
|
|
1759
1799
|
delete body.max_tokens;
|
|
1800
|
+
delete body.stream;
|
|
1760
1801
|
}
|
|
1761
1802
|
|
|
1762
|
-
|
|
1763
|
-
console.log(colorText(` Using model: ${actualModelId}`, 'cyan'));
|
|
1803
|
+
|
|
1764
1804
|
|
|
1765
1805
|
const response = await fetch(url, {
|
|
1766
1806
|
method: 'POST',
|
|
@@ -1768,56 +1808,154 @@ async function runRestApiBenchmark(models) {
|
|
|
1768
1808
|
body: JSON.stringify(body)
|
|
1769
1809
|
});
|
|
1770
1810
|
|
|
1771
|
-
|
|
1811
|
+
|
|
1772
1812
|
|
|
1773
1813
|
if (!response.ok) {
|
|
1774
1814
|
const errorText = await response.text();
|
|
1775
|
-
console.log(colorText(` Error: ${errorText.slice(0, 200)}...`, 'red'));
|
|
1776
1815
|
throw new Error(`API request failed: ${response.status} ${response.statusText}`);
|
|
1777
1816
|
}
|
|
1778
1817
|
|
|
1779
|
-
|
|
1818
|
+
// Process streaming response
|
|
1819
|
+
const reader = response.body.getReader();
|
|
1820
|
+
const decoder = new TextDecoder();
|
|
1821
|
+
let buffer = '';
|
|
1822
|
+
let inputTokens = 0;
|
|
1823
|
+
let outputTokens = 0;
|
|
1824
|
+
let isFirstChunk = true;
|
|
1825
|
+
|
|
1826
|
+
while (true) {
|
|
1827
|
+
const { done, value } = await reader.read();
|
|
1828
|
+
if (done) break;
|
|
1829
|
+
|
|
1830
|
+
// Capture TTFT on first chunk arrival (network level)
|
|
1831
|
+
if (isFirstChunk && !firstTokenTime) {
|
|
1832
|
+
firstTokenTime = Date.now();
|
|
1833
|
+
isFirstChunk = false;
|
|
1834
|
+
// Show live TTFT result (only in interactive mode, not headless)
|
|
1835
|
+
const ttftSeconds = ((firstTokenTime - startTime) / 1000).toFixed(2);
|
|
1836
|
+
if (!cliArgs.bench) {
|
|
1837
|
+
console.log(colorText(`TTFT received at ${ttftSeconds}s for ${model.name}`, 'green'));
|
|
1838
|
+
}
|
|
1839
|
+
}
|
|
1840
|
+
|
|
1841
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1842
|
+
const lines = buffer.split('\n');
|
|
1843
|
+
buffer = lines.pop() || '';
|
|
1844
|
+
|
|
1845
|
+
for (const line of lines) {
|
|
1846
|
+
const trimmedLine = line.trim();
|
|
1847
|
+
if (!trimmedLine) continue;
|
|
1848
|
+
|
|
1849
|
+
try {
|
|
1850
|
+
if (model.providerType === 'anthropic') {
|
|
1851
|
+
// Anthropic uses newline-delimited JSON with event types
|
|
1852
|
+
if (trimmedLine.startsWith('data: ')) {
|
|
1853
|
+
const jsonStr = trimmedLine.slice(6);
|
|
1854
|
+
if (jsonStr === '[DONE]') break;
|
|
1855
|
+
|
|
1856
|
+
const chunk = JSON.parse(jsonStr);
|
|
1857
|
+
|
|
1858
|
+
if (chunk.type === 'content_block_delta' && chunk.delta?.text) {
|
|
1859
|
+
streamedText += chunk.delta.text;
|
|
1860
|
+
} else if (chunk.type === 'message_start' && chunk.message?.usage) {
|
|
1861
|
+
inputTokens = chunk.message.usage.input_tokens || 0;
|
|
1862
|
+
} else if (chunk.type === 'message_delta') {
|
|
1863
|
+
// Capture output tokens from message_delta
|
|
1864
|
+
if (chunk.usage?.output_tokens) {
|
|
1865
|
+
outputTokens = chunk.usage.output_tokens;
|
|
1866
|
+
}
|
|
1867
|
+
// Some implementations put input_tokens here too
|
|
1868
|
+
if (chunk.usage?.input_tokens && !inputTokens) {
|
|
1869
|
+
inputTokens = chunk.usage.input_tokens;
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
} else if (trimmedLine.startsWith('event: ')) {
|
|
1873
|
+
// Skip event lines (Anthropic SSE format uses separate event and data lines)
|
|
1874
|
+
continue;
|
|
1875
|
+
} else {
|
|
1876
|
+
// Try parsing as raw JSON (some Anthropic-compatible APIs don't use SSE format)
|
|
1877
|
+
const chunk = JSON.parse(trimmedLine);
|
|
1878
|
+
|
|
1879
|
+
if (chunk.type === 'content_block_delta' && chunk.delta?.text) {
|
|
1880
|
+
streamedText += chunk.delta.text;
|
|
1881
|
+
} else if (chunk.type === 'message_start' && chunk.message?.usage) {
|
|
1882
|
+
inputTokens = chunk.message.usage.input_tokens || 0;
|
|
1883
|
+
} else if (chunk.type === 'message_delta') {
|
|
1884
|
+
if (chunk.usage?.output_tokens) {
|
|
1885
|
+
outputTokens = chunk.usage.output_tokens;
|
|
1886
|
+
}
|
|
1887
|
+
if (chunk.usage?.input_tokens && !inputTokens) {
|
|
1888
|
+
inputTokens = chunk.usage.input_tokens;
|
|
1889
|
+
}
|
|
1890
|
+
}
|
|
1891
|
+
}
|
|
1892
|
+
} else if (model.providerType === 'google') {
|
|
1893
|
+
// Google streaming format
|
|
1894
|
+
const chunk = JSON.parse(trimmedLine);
|
|
1895
|
+
if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
|
|
1896
|
+
const text = chunk.candidates[0].content.parts[0].text;
|
|
1897
|
+
streamedText += text;
|
|
1898
|
+
}
|
|
1899
|
+
if (chunk.usageMetadata?.promptTokenCount) {
|
|
1900
|
+
inputTokens = chunk.usageMetadata.promptTokenCount;
|
|
1901
|
+
}
|
|
1902
|
+
if (chunk.usageMetadata?.candidatesTokenCount) {
|
|
1903
|
+
outputTokens = chunk.usageMetadata.candidatesTokenCount;
|
|
1904
|
+
}
|
|
1905
|
+
} else {
|
|
1906
|
+
// OpenAI-compatible SSE format
|
|
1907
|
+
if (trimmedLine.startsWith('data: ')) {
|
|
1908
|
+
const jsonStr = trimmedLine.slice(6);
|
|
1909
|
+
if (jsonStr === '[DONE]') break;
|
|
1910
|
+
|
|
1911
|
+
const chunk = JSON.parse(jsonStr);
|
|
1912
|
+
|
|
1913
|
+
if (chunk.choices?.[0]?.delta?.content) {
|
|
1914
|
+
streamedText += chunk.choices[0].delta.content;
|
|
1915
|
+
}
|
|
1916
|
+
|
|
1917
|
+
if (chunk.usage?.prompt_tokens) {
|
|
1918
|
+
inputTokens = chunk.usage.prompt_tokens;
|
|
1919
|
+
}
|
|
1920
|
+
if (chunk.usage?.completion_tokens) {
|
|
1921
|
+
outputTokens = chunk.usage.completion_tokens;
|
|
1922
|
+
}
|
|
1923
|
+
}
|
|
1924
|
+
}
|
|
1925
|
+
} catch (parseError) {
|
|
1926
|
+
// Skip invalid JSON lines
|
|
1927
|
+
continue;
|
|
1928
|
+
}
|
|
1929
|
+
}
|
|
1930
|
+
}
|
|
1931
|
+
|
|
1780
1932
|
const endTime = Date.now();
|
|
1781
1933
|
const totalTime = endTime - startTime;
|
|
1934
|
+
const timeToFirstToken = firstTokenTime ? firstTokenTime - startTime : totalTime;
|
|
1782
1935
|
|
|
1783
|
-
// Calculate
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
} else if (model.providerType === 'google') {
|
|
1790
|
-
inputTokens = data.usageMetadata?.promptTokenCount || Math.round(testPrompt.length / 4);
|
|
1791
|
-
outputTokens = data.usageMetadata?.candidatesTokenCount || Math.round(data.candidates?.[0]?.content?.parts?.[0]?.text?.length / 4 || 0);
|
|
1792
|
-
} else {
|
|
1793
|
-
inputTokens = data.usage?.prompt_tokens || Math.round(testPrompt.length / 4);
|
|
1794
|
-
outputTokens = data.usage?.completion_tokens || Math.round(data.choices?.[0]?.message?.content?.length / 4 || 0);
|
|
1795
|
-
}
|
|
1796
|
-
|
|
1797
|
-
const totalTokens = inputTokens + outputTokens;
|
|
1936
|
+
// Calculate token counts - use provider's count if available, otherwise estimate
|
|
1937
|
+
const usedEstimateForOutput = !outputTokens;
|
|
1938
|
+
const usedEstimateForInput = !inputTokens;
|
|
1939
|
+
const finalOutputTokens = outputTokens || Math.round(streamedText.length / 4);
|
|
1940
|
+
const finalInputTokens = inputTokens || Math.round(testPrompt.length / 4);
|
|
1941
|
+
const totalTokens = finalInputTokens + finalOutputTokens;
|
|
1798
1942
|
const tokensPerSecond = totalTime > 0 ? (totalTokens / totalTime) * 1000 : 0;
|
|
1799
|
-
|
|
1800
|
-
console.log(colorText('Completed!', 'green'));
|
|
1801
|
-
console.log(colorText(` Total Time: ${(totalTime / 1000).toFixed(2)}s`, 'cyan'));
|
|
1802
|
-
console.log(colorText(` Tokens/Sec: ${tokensPerSecond.toFixed(1)}`, 'cyan'));
|
|
1803
|
-
console.log(colorText(` Input Tokens: ${inputTokens}`, 'cyan'));
|
|
1804
|
-
console.log(colorText(` Output Tokens: ${outputTokens}`, 'cyan'));
|
|
1805
|
-
console.log(colorText(` Total Tokens: ${totalTokens}`, 'cyan'));
|
|
1806
1943
|
|
|
1807
1944
|
return {
|
|
1808
1945
|
model: model.name,
|
|
1809
1946
|
provider: model.providerName,
|
|
1810
1947
|
totalTime: totalTime,
|
|
1811
|
-
timeToFirstToken:
|
|
1812
|
-
tokenCount:
|
|
1948
|
+
timeToFirstToken: timeToFirstToken,
|
|
1949
|
+
tokenCount: finalOutputTokens,
|
|
1813
1950
|
tokensPerSecond: tokensPerSecond,
|
|
1814
|
-
promptTokens:
|
|
1951
|
+
promptTokens: finalInputTokens,
|
|
1815
1952
|
totalTokens: totalTokens,
|
|
1953
|
+
usedEstimateForOutput: usedEstimateForOutput,
|
|
1954
|
+
usedEstimateForInput: usedEstimateForInput,
|
|
1816
1955
|
success: true
|
|
1817
1956
|
};
|
|
1818
1957
|
|
|
1819
1958
|
} catch (error) {
|
|
1820
|
-
console.log(colorText('Failed: ', 'red') + error.message);
|
|
1821
1959
|
return {
|
|
1822
1960
|
model: model.name,
|
|
1823
1961
|
provider: model.providerName,
|
|
@@ -1831,17 +1969,49 @@ async function runRestApiBenchmark(models) {
|
|
|
1831
1969
|
error: error.message
|
|
1832
1970
|
};
|
|
1833
1971
|
}
|
|
1834
|
-
|
|
1972
|
+
}
|
|
1973
|
+
|
|
1974
|
+
// REST API benchmark function using direct API calls (with UI)
|
|
1975
|
+
async function runRestApiBenchmark(models) {
|
|
1976
|
+
if (models.length === 0) {
|
|
1977
|
+
console.log(colorText('No models selected for benchmarking.', 'red'));
|
|
1978
|
+
return;
|
|
1979
|
+
}
|
|
1835
1980
|
|
|
1836
|
-
|
|
1981
|
+
clearScreen();
|
|
1982
|
+
showHeader();
|
|
1983
|
+
console.log(colorText('Running REST API Benchmark with Streaming...', 'green'));
|
|
1984
|
+
console.log(colorText(`Running ${models.length} models in parallel...`, 'cyan'));
|
|
1985
|
+
console.log(colorText('Note: This uses direct REST API calls with streaming support', 'dim'));
|
|
1986
|
+
console.log('');
|
|
1987
|
+
|
|
1988
|
+
// Run all benchmarks in parallel with UI feedback
|
|
1837
1989
|
console.log(colorText('Starting parallel REST API benchmark execution...', 'cyan'));
|
|
1838
|
-
|
|
1990
|
+
|
|
1991
|
+
// Start all benchmarks in parallel
|
|
1992
|
+
const promises = models.map(model => {
|
|
1993
|
+
console.log(colorText(`Testing ${model.name} (${model.providerName}) via REST API with streaming...`, 'yellow'));
|
|
1994
|
+
return benchmarkSingleModelRest(model);
|
|
1995
|
+
});
|
|
1996
|
+
|
|
1839
1997
|
const results = await Promise.all(promises);
|
|
1840
1998
|
|
|
1999
|
+
// Show individual results after all complete
|
|
2000
|
+
results.forEach((result, index) => {
|
|
2001
|
+
if (result.success) {
|
|
2002
|
+
console.log(colorText(`✓ ${result.model} (${result.provider}) completed!`, 'green'));
|
|
2003
|
+
console.log(colorText(` Total Time: ${(result.totalTime / 1000).toFixed(2)}s`, 'cyan'));
|
|
2004
|
+
console.log(colorText(` TTFT: ${(result.timeToFirstToken / 1000).toFixed(2)}s`, 'cyan'));
|
|
2005
|
+
console.log(colorText(` Tokens/Sec: ${result.tokensPerSecond.toFixed(1)}`, 'cyan'));
|
|
2006
|
+
} else {
|
|
2007
|
+
console.log(colorText(`✗ ${result.model} (${result.provider}) failed: `, 'red') + result.error);
|
|
2008
|
+
}
|
|
2009
|
+
});
|
|
2010
|
+
|
|
1841
2011
|
console.log('');
|
|
1842
2012
|
console.log(colorText('All REST API benchmarks completed!', 'green'));
|
|
1843
2013
|
|
|
1844
|
-
await displayColorfulResults(results, 'REST API', models);
|
|
2014
|
+
await displayColorfulResults(results, 'REST API (Streaming)', models);
|
|
1845
2015
|
|
|
1846
2016
|
// Add successful models to recent models list
|
|
1847
2017
|
const successfulModels = results
|
|
@@ -1868,16 +2038,16 @@ async function runRestApiBenchmark(models) {
|
|
|
1868
2038
|
async function showMainMenu() {
|
|
1869
2039
|
const menuOptions = [
|
|
1870
2040
|
{ id: 1, text: 'Set Model', action: () => showModelMenu() },
|
|
1871
|
-
{ id: 2, text: 'Run Benchmark (
|
|
2041
|
+
{ id: 2, text: 'Run Benchmark (REST API)', action: async () => {
|
|
1872
2042
|
const selectedModels = await selectModelsCircular();
|
|
1873
2043
|
if (selectedModels.length > 0) {
|
|
1874
|
-
await
|
|
2044
|
+
await runRestApiBenchmark(selectedModels);
|
|
1875
2045
|
}
|
|
1876
2046
|
}},
|
|
1877
|
-
{ id: 3, text: 'Run Benchmark (
|
|
2047
|
+
{ id: 3, text: 'Run Benchmark (AI SDK - Legacy)', action: async () => {
|
|
1878
2048
|
const selectedModels = await selectModelsCircular();
|
|
1879
2049
|
if (selectedModels.length > 0) {
|
|
1880
|
-
await
|
|
2050
|
+
await runStreamingBenchmark(selectedModels);
|
|
1881
2051
|
}
|
|
1882
2052
|
}},
|
|
1883
2053
|
{ id: 4, text: 'Exit', action: () => {
|
|
@@ -1896,7 +2066,6 @@ async function showMainMenu() {
|
|
|
1896
2066
|
// Add header
|
|
1897
2067
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1898
2068
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1899
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1900
2069
|
screenContent += '\n';
|
|
1901
2070
|
|
|
1902
2071
|
screenContent += colorText('Main Menu:', 'cyan') + '\n';
|
|
@@ -1953,7 +2122,6 @@ async function showModelMenu() {
|
|
|
1953
2122
|
// Add header
|
|
1954
2123
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1955
2124
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1956
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1957
2125
|
screenContent += '\n';
|
|
1958
2126
|
|
|
1959
2127
|
screenContent += colorText('Model Management:', 'cyan') + '\n';
|
|
@@ -2002,17 +2170,158 @@ process.on('SIGINT', () => {
|
|
|
2002
2170
|
process.exit(0);
|
|
2003
2171
|
});
|
|
2004
2172
|
|
|
2173
|
+
// Headless benchmark mode
|
|
2174
|
+
async function runHeadlessBenchmark(benchSpec, apiKey, useAiSdk) {
|
|
2175
|
+
try {
|
|
2176
|
+
// Parse provider:model format
|
|
2177
|
+
const [providerSpec, modelName] = benchSpec.split(':');
|
|
2178
|
+
|
|
2179
|
+
if (!providerSpec || !modelName) {
|
|
2180
|
+
console.error(colorText('Error: Invalid --bench format. Use: provider:model', 'red'));
|
|
2181
|
+
console.error(colorText('Example: --bench zai-code-anth:glm-4.6', 'yellow'));
|
|
2182
|
+
process.exit(1);
|
|
2183
|
+
}
|
|
2184
|
+
|
|
2185
|
+
// Load all available providers
|
|
2186
|
+
const config = await loadConfig();
|
|
2187
|
+
|
|
2188
|
+
// Find the provider (case-insensitive search)
|
|
2189
|
+
const provider = config.providers.find(p =>
|
|
2190
|
+
p.id?.toLowerCase() === providerSpec.toLowerCase() ||
|
|
2191
|
+
p.name?.toLowerCase() === providerSpec.toLowerCase()
|
|
2192
|
+
);
|
|
2193
|
+
|
|
2194
|
+
if (!provider) {
|
|
2195
|
+
console.error(colorText(`Error: Provider '${providerSpec}' not found`, 'red'));
|
|
2196
|
+
console.error(colorText('Available providers:', 'yellow'));
|
|
2197
|
+
config.providers.forEach(p => {
|
|
2198
|
+
console.error(colorText(` - ${p.id || p.name}`, 'cyan'));
|
|
2199
|
+
});
|
|
2200
|
+
process.exit(1);
|
|
2201
|
+
}
|
|
2202
|
+
|
|
2203
|
+
// Find the model
|
|
2204
|
+
// Model IDs are prefixed with provider name (e.g., "zai-code-anth_glm-4.6")
|
|
2205
|
+
// So we need to check:
|
|
2206
|
+
// 1. Full ID match: "zai-code-anth_glm-4.6"
|
|
2207
|
+
// 2. ID without provider prefix: "glm-4.6"
|
|
2208
|
+
// 3. Name match: "GLM-4.6-anth"
|
|
2209
|
+
const model = provider.models.find(m => {
|
|
2210
|
+
const modelIdLower = m.id?.toLowerCase() || '';
|
|
2211
|
+
const modelNameLower = m.name?.toLowerCase() || '';
|
|
2212
|
+
const searchLower = modelName.toLowerCase();
|
|
2213
|
+
|
|
2214
|
+
// Check full ID match
|
|
2215
|
+
if (modelIdLower === searchLower) return true;
|
|
2216
|
+
|
|
2217
|
+
// Check ID without provider prefix (strip "provider_" prefix)
|
|
2218
|
+
const idWithoutPrefix = modelIdLower.includes('_')
|
|
2219
|
+
? modelIdLower.split('_').slice(1).join('_')
|
|
2220
|
+
: modelIdLower;
|
|
2221
|
+
if (idWithoutPrefix === searchLower) return true;
|
|
2222
|
+
|
|
2223
|
+
// Check name match
|
|
2224
|
+
if (modelNameLower === searchLower) return true;
|
|
2225
|
+
|
|
2226
|
+
return false;
|
|
2227
|
+
});
|
|
2228
|
+
|
|
2229
|
+
if (!model) {
|
|
2230
|
+
console.error(colorText(`Error: Model '${modelName}' not found in provider '${provider.name}'`, 'red'));
|
|
2231
|
+
console.error(colorText('Available models:', 'yellow'));
|
|
2232
|
+
provider.models.forEach(m => {
|
|
2233
|
+
// Show both name and ID (without provider prefix) for clarity
|
|
2234
|
+
const idWithoutPrefix = m.id?.includes('_')
|
|
2235
|
+
? m.id.split('_').slice(1).join('_')
|
|
2236
|
+
: m.id;
|
|
2237
|
+
console.error(colorText(` - ${m.name} (id: ${idWithoutPrefix})`, 'cyan'));
|
|
2238
|
+
});
|
|
2239
|
+
process.exit(1);
|
|
2240
|
+
}
|
|
2241
|
+
|
|
2242
|
+
// If API key provided via flag, use it; otherwise use existing config
|
|
2243
|
+
let finalApiKey = apiKey || provider.apiKey;
|
|
2244
|
+
|
|
2245
|
+
if (!finalApiKey) {
|
|
2246
|
+
console.error(colorText(`Error: No API key found for provider '${provider.name}'`, 'red'));
|
|
2247
|
+
console.error(colorText('Please provide --api-key flag or configure the provider first', 'yellow'));
|
|
2248
|
+
process.exit(1);
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
// Create model object with all required config
|
|
2252
|
+
const modelConfig = {
|
|
2253
|
+
...model,
|
|
2254
|
+
providerName: provider.name,
|
|
2255
|
+
providerType: provider.type,
|
|
2256
|
+
providerId: provider.id,
|
|
2257
|
+
providerConfig: {
|
|
2258
|
+
...provider,
|
|
2259
|
+
apiKey: finalApiKey,
|
|
2260
|
+
baseUrl: provider.baseUrl || ''
|
|
2261
|
+
},
|
|
2262
|
+
selected: true
|
|
2263
|
+
};
|
|
2264
|
+
|
|
2265
|
+
// Run benchmark silently and get results
|
|
2266
|
+
let result;
|
|
2267
|
+
if (useAiSdk) {
|
|
2268
|
+
// TODO: Implement AI SDK silent benchmark
|
|
2269
|
+
console.error(colorText('AI SDK headless mode not yet implemented', 'red'));
|
|
2270
|
+
process.exit(1);
|
|
2271
|
+
} else {
|
|
2272
|
+
result = await benchmarkSingleModelRest(modelConfig);
|
|
2273
|
+
}
|
|
2274
|
+
|
|
2275
|
+
// Output JSON to stdout
|
|
2276
|
+
const jsonOutput = {
|
|
2277
|
+
provider: provider.name,
|
|
2278
|
+
providerId: provider.id,
|
|
2279
|
+
model: model.name,
|
|
2280
|
+
modelId: model.id,
|
|
2281
|
+
method: useAiSdk ? 'ai-sdk' : 'rest-api',
|
|
2282
|
+
success: result.success,
|
|
2283
|
+
totalTime: result.totalTime,
|
|
2284
|
+
totalTimeSeconds: result.totalTime / 1000,
|
|
2285
|
+
timeToFirstToken: result.timeToFirstToken,
|
|
2286
|
+
timeToFirstTokenSeconds: result.timeToFirstToken / 1000,
|
|
2287
|
+
tokensPerSecond: result.tokensPerSecond,
|
|
2288
|
+
outputTokens: result.tokenCount,
|
|
2289
|
+
promptTokens: result.promptTokens,
|
|
2290
|
+
totalTokens: result.totalTokens,
|
|
2291
|
+
is_estimated: !!(result.usedEstimateForOutput || result.usedEstimateForInput),
|
|
2292
|
+
error: result.error || null
|
|
2293
|
+
};
|
|
2294
|
+
|
|
2295
|
+
console.log(JSON.stringify(jsonOutput, null, cliArgs.formatted ? 2 : 0));
|
|
2296
|
+
process.exit(result.success ? 0 : 1);
|
|
2297
|
+
} catch (error) {
|
|
2298
|
+
console.error(colorText('Error: ' + error.message, 'red'));
|
|
2299
|
+
if (debugMode) {
|
|
2300
|
+
console.error(error.stack);
|
|
2301
|
+
}
|
|
2302
|
+
process.exit(1);
|
|
2303
|
+
}
|
|
2304
|
+
}
|
|
2305
|
+
|
|
2005
2306
|
// Start the CLI
|
|
2006
|
-
if (
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2307
|
+
if (require.main === module) {
|
|
2308
|
+
// Check if help flag
|
|
2309
|
+
if (cliArgs.help) {
|
|
2310
|
+
showHelp();
|
|
2311
|
+
process.exit(0);
|
|
2312
|
+
}
|
|
2313
|
+
|
|
2314
|
+
// Check if headless benchmark mode
|
|
2315
|
+
if (cliArgs.bench) {
|
|
2316
|
+
runHeadlessBenchmark(cliArgs.bench, cliArgs.apiKey, cliArgs.useAiSdk);
|
|
2317
|
+
} else {
|
|
2318
|
+
// Interactive mode
|
|
2319
|
+
cleanupRecentModelsFromConfig().then(() => {
|
|
2320
|
+
showMainMenu();
|
|
2321
|
+
}).catch(() => {
|
|
2322
|
+
showMainMenu();
|
|
2323
|
+
});
|
|
2324
|
+
}
|
|
2016
2325
|
}
|
|
2017
2326
|
|
|
2018
2327
|
export { showMainMenu, listProviders, selectModelsCircular, runStreamingBenchmark, loadConfig, saveConfig };
|