ai-speedometer 1.2.4 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/cli.js +398 -97
- package/dist/ai-speedometer +58 -74
- package/package.json +4 -7
package/cli.js
CHANGED
|
@@ -6,7 +6,6 @@ import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
|
|
6
6
|
import { createAnthropic } from '@ai-sdk/anthropic';
|
|
7
7
|
import { streamText } from 'ai'; // Changed from streamText to generateText
|
|
8
8
|
import { testPrompt } from './test-prompt.js';
|
|
9
|
-
import { LLMBenchmark } from './benchmark-rest.js';
|
|
10
9
|
import { getAllProviders, searchProviders, getModelsForProvider } from './models-dev.js';
|
|
11
10
|
import {
|
|
12
11
|
getAllAvailableProviders,
|
|
@@ -29,8 +28,59 @@ import {
|
|
|
29
28
|
import 'dotenv/config';
|
|
30
29
|
import Table from 'cli-table3';
|
|
31
30
|
|
|
32
|
-
//
|
|
33
|
-
|
|
31
|
+
// Parse command line arguments
|
|
32
|
+
function parseCliArgs() {
|
|
33
|
+
const args = process.argv.slice(2);
|
|
34
|
+
const parsed = {
|
|
35
|
+
debug: false,
|
|
36
|
+
bench: null,
|
|
37
|
+
apiKey: null,
|
|
38
|
+
useAiSdk: false,
|
|
39
|
+
help: false
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
for (let i = 0; i < args.length; i++) {
|
|
43
|
+
const arg = args[i];
|
|
44
|
+
|
|
45
|
+
if (arg === '--debug') {
|
|
46
|
+
parsed.debug = true;
|
|
47
|
+
} else if (arg === '--bench') {
|
|
48
|
+
parsed.bench = args[++i];
|
|
49
|
+
} else if (arg === '--api-key') {
|
|
50
|
+
parsed.apiKey = args[++i];
|
|
51
|
+
} else if (arg === '--ai-sdk') {
|
|
52
|
+
parsed.useAiSdk = true;
|
|
53
|
+
} else if (arg === '--help' || arg === '-h') {
|
|
54
|
+
parsed.help = true;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return parsed;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function showHelp() {
|
|
62
|
+
console.log(colorText('ai-speedometer - Benchmark AI models', 'cyan'));
|
|
63
|
+
console.log('');
|
|
64
|
+
console.log(colorText('Usage:', 'yellow'));
|
|
65
|
+
console.log(' ai-speedometer ' + colorText('# Interactive mode', 'dim'));
|
|
66
|
+
console.log(' ai-speedometer --bench <provider:model> ' + colorText('# Headless benchmark', 'dim'));
|
|
67
|
+
console.log('');
|
|
68
|
+
console.log(colorText('Options:', 'yellow'));
|
|
69
|
+
console.log(' --bench <provider:model> ' + colorText('Run benchmark in headless mode', 'dim'));
|
|
70
|
+
console.log(' --api-key <key> ' + colorText('Override API key (optional)', 'dim'));
|
|
71
|
+
console.log(' --ai-sdk ' + colorText('Use AI SDK instead of REST API', 'dim'));
|
|
72
|
+
console.log(' --debug ' + colorText('Enable debug logging', 'dim'));
|
|
73
|
+
console.log(' --help, -h ' + colorText('Show this help message', 'dim'));
|
|
74
|
+
console.log('');
|
|
75
|
+
console.log(colorText('Examples:', 'yellow'));
|
|
76
|
+
console.log(' ai-speedometer --bench openai:gpt-4');
|
|
77
|
+
console.log(' ai-speedometer --bench anthropic:claude-3-opus --api-key "sk-..."');
|
|
78
|
+
console.log(' ai-speedometer --bench openai:gpt-4 --ai-sdk');
|
|
79
|
+
console.log('');
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const cliArgs = parseCliArgs();
|
|
83
|
+
const debugMode = cliArgs.debug;
|
|
34
84
|
let logFile = null;
|
|
35
85
|
|
|
36
86
|
function log(message) {
|
|
@@ -125,7 +175,6 @@ function clearScreen() {
|
|
|
125
175
|
function showHeader() {
|
|
126
176
|
console.log(colorText('Ai-speedometer', 'cyan'));
|
|
127
177
|
console.log(colorText('=============================', 'cyan'));
|
|
128
|
-
console.log(colorText('Note: opencode uses ai-sdk', 'dim'));
|
|
129
178
|
console.log('');
|
|
130
179
|
}
|
|
131
180
|
|
|
@@ -302,7 +351,6 @@ async function selectModelsCircular() {
|
|
|
302
351
|
// Add header
|
|
303
352
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
304
353
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
305
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
306
354
|
screenContent += '\n';
|
|
307
355
|
|
|
308
356
|
screenContent += colorText('Select Models for Benchmark', 'magenta') + '\n';
|
|
@@ -805,8 +853,9 @@ async function displayColorfulResults(results, method = 'AI SDK', models = []) {
|
|
|
805
853
|
console.log(colorText('COMPREHENSIVE PERFORMANCE SUMMARY', 'yellow'));
|
|
806
854
|
|
|
807
855
|
// Add note about method differences
|
|
808
|
-
console.log(colorText('Note: ', 'cyan') + colorText('
|
|
809
|
-
console.log(colorText(' ', 'cyan') + colorText('
|
|
856
|
+
console.log(colorText('Note: ', 'cyan') + colorText('REST API with streaming now supports TTFT measurement. AI SDK also supports streaming.', 'dim'));
|
|
857
|
+
console.log(colorText(' ', 'cyan') + colorText('For thinking models, TTFT will be higher as thinking tokens are processed before output tokens.', 'dim'));
|
|
858
|
+
console.log(colorText(' ', 'cyan') + colorText('[est] markers indicate token counts were estimated (API did not provide usage metadata).', 'dim'));
|
|
810
859
|
console.log('');
|
|
811
860
|
|
|
812
861
|
const table = new Table({
|
|
@@ -833,15 +882,19 @@ async function displayColorfulResults(results, method = 'AI SDK', models = []) {
|
|
|
833
882
|
|
|
834
883
|
// Add data rows (already ranked by sort order)
|
|
835
884
|
sortedResults.forEach((result) => {
|
|
885
|
+
const outputTokenDisplay = result.tokenCount.toString() + (result.usedEstimateForOutput ? ' [est]' : '');
|
|
886
|
+
const promptTokenDisplay = result.promptTokens.toString() + (result.usedEstimateForInput ? ' [est]' : '');
|
|
887
|
+
const totalTokenDisplay = result.totalTokens.toString() + ((result.usedEstimateForInput || result.usedEstimateForOutput) ? ' [est]' : '');
|
|
888
|
+
|
|
836
889
|
table.push([
|
|
837
890
|
colorText(result.model, 'white'),
|
|
838
891
|
colorText(result.provider, 'white'),
|
|
839
892
|
colorText((result.totalTime / 1000).toFixed(2), 'green'),
|
|
840
893
|
colorText((result.timeToFirstToken / 1000).toFixed(2), 'yellow'),
|
|
841
894
|
colorText(result.tokensPerSecond.toFixed(1), 'magenta'),
|
|
842
|
-
colorText(result.
|
|
843
|
-
colorText(result.
|
|
844
|
-
colorText(result.
|
|
895
|
+
colorText(outputTokenDisplay, result.usedEstimateForOutput ? 'yellow' : 'blue'),
|
|
896
|
+
colorText(promptTokenDisplay, result.usedEstimateForInput ? 'yellow' : 'blue'),
|
|
897
|
+
colorText(totalTokenDisplay, (result.usedEstimateForInput || result.usedEstimateForOutput) ? 'yellow' : 'bright')
|
|
845
898
|
]);
|
|
846
899
|
});
|
|
847
900
|
|
|
@@ -1007,7 +1060,6 @@ async function addVerifiedProvider() {
|
|
|
1007
1060
|
// Add header
|
|
1008
1061
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1009
1062
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1010
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1011
1063
|
screenContent += '\n';
|
|
1012
1064
|
|
|
1013
1065
|
screenContent += colorText('Add Verified Provider', 'magenta') + '\n';
|
|
@@ -1223,7 +1275,6 @@ async function addCustomProviderCLI() {
|
|
|
1223
1275
|
// Add header
|
|
1224
1276
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1225
1277
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1226
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1227
1278
|
screenContent += '\n';
|
|
1228
1279
|
|
|
1229
1280
|
screenContent += colorText('Add Custom Provider', 'magenta') + '\n';
|
|
@@ -1486,7 +1537,6 @@ async function addCustomModelsMenu() {
|
|
|
1486
1537
|
// Add header
|
|
1487
1538
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1488
1539
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1489
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1490
1540
|
screenContent += '\n';
|
|
1491
1541
|
|
|
1492
1542
|
screenContent += colorText('Add Custom Models', 'magenta') + '\n';
|
|
@@ -1553,7 +1603,6 @@ async function addModelsToExistingProvider() {
|
|
|
1553
1603
|
// Add header
|
|
1554
1604
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1555
1605
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1556
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1557
1606
|
screenContent += '\n';
|
|
1558
1607
|
|
|
1559
1608
|
screenContent += colorText('Add Models to Existing Provider', 'magenta') + '\n';
|
|
@@ -1665,23 +1714,9 @@ async function addModelsToExistingProvider() {
|
|
|
1665
1714
|
await question(colorText('\nPress Enter to continue...', 'yellow'));
|
|
1666
1715
|
}
|
|
1667
1716
|
|
|
1668
|
-
//
|
|
1669
|
-
async function
|
|
1670
|
-
|
|
1671
|
-
console.log(colorText('No models selected for benchmarking.', 'red'));
|
|
1672
|
-
return;
|
|
1673
|
-
}
|
|
1674
|
-
|
|
1675
|
-
clearScreen();
|
|
1676
|
-
showHeader();
|
|
1677
|
-
console.log(colorText('Running REST API Benchmark...', 'green'));
|
|
1678
|
-
console.log(colorText(`Running ${models.length} models in parallel...`, 'cyan'));
|
|
1679
|
-
console.log(colorText('Note: This uses direct REST API calls instead of AI SDK', 'dim'));
|
|
1680
|
-
console.log('');
|
|
1681
|
-
|
|
1682
|
-
// Create a function to benchmark a single model using REST API
|
|
1683
|
-
const benchmarkModelRest = async (model) => {
|
|
1684
|
-
console.log(colorText(`Testing ${model.name} (${model.providerName}) via REST API...`, 'yellow'));
|
|
1717
|
+
// Silent benchmark helper (returns raw result without UI)
|
|
1718
|
+
async function benchmarkSingleModelRest(model) {
|
|
1719
|
+
|
|
1685
1720
|
|
|
1686
1721
|
try {
|
|
1687
1722
|
// Validate required configuration
|
|
@@ -1693,14 +1728,24 @@ async function runRestApiBenchmark(models) {
|
|
|
1693
1728
|
throw new Error(`Missing base URL for provider ${model.providerName}`);
|
|
1694
1729
|
}
|
|
1695
1730
|
|
|
1731
|
+
// Extract the actual model ID for API calls (moved before usage)
|
|
1732
|
+
let actualModelId = model.name;
|
|
1733
|
+
if (model.id && model.id.includes('_')) {
|
|
1734
|
+
actualModelId = model.id.split('_')[1];
|
|
1735
|
+
}
|
|
1736
|
+
actualModelId = actualModelId.trim();
|
|
1737
|
+
|
|
1696
1738
|
const startTime = Date.now();
|
|
1739
|
+
let firstTokenTime = null;
|
|
1740
|
+
let streamedText = '';
|
|
1741
|
+
let tokenCount = 0;
|
|
1697
1742
|
|
|
1698
1743
|
// Use correct endpoint based on provider type
|
|
1699
1744
|
let endpoint;
|
|
1700
1745
|
if (model.providerType === 'anthropic') {
|
|
1701
1746
|
endpoint = '/messages';
|
|
1702
1747
|
} else if (model.providerType === 'google') {
|
|
1703
|
-
endpoint = '/models/' + actualModelId + ':
|
|
1748
|
+
endpoint = '/models/' + actualModelId + ':streamGenerateContent';
|
|
1704
1749
|
} else {
|
|
1705
1750
|
endpoint = '/chat/completions';
|
|
1706
1751
|
}
|
|
@@ -1709,17 +1754,7 @@ async function runRestApiBenchmark(models) {
|
|
|
1709
1754
|
const baseUrl = model.providerConfig.baseUrl.replace(/\/$/, '');
|
|
1710
1755
|
const url = `${baseUrl}${endpoint}`;
|
|
1711
1756
|
|
|
1712
|
-
|
|
1713
|
-
let actualModelId = model.name;
|
|
1714
|
-
if (model.id && model.id.includes('_')) {
|
|
1715
|
-
// For models with provider prefix, extract the actual model ID
|
|
1716
|
-
actualModelId = model.id.split('_')[1];
|
|
1717
|
-
console.log(colorText(` Using extracted model ID: ${actualModelId}`, 'cyan'));
|
|
1718
|
-
}
|
|
1719
|
-
|
|
1720
|
-
// Trim any trailing spaces from model names
|
|
1721
|
-
actualModelId = actualModelId.trim();
|
|
1722
|
-
console.log(colorText(` Using final model ID: "${actualModelId}"`, 'cyan'));
|
|
1757
|
+
|
|
1723
1758
|
|
|
1724
1759
|
const headers = {
|
|
1725
1760
|
'Content-Type': 'application/json',
|
|
@@ -1731,7 +1766,6 @@ async function runRestApiBenchmark(models) {
|
|
|
1731
1766
|
headers['x-api-key'] = model.providerConfig.apiKey;
|
|
1732
1767
|
headers['anthropic-version'] = '2023-06-01';
|
|
1733
1768
|
} else if (model.providerType === 'google') {
|
|
1734
|
-
// Google uses different auth
|
|
1735
1769
|
delete headers['Authorization'];
|
|
1736
1770
|
headers['x-goog-api-key'] = model.providerConfig.apiKey;
|
|
1737
1771
|
}
|
|
@@ -1742,14 +1776,15 @@ async function runRestApiBenchmark(models) {
|
|
|
1742
1776
|
{ role: 'user', content: testPrompt }
|
|
1743
1777
|
],
|
|
1744
1778
|
max_tokens: 500,
|
|
1745
|
-
temperature: 0.7
|
|
1779
|
+
temperature: 0.7,
|
|
1780
|
+
stream: true
|
|
1746
1781
|
};
|
|
1747
1782
|
|
|
1748
1783
|
// Adjust for provider-specific formats
|
|
1749
1784
|
if (model.providerType === 'anthropic') {
|
|
1750
1785
|
body.max_tokens = 500;
|
|
1786
|
+
body.stream = true;
|
|
1751
1787
|
} else if (model.providerType === 'google') {
|
|
1752
|
-
// Google format is slightly different
|
|
1753
1788
|
body.contents = [{ parts: [{ text: testPrompt }] }];
|
|
1754
1789
|
body.generationConfig = {
|
|
1755
1790
|
maxOutputTokens: 500,
|
|
@@ -1757,10 +1792,10 @@ async function runRestApiBenchmark(models) {
|
|
|
1757
1792
|
};
|
|
1758
1793
|
delete body.messages;
|
|
1759
1794
|
delete body.max_tokens;
|
|
1795
|
+
delete body.stream;
|
|
1760
1796
|
}
|
|
1761
1797
|
|
|
1762
|
-
|
|
1763
|
-
console.log(colorText(` Using model: ${actualModelId}`, 'cyan'));
|
|
1798
|
+
|
|
1764
1799
|
|
|
1765
1800
|
const response = await fetch(url, {
|
|
1766
1801
|
method: 'POST',
|
|
@@ -1768,56 +1803,152 @@ async function runRestApiBenchmark(models) {
|
|
|
1768
1803
|
body: JSON.stringify(body)
|
|
1769
1804
|
});
|
|
1770
1805
|
|
|
1771
|
-
|
|
1806
|
+
|
|
1772
1807
|
|
|
1773
1808
|
if (!response.ok) {
|
|
1774
1809
|
const errorText = await response.text();
|
|
1775
|
-
console.log(colorText(` Error: ${errorText.slice(0, 200)}...`, 'red'));
|
|
1776
1810
|
throw new Error(`API request failed: ${response.status} ${response.statusText}`);
|
|
1777
1811
|
}
|
|
1778
1812
|
|
|
1779
|
-
|
|
1813
|
+
// Process streaming response
|
|
1814
|
+
const reader = response.body.getReader();
|
|
1815
|
+
const decoder = new TextDecoder();
|
|
1816
|
+
let buffer = '';
|
|
1817
|
+
let inputTokens = 0;
|
|
1818
|
+
let outputTokens = 0;
|
|
1819
|
+
let isFirstChunk = true;
|
|
1820
|
+
|
|
1821
|
+
while (true) {
|
|
1822
|
+
const { done, value } = await reader.read();
|
|
1823
|
+
if (done) break;
|
|
1824
|
+
|
|
1825
|
+
// Capture TTFT on first chunk arrival (network level)
|
|
1826
|
+
if (isFirstChunk && !firstTokenTime) {
|
|
1827
|
+
firstTokenTime = Date.now();
|
|
1828
|
+
isFirstChunk = false;
|
|
1829
|
+
// Show live TTFT result
|
|
1830
|
+
const ttftSeconds = ((firstTokenTime - startTime) / 1000).toFixed(2);
|
|
1831
|
+
console.log(colorText(`TTFT received at ${ttftSeconds}s for ${model.name}`, 'green'));
|
|
1832
|
+
}
|
|
1833
|
+
|
|
1834
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1835
|
+
const lines = buffer.split('\n');
|
|
1836
|
+
buffer = lines.pop() || '';
|
|
1837
|
+
|
|
1838
|
+
for (const line of lines) {
|
|
1839
|
+
const trimmedLine = line.trim();
|
|
1840
|
+
if (!trimmedLine) continue;
|
|
1841
|
+
|
|
1842
|
+
try {
|
|
1843
|
+
if (model.providerType === 'anthropic') {
|
|
1844
|
+
// Anthropic uses newline-delimited JSON with event types
|
|
1845
|
+
if (trimmedLine.startsWith('data: ')) {
|
|
1846
|
+
const jsonStr = trimmedLine.slice(6);
|
|
1847
|
+
if (jsonStr === '[DONE]') break;
|
|
1848
|
+
|
|
1849
|
+
const chunk = JSON.parse(jsonStr);
|
|
1850
|
+
|
|
1851
|
+
if (chunk.type === 'content_block_delta' && chunk.delta?.text) {
|
|
1852
|
+
streamedText += chunk.delta.text;
|
|
1853
|
+
} else if (chunk.type === 'message_start' && chunk.message?.usage) {
|
|
1854
|
+
inputTokens = chunk.message.usage.input_tokens || 0;
|
|
1855
|
+
} else if (chunk.type === 'message_delta') {
|
|
1856
|
+
// Capture output tokens from message_delta
|
|
1857
|
+
if (chunk.usage?.output_tokens) {
|
|
1858
|
+
outputTokens = chunk.usage.output_tokens;
|
|
1859
|
+
}
|
|
1860
|
+
// Some implementations put input_tokens here too
|
|
1861
|
+
if (chunk.usage?.input_tokens && !inputTokens) {
|
|
1862
|
+
inputTokens = chunk.usage.input_tokens;
|
|
1863
|
+
}
|
|
1864
|
+
}
|
|
1865
|
+
} else if (trimmedLine.startsWith('event: ')) {
|
|
1866
|
+
// Skip event lines (Anthropic SSE format uses separate event and data lines)
|
|
1867
|
+
continue;
|
|
1868
|
+
} else {
|
|
1869
|
+
// Try parsing as raw JSON (some Anthropic-compatible APIs don't use SSE format)
|
|
1870
|
+
const chunk = JSON.parse(trimmedLine);
|
|
1871
|
+
|
|
1872
|
+
if (chunk.type === 'content_block_delta' && chunk.delta?.text) {
|
|
1873
|
+
streamedText += chunk.delta.text;
|
|
1874
|
+
} else if (chunk.type === 'message_start' && chunk.message?.usage) {
|
|
1875
|
+
inputTokens = chunk.message.usage.input_tokens || 0;
|
|
1876
|
+
} else if (chunk.type === 'message_delta') {
|
|
1877
|
+
if (chunk.usage?.output_tokens) {
|
|
1878
|
+
outputTokens = chunk.usage.output_tokens;
|
|
1879
|
+
}
|
|
1880
|
+
if (chunk.usage?.input_tokens && !inputTokens) {
|
|
1881
|
+
inputTokens = chunk.usage.input_tokens;
|
|
1882
|
+
}
|
|
1883
|
+
}
|
|
1884
|
+
}
|
|
1885
|
+
} else if (model.providerType === 'google') {
|
|
1886
|
+
// Google streaming format
|
|
1887
|
+
const chunk = JSON.parse(trimmedLine);
|
|
1888
|
+
if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
|
|
1889
|
+
const text = chunk.candidates[0].content.parts[0].text;
|
|
1890
|
+
streamedText += text;
|
|
1891
|
+
}
|
|
1892
|
+
if (chunk.usageMetadata?.promptTokenCount) {
|
|
1893
|
+
inputTokens = chunk.usageMetadata.promptTokenCount;
|
|
1894
|
+
}
|
|
1895
|
+
if (chunk.usageMetadata?.candidatesTokenCount) {
|
|
1896
|
+
outputTokens = chunk.usageMetadata.candidatesTokenCount;
|
|
1897
|
+
}
|
|
1898
|
+
} else {
|
|
1899
|
+
// OpenAI-compatible SSE format
|
|
1900
|
+
if (trimmedLine.startsWith('data: ')) {
|
|
1901
|
+
const jsonStr = trimmedLine.slice(6);
|
|
1902
|
+
if (jsonStr === '[DONE]') break;
|
|
1903
|
+
|
|
1904
|
+
const chunk = JSON.parse(jsonStr);
|
|
1905
|
+
|
|
1906
|
+
if (chunk.choices?.[0]?.delta?.content) {
|
|
1907
|
+
streamedText += chunk.choices[0].delta.content;
|
|
1908
|
+
}
|
|
1909
|
+
|
|
1910
|
+
if (chunk.usage?.prompt_tokens) {
|
|
1911
|
+
inputTokens = chunk.usage.prompt_tokens;
|
|
1912
|
+
}
|
|
1913
|
+
if (chunk.usage?.completion_tokens) {
|
|
1914
|
+
outputTokens = chunk.usage.completion_tokens;
|
|
1915
|
+
}
|
|
1916
|
+
}
|
|
1917
|
+
}
|
|
1918
|
+
} catch (parseError) {
|
|
1919
|
+
// Skip invalid JSON lines
|
|
1920
|
+
continue;
|
|
1921
|
+
}
|
|
1922
|
+
}
|
|
1923
|
+
}
|
|
1924
|
+
|
|
1780
1925
|
const endTime = Date.now();
|
|
1781
1926
|
const totalTime = endTime - startTime;
|
|
1927
|
+
const timeToFirstToken = firstTokenTime ? firstTokenTime - startTime : totalTime;
|
|
1782
1928
|
|
|
1783
|
-
// Calculate
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
} else if (model.providerType === 'google') {
|
|
1790
|
-
inputTokens = data.usageMetadata?.promptTokenCount || Math.round(testPrompt.length / 4);
|
|
1791
|
-
outputTokens = data.usageMetadata?.candidatesTokenCount || Math.round(data.candidates?.[0]?.content?.parts?.[0]?.text?.length / 4 || 0);
|
|
1792
|
-
} else {
|
|
1793
|
-
inputTokens = data.usage?.prompt_tokens || Math.round(testPrompt.length / 4);
|
|
1794
|
-
outputTokens = data.usage?.completion_tokens || Math.round(data.choices?.[0]?.message?.content?.length / 4 || 0);
|
|
1795
|
-
}
|
|
1796
|
-
|
|
1797
|
-
const totalTokens = inputTokens + outputTokens;
|
|
1929
|
+
// Calculate token counts - use provider's count if available, otherwise estimate
|
|
1930
|
+
const usedEstimateForOutput = !outputTokens;
|
|
1931
|
+
const usedEstimateForInput = !inputTokens;
|
|
1932
|
+
const finalOutputTokens = outputTokens || Math.round(streamedText.length / 4);
|
|
1933
|
+
const finalInputTokens = inputTokens || Math.round(testPrompt.length / 4);
|
|
1934
|
+
const totalTokens = finalInputTokens + finalOutputTokens;
|
|
1798
1935
|
const tokensPerSecond = totalTime > 0 ? (totalTokens / totalTime) * 1000 : 0;
|
|
1799
|
-
|
|
1800
|
-
console.log(colorText('Completed!', 'green'));
|
|
1801
|
-
console.log(colorText(` Total Time: ${(totalTime / 1000).toFixed(2)}s`, 'cyan'));
|
|
1802
|
-
console.log(colorText(` Tokens/Sec: ${tokensPerSecond.toFixed(1)}`, 'cyan'));
|
|
1803
|
-
console.log(colorText(` Input Tokens: ${inputTokens}`, 'cyan'));
|
|
1804
|
-
console.log(colorText(` Output Tokens: ${outputTokens}`, 'cyan'));
|
|
1805
|
-
console.log(colorText(` Total Tokens: ${totalTokens}`, 'cyan'));
|
|
1806
1936
|
|
|
1807
1937
|
return {
|
|
1808
1938
|
model: model.name,
|
|
1809
1939
|
provider: model.providerName,
|
|
1810
1940
|
totalTime: totalTime,
|
|
1811
|
-
timeToFirstToken:
|
|
1812
|
-
tokenCount:
|
|
1941
|
+
timeToFirstToken: timeToFirstToken,
|
|
1942
|
+
tokenCount: finalOutputTokens,
|
|
1813
1943
|
tokensPerSecond: tokensPerSecond,
|
|
1814
|
-
promptTokens:
|
|
1944
|
+
promptTokens: finalInputTokens,
|
|
1815
1945
|
totalTokens: totalTokens,
|
|
1946
|
+
usedEstimateForOutput: usedEstimateForOutput,
|
|
1947
|
+
usedEstimateForInput: usedEstimateForInput,
|
|
1816
1948
|
success: true
|
|
1817
1949
|
};
|
|
1818
1950
|
|
|
1819
1951
|
} catch (error) {
|
|
1820
|
-
console.log(colorText('Failed: ', 'red') + error.message);
|
|
1821
1952
|
return {
|
|
1822
1953
|
model: model.name,
|
|
1823
1954
|
provider: model.providerName,
|
|
@@ -1831,17 +1962,49 @@ async function runRestApiBenchmark(models) {
|
|
|
1831
1962
|
error: error.message
|
|
1832
1963
|
};
|
|
1833
1964
|
}
|
|
1834
|
-
|
|
1965
|
+
}
|
|
1966
|
+
|
|
1967
|
+
// REST API benchmark function using direct API calls (with UI)
|
|
1968
|
+
async function runRestApiBenchmark(models) {
|
|
1969
|
+
if (models.length === 0) {
|
|
1970
|
+
console.log(colorText('No models selected for benchmarking.', 'red'));
|
|
1971
|
+
return;
|
|
1972
|
+
}
|
|
1835
1973
|
|
|
1836
|
-
|
|
1974
|
+
clearScreen();
|
|
1975
|
+
showHeader();
|
|
1976
|
+
console.log(colorText('Running REST API Benchmark with Streaming...', 'green'));
|
|
1977
|
+
console.log(colorText(`Running ${models.length} models in parallel...`, 'cyan'));
|
|
1978
|
+
console.log(colorText('Note: This uses direct REST API calls with streaming support', 'dim'));
|
|
1979
|
+
console.log('');
|
|
1980
|
+
|
|
1981
|
+
// Run all benchmarks in parallel with UI feedback
|
|
1837
1982
|
console.log(colorText('Starting parallel REST API benchmark execution...', 'cyan'));
|
|
1838
|
-
|
|
1983
|
+
|
|
1984
|
+
// Start all benchmarks in parallel
|
|
1985
|
+
const promises = models.map(model => {
|
|
1986
|
+
console.log(colorText(`Testing ${model.name} (${model.providerName}) via REST API with streaming...`, 'yellow'));
|
|
1987
|
+
return benchmarkSingleModelRest(model);
|
|
1988
|
+
});
|
|
1989
|
+
|
|
1839
1990
|
const results = await Promise.all(promises);
|
|
1840
1991
|
|
|
1992
|
+
// Show individual results after all complete
|
|
1993
|
+
results.forEach((result, index) => {
|
|
1994
|
+
if (result.success) {
|
|
1995
|
+
console.log(colorText(`✓ ${result.model} (${result.provider}) completed!`, 'green'));
|
|
1996
|
+
console.log(colorText(` Total Time: ${(result.totalTime / 1000).toFixed(2)}s`, 'cyan'));
|
|
1997
|
+
console.log(colorText(` TTFT: ${(result.timeToFirstToken / 1000).toFixed(2)}s`, 'cyan'));
|
|
1998
|
+
console.log(colorText(` Tokens/Sec: ${result.tokensPerSecond.toFixed(1)}`, 'cyan'));
|
|
1999
|
+
} else {
|
|
2000
|
+
console.log(colorText(`✗ ${result.model} (${result.provider}) failed: `, 'red') + result.error);
|
|
2001
|
+
}
|
|
2002
|
+
});
|
|
2003
|
+
|
|
1841
2004
|
console.log('');
|
|
1842
2005
|
console.log(colorText('All REST API benchmarks completed!', 'green'));
|
|
1843
2006
|
|
|
1844
|
-
await displayColorfulResults(results, 'REST API', models);
|
|
2007
|
+
await displayColorfulResults(results, 'REST API (Streaming)', models);
|
|
1845
2008
|
|
|
1846
2009
|
// Add successful models to recent models list
|
|
1847
2010
|
const successfulModels = results
|
|
@@ -1868,16 +2031,16 @@ async function runRestApiBenchmark(models) {
|
|
|
1868
2031
|
async function showMainMenu() {
|
|
1869
2032
|
const menuOptions = [
|
|
1870
2033
|
{ id: 1, text: 'Set Model', action: () => showModelMenu() },
|
|
1871
|
-
{ id: 2, text: 'Run Benchmark (
|
|
2034
|
+
{ id: 2, text: 'Run Benchmark (REST API)', action: async () => {
|
|
1872
2035
|
const selectedModels = await selectModelsCircular();
|
|
1873
2036
|
if (selectedModels.length > 0) {
|
|
1874
|
-
await
|
|
2037
|
+
await runRestApiBenchmark(selectedModels);
|
|
1875
2038
|
}
|
|
1876
2039
|
}},
|
|
1877
|
-
{ id: 3, text: 'Run Benchmark (
|
|
2040
|
+
{ id: 3, text: 'Run Benchmark (AI SDK - Legacy)', action: async () => {
|
|
1878
2041
|
const selectedModels = await selectModelsCircular();
|
|
1879
2042
|
if (selectedModels.length > 0) {
|
|
1880
|
-
await
|
|
2043
|
+
await runStreamingBenchmark(selectedModels);
|
|
1881
2044
|
}
|
|
1882
2045
|
}},
|
|
1883
2046
|
{ id: 4, text: 'Exit', action: () => {
|
|
@@ -1896,7 +2059,6 @@ async function showMainMenu() {
|
|
|
1896
2059
|
// Add header
|
|
1897
2060
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1898
2061
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1899
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1900
2062
|
screenContent += '\n';
|
|
1901
2063
|
|
|
1902
2064
|
screenContent += colorText('Main Menu:', 'cyan') + '\n';
|
|
@@ -1953,7 +2115,6 @@ async function showModelMenu() {
|
|
|
1953
2115
|
// Add header
|
|
1954
2116
|
screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
|
|
1955
2117
|
screenContent += colorText('=============================', 'cyan') + '\n';
|
|
1956
|
-
screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
|
|
1957
2118
|
screenContent += '\n';
|
|
1958
2119
|
|
|
1959
2120
|
screenContent += colorText('Model Management:', 'cyan') + '\n';
|
|
@@ -2002,17 +2163,157 @@ process.on('SIGINT', () => {
|
|
|
2002
2163
|
process.exit(0);
|
|
2003
2164
|
});
|
|
2004
2165
|
|
|
2166
|
+
// Headless benchmark mode
|
|
2167
|
+
async function runHeadlessBenchmark(benchSpec, apiKey, useAiSdk) {
|
|
2168
|
+
try {
|
|
2169
|
+
// Parse provider:model format
|
|
2170
|
+
const [providerSpec, modelName] = benchSpec.split(':');
|
|
2171
|
+
|
|
2172
|
+
if (!providerSpec || !modelName) {
|
|
2173
|
+
console.error(colorText('Error: Invalid --bench format. Use: provider:model', 'red'));
|
|
2174
|
+
console.error(colorText('Example: --bench zai-code-anth:glm-4.6', 'yellow'));
|
|
2175
|
+
process.exit(1);
|
|
2176
|
+
}
|
|
2177
|
+
|
|
2178
|
+
// Load all available providers
|
|
2179
|
+
const config = await loadConfig();
|
|
2180
|
+
|
|
2181
|
+
// Find the provider (case-insensitive search)
|
|
2182
|
+
const provider = config.providers.find(p =>
|
|
2183
|
+
p.id?.toLowerCase() === providerSpec.toLowerCase() ||
|
|
2184
|
+
p.name?.toLowerCase() === providerSpec.toLowerCase()
|
|
2185
|
+
);
|
|
2186
|
+
|
|
2187
|
+
if (!provider) {
|
|
2188
|
+
console.error(colorText(`Error: Provider '${providerSpec}' not found`, 'red'));
|
|
2189
|
+
console.error(colorText('Available providers:', 'yellow'));
|
|
2190
|
+
config.providers.forEach(p => {
|
|
2191
|
+
console.error(colorText(` - ${p.id || p.name}`, 'cyan'));
|
|
2192
|
+
});
|
|
2193
|
+
process.exit(1);
|
|
2194
|
+
}
|
|
2195
|
+
|
|
2196
|
+
// Find the model
|
|
2197
|
+
// Model IDs are prefixed with provider name (e.g., "zai-code-anth_glm-4.6")
|
|
2198
|
+
// So we need to check:
|
|
2199
|
+
// 1. Full ID match: "zai-code-anth_glm-4.6"
|
|
2200
|
+
// 2. ID without provider prefix: "glm-4.6"
|
|
2201
|
+
// 3. Name match: "GLM-4.6-anth"
|
|
2202
|
+
const model = provider.models.find(m => {
|
|
2203
|
+
const modelIdLower = m.id?.toLowerCase() || '';
|
|
2204
|
+
const modelNameLower = m.name?.toLowerCase() || '';
|
|
2205
|
+
const searchLower = modelName.toLowerCase();
|
|
2206
|
+
|
|
2207
|
+
// Check full ID match
|
|
2208
|
+
if (modelIdLower === searchLower) return true;
|
|
2209
|
+
|
|
2210
|
+
// Check ID without provider prefix (strip "provider_" prefix)
|
|
2211
|
+
const idWithoutPrefix = modelIdLower.includes('_')
|
|
2212
|
+
? modelIdLower.split('_').slice(1).join('_')
|
|
2213
|
+
: modelIdLower;
|
|
2214
|
+
if (idWithoutPrefix === searchLower) return true;
|
|
2215
|
+
|
|
2216
|
+
// Check name match
|
|
2217
|
+
if (modelNameLower === searchLower) return true;
|
|
2218
|
+
|
|
2219
|
+
return false;
|
|
2220
|
+
});
|
|
2221
|
+
|
|
2222
|
+
if (!model) {
|
|
2223
|
+
console.error(colorText(`Error: Model '${modelName}' not found in provider '${provider.name}'`, 'red'));
|
|
2224
|
+
console.error(colorText('Available models:', 'yellow'));
|
|
2225
|
+
provider.models.forEach(m => {
|
|
2226
|
+
// Show both name and ID (without provider prefix) for clarity
|
|
2227
|
+
const idWithoutPrefix = m.id?.includes('_')
|
|
2228
|
+
? m.id.split('_').slice(1).join('_')
|
|
2229
|
+
: m.id;
|
|
2230
|
+
console.error(colorText(` - ${m.name} (id: ${idWithoutPrefix})`, 'cyan'));
|
|
2231
|
+
});
|
|
2232
|
+
process.exit(1);
|
|
2233
|
+
}
|
|
2234
|
+
|
|
2235
|
+
// If API key provided via flag, use it; otherwise use existing config
|
|
2236
|
+
let finalApiKey = apiKey || provider.apiKey;
|
|
2237
|
+
|
|
2238
|
+
if (!finalApiKey) {
|
|
2239
|
+
console.error(colorText(`Error: No API key found for provider '${provider.name}'`, 'red'));
|
|
2240
|
+
console.error(colorText('Please provide --api-key flag or configure the provider first', 'yellow'));
|
|
2241
|
+
process.exit(1);
|
|
2242
|
+
}
|
|
2243
|
+
|
|
2244
|
+
// Create model object with all required config
|
|
2245
|
+
const modelConfig = {
|
|
2246
|
+
...model,
|
|
2247
|
+
providerName: provider.name,
|
|
2248
|
+
providerType: provider.type,
|
|
2249
|
+
providerId: provider.id,
|
|
2250
|
+
providerConfig: {
|
|
2251
|
+
...provider,
|
|
2252
|
+
apiKey: finalApiKey,
|
|
2253
|
+
baseUrl: provider.baseUrl || ''
|
|
2254
|
+
},
|
|
2255
|
+
selected: true
|
|
2256
|
+
};
|
|
2257
|
+
|
|
2258
|
+
// Run benchmark silently and get results
|
|
2259
|
+
let result;
|
|
2260
|
+
if (useAiSdk) {
|
|
2261
|
+
// TODO: Implement AI SDK silent benchmark
|
|
2262
|
+
console.error(colorText('AI SDK headless mode not yet implemented', 'red'));
|
|
2263
|
+
process.exit(1);
|
|
2264
|
+
} else {
|
|
2265
|
+
result = await benchmarkSingleModelRest(modelConfig);
|
|
2266
|
+
}
|
|
2267
|
+
|
|
2268
|
+
// Output JSON to stdout
|
|
2269
|
+
const jsonOutput = {
|
|
2270
|
+
provider: provider.name,
|
|
2271
|
+
providerId: provider.id,
|
|
2272
|
+
model: model.name,
|
|
2273
|
+
modelId: model.id,
|
|
2274
|
+
method: useAiSdk ? 'ai-sdk' : 'rest-api',
|
|
2275
|
+
success: result.success,
|
|
2276
|
+
totalTime: result.totalTime,
|
|
2277
|
+
totalTimeSeconds: result.totalTime / 1000,
|
|
2278
|
+
timeToFirstToken: result.timeToFirstToken,
|
|
2279
|
+
timeToFirstTokenSeconds: result.timeToFirstToken / 1000,
|
|
2280
|
+
tokensPerSecond: result.tokensPerSecond,
|
|
2281
|
+
outputTokens: result.tokenCount,
|
|
2282
|
+
promptTokens: result.promptTokens,
|
|
2283
|
+
totalTokens: result.totalTokens,
|
|
2284
|
+
error: result.error || null
|
|
2285
|
+
};
|
|
2286
|
+
|
|
2287
|
+
console.log(JSON.stringify(jsonOutput, null, 2));
|
|
2288
|
+
process.exit(result.success ? 0 : 1);
|
|
2289
|
+
} catch (error) {
|
|
2290
|
+
console.error(colorText('Error: ' + error.message, 'red'));
|
|
2291
|
+
if (debugMode) {
|
|
2292
|
+
console.error(error.stack);
|
|
2293
|
+
}
|
|
2294
|
+
process.exit(1);
|
|
2295
|
+
}
|
|
2296
|
+
}
|
|
2297
|
+
|
|
2005
2298
|
// Start the CLI
|
|
2006
|
-
if (
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2299
|
+
if (require.main === module) {
|
|
2300
|
+
// Check if help flag
|
|
2301
|
+
if (cliArgs.help) {
|
|
2302
|
+
showHelp();
|
|
2303
|
+
process.exit(0);
|
|
2304
|
+
}
|
|
2305
|
+
|
|
2306
|
+
// Check if headless benchmark mode
|
|
2307
|
+
if (cliArgs.bench) {
|
|
2308
|
+
runHeadlessBenchmark(cliArgs.bench, cliArgs.apiKey, cliArgs.useAiSdk);
|
|
2309
|
+
} else {
|
|
2310
|
+
// Interactive mode
|
|
2311
|
+
cleanupRecentModelsFromConfig().then(() => {
|
|
2312
|
+
showMainMenu();
|
|
2313
|
+
}).catch(() => {
|
|
2314
|
+
showMainMenu();
|
|
2315
|
+
});
|
|
2316
|
+
}
|
|
2016
2317
|
}
|
|
2017
2318
|
|
|
2018
2319
|
export { showMainMenu, listProviders, selectModelsCircular, runStreamingBenchmark, loadConfig, saveConfig };
|