ai-speedometer 1.2.4 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.js CHANGED
@@ -6,7 +6,6 @@ import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
6
6
  import { createAnthropic } from '@ai-sdk/anthropic';
7
7
  import { streamText } from 'ai'; // Changed from streamText to generateText
8
8
  import { testPrompt } from './test-prompt.js';
9
- import { LLMBenchmark } from './benchmark-rest.js';
10
9
  import { getAllProviders, searchProviders, getModelsForProvider } from './models-dev.js';
11
10
  import {
12
11
  getAllAvailableProviders,
@@ -29,8 +28,59 @@ import {
29
28
  import 'dotenv/config';
30
29
  import Table from 'cli-table3';
31
30
 
32
- // Check for debug flag
33
- const debugMode = process.argv.includes('--debug');
31
+ // Parse command line arguments
32
+ function parseCliArgs() {
33
+ const args = process.argv.slice(2);
34
+ const parsed = {
35
+ debug: false,
36
+ bench: null,
37
+ apiKey: null,
38
+ useAiSdk: false,
39
+ help: false
40
+ };
41
+
42
+ for (let i = 0; i < args.length; i++) {
43
+ const arg = args[i];
44
+
45
+ if (arg === '--debug') {
46
+ parsed.debug = true;
47
+ } else if (arg === '--bench') {
48
+ parsed.bench = args[++i];
49
+ } else if (arg === '--api-key') {
50
+ parsed.apiKey = args[++i];
51
+ } else if (arg === '--ai-sdk') {
52
+ parsed.useAiSdk = true;
53
+ } else if (arg === '--help' || arg === '-h') {
54
+ parsed.help = true;
55
+ }
56
+ }
57
+
58
+ return parsed;
59
+ }
60
+
61
+ function showHelp() {
62
+ console.log(colorText('ai-speedometer - Benchmark AI models', 'cyan'));
63
+ console.log('');
64
+ console.log(colorText('Usage:', 'yellow'));
65
+ console.log(' ai-speedometer ' + colorText('# Interactive mode', 'dim'));
66
+ console.log(' ai-speedometer --bench <provider:model> ' + colorText('# Headless benchmark', 'dim'));
67
+ console.log('');
68
+ console.log(colorText('Options:', 'yellow'));
69
+ console.log(' --bench <provider:model> ' + colorText('Run benchmark in headless mode', 'dim'));
70
+ console.log(' --api-key <key> ' + colorText('Override API key (optional)', 'dim'));
71
+ console.log(' --ai-sdk ' + colorText('Use AI SDK instead of REST API', 'dim'));
72
+ console.log(' --debug ' + colorText('Enable debug logging', 'dim'));
73
+ console.log(' --help, -h ' + colorText('Show this help message', 'dim'));
74
+ console.log('');
75
+ console.log(colorText('Examples:', 'yellow'));
76
+ console.log(' ai-speedometer --bench openai:gpt-4');
77
+ console.log(' ai-speedometer --bench anthropic:claude-3-opus --api-key "sk-..."');
78
+ console.log(' ai-speedometer --bench openai:gpt-4 --ai-sdk');
79
+ console.log('');
80
+ }
81
+
82
+ const cliArgs = parseCliArgs();
83
+ const debugMode = cliArgs.debug;
34
84
  let logFile = null;
35
85
 
36
86
  function log(message) {
@@ -125,7 +175,6 @@ function clearScreen() {
125
175
  function showHeader() {
126
176
  console.log(colorText('Ai-speedometer', 'cyan'));
127
177
  console.log(colorText('=============================', 'cyan'));
128
- console.log(colorText('Note: opencode uses ai-sdk', 'dim'));
129
178
  console.log('');
130
179
  }
131
180
 
@@ -302,7 +351,6 @@ async function selectModelsCircular() {
302
351
  // Add header
303
352
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
304
353
  screenContent += colorText('=============================', 'cyan') + '\n';
305
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
306
354
  screenContent += '\n';
307
355
 
308
356
  screenContent += colorText('Select Models for Benchmark', 'magenta') + '\n';
@@ -805,8 +853,9 @@ async function displayColorfulResults(results, method = 'AI SDK', models = []) {
805
853
  console.log(colorText('COMPREHENSIVE PERFORMANCE SUMMARY', 'yellow'));
806
854
 
807
855
  // Add note about method differences
808
- console.log(colorText('Note: ', 'cyan') + colorText('Benchmark over REST API doesn\'t utilize streaming, so TTFT is 0. AI SDK utilizes streaming, but', 'dim'));
809
- console.log(colorText(' ', 'cyan') + colorText('if the model is a thinking model, TTFT will be much higher because thinking tokens are not counted as first token.', 'dim'));
856
+ console.log(colorText('Note: ', 'cyan') + colorText('REST API with streaming now supports TTFT measurement. AI SDK also supports streaming.', 'dim'));
857
+ console.log(colorText(' ', 'cyan') + colorText('For thinking models, TTFT will be higher as thinking tokens are processed before output tokens.', 'dim'));
858
+ console.log(colorText(' ', 'cyan') + colorText('[est] markers indicate token counts were estimated (API did not provide usage metadata).', 'dim'));
810
859
  console.log('');
811
860
 
812
861
  const table = new Table({
@@ -833,15 +882,19 @@ async function displayColorfulResults(results, method = 'AI SDK', models = []) {
833
882
 
834
883
  // Add data rows (already ranked by sort order)
835
884
  sortedResults.forEach((result) => {
885
+ const outputTokenDisplay = result.tokenCount.toString() + (result.usedEstimateForOutput ? ' [est]' : '');
886
+ const promptTokenDisplay = result.promptTokens.toString() + (result.usedEstimateForInput ? ' [est]' : '');
887
+ const totalTokenDisplay = result.totalTokens.toString() + ((result.usedEstimateForInput || result.usedEstimateForOutput) ? ' [est]' : '');
888
+
836
889
  table.push([
837
890
  colorText(result.model, 'white'),
838
891
  colorText(result.provider, 'white'),
839
892
  colorText((result.totalTime / 1000).toFixed(2), 'green'),
840
893
  colorText((result.timeToFirstToken / 1000).toFixed(2), 'yellow'),
841
894
  colorText(result.tokensPerSecond.toFixed(1), 'magenta'),
842
- colorText(result.tokenCount.toString(), 'blue'),
843
- colorText(result.promptTokens.toString(), 'blue'),
844
- colorText(result.totalTokens.toString(), 'bright')
895
+ colorText(outputTokenDisplay, result.usedEstimateForOutput ? 'yellow' : 'blue'),
896
+ colorText(promptTokenDisplay, result.usedEstimateForInput ? 'yellow' : 'blue'),
897
+ colorText(totalTokenDisplay, (result.usedEstimateForInput || result.usedEstimateForOutput) ? 'yellow' : 'bright')
845
898
  ]);
846
899
  });
847
900
 
@@ -1007,7 +1060,6 @@ async function addVerifiedProvider() {
1007
1060
  // Add header
1008
1061
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1009
1062
  screenContent += colorText('=============================', 'cyan') + '\n';
1010
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1011
1063
  screenContent += '\n';
1012
1064
 
1013
1065
  screenContent += colorText('Add Verified Provider', 'magenta') + '\n';
@@ -1223,7 +1275,6 @@ async function addCustomProviderCLI() {
1223
1275
  // Add header
1224
1276
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1225
1277
  screenContent += colorText('=============================', 'cyan') + '\n';
1226
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1227
1278
  screenContent += '\n';
1228
1279
 
1229
1280
  screenContent += colorText('Add Custom Provider', 'magenta') + '\n';
@@ -1486,7 +1537,6 @@ async function addCustomModelsMenu() {
1486
1537
  // Add header
1487
1538
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1488
1539
  screenContent += colorText('=============================', 'cyan') + '\n';
1489
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1490
1540
  screenContent += '\n';
1491
1541
 
1492
1542
  screenContent += colorText('Add Custom Models', 'magenta') + '\n';
@@ -1553,7 +1603,6 @@ async function addModelsToExistingProvider() {
1553
1603
  // Add header
1554
1604
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1555
1605
  screenContent += colorText('=============================', 'cyan') + '\n';
1556
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1557
1606
  screenContent += '\n';
1558
1607
 
1559
1608
  screenContent += colorText('Add Models to Existing Provider', 'magenta') + '\n';
@@ -1665,23 +1714,9 @@ async function addModelsToExistingProvider() {
1665
1714
  await question(colorText('\nPress Enter to continue...', 'yellow'));
1666
1715
  }
1667
1716
 
1668
- // REST API benchmark function using direct API calls
1669
- async function runRestApiBenchmark(models) {
1670
- if (models.length === 0) {
1671
- console.log(colorText('No models selected for benchmarking.', 'red'));
1672
- return;
1673
- }
1674
-
1675
- clearScreen();
1676
- showHeader();
1677
- console.log(colorText('Running REST API Benchmark...', 'green'));
1678
- console.log(colorText(`Running ${models.length} models in parallel...`, 'cyan'));
1679
- console.log(colorText('Note: This uses direct REST API calls instead of AI SDK', 'dim'));
1680
- console.log('');
1681
-
1682
- // Create a function to benchmark a single model using REST API
1683
- const benchmarkModelRest = async (model) => {
1684
- console.log(colorText(`Testing ${model.name} (${model.providerName}) via REST API...`, 'yellow'));
1717
+ // Silent benchmark helper (returns raw result without UI)
1718
+ async function benchmarkSingleModelRest(model) {
1719
+
1685
1720
 
1686
1721
  try {
1687
1722
  // Validate required configuration
@@ -1693,14 +1728,24 @@ async function runRestApiBenchmark(models) {
1693
1728
  throw new Error(`Missing base URL for provider ${model.providerName}`);
1694
1729
  }
1695
1730
 
1731
+ // Extract the actual model ID for API calls (moved before usage)
1732
+ let actualModelId = model.name;
1733
+ if (model.id && model.id.includes('_')) {
1734
+ actualModelId = model.id.split('_')[1];
1735
+ }
1736
+ actualModelId = actualModelId.trim();
1737
+
1696
1738
  const startTime = Date.now();
1739
+ let firstTokenTime = null;
1740
+ let streamedText = '';
1741
+ let tokenCount = 0;
1697
1742
 
1698
1743
  // Use correct endpoint based on provider type
1699
1744
  let endpoint;
1700
1745
  if (model.providerType === 'anthropic') {
1701
1746
  endpoint = '/messages';
1702
1747
  } else if (model.providerType === 'google') {
1703
- endpoint = '/models/' + actualModelId + ':generateContent';
1748
+ endpoint = '/models/' + actualModelId + ':streamGenerateContent';
1704
1749
  } else {
1705
1750
  endpoint = '/chat/completions';
1706
1751
  }
@@ -1709,17 +1754,7 @@ async function runRestApiBenchmark(models) {
1709
1754
  const baseUrl = model.providerConfig.baseUrl.replace(/\/$/, '');
1710
1755
  const url = `${baseUrl}${endpoint}`;
1711
1756
 
1712
- // Extract the actual model ID for API calls
1713
- let actualModelId = model.name;
1714
- if (model.id && model.id.includes('_')) {
1715
- // For models with provider prefix, extract the actual model ID
1716
- actualModelId = model.id.split('_')[1];
1717
- console.log(colorText(` Using extracted model ID: ${actualModelId}`, 'cyan'));
1718
- }
1719
-
1720
- // Trim any trailing spaces from model names
1721
- actualModelId = actualModelId.trim();
1722
- console.log(colorText(` Using final model ID: "${actualModelId}"`, 'cyan'));
1757
+
1723
1758
 
1724
1759
  const headers = {
1725
1760
  'Content-Type': 'application/json',
@@ -1731,7 +1766,6 @@ async function runRestApiBenchmark(models) {
1731
1766
  headers['x-api-key'] = model.providerConfig.apiKey;
1732
1767
  headers['anthropic-version'] = '2023-06-01';
1733
1768
  } else if (model.providerType === 'google') {
1734
- // Google uses different auth
1735
1769
  delete headers['Authorization'];
1736
1770
  headers['x-goog-api-key'] = model.providerConfig.apiKey;
1737
1771
  }
@@ -1742,14 +1776,15 @@ async function runRestApiBenchmark(models) {
1742
1776
  { role: 'user', content: testPrompt }
1743
1777
  ],
1744
1778
  max_tokens: 500,
1745
- temperature: 0.7
1779
+ temperature: 0.7,
1780
+ stream: true
1746
1781
  };
1747
1782
 
1748
1783
  // Adjust for provider-specific formats
1749
1784
  if (model.providerType === 'anthropic') {
1750
1785
  body.max_tokens = 500;
1786
+ body.stream = true;
1751
1787
  } else if (model.providerType === 'google') {
1752
- // Google format is slightly different
1753
1788
  body.contents = [{ parts: [{ text: testPrompt }] }];
1754
1789
  body.generationConfig = {
1755
1790
  maxOutputTokens: 500,
@@ -1757,10 +1792,10 @@ async function runRestApiBenchmark(models) {
1757
1792
  };
1758
1793
  delete body.messages;
1759
1794
  delete body.max_tokens;
1795
+ delete body.stream;
1760
1796
  }
1761
1797
 
1762
- console.log(colorText(` Making request to: ${url}`, 'cyan'));
1763
- console.log(colorText(` Using model: ${actualModelId}`, 'cyan'));
1798
+
1764
1799
 
1765
1800
  const response = await fetch(url, {
1766
1801
  method: 'POST',
@@ -1768,56 +1803,152 @@ async function runRestApiBenchmark(models) {
1768
1803
  body: JSON.stringify(body)
1769
1804
  });
1770
1805
 
1771
- console.log(colorText(` Response status: ${response.status}`, 'cyan'));
1806
+
1772
1807
 
1773
1808
  if (!response.ok) {
1774
1809
  const errorText = await response.text();
1775
- console.log(colorText(` Error: ${errorText.slice(0, 200)}...`, 'red'));
1776
1810
  throw new Error(`API request failed: ${response.status} ${response.statusText}`);
1777
1811
  }
1778
1812
 
1779
- const data = await response.json();
1813
+ // Process streaming response
1814
+ const reader = response.body.getReader();
1815
+ const decoder = new TextDecoder();
1816
+ let buffer = '';
1817
+ let inputTokens = 0;
1818
+ let outputTokens = 0;
1819
+ let isFirstChunk = true;
1820
+
1821
+ while (true) {
1822
+ const { done, value } = await reader.read();
1823
+ if (done) break;
1824
+
1825
+ // Capture TTFT on first chunk arrival (network level)
1826
+ if (isFirstChunk && !firstTokenTime) {
1827
+ firstTokenTime = Date.now();
1828
+ isFirstChunk = false;
1829
+ // Show live TTFT result
1830
+ const ttftSeconds = ((firstTokenTime - startTime) / 1000).toFixed(2);
1831
+ console.log(colorText(`TTFT received at ${ttftSeconds}s for ${model.name}`, 'green'));
1832
+ }
1833
+
1834
+ buffer += decoder.decode(value, { stream: true });
1835
+ const lines = buffer.split('\n');
1836
+ buffer = lines.pop() || '';
1837
+
1838
+ for (const line of lines) {
1839
+ const trimmedLine = line.trim();
1840
+ if (!trimmedLine) continue;
1841
+
1842
+ try {
1843
+ if (model.providerType === 'anthropic') {
1844
+ // Anthropic uses newline-delimited JSON with event types
1845
+ if (trimmedLine.startsWith('data: ')) {
1846
+ const jsonStr = trimmedLine.slice(6);
1847
+ if (jsonStr === '[DONE]') break;
1848
+
1849
+ const chunk = JSON.parse(jsonStr);
1850
+
1851
+ if (chunk.type === 'content_block_delta' && chunk.delta?.text) {
1852
+ streamedText += chunk.delta.text;
1853
+ } else if (chunk.type === 'message_start' && chunk.message?.usage) {
1854
+ inputTokens = chunk.message.usage.input_tokens || 0;
1855
+ } else if (chunk.type === 'message_delta') {
1856
+ // Capture output tokens from message_delta
1857
+ if (chunk.usage?.output_tokens) {
1858
+ outputTokens = chunk.usage.output_tokens;
1859
+ }
1860
+ // Some implementations put input_tokens here too
1861
+ if (chunk.usage?.input_tokens && !inputTokens) {
1862
+ inputTokens = chunk.usage.input_tokens;
1863
+ }
1864
+ }
1865
+ } else if (trimmedLine.startsWith('event: ')) {
1866
+ // Skip event lines (Anthropic SSE format uses separate event and data lines)
1867
+ continue;
1868
+ } else {
1869
+ // Try parsing as raw JSON (some Anthropic-compatible APIs don't use SSE format)
1870
+ const chunk = JSON.parse(trimmedLine);
1871
+
1872
+ if (chunk.type === 'content_block_delta' && chunk.delta?.text) {
1873
+ streamedText += chunk.delta.text;
1874
+ } else if (chunk.type === 'message_start' && chunk.message?.usage) {
1875
+ inputTokens = chunk.message.usage.input_tokens || 0;
1876
+ } else if (chunk.type === 'message_delta') {
1877
+ if (chunk.usage?.output_tokens) {
1878
+ outputTokens = chunk.usage.output_tokens;
1879
+ }
1880
+ if (chunk.usage?.input_tokens && !inputTokens) {
1881
+ inputTokens = chunk.usage.input_tokens;
1882
+ }
1883
+ }
1884
+ }
1885
+ } else if (model.providerType === 'google') {
1886
+ // Google streaming format
1887
+ const chunk = JSON.parse(trimmedLine);
1888
+ if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
1889
+ const text = chunk.candidates[0].content.parts[0].text;
1890
+ streamedText += text;
1891
+ }
1892
+ if (chunk.usageMetadata?.promptTokenCount) {
1893
+ inputTokens = chunk.usageMetadata.promptTokenCount;
1894
+ }
1895
+ if (chunk.usageMetadata?.candidatesTokenCount) {
1896
+ outputTokens = chunk.usageMetadata.candidatesTokenCount;
1897
+ }
1898
+ } else {
1899
+ // OpenAI-compatible SSE format
1900
+ if (trimmedLine.startsWith('data: ')) {
1901
+ const jsonStr = trimmedLine.slice(6);
1902
+ if (jsonStr === '[DONE]') break;
1903
+
1904
+ const chunk = JSON.parse(jsonStr);
1905
+
1906
+ if (chunk.choices?.[0]?.delta?.content) {
1907
+ streamedText += chunk.choices[0].delta.content;
1908
+ }
1909
+
1910
+ if (chunk.usage?.prompt_tokens) {
1911
+ inputTokens = chunk.usage.prompt_tokens;
1912
+ }
1913
+ if (chunk.usage?.completion_tokens) {
1914
+ outputTokens = chunk.usage.completion_tokens;
1915
+ }
1916
+ }
1917
+ }
1918
+ } catch (parseError) {
1919
+ // Skip invalid JSON lines
1920
+ continue;
1921
+ }
1922
+ }
1923
+ }
1924
+
1780
1925
  const endTime = Date.now();
1781
1926
  const totalTime = endTime - startTime;
1927
+ const timeToFirstToken = firstTokenTime ? firstTokenTime - startTime : totalTime;
1782
1928
 
1783
- // Calculate tokens based on provider type
1784
- let inputTokens, outputTokens;
1785
-
1786
- if (model.providerType === 'anthropic') {
1787
- inputTokens = data.usage?.input_tokens || Math.round(testPrompt.length / 4);
1788
- outputTokens = data.usage?.output_tokens || Math.round(data.content?.[0]?.text?.length / 4 || 0);
1789
- } else if (model.providerType === 'google') {
1790
- inputTokens = data.usageMetadata?.promptTokenCount || Math.round(testPrompt.length / 4);
1791
- outputTokens = data.usageMetadata?.candidatesTokenCount || Math.round(data.candidates?.[0]?.content?.parts?.[0]?.text?.length / 4 || 0);
1792
- } else {
1793
- inputTokens = data.usage?.prompt_tokens || Math.round(testPrompt.length / 4);
1794
- outputTokens = data.usage?.completion_tokens || Math.round(data.choices?.[0]?.message?.content?.length / 4 || 0);
1795
- }
1796
-
1797
- const totalTokens = inputTokens + outputTokens;
1929
+ // Calculate token counts - use provider's count if available, otherwise estimate
1930
+ const usedEstimateForOutput = !outputTokens;
1931
+ const usedEstimateForInput = !inputTokens;
1932
+ const finalOutputTokens = outputTokens || Math.round(streamedText.length / 4);
1933
+ const finalInputTokens = inputTokens || Math.round(testPrompt.length / 4);
1934
+ const totalTokens = finalInputTokens + finalOutputTokens;
1798
1935
  const tokensPerSecond = totalTime > 0 ? (totalTokens / totalTime) * 1000 : 0;
1799
-
1800
- console.log(colorText('Completed!', 'green'));
1801
- console.log(colorText(` Total Time: ${(totalTime / 1000).toFixed(2)}s`, 'cyan'));
1802
- console.log(colorText(` Tokens/Sec: ${tokensPerSecond.toFixed(1)}`, 'cyan'));
1803
- console.log(colorText(` Input Tokens: ${inputTokens}`, 'cyan'));
1804
- console.log(colorText(` Output Tokens: ${outputTokens}`, 'cyan'));
1805
- console.log(colorText(` Total Tokens: ${totalTokens}`, 'cyan'));
1806
1936
 
1807
1937
  return {
1808
1938
  model: model.name,
1809
1939
  provider: model.providerName,
1810
1940
  totalTime: totalTime,
1811
- timeToFirstToken: 0, // REST API doesn't track TTFT
1812
- tokenCount: outputTokens,
1941
+ timeToFirstToken: timeToFirstToken,
1942
+ tokenCount: finalOutputTokens,
1813
1943
  tokensPerSecond: tokensPerSecond,
1814
- promptTokens: inputTokens,
1944
+ promptTokens: finalInputTokens,
1815
1945
  totalTokens: totalTokens,
1946
+ usedEstimateForOutput: usedEstimateForOutput,
1947
+ usedEstimateForInput: usedEstimateForInput,
1816
1948
  success: true
1817
1949
  };
1818
1950
 
1819
1951
  } catch (error) {
1820
- console.log(colorText('Failed: ', 'red') + error.message);
1821
1952
  return {
1822
1953
  model: model.name,
1823
1954
  provider: model.providerName,
@@ -1831,17 +1962,49 @@ async function runRestApiBenchmark(models) {
1831
1962
  error: error.message
1832
1963
  };
1833
1964
  }
1834
- };
1965
+ }
1966
+
1967
+ // REST API benchmark function using direct API calls (with UI)
1968
+ async function runRestApiBenchmark(models) {
1969
+ if (models.length === 0) {
1970
+ console.log(colorText('No models selected for benchmarking.', 'red'));
1971
+ return;
1972
+ }
1835
1973
 
1836
- // Run all benchmarks in parallel
1974
+ clearScreen();
1975
+ showHeader();
1976
+ console.log(colorText('Running REST API Benchmark with Streaming...', 'green'));
1977
+ console.log(colorText(`Running ${models.length} models in parallel...`, 'cyan'));
1978
+ console.log(colorText('Note: This uses direct REST API calls with streaming support', 'dim'));
1979
+ console.log('');
1980
+
1981
+ // Run all benchmarks in parallel with UI feedback
1837
1982
  console.log(colorText('Starting parallel REST API benchmark execution...', 'cyan'));
1838
- const promises = models.map(model => benchmarkModelRest(model));
1983
+
1984
+ // Start all benchmarks in parallel
1985
+ const promises = models.map(model => {
1986
+ console.log(colorText(`Testing ${model.name} (${model.providerName}) via REST API with streaming...`, 'yellow'));
1987
+ return benchmarkSingleModelRest(model);
1988
+ });
1989
+
1839
1990
  const results = await Promise.all(promises);
1840
1991
 
1992
+ // Show individual results after all complete
1993
+ results.forEach((result, index) => {
1994
+ if (result.success) {
1995
+ console.log(colorText(`✓ ${result.model} (${result.provider}) completed!`, 'green'));
1996
+ console.log(colorText(` Total Time: ${(result.totalTime / 1000).toFixed(2)}s`, 'cyan'));
1997
+ console.log(colorText(` TTFT: ${(result.timeToFirstToken / 1000).toFixed(2)}s`, 'cyan'));
1998
+ console.log(colorText(` Tokens/Sec: ${result.tokensPerSecond.toFixed(1)}`, 'cyan'));
1999
+ } else {
2000
+ console.log(colorText(`✗ ${result.model} (${result.provider}) failed: `, 'red') + result.error);
2001
+ }
2002
+ });
2003
+
1841
2004
  console.log('');
1842
2005
  console.log(colorText('All REST API benchmarks completed!', 'green'));
1843
2006
 
1844
- await displayColorfulResults(results, 'REST API', models);
2007
+ await displayColorfulResults(results, 'REST API (Streaming)', models);
1845
2008
 
1846
2009
  // Add successful models to recent models list
1847
2010
  const successfulModels = results
@@ -1868,16 +2031,16 @@ async function runRestApiBenchmark(models) {
1868
2031
  async function showMainMenu() {
1869
2032
  const menuOptions = [
1870
2033
  { id: 1, text: 'Set Model', action: () => showModelMenu() },
1871
- { id: 2, text: 'Run Benchmark (AI SDK)', action: async () => {
2034
+ { id: 2, text: 'Run Benchmark (REST API)', action: async () => {
1872
2035
  const selectedModels = await selectModelsCircular();
1873
2036
  if (selectedModels.length > 0) {
1874
- await runStreamingBenchmark(selectedModels);
2037
+ await runRestApiBenchmark(selectedModels);
1875
2038
  }
1876
2039
  }},
1877
- { id: 3, text: 'Run Benchmark (REST API)', action: async () => {
2040
+ { id: 3, text: 'Run Benchmark (AI SDK - Legacy)', action: async () => {
1878
2041
  const selectedModels = await selectModelsCircular();
1879
2042
  if (selectedModels.length > 0) {
1880
- await runRestApiBenchmark(selectedModels);
2043
+ await runStreamingBenchmark(selectedModels);
1881
2044
  }
1882
2045
  }},
1883
2046
  { id: 4, text: 'Exit', action: () => {
@@ -1896,7 +2059,6 @@ async function showMainMenu() {
1896
2059
  // Add header
1897
2060
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1898
2061
  screenContent += colorText('=============================', 'cyan') + '\n';
1899
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1900
2062
  screenContent += '\n';
1901
2063
 
1902
2064
  screenContent += colorText('Main Menu:', 'cyan') + '\n';
@@ -1953,7 +2115,6 @@ async function showModelMenu() {
1953
2115
  // Add header
1954
2116
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1955
2117
  screenContent += colorText('=============================', 'cyan') + '\n';
1956
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1957
2118
  screenContent += '\n';
1958
2119
 
1959
2120
  screenContent += colorText('Model Management:', 'cyan') + '\n';
@@ -2002,17 +2163,157 @@ process.on('SIGINT', () => {
2002
2163
  process.exit(0);
2003
2164
  });
2004
2165
 
2166
+ // Headless benchmark mode
2167
+ async function runHeadlessBenchmark(benchSpec, apiKey, useAiSdk) {
2168
+ try {
2169
+ // Parse provider:model format
2170
+ const [providerSpec, modelName] = benchSpec.split(':');
2171
+
2172
+ if (!providerSpec || !modelName) {
2173
+ console.error(colorText('Error: Invalid --bench format. Use: provider:model', 'red'));
2174
+ console.error(colorText('Example: --bench zai-code-anth:glm-4.6', 'yellow'));
2175
+ process.exit(1);
2176
+ }
2177
+
2178
+ // Load all available providers
2179
+ const config = await loadConfig();
2180
+
2181
+ // Find the provider (case-insensitive search)
2182
+ const provider = config.providers.find(p =>
2183
+ p.id?.toLowerCase() === providerSpec.toLowerCase() ||
2184
+ p.name?.toLowerCase() === providerSpec.toLowerCase()
2185
+ );
2186
+
2187
+ if (!provider) {
2188
+ console.error(colorText(`Error: Provider '${providerSpec}' not found`, 'red'));
2189
+ console.error(colorText('Available providers:', 'yellow'));
2190
+ config.providers.forEach(p => {
2191
+ console.error(colorText(` - ${p.id || p.name}`, 'cyan'));
2192
+ });
2193
+ process.exit(1);
2194
+ }
2195
+
2196
+ // Find the model
2197
+ // Model IDs are prefixed with provider name (e.g., "zai-code-anth_glm-4.6")
2198
+ // So we need to check:
2199
+ // 1. Full ID match: "zai-code-anth_glm-4.6"
2200
+ // 2. ID without provider prefix: "glm-4.6"
2201
+ // 3. Name match: "GLM-4.6-anth"
2202
+ const model = provider.models.find(m => {
2203
+ const modelIdLower = m.id?.toLowerCase() || '';
2204
+ const modelNameLower = m.name?.toLowerCase() || '';
2205
+ const searchLower = modelName.toLowerCase();
2206
+
2207
+ // Check full ID match
2208
+ if (modelIdLower === searchLower) return true;
2209
+
2210
+ // Check ID without provider prefix (strip "provider_" prefix)
2211
+ const idWithoutPrefix = modelIdLower.includes('_')
2212
+ ? modelIdLower.split('_').slice(1).join('_')
2213
+ : modelIdLower;
2214
+ if (idWithoutPrefix === searchLower) return true;
2215
+
2216
+ // Check name match
2217
+ if (modelNameLower === searchLower) return true;
2218
+
2219
+ return false;
2220
+ });
2221
+
2222
+ if (!model) {
2223
+ console.error(colorText(`Error: Model '${modelName}' not found in provider '${provider.name}'`, 'red'));
2224
+ console.error(colorText('Available models:', 'yellow'));
2225
+ provider.models.forEach(m => {
2226
+ // Show both name and ID (without provider prefix) for clarity
2227
+ const idWithoutPrefix = m.id?.includes('_')
2228
+ ? m.id.split('_').slice(1).join('_')
2229
+ : m.id;
2230
+ console.error(colorText(` - ${m.name} (id: ${idWithoutPrefix})`, 'cyan'));
2231
+ });
2232
+ process.exit(1);
2233
+ }
2234
+
2235
+ // If API key provided via flag, use it; otherwise use existing config
2236
+ let finalApiKey = apiKey || provider.apiKey;
2237
+
2238
+ if (!finalApiKey) {
2239
+ console.error(colorText(`Error: No API key found for provider '${provider.name}'`, 'red'));
2240
+ console.error(colorText('Please provide --api-key flag or configure the provider first', 'yellow'));
2241
+ process.exit(1);
2242
+ }
2243
+
2244
+ // Create model object with all required config
2245
+ const modelConfig = {
2246
+ ...model,
2247
+ providerName: provider.name,
2248
+ providerType: provider.type,
2249
+ providerId: provider.id,
2250
+ providerConfig: {
2251
+ ...provider,
2252
+ apiKey: finalApiKey,
2253
+ baseUrl: provider.baseUrl || ''
2254
+ },
2255
+ selected: true
2256
+ };
2257
+
2258
+ // Run benchmark silently and get results
2259
+ let result;
2260
+ if (useAiSdk) {
2261
+ // TODO: Implement AI SDK silent benchmark
2262
+ console.error(colorText('AI SDK headless mode not yet implemented', 'red'));
2263
+ process.exit(1);
2264
+ } else {
2265
+ result = await benchmarkSingleModelRest(modelConfig);
2266
+ }
2267
+
2268
+ // Output JSON to stdout
2269
+ const jsonOutput = {
2270
+ provider: provider.name,
2271
+ providerId: provider.id,
2272
+ model: model.name,
2273
+ modelId: model.id,
2274
+ method: useAiSdk ? 'ai-sdk' : 'rest-api',
2275
+ success: result.success,
2276
+ totalTime: result.totalTime,
2277
+ totalTimeSeconds: result.totalTime / 1000,
2278
+ timeToFirstToken: result.timeToFirstToken,
2279
+ timeToFirstTokenSeconds: result.timeToFirstToken / 1000,
2280
+ tokensPerSecond: result.tokensPerSecond,
2281
+ outputTokens: result.tokenCount,
2282
+ promptTokens: result.promptTokens,
2283
+ totalTokens: result.totalTokens,
2284
+ error: result.error || null
2285
+ };
2286
+
2287
+ console.log(JSON.stringify(jsonOutput, null, 2));
2288
+ process.exit(result.success ? 0 : 1);
2289
+ } catch (error) {
2290
+ console.error(colorText('Error: ' + error.message, 'red'));
2291
+ if (debugMode) {
2292
+ console.error(error.stack);
2293
+ }
2294
+ process.exit(1);
2295
+ }
2296
+ }
2297
+
2005
2298
  // Start the CLI
2006
- if (import.meta.url === `file://${process.argv[1]}` ||
2007
- process.argv.length === 2 ||
2008
- (process.argv.length === 3 && process.argv[2] === '--debug')) {
2009
-
2010
- // Clean up recent models from main config and migrate to cache on startup
2011
- cleanupRecentModelsFromConfig().then(() => {
2012
- showMainMenu();
2013
- }).catch(() => {
2014
- showMainMenu();
2015
- });
2299
+ if (require.main === module) {
2300
+ // Check if help flag
2301
+ if (cliArgs.help) {
2302
+ showHelp();
2303
+ process.exit(0);
2304
+ }
2305
+
2306
+ // Check if headless benchmark mode
2307
+ if (cliArgs.bench) {
2308
+ runHeadlessBenchmark(cliArgs.bench, cliArgs.apiKey, cliArgs.useAiSdk);
2309
+ } else {
2310
+ // Interactive mode
2311
+ cleanupRecentModelsFromConfig().then(() => {
2312
+ showMainMenu();
2313
+ }).catch(() => {
2314
+ showMainMenu();
2315
+ });
2316
+ }
2016
2317
  }
2017
2318
 
2018
2319
  export { showMainMenu, listProviders, selectModelsCircular, runStreamingBenchmark, loadConfig, saveConfig };