ai-speedometer 1.2.5 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.js CHANGED
@@ -6,7 +6,6 @@ import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
6
6
  import { createAnthropic } from '@ai-sdk/anthropic';
7
7
  import { streamText } from 'ai'; // Changed from streamText to generateText
8
8
  import { testPrompt } from './test-prompt.js';
9
- import { LLMBenchmark } from './benchmark-rest.js';
10
9
  import { getAllProviders, searchProviders, getModelsForProvider } from './models-dev.js';
11
10
  import {
12
11
  getAllAvailableProviders,
@@ -29,8 +28,64 @@ import {
29
28
  import 'dotenv/config';
30
29
  import Table from 'cli-table3';
31
30
 
32
- // Check for debug flag
33
- const debugMode = process.argv.includes('--debug');
31
+ // Parse command line arguments
32
+ function parseCliArgs() {
33
+ const args = process.argv.slice(2);
34
+ const parsed = {
35
+ debug: false,
36
+ bench: null,
37
+ apiKey: null,
38
+ useAiSdk: false,
39
+ formatted: false,
40
+ help: false
41
+ };
42
+
43
+ for (let i = 0; i < args.length; i++) {
44
+ const arg = args[i];
45
+
46
+ if (arg === '--debug') {
47
+ parsed.debug = true;
48
+ } else if (arg === '--bench') {
49
+ parsed.bench = args[++i];
50
+ } else if (arg === '--api-key') {
51
+ parsed.apiKey = args[++i];
52
+ } else if (arg === '--ai-sdk') {
53
+ parsed.useAiSdk = true;
54
+ } else if (arg === '--formatted') {
55
+ parsed.formatted = true;
56
+ } else if (arg === '--help' || arg === '-h') {
57
+ parsed.help = true;
58
+ }
59
+ }
60
+
61
+ return parsed;
62
+ }
63
+
64
+ function showHelp() {
65
+ console.log(colorText('ai-speedometer - Benchmark AI models', 'cyan'));
66
+ console.log('');
67
+ console.log(colorText('Usage:', 'yellow'));
68
+ console.log(' ai-speedometer ' + colorText('# Interactive mode', 'dim'));
69
+ console.log(' ai-speedometer --bench <provider:model> ' + colorText('# Headless benchmark', 'dim'));
70
+ console.log('');
71
+ console.log(colorText('Options:', 'yellow'));
72
+ console.log(' --bench <provider:model> ' + colorText('Run benchmark in headless mode', 'dim'));
73
+ console.log(' --api-key <key> ' + colorText('Override API key (optional)', 'dim'));
74
+ console.log(' --ai-sdk ' + colorText('Use AI SDK instead of REST API', 'dim'));
75
+ console.log(' --formatted ' + colorText('Format JSON output for human readability', 'dim'));
76
+ console.log(' --debug ' + colorText('Enable debug logging', 'dim'));
77
+ console.log(' --help, -h ' + colorText('Show this help message', 'dim'));
78
+ console.log('');
79
+ console.log(colorText('Examples:', 'yellow'));
80
+ console.log(' ai-speedometer --bench openai:gpt-4');
81
+ console.log(' ai-speedometer --bench anthropic:claude-3-opus --api-key "sk-..."');
82
+ console.log(' ai-speedometer --bench openai:gpt-4 --ai-sdk');
83
+ console.log(' ai-speedometer --bench openai:gpt-4 --formatted');
84
+ console.log('');
85
+ }
86
+
87
+ const cliArgs = parseCliArgs();
88
+ const debugMode = cliArgs.debug;
34
89
  let logFile = null;
35
90
 
36
91
  function log(message) {
@@ -125,7 +180,6 @@ function clearScreen() {
125
180
  function showHeader() {
126
181
  console.log(colorText('Ai-speedometer', 'cyan'));
127
182
  console.log(colorText('=============================', 'cyan'));
128
- console.log(colorText('Note: opencode uses ai-sdk', 'dim'));
129
183
  console.log('');
130
184
  }
131
185
 
@@ -302,7 +356,6 @@ async function selectModelsCircular() {
302
356
  // Add header
303
357
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
304
358
  screenContent += colorText('=============================', 'cyan') + '\n';
305
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
306
359
  screenContent += '\n';
307
360
 
308
361
  screenContent += colorText('Select Models for Benchmark', 'magenta') + '\n';
@@ -805,8 +858,9 @@ async function displayColorfulResults(results, method = 'AI SDK', models = []) {
805
858
  console.log(colorText('COMPREHENSIVE PERFORMANCE SUMMARY', 'yellow'));
806
859
 
807
860
  // Add note about method differences
808
- console.log(colorText('Note: ', 'cyan') + colorText('Benchmark over REST API doesn\'t utilize streaming, so TTFT is 0. AI SDK utilizes streaming, but', 'dim'));
809
- console.log(colorText(' ', 'cyan') + colorText('if the model is a thinking model, TTFT will be much higher because thinking tokens are not counted as first token.', 'dim'));
861
+ console.log(colorText('Note: ', 'cyan') + colorText('REST API with streaming now supports TTFT measurement. AI SDK also supports streaming.', 'dim'));
862
+ console.log(colorText(' ', 'cyan') + colorText('For thinking models, TTFT will be higher as thinking tokens are processed before output tokens.', 'dim'));
863
+ console.log(colorText(' ', 'cyan') + colorText('[est] markers indicate token counts were estimated (API did not provide usage metadata).', 'dim'));
810
864
  console.log('');
811
865
 
812
866
  const table = new Table({
@@ -833,15 +887,19 @@ async function displayColorfulResults(results, method = 'AI SDK', models = []) {
833
887
 
834
888
  // Add data rows (already ranked by sort order)
835
889
  sortedResults.forEach((result) => {
890
+ const outputTokenDisplay = result.tokenCount.toString() + (result.usedEstimateForOutput ? ' [est]' : '');
891
+ const promptTokenDisplay = result.promptTokens.toString() + (result.usedEstimateForInput ? ' [est]' : '');
892
+ const totalTokenDisplay = result.totalTokens.toString() + ((result.usedEstimateForInput || result.usedEstimateForOutput) ? ' [est]' : '');
893
+
836
894
  table.push([
837
895
  colorText(result.model, 'white'),
838
896
  colorText(result.provider, 'white'),
839
897
  colorText((result.totalTime / 1000).toFixed(2), 'green'),
840
898
  colorText((result.timeToFirstToken / 1000).toFixed(2), 'yellow'),
841
899
  colorText(result.tokensPerSecond.toFixed(1), 'magenta'),
842
- colorText(result.tokenCount.toString(), 'blue'),
843
- colorText(result.promptTokens.toString(), 'blue'),
844
- colorText(result.totalTokens.toString(), 'bright')
900
+ colorText(outputTokenDisplay, result.usedEstimateForOutput ? 'yellow' : 'blue'),
901
+ colorText(promptTokenDisplay, result.usedEstimateForInput ? 'yellow' : 'blue'),
902
+ colorText(totalTokenDisplay, (result.usedEstimateForInput || result.usedEstimateForOutput) ? 'yellow' : 'bright')
845
903
  ]);
846
904
  });
847
905
 
@@ -1007,7 +1065,6 @@ async function addVerifiedProvider() {
1007
1065
  // Add header
1008
1066
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1009
1067
  screenContent += colorText('=============================', 'cyan') + '\n';
1010
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1011
1068
  screenContent += '\n';
1012
1069
 
1013
1070
  screenContent += colorText('Add Verified Provider', 'magenta') + '\n';
@@ -1223,7 +1280,6 @@ async function addCustomProviderCLI() {
1223
1280
  // Add header
1224
1281
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1225
1282
  screenContent += colorText('=============================', 'cyan') + '\n';
1226
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1227
1283
  screenContent += '\n';
1228
1284
 
1229
1285
  screenContent += colorText('Add Custom Provider', 'magenta') + '\n';
@@ -1486,7 +1542,6 @@ async function addCustomModelsMenu() {
1486
1542
  // Add header
1487
1543
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1488
1544
  screenContent += colorText('=============================', 'cyan') + '\n';
1489
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1490
1545
  screenContent += '\n';
1491
1546
 
1492
1547
  screenContent += colorText('Add Custom Models', 'magenta') + '\n';
@@ -1553,7 +1608,6 @@ async function addModelsToExistingProvider() {
1553
1608
  // Add header
1554
1609
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1555
1610
  screenContent += colorText('=============================', 'cyan') + '\n';
1556
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1557
1611
  screenContent += '\n';
1558
1612
 
1559
1613
  screenContent += colorText('Add Models to Existing Provider', 'magenta') + '\n';
@@ -1665,23 +1719,9 @@ async function addModelsToExistingProvider() {
1665
1719
  await question(colorText('\nPress Enter to continue...', 'yellow'));
1666
1720
  }
1667
1721
 
1668
- // REST API benchmark function using direct API calls
1669
- async function runRestApiBenchmark(models) {
1670
- if (models.length === 0) {
1671
- console.log(colorText('No models selected for benchmarking.', 'red'));
1672
- return;
1673
- }
1674
-
1675
- clearScreen();
1676
- showHeader();
1677
- console.log(colorText('Running REST API Benchmark...', 'green'));
1678
- console.log(colorText(`Running ${models.length} models in parallel...`, 'cyan'));
1679
- console.log(colorText('Note: This uses direct REST API calls instead of AI SDK', 'dim'));
1680
- console.log('');
1681
-
1682
- // Create a function to benchmark a single model using REST API
1683
- const benchmarkModelRest = async (model) => {
1684
- console.log(colorText(`Testing ${model.name} (${model.providerName}) via REST API...`, 'yellow'));
1722
+ // Silent benchmark helper (returns raw result without UI)
1723
+ async function benchmarkSingleModelRest(model) {
1724
+
1685
1725
 
1686
1726
  try {
1687
1727
  // Validate required configuration
@@ -1693,14 +1733,24 @@ async function runRestApiBenchmark(models) {
1693
1733
  throw new Error(`Missing base URL for provider ${model.providerName}`);
1694
1734
  }
1695
1735
 
1736
+ // Extract the actual model ID for API calls (moved before usage)
1737
+ let actualModelId = model.name;
1738
+ if (model.id && model.id.includes('_')) {
1739
+ actualModelId = model.id.split('_')[1];
1740
+ }
1741
+ actualModelId = actualModelId.trim();
1742
+
1696
1743
  const startTime = Date.now();
1744
+ let firstTokenTime = null;
1745
+ let streamedText = '';
1746
+ let tokenCount = 0;
1697
1747
 
1698
1748
  // Use correct endpoint based on provider type
1699
1749
  let endpoint;
1700
1750
  if (model.providerType === 'anthropic') {
1701
1751
  endpoint = '/messages';
1702
1752
  } else if (model.providerType === 'google') {
1703
- endpoint = '/models/' + actualModelId + ':generateContent';
1753
+ endpoint = '/models/' + actualModelId + ':streamGenerateContent';
1704
1754
  } else {
1705
1755
  endpoint = '/chat/completions';
1706
1756
  }
@@ -1709,17 +1759,7 @@ async function runRestApiBenchmark(models) {
1709
1759
  const baseUrl = model.providerConfig.baseUrl.replace(/\/$/, '');
1710
1760
  const url = `${baseUrl}${endpoint}`;
1711
1761
 
1712
- // Extract the actual model ID for API calls
1713
- let actualModelId = model.name;
1714
- if (model.id && model.id.includes('_')) {
1715
- // For models with provider prefix, extract the actual model ID
1716
- actualModelId = model.id.split('_')[1];
1717
- console.log(colorText(` Using extracted model ID: ${actualModelId}`, 'cyan'));
1718
- }
1719
-
1720
- // Trim any trailing spaces from model names
1721
- actualModelId = actualModelId.trim();
1722
- console.log(colorText(` Using final model ID: "${actualModelId}"`, 'cyan'));
1762
+
1723
1763
 
1724
1764
  const headers = {
1725
1765
  'Content-Type': 'application/json',
@@ -1731,7 +1771,6 @@ async function runRestApiBenchmark(models) {
1731
1771
  headers['x-api-key'] = model.providerConfig.apiKey;
1732
1772
  headers['anthropic-version'] = '2023-06-01';
1733
1773
  } else if (model.providerType === 'google') {
1734
- // Google uses different auth
1735
1774
  delete headers['Authorization'];
1736
1775
  headers['x-goog-api-key'] = model.providerConfig.apiKey;
1737
1776
  }
@@ -1742,14 +1781,15 @@ async function runRestApiBenchmark(models) {
1742
1781
  { role: 'user', content: testPrompt }
1743
1782
  ],
1744
1783
  max_tokens: 500,
1745
- temperature: 0.7
1784
+ temperature: 0.7,
1785
+ stream: true
1746
1786
  };
1747
1787
 
1748
1788
  // Adjust for provider-specific formats
1749
1789
  if (model.providerType === 'anthropic') {
1750
1790
  body.max_tokens = 500;
1791
+ body.stream = true;
1751
1792
  } else if (model.providerType === 'google') {
1752
- // Google format is slightly different
1753
1793
  body.contents = [{ parts: [{ text: testPrompt }] }];
1754
1794
  body.generationConfig = {
1755
1795
  maxOutputTokens: 500,
@@ -1757,10 +1797,10 @@ async function runRestApiBenchmark(models) {
1757
1797
  };
1758
1798
  delete body.messages;
1759
1799
  delete body.max_tokens;
1800
+ delete body.stream;
1760
1801
  }
1761
1802
 
1762
- console.log(colorText(` Making request to: ${url}`, 'cyan'));
1763
- console.log(colorText(` Using model: ${actualModelId}`, 'cyan'));
1803
+
1764
1804
 
1765
1805
  const response = await fetch(url, {
1766
1806
  method: 'POST',
@@ -1768,56 +1808,154 @@ async function runRestApiBenchmark(models) {
1768
1808
  body: JSON.stringify(body)
1769
1809
  });
1770
1810
 
1771
- console.log(colorText(` Response status: ${response.status}`, 'cyan'));
1811
+
1772
1812
 
1773
1813
  if (!response.ok) {
1774
1814
  const errorText = await response.text();
1775
- console.log(colorText(` Error: ${errorText.slice(0, 200)}...`, 'red'));
1776
1815
  throw new Error(`API request failed: ${response.status} ${response.statusText}`);
1777
1816
  }
1778
1817
 
1779
- const data = await response.json();
1818
+ // Process streaming response
1819
+ const reader = response.body.getReader();
1820
+ const decoder = new TextDecoder();
1821
+ let buffer = '';
1822
+ let inputTokens = 0;
1823
+ let outputTokens = 0;
1824
+ let isFirstChunk = true;
1825
+
1826
+ while (true) {
1827
+ const { done, value } = await reader.read();
1828
+ if (done) break;
1829
+
1830
+ // Capture TTFT on first chunk arrival (network level)
1831
+ if (isFirstChunk && !firstTokenTime) {
1832
+ firstTokenTime = Date.now();
1833
+ isFirstChunk = false;
1834
+ // Show live TTFT result (only in interactive mode, not headless)
1835
+ const ttftSeconds = ((firstTokenTime - startTime) / 1000).toFixed(2);
1836
+ if (!cliArgs.bench) {
1837
+ console.log(colorText(`TTFT received at ${ttftSeconds}s for ${model.name}`, 'green'));
1838
+ }
1839
+ }
1840
+
1841
+ buffer += decoder.decode(value, { stream: true });
1842
+ const lines = buffer.split('\n');
1843
+ buffer = lines.pop() || '';
1844
+
1845
+ for (const line of lines) {
1846
+ const trimmedLine = line.trim();
1847
+ if (!trimmedLine) continue;
1848
+
1849
+ try {
1850
+ if (model.providerType === 'anthropic') {
1851
+ // Anthropic uses newline-delimited JSON with event types
1852
+ if (trimmedLine.startsWith('data: ')) {
1853
+ const jsonStr = trimmedLine.slice(6);
1854
+ if (jsonStr === '[DONE]') break;
1855
+
1856
+ const chunk = JSON.parse(jsonStr);
1857
+
1858
+ if (chunk.type === 'content_block_delta' && chunk.delta?.text) {
1859
+ streamedText += chunk.delta.text;
1860
+ } else if (chunk.type === 'message_start' && chunk.message?.usage) {
1861
+ inputTokens = chunk.message.usage.input_tokens || 0;
1862
+ } else if (chunk.type === 'message_delta') {
1863
+ // Capture output tokens from message_delta
1864
+ if (chunk.usage?.output_tokens) {
1865
+ outputTokens = chunk.usage.output_tokens;
1866
+ }
1867
+ // Some implementations put input_tokens here too
1868
+ if (chunk.usage?.input_tokens && !inputTokens) {
1869
+ inputTokens = chunk.usage.input_tokens;
1870
+ }
1871
+ }
1872
+ } else if (trimmedLine.startsWith('event: ')) {
1873
+ // Skip event lines (Anthropic SSE format uses separate event and data lines)
1874
+ continue;
1875
+ } else {
1876
+ // Try parsing as raw JSON (some Anthropic-compatible APIs don't use SSE format)
1877
+ const chunk = JSON.parse(trimmedLine);
1878
+
1879
+ if (chunk.type === 'content_block_delta' && chunk.delta?.text) {
1880
+ streamedText += chunk.delta.text;
1881
+ } else if (chunk.type === 'message_start' && chunk.message?.usage) {
1882
+ inputTokens = chunk.message.usage.input_tokens || 0;
1883
+ } else if (chunk.type === 'message_delta') {
1884
+ if (chunk.usage?.output_tokens) {
1885
+ outputTokens = chunk.usage.output_tokens;
1886
+ }
1887
+ if (chunk.usage?.input_tokens && !inputTokens) {
1888
+ inputTokens = chunk.usage.input_tokens;
1889
+ }
1890
+ }
1891
+ }
1892
+ } else if (model.providerType === 'google') {
1893
+ // Google streaming format
1894
+ const chunk = JSON.parse(trimmedLine);
1895
+ if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
1896
+ const text = chunk.candidates[0].content.parts[0].text;
1897
+ streamedText += text;
1898
+ }
1899
+ if (chunk.usageMetadata?.promptTokenCount) {
1900
+ inputTokens = chunk.usageMetadata.promptTokenCount;
1901
+ }
1902
+ if (chunk.usageMetadata?.candidatesTokenCount) {
1903
+ outputTokens = chunk.usageMetadata.candidatesTokenCount;
1904
+ }
1905
+ } else {
1906
+ // OpenAI-compatible SSE format
1907
+ if (trimmedLine.startsWith('data: ')) {
1908
+ const jsonStr = trimmedLine.slice(6);
1909
+ if (jsonStr === '[DONE]') break;
1910
+
1911
+ const chunk = JSON.parse(jsonStr);
1912
+
1913
+ if (chunk.choices?.[0]?.delta?.content) {
1914
+ streamedText += chunk.choices[0].delta.content;
1915
+ }
1916
+
1917
+ if (chunk.usage?.prompt_tokens) {
1918
+ inputTokens = chunk.usage.prompt_tokens;
1919
+ }
1920
+ if (chunk.usage?.completion_tokens) {
1921
+ outputTokens = chunk.usage.completion_tokens;
1922
+ }
1923
+ }
1924
+ }
1925
+ } catch (parseError) {
1926
+ // Skip invalid JSON lines
1927
+ continue;
1928
+ }
1929
+ }
1930
+ }
1931
+
1780
1932
  const endTime = Date.now();
1781
1933
  const totalTime = endTime - startTime;
1934
+ const timeToFirstToken = firstTokenTime ? firstTokenTime - startTime : totalTime;
1782
1935
 
1783
- // Calculate tokens based on provider type
1784
- let inputTokens, outputTokens;
1785
-
1786
- if (model.providerType === 'anthropic') {
1787
- inputTokens = data.usage?.input_tokens || Math.round(testPrompt.length / 4);
1788
- outputTokens = data.usage?.output_tokens || Math.round(data.content?.[0]?.text?.length / 4 || 0);
1789
- } else if (model.providerType === 'google') {
1790
- inputTokens = data.usageMetadata?.promptTokenCount || Math.round(testPrompt.length / 4);
1791
- outputTokens = data.usageMetadata?.candidatesTokenCount || Math.round(data.candidates?.[0]?.content?.parts?.[0]?.text?.length / 4 || 0);
1792
- } else {
1793
- inputTokens = data.usage?.prompt_tokens || Math.round(testPrompt.length / 4);
1794
- outputTokens = data.usage?.completion_tokens || Math.round(data.choices?.[0]?.message?.content?.length / 4 || 0);
1795
- }
1796
-
1797
- const totalTokens = inputTokens + outputTokens;
1936
+ // Calculate token counts - use provider's count if available, otherwise estimate
1937
+ const usedEstimateForOutput = !outputTokens;
1938
+ const usedEstimateForInput = !inputTokens;
1939
+ const finalOutputTokens = outputTokens || Math.round(streamedText.length / 4);
1940
+ const finalInputTokens = inputTokens || Math.round(testPrompt.length / 4);
1941
+ const totalTokens = finalInputTokens + finalOutputTokens;
1798
1942
  const tokensPerSecond = totalTime > 0 ? (totalTokens / totalTime) * 1000 : 0;
1799
-
1800
- console.log(colorText('Completed!', 'green'));
1801
- console.log(colorText(` Total Time: ${(totalTime / 1000).toFixed(2)}s`, 'cyan'));
1802
- console.log(colorText(` Tokens/Sec: ${tokensPerSecond.toFixed(1)}`, 'cyan'));
1803
- console.log(colorText(` Input Tokens: ${inputTokens}`, 'cyan'));
1804
- console.log(colorText(` Output Tokens: ${outputTokens}`, 'cyan'));
1805
- console.log(colorText(` Total Tokens: ${totalTokens}`, 'cyan'));
1806
1943
 
1807
1944
  return {
1808
1945
  model: model.name,
1809
1946
  provider: model.providerName,
1810
1947
  totalTime: totalTime,
1811
- timeToFirstToken: 0, // REST API doesn't track TTFT
1812
- tokenCount: outputTokens,
1948
+ timeToFirstToken: timeToFirstToken,
1949
+ tokenCount: finalOutputTokens,
1813
1950
  tokensPerSecond: tokensPerSecond,
1814
- promptTokens: inputTokens,
1951
+ promptTokens: finalInputTokens,
1815
1952
  totalTokens: totalTokens,
1953
+ usedEstimateForOutput: usedEstimateForOutput,
1954
+ usedEstimateForInput: usedEstimateForInput,
1816
1955
  success: true
1817
1956
  };
1818
1957
 
1819
1958
  } catch (error) {
1820
- console.log(colorText('Failed: ', 'red') + error.message);
1821
1959
  return {
1822
1960
  model: model.name,
1823
1961
  provider: model.providerName,
@@ -1831,17 +1969,49 @@ async function runRestApiBenchmark(models) {
1831
1969
  error: error.message
1832
1970
  };
1833
1971
  }
1834
- };
1972
+ }
1973
+
1974
+ // REST API benchmark function using direct API calls (with UI)
1975
+ async function runRestApiBenchmark(models) {
1976
+ if (models.length === 0) {
1977
+ console.log(colorText('No models selected for benchmarking.', 'red'));
1978
+ return;
1979
+ }
1835
1980
 
1836
- // Run all benchmarks in parallel
1981
+ clearScreen();
1982
+ showHeader();
1983
+ console.log(colorText('Running REST API Benchmark with Streaming...', 'green'));
1984
+ console.log(colorText(`Running ${models.length} models in parallel...`, 'cyan'));
1985
+ console.log(colorText('Note: This uses direct REST API calls with streaming support', 'dim'));
1986
+ console.log('');
1987
+
1988
+ // Run all benchmarks in parallel with UI feedback
1837
1989
  console.log(colorText('Starting parallel REST API benchmark execution...', 'cyan'));
1838
- const promises = models.map(model => benchmarkModelRest(model));
1990
+
1991
+ // Start all benchmarks in parallel
1992
+ const promises = models.map(model => {
1993
+ console.log(colorText(`Testing ${model.name} (${model.providerName}) via REST API with streaming...`, 'yellow'));
1994
+ return benchmarkSingleModelRest(model);
1995
+ });
1996
+
1839
1997
  const results = await Promise.all(promises);
1840
1998
 
1999
+ // Show individual results after all complete
2000
+ results.forEach((result, index) => {
2001
+ if (result.success) {
2002
+ console.log(colorText(`✓ ${result.model} (${result.provider}) completed!`, 'green'));
2003
+ console.log(colorText(` Total Time: ${(result.totalTime / 1000).toFixed(2)}s`, 'cyan'));
2004
+ console.log(colorText(` TTFT: ${(result.timeToFirstToken / 1000).toFixed(2)}s`, 'cyan'));
2005
+ console.log(colorText(` Tokens/Sec: ${result.tokensPerSecond.toFixed(1)}`, 'cyan'));
2006
+ } else {
2007
+ console.log(colorText(`✗ ${result.model} (${result.provider}) failed: `, 'red') + result.error);
2008
+ }
2009
+ });
2010
+
1841
2011
  console.log('');
1842
2012
  console.log(colorText('All REST API benchmarks completed!', 'green'));
1843
2013
 
1844
- await displayColorfulResults(results, 'REST API', models);
2014
+ await displayColorfulResults(results, 'REST API (Streaming)', models);
1845
2015
 
1846
2016
  // Add successful models to recent models list
1847
2017
  const successfulModels = results
@@ -1868,16 +2038,16 @@ async function runRestApiBenchmark(models) {
1868
2038
  async function showMainMenu() {
1869
2039
  const menuOptions = [
1870
2040
  { id: 1, text: 'Set Model', action: () => showModelMenu() },
1871
- { id: 2, text: 'Run Benchmark (AI SDK)', action: async () => {
2041
+ { id: 2, text: 'Run Benchmark (REST API)', action: async () => {
1872
2042
  const selectedModels = await selectModelsCircular();
1873
2043
  if (selectedModels.length > 0) {
1874
- await runStreamingBenchmark(selectedModels);
2044
+ await runRestApiBenchmark(selectedModels);
1875
2045
  }
1876
2046
  }},
1877
- { id: 3, text: 'Run Benchmark (REST API)', action: async () => {
2047
+ { id: 3, text: 'Run Benchmark (AI SDK - Legacy)', action: async () => {
1878
2048
  const selectedModels = await selectModelsCircular();
1879
2049
  if (selectedModels.length > 0) {
1880
- await runRestApiBenchmark(selectedModels);
2050
+ await runStreamingBenchmark(selectedModels);
1881
2051
  }
1882
2052
  }},
1883
2053
  { id: 4, text: 'Exit', action: () => {
@@ -1896,7 +2066,6 @@ async function showMainMenu() {
1896
2066
  // Add header
1897
2067
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1898
2068
  screenContent += colorText('=============================', 'cyan') + '\n';
1899
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1900
2069
  screenContent += '\n';
1901
2070
 
1902
2071
  screenContent += colorText('Main Menu:', 'cyan') + '\n';
@@ -1953,7 +2122,6 @@ async function showModelMenu() {
1953
2122
  // Add header
1954
2123
  screenContent += colorText('Ai-speedometer', 'cyan') + '\n';
1955
2124
  screenContent += colorText('=============================', 'cyan') + '\n';
1956
- screenContent += colorText('Note: opencode uses ai-sdk', 'dim') + '\n';
1957
2125
  screenContent += '\n';
1958
2126
 
1959
2127
  screenContent += colorText('Model Management:', 'cyan') + '\n';
@@ -2002,17 +2170,158 @@ process.on('SIGINT', () => {
2002
2170
  process.exit(0);
2003
2171
  });
2004
2172
 
2173
+ // Headless benchmark mode
2174
+ async function runHeadlessBenchmark(benchSpec, apiKey, useAiSdk) {
2175
+ try {
2176
+ // Parse provider:model format
2177
+ const [providerSpec, modelName] = benchSpec.split(':');
2178
+
2179
+ if (!providerSpec || !modelName) {
2180
+ console.error(colorText('Error: Invalid --bench format. Use: provider:model', 'red'));
2181
+ console.error(colorText('Example: --bench zai-code-anth:glm-4.6', 'yellow'));
2182
+ process.exit(1);
2183
+ }
2184
+
2185
+ // Load all available providers
2186
+ const config = await loadConfig();
2187
+
2188
+ // Find the provider (case-insensitive search)
2189
+ const provider = config.providers.find(p =>
2190
+ p.id?.toLowerCase() === providerSpec.toLowerCase() ||
2191
+ p.name?.toLowerCase() === providerSpec.toLowerCase()
2192
+ );
2193
+
2194
+ if (!provider) {
2195
+ console.error(colorText(`Error: Provider '${providerSpec}' not found`, 'red'));
2196
+ console.error(colorText('Available providers:', 'yellow'));
2197
+ config.providers.forEach(p => {
2198
+ console.error(colorText(` - ${p.id || p.name}`, 'cyan'));
2199
+ });
2200
+ process.exit(1);
2201
+ }
2202
+
2203
+ // Find the model
2204
+ // Model IDs are prefixed with provider name (e.g., "zai-code-anth_glm-4.6")
2205
+ // So we need to check:
2206
+ // 1. Full ID match: "zai-code-anth_glm-4.6"
2207
+ // 2. ID without provider prefix: "glm-4.6"
2208
+ // 3. Name match: "GLM-4.6-anth"
2209
+ const model = provider.models.find(m => {
2210
+ const modelIdLower = m.id?.toLowerCase() || '';
2211
+ const modelNameLower = m.name?.toLowerCase() || '';
2212
+ const searchLower = modelName.toLowerCase();
2213
+
2214
+ // Check full ID match
2215
+ if (modelIdLower === searchLower) return true;
2216
+
2217
+ // Check ID without provider prefix (strip "provider_" prefix)
2218
+ const idWithoutPrefix = modelIdLower.includes('_')
2219
+ ? modelIdLower.split('_').slice(1).join('_')
2220
+ : modelIdLower;
2221
+ if (idWithoutPrefix === searchLower) return true;
2222
+
2223
+ // Check name match
2224
+ if (modelNameLower === searchLower) return true;
2225
+
2226
+ return false;
2227
+ });
2228
+
2229
+ if (!model) {
2230
+ console.error(colorText(`Error: Model '${modelName}' not found in provider '${provider.name}'`, 'red'));
2231
+ console.error(colorText('Available models:', 'yellow'));
2232
+ provider.models.forEach(m => {
2233
+ // Show both name and ID (without provider prefix) for clarity
2234
+ const idWithoutPrefix = m.id?.includes('_')
2235
+ ? m.id.split('_').slice(1).join('_')
2236
+ : m.id;
2237
+ console.error(colorText(` - ${m.name} (id: ${idWithoutPrefix})`, 'cyan'));
2238
+ });
2239
+ process.exit(1);
2240
+ }
2241
+
2242
+ // If API key provided via flag, use it; otherwise use existing config
2243
+ let finalApiKey = apiKey || provider.apiKey;
2244
+
2245
+ if (!finalApiKey) {
2246
+ console.error(colorText(`Error: No API key found for provider '${provider.name}'`, 'red'));
2247
+ console.error(colorText('Please provide --api-key flag or configure the provider first', 'yellow'));
2248
+ process.exit(1);
2249
+ }
2250
+
2251
+ // Create model object with all required config
2252
+ const modelConfig = {
2253
+ ...model,
2254
+ providerName: provider.name,
2255
+ providerType: provider.type,
2256
+ providerId: provider.id,
2257
+ providerConfig: {
2258
+ ...provider,
2259
+ apiKey: finalApiKey,
2260
+ baseUrl: provider.baseUrl || ''
2261
+ },
2262
+ selected: true
2263
+ };
2264
+
2265
+ // Run benchmark silently and get results
2266
+ let result;
2267
+ if (useAiSdk) {
2268
+ // TODO: Implement AI SDK silent benchmark
2269
+ console.error(colorText('AI SDK headless mode not yet implemented', 'red'));
2270
+ process.exit(1);
2271
+ } else {
2272
+ result = await benchmarkSingleModelRest(modelConfig);
2273
+ }
2274
+
2275
+ // Output JSON to stdout
2276
+ const jsonOutput = {
2277
+ provider: provider.name,
2278
+ providerId: provider.id,
2279
+ model: model.name,
2280
+ modelId: model.id,
2281
+ method: useAiSdk ? 'ai-sdk' : 'rest-api',
2282
+ success: result.success,
2283
+ totalTime: result.totalTime,
2284
+ totalTimeSeconds: result.totalTime / 1000,
2285
+ timeToFirstToken: result.timeToFirstToken,
2286
+ timeToFirstTokenSeconds: result.timeToFirstToken / 1000,
2287
+ tokensPerSecond: result.tokensPerSecond,
2288
+ outputTokens: result.tokenCount,
2289
+ promptTokens: result.promptTokens,
2290
+ totalTokens: result.totalTokens,
2291
+ is_estimated: !!(result.usedEstimateForOutput || result.usedEstimateForInput),
2292
+ error: result.error || null
2293
+ };
2294
+
2295
+ console.log(JSON.stringify(jsonOutput, null, cliArgs.formatted ? 2 : 0));
2296
+ process.exit(result.success ? 0 : 1);
2297
+ } catch (error) {
2298
+ console.error(colorText('Error: ' + error.message, 'red'));
2299
+ if (debugMode) {
2300
+ console.error(error.stack);
2301
+ }
2302
+ process.exit(1);
2303
+ }
2304
+ }
2305
+
2005
2306
  // Start the CLI
2006
- if (import.meta.url === `file://${process.argv[1]}` ||
2007
- process.argv.length === 2 ||
2008
- (process.argv.length === 3 && process.argv[2] === '--debug')) {
2009
-
2010
- // Clean up recent models from main config and migrate to cache on startup
2011
- cleanupRecentModelsFromConfig().then(() => {
2012
- showMainMenu();
2013
- }).catch(() => {
2014
- showMainMenu();
2015
- });
2307
+ if (require.main === module) {
2308
+ // Check if help flag
2309
+ if (cliArgs.help) {
2310
+ showHelp();
2311
+ process.exit(0);
2312
+ }
2313
+
2314
+ // Check if headless benchmark mode
2315
+ if (cliArgs.bench) {
2316
+ runHeadlessBenchmark(cliArgs.bench, cliArgs.apiKey, cliArgs.useAiSdk);
2317
+ } else {
2318
+ // Interactive mode
2319
+ cleanupRecentModelsFromConfig().then(() => {
2320
+ showMainMenu();
2321
+ }).catch(() => {
2322
+ showMainMenu();
2323
+ });
2324
+ }
2016
2325
  }
2017
2326
 
2018
2327
  export { showMainMenu, listProviders, selectModelsCircular, runStreamingBenchmark, loadConfig, saveConfig };