ai-speedometer 2.1.5 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/ai-speedometer +148 -32
  2. package/package.json +1 -1
@@ -1712,7 +1712,7 @@ var exports_benchmark = {};
1712
1712
  __export(exports_benchmark, {
1713
1713
  benchmarkSingleModelRest: () => benchmarkSingleModelRest
1714
1714
  });
1715
- async function benchmarkSingleModelRest(model) {
1715
+ async function benchmarkSingleModelRest(model, logger) {
1716
1716
  try {
1717
1717
  if (!model.providerConfig || !model.providerConfig.apiKey) {
1718
1718
  throw new Error(`Missing API key for provider ${model.providerName}`);
@@ -1729,6 +1729,7 @@ async function benchmarkSingleModelRest(model) {
1729
1729
  actualModelId = model.name;
1730
1730
  }
1731
1731
  actualModelId = actualModelId.trim();
1732
+ await logger?.logHeader(model.name, model.providerName, model.providerConfig.apiKey);
1732
1733
  const startTime = Date.now();
1733
1734
  let firstTokenTime = null;
1734
1735
  let streamedText = "";
@@ -1762,7 +1763,8 @@ async function benchmarkSingleModelRest(model) {
1762
1763
  messages: [{ role: "user", content: TEST_PROMPT }],
1763
1764
  max_tokens: 500,
1764
1765
  temperature: 0.7,
1765
- stream: true
1766
+ stream: true,
1767
+ stream_options: { include_usage: true }
1766
1768
  };
1767
1769
  if (model.providerType === "google") {
1768
1770
  body["contents"] = [{ parts: [{ text: TEST_PROMPT }] }];
@@ -1770,6 +1772,9 @@ async function benchmarkSingleModelRest(model) {
1770
1772
  delete body["messages"];
1771
1773
  delete body["max_tokens"];
1772
1774
  delete body["stream"];
1775
+ delete body["stream_options"];
1776
+ } else if (model.providerType === "anthropic") {
1777
+ delete body["stream_options"];
1773
1778
  }
1774
1779
  const response = await fetch(url, {
1775
1780
  method: "POST",
@@ -1797,21 +1802,21 @@ async function benchmarkSingleModelRest(model) {
1797
1802
  const reader = response.body.getReader();
1798
1803
  const decoder = new TextDecoder;
1799
1804
  let buffer = "";
1800
- let isFirstChunk = true;
1805
+ let firstParsedTokenTime = null;
1801
1806
  while (true) {
1802
1807
  const { done, value } = await reader.read();
1803
1808
  if (done)
1804
1809
  break;
1805
- if (isFirstChunk && !firstTokenTime) {
1810
+ if (!firstTokenTime)
1806
1811
  firstTokenTime = Date.now();
1807
- isFirstChunk = false;
1808
- }
1809
1812
  buffer += decoder.decode(value, { stream: true });
1810
1813
  const lines = buffer.split(`
1811
1814
  `);
1812
1815
  buffer = lines.pop() || "";
1813
1816
  for (const line of lines) {
1814
1817
  const trimmedLine = line.trim();
1818
+ if (trimmedLine)
1819
+ await logger?.logRaw(trimmedLine);
1815
1820
  if (!trimmedLine)
1816
1821
  continue;
1817
1822
  try {
@@ -1819,10 +1824,12 @@ async function benchmarkSingleModelRest(model) {
1819
1824
  if (trimmedLine.startsWith("data: ")) {
1820
1825
  const jsonStr = trimmedLine.slice(6);
1821
1826
  if (jsonStr === "[DONE]")
1822
- break;
1827
+ continue;
1823
1828
  const chunk = JSON.parse(jsonStr);
1824
1829
  const chunkTyped = chunk;
1825
1830
  if (chunkTyped.type === "content_block_delta" && chunkTyped.delta?.text) {
1831
+ if (!firstParsedTokenTime)
1832
+ firstParsedTokenTime = Date.now();
1826
1833
  streamedText += chunkTyped.delta.text;
1827
1834
  } else if (chunkTyped.type === "message_start" && chunkTyped.message?.usage) {
1828
1835
  inputTokens = chunkTyped.message.usage.input_tokens || 0;
@@ -1837,6 +1844,8 @@ async function benchmarkSingleModelRest(model) {
1837
1844
  } else {
1838
1845
  const chunk = JSON.parse(trimmedLine);
1839
1846
  if (chunk.type === "content_block_delta" && chunk.delta?.text) {
1847
+ if (!firstParsedTokenTime)
1848
+ firstParsedTokenTime = Date.now();
1840
1849
  streamedText += chunk.delta.text;
1841
1850
  } else if (chunk.type === "message_start" && chunk.message?.usage) {
1842
1851
  inputTokens = chunk.message.usage.input_tokens || 0;
@@ -1850,6 +1859,8 @@ async function benchmarkSingleModelRest(model) {
1850
1859
  } else if (model.providerType === "google") {
1851
1860
  const chunk = JSON.parse(trimmedLine);
1852
1861
  if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
1862
+ if (!firstParsedTokenTime)
1863
+ firstParsedTokenTime = Date.now();
1853
1864
  streamedText += chunk.candidates[0].content.parts[0].text;
1854
1865
  }
1855
1866
  if (chunk.usageMetadata?.promptTokenCount)
@@ -1860,12 +1871,17 @@ async function benchmarkSingleModelRest(model) {
1860
1871
  if (trimmedLine.startsWith("data: ")) {
1861
1872
  const jsonStr = trimmedLine.slice(6);
1862
1873
  if (jsonStr === "[DONE]")
1863
- break;
1874
+ continue;
1864
1875
  const chunk = JSON.parse(jsonStr);
1865
- if (chunk.choices?.[0]?.delta?.content)
1876
+ if (chunk.choices?.[0]?.delta?.content) {
1877
+ if (!firstParsedTokenTime)
1878
+ firstParsedTokenTime = Date.now();
1866
1879
  streamedText += chunk.choices[0].delta.content;
1867
- else if (chunk.choices?.[0]?.delta?.reasoning)
1880
+ } else if (chunk.choices?.[0]?.delta?.reasoning) {
1881
+ if (!firstParsedTokenTime)
1882
+ firstParsedTokenTime = Date.now();
1868
1883
  streamedText += chunk.choices[0].delta.reasoning;
1884
+ }
1869
1885
  if (chunk.usage?.prompt_tokens)
1870
1886
  inputTokens = chunk.usage.prompt_tokens;
1871
1887
  if (chunk.usage?.completion_tokens)
@@ -1877,15 +1893,18 @@ async function benchmarkSingleModelRest(model) {
1877
1893
  }
1878
1894
  }
1879
1895
  }
1896
+ await logger?.flush();
1880
1897
  const endTime = Date.now();
1881
1898
  const totalTime = endTime - startTime;
1882
- const timeToFirstToken = firstTokenTime ? firstTokenTime - startTime : totalTime;
1899
+ const effectiveFirstToken = firstParsedTokenTime ?? firstTokenTime;
1900
+ const timeToFirstToken = effectiveFirstToken ? effectiveFirstToken - startTime : totalTime;
1901
+ const generationTime = totalTime - timeToFirstToken;
1883
1902
  const usedEstimateForOutput = !outputTokens;
1884
1903
  const usedEstimateForInput = !inputTokens;
1885
1904
  const finalOutputTokens = outputTokens || Math.round(streamedText.length / 4);
1886
1905
  const finalInputTokens = inputTokens || Math.round(TEST_PROMPT.length / 4);
1887
1906
  const totalTokens = finalInputTokens + finalOutputTokens;
1888
- const tokensPerSecond = totalTime > 0 ? finalOutputTokens / totalTime * 1000 : 0;
1907
+ const tokensPerSecond = generationTime > 0 ? finalOutputTokens / generationTime * 1000 : 0;
1889
1908
  return {
1890
1909
  model: model.name,
1891
1910
  provider: model.providerName,
@@ -1900,6 +1919,7 @@ async function benchmarkSingleModelRest(model) {
1900
1919
  success: true
1901
1920
  };
1902
1921
  } catch (error) {
1922
+ await logger?.flush();
1903
1923
  return {
1904
1924
  model: model.name,
1905
1925
  provider: model.providerName,
@@ -2081,6 +2101,66 @@ var init_headless = __esm(() => {
2081
2101
  init_benchmark();
2082
2102
  });
2083
2103
 
2104
+ // ../core/src/logger.ts
2105
+ var exports_logger = {};
2106
+ __export(exports_logger, {
2107
+ getLogPath: () => getLogPath,
2108
+ createRunId: () => createRunId,
2109
+ createBenchLogger: () => createBenchLogger
2110
+ });
2111
+ import { mkdir, appendFile } from "fs/promises";
2112
+ import { homedir as homedir4 } from "os";
2113
+ import { join } from "path";
2114
+ function generateRunId() {
2115
+ const now = new Date;
2116
+ const date = now.toISOString().slice(0, 10);
2117
+ const time = now.toTimeString().slice(0, 8).replace(/:/g, "");
2118
+ const rand = Math.random().toString(16).slice(2, 6);
2119
+ return `${date}_${time}_${rand}`;
2120
+ }
2121
+ function redactSecrets(line, apiKey) {
2122
+ if (!apiKey)
2123
+ return line;
2124
+ return line.split(apiKey).join("[REDACTED]");
2125
+ }
2126
+ function createRunId() {
2127
+ return generateRunId();
2128
+ }
2129
+ function getLogPath(runId) {
2130
+ return join(homedir4(), ".local", "share", "ai-speedometer", "logs", `${runId}.log`);
2131
+ }
2132
+ async function createBenchLogger(runId) {
2133
+ const logPath = getLogPath(runId);
2134
+ const logDir = join(homedir4(), ".local", "share", "ai-speedometer", "logs");
2135
+ await mkdir(logDir, { recursive: true });
2136
+ let currentApiKey = "";
2137
+ let buffer = "";
2138
+ return {
2139
+ logPath,
2140
+ runId,
2141
+ logHeader: async (modelName, providerName, apiKey = "") => {
2142
+ currentApiKey = apiKey;
2143
+ const ts = new Date().toISOString();
2144
+ buffer = `
2145
+ === ${modelName} | ${providerName} | ${ts} ===
2146
+ `;
2147
+ },
2148
+ logRaw: async (line) => {
2149
+ const safe = redactSecrets(line, currentApiKey);
2150
+ buffer += safe + `
2151
+ `;
2152
+ },
2153
+ flush: async () => {
2154
+ buffer += `
2155
+ ` + "=".repeat(60) + `
2156
+ `;
2157
+ await appendFile(logPath, buffer, "utf8");
2158
+ buffer = "";
2159
+ }
2160
+ };
2161
+ }
2162
+ var init_logger = () => {};
2163
+
2084
2164
  // src/tui/context/AppContext.tsx
2085
2165
  import { createContext, useContext, useReducer, useEffect } from "react";
2086
2166
  import { jsxDEV } from "@opentui/react/jsx-dev-runtime";
@@ -2117,12 +2197,23 @@ function appReducer(state, action) {
2117
2197
  };
2118
2198
  case "BENCH_RESET":
2119
2199
  return { ...state, benchResults: [], selectedModels: [] };
2200
+ case "SET_LOG_INFO":
2201
+ return { ...state, logMode: action.logMode, logPath: action.logPath, runId: action.runId };
2120
2202
  default:
2121
2203
  return state;
2122
2204
  }
2123
2205
  }
2124
- function AppProvider({ children }) {
2206
+ function AppProvider({ children, logMode = false }) {
2125
2207
  const [state, dispatch] = useReducer(appReducer, initialState);
2208
+ useEffect(() => {
2209
+ if (logMode) {
2210
+ Promise.resolve().then(() => (init_logger(), exports_logger)).then(({ createRunId: createRunId2, getLogPath: getLogPath2 }) => {
2211
+ const runId = createRunId2();
2212
+ const logPath = getLogPath2(runId);
2213
+ dispatch({ type: "SET_LOG_INFO", logMode: true, logPath, runId });
2214
+ });
2215
+ }
2216
+ }, [logMode]);
2126
2217
  useEffect(() => {
2127
2218
  let cancelled = false;
2128
2219
  async function loadConfig2() {
@@ -2165,7 +2256,10 @@ var init_AppContext = __esm(() => {
2165
2256
  config: null,
2166
2257
  selectedModels: [],
2167
2258
  benchResults: [],
2168
- isLoadingConfig: true
2259
+ isLoadingConfig: true,
2260
+ logMode: false,
2261
+ logPath: null,
2262
+ runId: null
2169
2263
  };
2170
2264
  AppContext = createContext(null);
2171
2265
  });
@@ -2175,7 +2269,7 @@ var package_default;
2175
2269
  var init_package = __esm(() => {
2176
2270
  package_default = {
2177
2271
  name: "ai-speedometer",
2178
- version: "2.1.5",
2272
+ version: "2.1.7",
2179
2273
  description: "A comprehensive CLI tool for benchmarking AI models across multiple providers with parallel execution and professional metrics",
2180
2274
  bin: {
2181
2275
  "ai-speedometer": "dist/ai-speedometer",
@@ -2672,15 +2766,15 @@ function ModelSelectScreen() {
2672
2766
  }
2673
2767
  return;
2674
2768
  }
2675
- if (!searchQuery && (key.sequence === "A" || key.sequence === "a")) {
2769
+ if (!searchQuery && key.sequence === "A") {
2676
2770
  setSelected(new Set(orderedModels.map((m) => m.key)));
2677
2771
  return;
2678
2772
  }
2679
- if (!searchQuery && (key.sequence === "N" || key.sequence === "n")) {
2773
+ if (!searchQuery && key.sequence === "N") {
2680
2774
  setSelected(new Set);
2681
2775
  return;
2682
2776
  }
2683
- if (!searchQuery && recentCount > 0 && (key.sequence === "R" || key.sequence === "r")) {
2777
+ if (!searchQuery && recentCount > 0 && key.sequence === "R") {
2684
2778
  launchBench(orderedModels.slice(0, recentCount));
2685
2779
  return;
2686
2780
  }
@@ -3001,9 +3095,12 @@ function BenchmarkScreen() {
3001
3095
  setModelStates((prev) => prev.map((s) => ({ ...s, status: "running", startedAt: Date.now() })));
3002
3096
  async function runAll() {
3003
3097
  const { benchmarkSingleModelRest: benchmarkSingleModelRest2 } = await Promise.resolve().then(() => (init_benchmark(), exports_benchmark));
3098
+ const logEnabled = state.logMode && !!state.runId;
3099
+ const { createBenchLogger: createBenchLogger2 } = logEnabled ? await Promise.resolve().then(() => (init_logger(), exports_logger)) : { createBenchLogger: null };
3004
3100
  const promises = models.map(async (model) => {
3101
+ const logger = logEnabled && createBenchLogger2 ? await createBenchLogger2(state.runId) : undefined;
3005
3102
  try {
3006
- const result = await benchmarkSingleModelRest2(model);
3103
+ const result = await benchmarkSingleModelRest2(model, logger);
3007
3104
  if (!result.success) {
3008
3105
  const errMsg = result.error ?? "Request failed";
3009
3106
  setModelStates((prev) => prev.map((s) => s.model.id === model.id && s.model.providerId === model.providerId ? { ...s, status: "error", error: errMsg } : s));
@@ -3424,10 +3521,22 @@ function BenchmarkScreen() {
3424
3521
  navigate("main-menu");
3425
3522
  }
3426
3523
  });
3427
- const statusLine = allDone ? /* @__PURE__ */ jsxDEV10("text", {
3428
- fg: "#9ece6a",
3429
- children: "All done! [Enter]/[q] return [\u2191\u2193/PgUp/PgDn/wheel] scroll"
3430
- }, undefined, false, undefined, this) : /* @__PURE__ */ jsxDEV10("box", {
3524
+ const statusLine = allDone ? /* @__PURE__ */ jsxDEV10("box", {
3525
+ flexDirection: "row",
3526
+ children: [
3527
+ /* @__PURE__ */ jsxDEV10("text", {
3528
+ fg: "#9ece6a",
3529
+ children: "All done! [Enter]/[Q] return [\u2191\u2193/PgUp/PgDn/wheel] scroll"
3530
+ }, undefined, false, undefined, this),
3531
+ state.logMode && state.logPath && /* @__PURE__ */ jsxDEV10("text", {
3532
+ fg: "#565f89",
3533
+ children: [
3534
+ " log: ",
3535
+ state.logPath
3536
+ ]
3537
+ }, undefined, true, undefined, this)
3538
+ ]
3539
+ }, undefined, true, undefined, this) : /* @__PURE__ */ jsxDEV10("box", {
3431
3540
  flexDirection: "row",
3432
3541
  children: [
3433
3542
  running.length > 0 && /* @__PURE__ */ jsxDEV10("text", {
@@ -5028,23 +5137,23 @@ function getHints(screen, benchResults) {
5028
5137
  case "main-menu":
5029
5138
  return ["[\u2191\u2193] navigate", "[Enter] select", "[Ctrl+C] quit"];
5030
5139
  case "model-menu":
5031
- return ["[\u2191\u2193] navigate", "[Enter] select", "[q] back"];
5140
+ return ["[\u2191\u2193] navigate", "[Enter] select", "[Q] back"];
5032
5141
  case "model-select":
5033
5142
  return ["[\u2191\u2193] navigate", "[Tab] select", "[Enter] run", "[A] all", "[N] none", "[R] recent", "[Esc] back"];
5034
5143
  case "benchmark": {
5035
5144
  const allDone = benchResults.length > 0 && benchResults.every((r) => r.status === "done" || r.status === "error");
5036
- return allDone ? ["[Enter] back to menu", "[q] back to menu"] : ["Benchmark in progress..."];
5145
+ return allDone ? ["[Enter] back to menu", "[Q] back to menu"] : ["Benchmark in progress..."];
5037
5146
  }
5038
5147
  case "list-providers":
5039
- return ["[\u2191\u2193] scroll", "[q] back"];
5148
+ return ["[\u2191\u2193] scroll", "[Q] back"];
5040
5149
  case "add-verified":
5041
- return ["[\u2191\u2193] navigate", "[Enter] select", "[q] back"];
5150
+ return ["[\u2191\u2193] navigate", "[Enter] select", "[Q] back"];
5042
5151
  case "add-custom":
5043
5152
  return ["[\u2191\u2193] navigate", "[Enter] confirm", "[Esc] back"];
5044
5153
  case "add-models":
5045
5154
  return ["[\u2191\u2193] navigate", "[Enter] add / finish", "[Esc] back"];
5046
5155
  default:
5047
- return ["[q] back"];
5156
+ return ["[Q] back"];
5048
5157
  }
5049
5158
  }
5050
5159
  function ActiveScreen() {
@@ -5096,8 +5205,9 @@ function Shell() {
5096
5205
  ]
5097
5206
  }, undefined, true, undefined, this);
5098
5207
  }
5099
- function App() {
5208
+ function App({ logMode = false }) {
5100
5209
  return /* @__PURE__ */ jsxDEV15(AppProvider, {
5210
+ logMode,
5101
5211
  children: /* @__PURE__ */ jsxDEV15(Shell, {}, undefined, false, undefined, this)
5102
5212
  }, undefined, false, undefined, this);
5103
5213
  }
@@ -5123,7 +5233,7 @@ __export(exports_tui, {
5123
5233
  import { createCliRenderer } from "@opentui/core";
5124
5234
  import { createRoot } from "@opentui/react";
5125
5235
  import { jsxDEV as jsxDEV16 } from "@opentui/react/jsx-dev-runtime";
5126
- async function startTui() {
5236
+ async function startTui(logMode = false) {
5127
5237
  const renderer = await createCliRenderer({
5128
5238
  exitOnCtrlC: false
5129
5239
  });
@@ -5137,7 +5247,9 @@ async function startTui() {
5137
5247
  renderer.destroy();
5138
5248
  process.exit(0);
5139
5249
  });
5140
- createRoot(renderer).render(/* @__PURE__ */ jsxDEV16(App, {}, undefined, false, undefined, this));
5250
+ createRoot(renderer).render(/* @__PURE__ */ jsxDEV16(App, {
5251
+ logMode
5252
+ }, undefined, false, undefined, this));
5141
5253
  }
5142
5254
  var ENABLE_BRACKETED_PASTE = "\x1B[?2004h", DISABLE_BRACKETED_PASTE = "\x1B[?2004l";
5143
5255
  var init_tui = __esm(() => {
@@ -5149,6 +5261,7 @@ function parseCliArgs() {
5149
5261
  const args = process.argv.slice(2);
5150
5262
  const parsed = {
5151
5263
  debug: false,
5264
+ log: false,
5152
5265
  bench: null,
5153
5266
  benchCustom: null,
5154
5267
  apiKey: null,
@@ -5161,6 +5274,8 @@ function parseCliArgs() {
5161
5274
  const arg = args[i];
5162
5275
  if (arg === "--debug")
5163
5276
  parsed.debug = true;
5277
+ else if (arg === "--log")
5278
+ parsed.log = true;
5164
5279
  else if (arg === "--bench")
5165
5280
  parsed.bench = args[++i] ?? null;
5166
5281
  else if (arg === "--bench-custom")
@@ -5193,6 +5308,7 @@ function showHelp() {
5193
5308
  console.log(" --api-key <key> API key for custom provider");
5194
5309
  console.log(" --endpoint-format <format> Endpoint format (default: chat/completions)");
5195
5310
  console.log(" --formatted Format JSON output for human readability");
5311
+ console.log(" --log Log raw SSE streams to ~/.local/share/ai-speedometer/logs/");
5196
5312
  console.log(" --debug Enable debug logging");
5197
5313
  console.log(" --help, -h Show this help message");
5198
5314
  console.log("");
@@ -5210,5 +5326,5 @@ if (cliArgs.help) {
5210
5326
  await runHeadlessBenchmark2(cliArgs);
5211
5327
  } else {
5212
5328
  const { startTui: startTui2 } = await Promise.resolve().then(() => (init_tui(), exports_tui));
5213
- await startTui2();
5329
+ await startTui2(cliArgs.log);
5214
5330
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-speedometer",
3
- "version": "2.1.5",
3
+ "version": "2.1.7",
4
4
  "description": "A comprehensive CLI tool for benchmarking AI models across multiple providers with parallel execution and professional metrics",
5
5
  "bin": {
6
6
  "ai-speedometer": "dist/ai-speedometer",