ai-speedometer-headless 2.1.5 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -542,7 +542,7 @@ var getAllAvailableProviders = async (includeAllProviders = false) => {
542
542
  var TEST_PROMPT = `make a 300 word story`;
543
543
 
544
544
  // ../core/src/benchmark.ts
545
- async function benchmarkSingleModelRest(model) {
545
+ async function benchmarkSingleModelRest(model, logger) {
546
546
  try {
547
547
  if (!model.providerConfig || !model.providerConfig.apiKey) {
548
548
  throw new Error(`Missing API key for provider ${model.providerName}`);
@@ -559,6 +559,7 @@ async function benchmarkSingleModelRest(model) {
559
559
  actualModelId = model.name;
560
560
  }
561
561
  actualModelId = actualModelId.trim();
562
+ await logger?.logHeader(model.name, model.providerName, model.providerConfig.apiKey);
562
563
  const startTime = Date.now();
563
564
  let firstTokenTime = null;
564
565
  let streamedText = "";
@@ -592,7 +593,8 @@ async function benchmarkSingleModelRest(model) {
592
593
  messages: [{ role: "user", content: TEST_PROMPT }],
593
594
  max_tokens: 500,
594
595
  temperature: 0.7,
595
- stream: true
596
+ stream: true,
597
+ stream_options: { include_usage: true }
596
598
  };
597
599
  if (model.providerType === "google") {
598
600
  body["contents"] = [{ parts: [{ text: TEST_PROMPT }] }];
@@ -600,6 +602,9 @@ async function benchmarkSingleModelRest(model) {
600
602
  delete body["messages"];
601
603
  delete body["max_tokens"];
602
604
  delete body["stream"];
605
+ delete body["stream_options"];
606
+ } else if (model.providerType === "anthropic") {
607
+ delete body["stream_options"];
603
608
  }
604
609
  const response = await fetch(url, {
605
610
  method: "POST",
@@ -627,21 +632,21 @@ async function benchmarkSingleModelRest(model) {
627
632
  const reader = response.body.getReader();
628
633
  const decoder = new TextDecoder;
629
634
  let buffer = "";
630
- let isFirstChunk = true;
635
+ let firstParsedTokenTime = null;
631
636
  while (true) {
632
637
  const { done, value } = await reader.read();
633
638
  if (done)
634
639
  break;
635
- if (isFirstChunk && !firstTokenTime) {
640
+ if (!firstTokenTime)
636
641
  firstTokenTime = Date.now();
637
- isFirstChunk = false;
638
- }
639
642
  buffer += decoder.decode(value, { stream: true });
640
643
  const lines = buffer.split(`
641
644
  `);
642
645
  buffer = lines.pop() || "";
643
646
  for (const line of lines) {
644
647
  const trimmedLine = line.trim();
648
+ if (trimmedLine)
649
+ await logger?.logRaw(trimmedLine);
645
650
  if (!trimmedLine)
646
651
  continue;
647
652
  try {
@@ -649,10 +654,12 @@ async function benchmarkSingleModelRest(model) {
649
654
  if (trimmedLine.startsWith("data: ")) {
650
655
  const jsonStr = trimmedLine.slice(6);
651
656
  if (jsonStr === "[DONE]")
652
- break;
657
+ continue;
653
658
  const chunk = JSON.parse(jsonStr);
654
659
  const chunkTyped = chunk;
655
660
  if (chunkTyped.type === "content_block_delta" && chunkTyped.delta?.text) {
661
+ if (!firstParsedTokenTime)
662
+ firstParsedTokenTime = Date.now();
656
663
  streamedText += chunkTyped.delta.text;
657
664
  } else if (chunkTyped.type === "message_start" && chunkTyped.message?.usage) {
658
665
  inputTokens = chunkTyped.message.usage.input_tokens || 0;
@@ -667,6 +674,8 @@ async function benchmarkSingleModelRest(model) {
667
674
  } else {
668
675
  const chunk = JSON.parse(trimmedLine);
669
676
  if (chunk.type === "content_block_delta" && chunk.delta?.text) {
677
+ if (!firstParsedTokenTime)
678
+ firstParsedTokenTime = Date.now();
670
679
  streamedText += chunk.delta.text;
671
680
  } else if (chunk.type === "message_start" && chunk.message?.usage) {
672
681
  inputTokens = chunk.message.usage.input_tokens || 0;
@@ -680,6 +689,8 @@ async function benchmarkSingleModelRest(model) {
680
689
  } else if (model.providerType === "google") {
681
690
  const chunk = JSON.parse(trimmedLine);
682
691
  if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
692
+ if (!firstParsedTokenTime)
693
+ firstParsedTokenTime = Date.now();
683
694
  streamedText += chunk.candidates[0].content.parts[0].text;
684
695
  }
685
696
  if (chunk.usageMetadata?.promptTokenCount)
@@ -690,12 +701,17 @@ async function benchmarkSingleModelRest(model) {
690
701
  if (trimmedLine.startsWith("data: ")) {
691
702
  const jsonStr = trimmedLine.slice(6);
692
703
  if (jsonStr === "[DONE]")
693
- break;
704
+ continue;
694
705
  const chunk = JSON.parse(jsonStr);
695
- if (chunk.choices?.[0]?.delta?.content)
706
+ if (chunk.choices?.[0]?.delta?.content) {
707
+ if (!firstParsedTokenTime)
708
+ firstParsedTokenTime = Date.now();
696
709
  streamedText += chunk.choices[0].delta.content;
697
- else if (chunk.choices?.[0]?.delta?.reasoning)
710
+ } else if (chunk.choices?.[0]?.delta?.reasoning) {
711
+ if (!firstParsedTokenTime)
712
+ firstParsedTokenTime = Date.now();
698
713
  streamedText += chunk.choices[0].delta.reasoning;
714
+ }
699
715
  if (chunk.usage?.prompt_tokens)
700
716
  inputTokens = chunk.usage.prompt_tokens;
701
717
  if (chunk.usage?.completion_tokens)
@@ -707,15 +723,18 @@ async function benchmarkSingleModelRest(model) {
707
723
  }
708
724
  }
709
725
  }
726
+ await logger?.flush();
710
727
  const endTime = Date.now();
711
728
  const totalTime = endTime - startTime;
712
- const timeToFirstToken = firstTokenTime ? firstTokenTime - startTime : totalTime;
729
+ const effectiveFirstToken = firstParsedTokenTime ?? firstTokenTime;
730
+ const timeToFirstToken = effectiveFirstToken ? effectiveFirstToken - startTime : totalTime;
731
+ const generationTime = totalTime - timeToFirstToken;
713
732
  const usedEstimateForOutput = !outputTokens;
714
733
  const usedEstimateForInput = !inputTokens;
715
734
  const finalOutputTokens = outputTokens || Math.round(streamedText.length / 4);
716
735
  const finalInputTokens = inputTokens || Math.round(TEST_PROMPT.length / 4);
717
736
  const totalTokens = finalInputTokens + finalOutputTokens;
718
- const tokensPerSecond = totalTime > 0 ? finalOutputTokens / totalTime * 1000 : 0;
737
+ const tokensPerSecond = generationTime > 0 ? finalOutputTokens / generationTime * 1000 : 0;
719
738
  return {
720
739
  model: model.name,
721
740
  provider: model.providerName,
@@ -730,6 +749,7 @@ async function benchmarkSingleModelRest(model) {
730
749
  success: true
731
750
  };
732
751
  } catch (error) {
752
+ await logger?.flush();
733
753
  return {
734
754
  model: model.name,
735
755
  provider: model.providerName,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-speedometer-headless",
3
- "version": "2.1.5",
3
+ "version": "2.1.7",
4
4
  "description": "Headless CLI for benchmarking AI models — runs on Node.js and Bun, no TUI dependencies",
5
5
  "bin": {
6
6
  "ai-speedometer-headless": "dist/ai-speedometer-headless"