npm - ollama-bench - Versions diffs - 1.0.4 → 1.1.1 - Mend

ollama-bench 1.0.4 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -1,57 +1,46 @@
 # Ollama-bench
-A command-line tool to benchmark and compare the performance of Ollama language models. Measures tokens per second and total processing time.
-## Setup
-### 1. Install Ollama
-Choose your platform:
-- **Windows:** [Download Installer](https://ollama.com/download/OllamaSetup.exe)
-- **macOS:** [Download App](https://ollama.com/download/Ollama-darwin.zip)
-- **Linux:** Run:
-  ```bash
-  curl -fsSL https://ollama.com/install.sh | sh
-  ```
-- **Docker:** Pull and run:
-  ```bash
-  docker pull ollama/ollama
-  docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama
-  ```
-### 2. Start Ollama Server
-Before running any benchmarks, make sure the Ollama server is running:
-```bash
-# On Linux/macOS terminal or Windows PowerShell
-ollama serve
-```
+Minimal CLI tool to benchmark Ollama models with detailed phase analysis. Zero runtime dependencies.
-For Windows users, you can also run Ollama from the system tray after installation.
+## Features
-### 3. Install Benchmark Tool
-Install globally:
-```bash
-npm install -g ollama-bench
-```
-Or run directly with npx:
-```bash
-npx ollama-bench <model1> [model2] [model3]
-```
+- Phase-by-phase performance breakdown
+- Precise timing measurements
+- Works with npm, pnpm, yarn, and bun
+## Quick Start
-## Usage
 ```bash
-# Using global installation
-ollama-bench smollm:135m qwen2.5:0.5b
+# Run directly (no installation)
+npx ollama-bench qwen2.5:0.5b llama3.2:1b
-# Using npx (no installation required)
-npx ollama-bench smollm:135m qwen2.5:0.5b
+# Or with other package managers
+bunx ollama-bench qwen2.5:0.5b
+pnpm dlx ollama-bench qwen2.5:0.5b
 ```
-## Troubleshooting
+## Prerequisites
+1. **Install Ollama** - [ollama.com/download](https://ollama.com/download)
+2. **Start Ollama server** - Run `ollama serve`
+## Benchmark Phases
+Each benchmark measures three distinct phases:
+**Phase 1: Model Loading** (Loading weights into memory)
+- Time to load model from disk into RAM
+- Hardware-dependent, very consistent
+**Phase 2: Prompt Processing** (Encoding input)
+- Time to encode and process your input prompt
+- Fast, scales with prompt length
+**Phase 3: Response Generation** (Creating output)
+- Time to generate the actual response
+- Most important metric for user-facing performance
+- Varies with content complexity
-If you encounter errors, check:
-1. Is the Ollama server running? (`ollama serve`)
-2. Can you access `http://localhost:11434`?
-3. Do you have enough RAM for your chosen models?
 ## Available Models

package/bun.lock ADDED Viewed

@@ -0,0 +1,27 @@
+{
+  "lockfileVersion": 1,
+  "configVersion": 1,
+  "workspaces": {
+    "": {
+      "name": "ollama-bench",
+      "dependencies": {
+        "ollama": "latest",
+      },
+      "devDependencies": {
+        "@types/node": "^20.19.25",
+        "typescript": "^5.9.3",
+      },
+    },
+  },
+  "packages": {
+    "@types/node": ["@types/node@20.19.25", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ=="],
+    "ollama": ["ollama@0.6.3", "", { "dependencies": { "whatwg-fetch": "^3.6.20" } }, "sha512-KEWEhIqE5wtfzEIZbDCLH51VFZ6Z3ZSa6sIOg/E/tBV8S51flyqBOXi+bRxlOYKDf8i327zG9eSTb8IJxvm3Zg=="],
+    "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
+    "undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="],
+    "whatwg-fetch": ["whatwg-fetch@3.6.20", "", {}, "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg=="],
+  }
+}

package/dist/index.js CHANGED Viewed

@@ -12,18 +12,6 @@ const colors = {
     magenta: '\x1b[35m',
     blue: '\x1b[34m',
 };
-/**
- * Object containing emoji characters for various status indicators.
- */
-const emojis = {
-    rocket: '🚀',
-    check: '✅',
-    error: '❌',
-    hourglass: '⏳',
-    star: '⭐',
-    trophy: '🏆',
-    gear: '⚙️',
-};
 /**
  * Applies color to the given text.
  * @param text - The text to colorize.
@@ -40,14 +28,14 @@ function colorize(text, color) {
  * @returns An interval ID for the animation.
  */
 function createLoadingAnimation(operation, model) {
-    const frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
+    const frames = ['|', '/', '-', '\\'];
     let i = 0;
     let dots = 0;
     return setInterval(() => {
         const frame = frames[i];
         const dotString = '.'.repeat(dots);
         const operationText = colorize(`${operation} ${model}${dotString}`, 'blue');
-        process.stdout.write(`\r${frame} ${emojis.gear} ${operationText}`.padEnd(50));
+        process.stdout.write(`\r${frame} ${operationText}`.padEnd(50));
         i = (i + 1) % frames.length;
         dots = (dots + 1) % 4;
     }, 100);
@@ -57,7 +45,7 @@ function createLoadingAnimation(operation, model) {
  * @param model - The name of the model to pull.
  */
 async function pullModel(model) {
-    console.log(colorize(`${emojis.rocket} Initiating pull for ${model}...`, 'yellow'));
+    console.log(colorize(`Initiating pull for ${model}...`, 'yellow'));
     const loadingAnimation = createLoadingAnimation('Pulling', model);
     try {
         const start = performance.now();
@@ -67,14 +55,14 @@ async function pullModel(model) {
                 clearInterval(loadingAnimation);
                 const end = performance.now();
                 const duration = (end - start) / 1000;
-                console.log(`\r${colorize(`${emojis.check} Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')}     `);
+                console.log(`\r${colorize(`Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')}     `);
                 return;
             }
         }
     }
     catch (error) {
         clearInterval(loadingAnimation);
-        console.log(`\r${colorize(`${emojis.error} Error pulling ${model}: ${error.message}`, 'red')}     `);
+        console.log(`\r${colorize(`Error pulling ${model}: ${error.message}`, 'red')}     `);
     }
 }
 /**
@@ -84,8 +72,9 @@ async function pullModel(model) {
  */
 async function benchmarkModel(model) {
     const prompt = "Explain the theory of relativity in simple terms.";
-    console.log(colorize(`${emojis.hourglass} Initiating benchmark for ${model}...`, 'cyan'));
-    const loadingAnimation = createLoadingAnimation('Benchmarking', model);
+    console.log(colorize(`\nBenchmarking ${model}`, 'cyan'));
+    console.log(colorize('─'.repeat(50), 'cyan'));
+    const loadingAnimation = createLoadingAnimation('Running benchmark', model);
     try {
         const response = await ollama.generate({
             model,
@@ -93,19 +82,57 @@ async function benchmarkModel(model) {
             stream: false,
         });
         clearInterval(loadingAnimation);
-        const totalDuration = response.total_duration / 1e9; // Convert nanoseconds to seconds
-        const tokensPerSecond = response.eval_count / (response.eval_duration / 1e9);
-        console.log(`\r${colorize(`${emojis.star} Benchmark results for ${model}:`, 'cyan')}     `);
-        console.log(colorize(`  Total time: ${totalDuration.toFixed(2)} seconds`, 'yellow'));
-        console.log(colorize(`  Tokens generated: ${response.eval_count}`, 'yellow'));
-        console.log(colorize(`  Tokens per second: ${tokensPerSecond.toFixed(2)}`, 'yellow'));
+        process.stdout.write('\r' + ' '.repeat(50) + '\r');
+        // Calculate phase timings
+        const loadTime = response.load_duration / 1e9;
+        const promptEvalTime = response.prompt_eval_duration / 1e9;
+        const generationTime = response.eval_duration / 1e9;
+        const totalTime = response.total_duration / 1e9;
+        const tokensPerSecond = response.eval_count / generationTime;
+        // Calculate percentages
+        const loadPercent = (loadTime / totalTime * 100).toFixed(1);
+        const promptPercent = (promptEvalTime / totalTime * 100).toFixed(1);
+        const genPercent = (generationTime / totalTime * 100).toFixed(1);
+        // Display phases
+        console.log(colorize('Phase 1: Model Loading (Loading weights into memory)', 'yellow'));
+        console.log(colorize(`  Time: ${loadTime.toFixed(2)}s (${loadPercent}% of total)`, 'yellow'));
+        console.log();
+        console.log(colorize('Phase 2: Prompt Processing (Encoding input)', 'yellow'));
+        console.log(colorize(`  Tokens: ${response.prompt_eval_count}`, 'yellow'));
+        console.log(colorize(`  Time: ${promptEvalTime.toFixed(2)}s (${promptPercent}% of total)`, 'yellow'));
+        console.log(colorize(`  Speed: ${(response.prompt_eval_count / promptEvalTime).toFixed(2)} tokens/s`, 'yellow'));
+        console.log();
+        console.log(colorize('Phase 3: Response Generation (Creating output)', 'yellow'));
+        console.log(colorize(`  Tokens: ${response.eval_count}`, 'yellow'));
+        console.log(colorize(`  Time: ${generationTime.toFixed(2)}s (${genPercent}% of total)`, 'yellow'));
+        console.log(colorize(`  Speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'yellow'));
         console.log();
-        return { model, tokensPerSecond };
+        console.log(colorize('Summary', 'green'));
+        console.log(colorize(`  Total time: ${totalTime.toFixed(2)}s`, 'green'));
+        console.log(colorize(`  Generation speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'green'));
+        console.log();
+        return {
+            model,
+            tokensPerSecond,
+            loadTime,
+            promptEvalTime,
+            generationTime,
+            totalTime
+        };
     }
     catch (error) {
         clearInterval(loadingAnimation);
-        console.log(`\r${colorize(`${emojis.error} Error benchmarking ${model}: ${error.message}`, 'red')}     `);
-        return { model, tokensPerSecond: 0 };
+        process.stdout.write('\r' + ' '.repeat(50) + '\r');
+        console.log(colorize(`Error benchmarking ${model}: ${error.message}`, 'red'));
+        console.log();
+        return {
+            model,
+            tokensPerSecond: 0,
+            loadTime: 0,
+            promptEvalTime: 0,
+            generationTime: 0,
+            totalTime: 0
+        };
     }
 }
 /**
@@ -114,17 +141,20 @@ async function benchmarkModel(model) {
 export async function main() {
     const models = process.argv.slice(2);
     if (models.length === 0) {
-        console.log(colorize(`${emojis.error} Error: No models provided. Please specify at least one model.`, 'red'));
+        console.log(colorize(`Error: No models provided. Please specify at least one model.`, 'red'));
         process.exit(1);
     }
-    console.log(colorize(`${emojis.rocket} Ollama Benchmark Script`, 'cyan'));
-    console.log(colorize("=======================", 'cyan'));
+    console.log(colorize(`Ollama Benchmark Script`, 'cyan'));
+    console.log(colorize('═'.repeat(50), 'cyan'));
     // Pull models
+    console.log(colorize('\nPhase: Model Preparation', 'cyan'));
+    console.log(colorize('─'.repeat(50), 'cyan'));
     for (const model of models) {
         await pullModel(model);
     }
-    console.log();
     // Benchmark models
+    console.log(colorize('\nPhase: Performance Testing', 'cyan'));
+    console.log(colorize('─'.repeat(50), 'cyan'));
     const results = [];
     for (const model of models) {
         const result = await benchmarkModel(model);
@@ -132,8 +162,11 @@ export async function main() {
     }
     // Find the best performing model
     const bestModel = results.reduce((best, current) => current.tokensPerSecond > best.tokensPerSecond ? current : best);
-    console.log(colorize(`${emojis.trophy} Best performing model:`, 'magenta'));
-    console.log(colorize(`  ${bestModel.model} with ${bestModel.tokensPerSecond.toFixed(2)} tokens/second`, 'magenta'));
+    console.log(colorize('Final Results', 'magenta'));
+    console.log(colorize('═'.repeat(50), 'magenta'));
+    console.log(colorize(`Best performing model: ${bestModel.model}`, 'magenta'));
+    console.log(colorize(`Generation speed: ${bestModel.tokensPerSecond.toFixed(2)} tokens/s`, 'magenta'));
+    console.log(colorize(`Total time: ${bestModel.totalTime.toFixed(2)}s`, 'magenta'));
 }
 if (import.meta.url === import.meta.resolve(process.argv[1])) {
     main().catch(error => {

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "name": "ollama-bench",
-    "version": "1.0.4",
-    "description": "A CLI tool to benchmark Ollama models performance",
+    "version": "1.1.1",
+    "description": "Minimal CLI tool to benchmark Ollama models with detailed phase analysis. Zero runtime dependencies.",
     "main": "dist/index.js",
     "type": "module",
     "bin": {
@@ -12,7 +12,7 @@
         "start": "node dist/index.js",
         "dev": "tsc && node dist/index.js"
     },
-    "keywords": ["ollama", "benchmark", "ai", "models", "cli"],
+    "keywords": ["ollama", "benchmark", "ai", "models", "cli", "performance", "llm", "testing"],
     "author": "dalist1",
     "license": "MIT",
     "repository": {
@@ -27,8 +27,8 @@
         "ollama": "latest"
     },
     "devDependencies": {
-        "@types/node": "^20.17.5",
-        "typescript": "^5.6.3"
+        "@types/node": "^20.19.25",
+        "typescript": "^5.9.3"
     },
     "engines": {
         "node": ">=14.0.0"

package/src/index.ts CHANGED Viewed

@@ -7,11 +7,6 @@ import ollama from 'ollama';
  */
 type Color = 'reset' | 'green' | 'yellow' | 'red' | 'cyan' | 'magenta' | 'blue';
-/**
- * Represents the available emoji keys.
- */
-type Emoji = 'rocket' | 'check' | 'error' | 'hourglass' | 'star' | 'trophy' | 'gear';
 /**
  * Object containing ANSI color codes for text coloring.
  */
@@ -25,19 +20,6 @@ const colors: Record<Color, string> = {
   blue: '\x1b[34m',
 };
-/**
- * Object containing emoji characters for various status indicators.
- */
-const emojis: Record<Emoji, string> = {
-  rocket: '🚀',
-  check: '✅',
-  error: '❌',
-  hourglass: '⏳',
-  star: '⭐',
-  trophy: '🏆',
-  gear: '⚙️',
-};
 /**
  * Applies color to the given text.
  * @param text - The text to colorize.
@@ -55,14 +37,14 @@ function colorize(text: string, color: Color): string {
  * @returns An interval ID for the animation.
  */
 function createLoadingAnimation(operation: string, model: string): NodeJS.Timeout {
-  const frames: string[] = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
+  const frames: string[] = ['|', '/', '-', '\\'];
   let i = 0;
   let dots = 0;
   return setInterval(() => {
     const frame = frames[i];
     const dotString = '.'.repeat(dots);
     const operationText = colorize(`${operation} ${model}${dotString}`, 'blue');
-    process.stdout.write(`\r${frame} ${emojis.gear} ${operationText}`.padEnd(50));
+    process.stdout.write(`\r${frame} ${operationText}`.padEnd(50));
     i = (i + 1) % frames.length;
     dots = (dots + 1) % 4;
   }, 100);
@@ -73,7 +55,7 @@ function createLoadingAnimation(operation: string, model: string): NodeJS.Timeou
  * @param model - The name of the model to pull.
  */
 async function pullModel(model: string): Promise<void> {
-  console.log(colorize(`${emojis.rocket} Initiating pull for ${model}...`, 'yellow'));
+  console.log(colorize(`Initiating pull for ${model}...`, 'yellow'));
   const loadingAnimation = createLoadingAnimation('Pulling', model);
   try {
     const start = performance.now();
@@ -83,13 +65,13 @@ async function pullModel(model: string): Promise<void> {
         clearInterval(loadingAnimation);
         const end = performance.now();
         const duration = (end - start) / 1000;
-        console.log(`\r${colorize(`${emojis.check} Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')}     `);
+        console.log(`\r${colorize(`Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')}     `);
         return;
       }
     }
   } catch (error) {
     clearInterval(loadingAnimation);
-    console.log(`\r${colorize(`${emojis.error} Error pulling ${model}: ${(error as Error).message}`, 'red')}     `);
+    console.log(`\r${colorize(`Error pulling ${model}: ${(error as Error).message}`, 'red')}     `);
   }
 }
@@ -99,6 +81,10 @@ async function pullModel(model: string): Promise<void> {
 interface BenchmarkResult {
   model: string;
   tokensPerSecond: number;
+  loadTime: number;
+  promptEvalTime: number;
+  generationTime: number;
+  totalTime: number;
 }
 /**
@@ -108,8 +94,10 @@ interface BenchmarkResult {
  */
 async function benchmarkModel(model: string): Promise<BenchmarkResult> {
   const prompt = "Explain the theory of relativity in simple terms.";
-  console.log(colorize(`${emojis.hourglass} Initiating benchmark for ${model}...`, 'cyan'));
-  const loadingAnimation = createLoadingAnimation('Benchmarking', model);
+  console.log(colorize(`\nBenchmarking ${model}`, 'cyan'));
+  console.log(colorize('─'.repeat(50), 'cyan'));
+  const loadingAnimation = createLoadingAnimation('Running benchmark', model);
   try {
     const response = await ollama.generate({
@@ -119,20 +107,63 @@ async function benchmarkModel(model: string): Promise<BenchmarkResult> {
     });
     clearInterval(loadingAnimation);
-    const totalDuration = response.total_duration / 1e9; // Convert nanoseconds to seconds
-    const tokensPerSecond = response.eval_count / (response.eval_duration / 1e9);
+    process.stdout.write('\r' + ' '.repeat(50) + '\r');
+    // Calculate phase timings
+    const loadTime = response.load_duration / 1e9;
+    const promptEvalTime = response.prompt_eval_duration / 1e9;
+    const generationTime = response.eval_duration / 1e9;
+    const totalTime = response.total_duration / 1e9;
+    const tokensPerSecond = response.eval_count / generationTime;
+    // Calculate percentages
+    const loadPercent = (loadTime / totalTime * 100).toFixed(1);
+    const promptPercent = (promptEvalTime / totalTime * 100).toFixed(1);
+    const genPercent = (generationTime / totalTime * 100).toFixed(1);
+    // Display phases
+    console.log(colorize('Phase 1: Model Loading (Loading weights into memory)', 'yellow'));
+    console.log(colorize(`  Time: ${loadTime.toFixed(2)}s (${loadPercent}% of total)`, 'yellow'));
+    console.log();
-    console.log(`\r${colorize(`${emojis.star} Benchmark results for ${model}:`, 'cyan')}     `);
-    console.log(colorize(`  Total time: ${totalDuration.toFixed(2)} seconds`, 'yellow'));
-    console.log(colorize(`  Tokens generated: ${response.eval_count}`, 'yellow'));
-    console.log(colorize(`  Tokens per second: ${tokensPerSecond.toFixed(2)}`, 'yellow'));
+    console.log(colorize('Phase 2: Prompt Processing (Encoding input)', 'yellow'));
+    console.log(colorize(`  Tokens: ${response.prompt_eval_count}`, 'yellow'));
+    console.log(colorize(`  Time: ${promptEvalTime.toFixed(2)}s (${promptPercent}% of total)`, 'yellow'));
+    console.log(colorize(`  Speed: ${(response.prompt_eval_count / promptEvalTime).toFixed(2)} tokens/s`, 'yellow'));
     console.log();
-    return { model, tokensPerSecond };
+    console.log(colorize('Phase 3: Response Generation (Creating output)', 'yellow'));
+    console.log(colorize(`  Tokens: ${response.eval_count}`, 'yellow'));
+    console.log(colorize(`  Time: ${generationTime.toFixed(2)}s (${genPercent}% of total)`, 'yellow'));
+    console.log(colorize(`  Speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'yellow'));
+    console.log();
+    console.log(colorize('Summary', 'green'));
+    console.log(colorize(`  Total time: ${totalTime.toFixed(2)}s`, 'green'));
+    console.log(colorize(`  Generation speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'green'));
+    console.log();
+    return {
+      model,
+      tokensPerSecond,
+      loadTime,
+      promptEvalTime,
+      generationTime,
+      totalTime
+    };
   } catch (error) {
     clearInterval(loadingAnimation);
-    console.log(`\r${colorize(`${emojis.error} Error benchmarking ${model}: ${(error as Error).message}`, 'red')}     `);
-    return { model, tokensPerSecond: 0 };
+    process.stdout.write('\r' + ' '.repeat(50) + '\r');
+    console.log(colorize(`Error benchmarking ${model}: ${(error as Error).message}`, 'red'));
+    console.log();
+    return {
+      model,
+      tokensPerSecond: 0,
+      loadTime: 0,
+      promptEvalTime: 0,
+      generationTime: 0,
+      totalTime: 0
+    };
   }
 }
@@ -143,21 +174,23 @@ export async function main(): Promise<void> {
   const models = process.argv.slice(2);
   if (models.length === 0) {
-    console.log(colorize(`${emojis.error} Error: No models provided. Please specify at least one model.`, 'red'));
+    console.log(colorize(`Error: No models provided. Please specify at least one model.`, 'red'));
     process.exit(1);
   }
-  console.log(colorize(`${emojis.rocket} Ollama Benchmark Script`, 'cyan'));
-  console.log(colorize("=======================", 'cyan'));
+  console.log(colorize(`Ollama Benchmark Script`, 'cyan'));
+  console.log(colorize('═'.repeat(50), 'cyan'));
   // Pull models
+  console.log(colorize('\nPhase: Model Preparation', 'cyan'));
+  console.log(colorize('─'.repeat(50), 'cyan'));
   for (const model of models) {
     await pullModel(model);
   }
-  console.log();
   // Benchmark models
+  console.log(colorize('\nPhase: Performance Testing', 'cyan'));
+  console.log(colorize('─'.repeat(50), 'cyan'));
   const results: BenchmarkResult[] = [];
   for (const model of models) {
     const result = await benchmarkModel(model);
@@ -165,12 +198,15 @@ export async function main(): Promise<void> {
   }
   // Find the best performing model
-  const bestModel = results.reduce((best, current) =>
+  const bestModel = results.reduce((best, current) =>
     current.tokensPerSecond > best.tokensPerSecond ? current : best
   );
-  console.log(colorize(`${emojis.trophy} Best performing model:`, 'magenta'));
-  console.log(colorize(`  ${bestModel.model} with ${bestModel.tokensPerSecond.toFixed(2)} tokens/second`, 'magenta'));
+  console.log(colorize('Final Results', 'magenta'));
+  console.log(colorize('═'.repeat(50), 'magenta'));
+  console.log(colorize(`Best performing model: ${bestModel.model}`, 'magenta'));
+  console.log(colorize(`Generation speed: ${bestModel.tokensPerSecond.toFixed(2)} tokens/s`, 'magenta'));
+  console.log(colorize(`Total time: ${bestModel.totalTime.toFixed(2)}s`, 'magenta'));
 }
 if (import.meta.url === import.meta.resolve(process.argv[1])) {