ollama-bench 1.0.4 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,57 +1,46 @@
1
1
  # Ollama-bench
2
- A command-line tool to benchmark and compare the performance of Ollama language models. Measures tokens per second and total processing time.
3
-
4
- ## Setup
5
-
6
- ### 1. Install Ollama
7
- Choose your platform:
8
- - **Windows:** [Download Installer](https://ollama.com/download/OllamaSetup.exe)
9
- - **macOS:** [Download App](https://ollama.com/download/Ollama-darwin.zip)
10
- - **Linux:** Run:
11
- ```bash
12
- curl -fsSL https://ollama.com/install.sh | sh
13
- ```
14
- - **Docker:** Pull and run:
15
- ```bash
16
- docker pull ollama/ollama
17
- docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama
18
- ```
19
-
20
- ### 2. Start Ollama Server
21
- Before running any benchmarks, make sure the Ollama server is running:
22
2
 
23
- ```bash
24
- # On Linux/macOS terminal or Windows PowerShell
25
- ollama serve
26
- ```
3
+ Minimal CLI tool to benchmark Ollama models with detailed phase analysis. Zero runtime dependencies.
27
4
 
28
- For Windows users, you can also run Ollama from the system tray after installation.
5
+ ## Features
29
6
 
30
- ### 3. Install Benchmark Tool
31
- Install globally:
32
- ```bash
33
- npm install -g ollama-bench
34
- ```
35
- Or run directly with npx:
36
- ```bash
37
- npx ollama-bench <model1> [model2] [model3]
38
- ```
7
+ - Phase-by-phase performance breakdown
8
+ - Precise timing measurements
9
+ - Works with npm, pnpm, yarn, and bun
10
+
11
+ ## Quick Start
39
12
 
40
- ## Usage
41
13
  ```bash
42
- # Using global installation
43
- ollama-bench smollm:135m qwen2.5:0.5b
14
+ # Run directly (no installation)
15
+ npx ollama-bench qwen2.5:0.5b llama3.2:1b
44
16
 
45
- # Using npx (no installation required)
46
- npx ollama-bench smollm:135m qwen2.5:0.5b
17
+ # Or with other package managers
18
+ bunx ollama-bench qwen2.5:0.5b
19
+ pnpm dlx ollama-bench qwen2.5:0.5b
47
20
  ```
48
21
 
49
- ## Troubleshooting
22
+ ## Prerequisites
23
+
24
+ 1. **Install Ollama** - [ollama.com/download](https://ollama.com/download)
25
+ 2. **Start Ollama server** - Run `ollama serve`
26
+
27
+ ## Benchmark Phases
28
+
29
+ Each benchmark measures three distinct phases:
30
+
31
+ **Phase 1: Model Loading** (Loading weights into memory)
32
+ - Time to load model from disk into RAM
33
+ - Hardware-dependent, very consistent
34
+
35
+ **Phase 2: Prompt Processing** (Encoding input)
36
+ - Time to encode and process your input prompt
37
+ - Fast, scales with prompt length
38
+
39
+ **Phase 3: Response Generation** (Creating output)
40
+ - Time to generate the actual response
41
+ - Most important metric for user-facing performance
42
+ - Varies with content complexity
50
43
 
51
- If you encounter errors, check:
52
- 1. Is the Ollama server running? (`ollama serve`)
53
- 2. Can you access `http://localhost:11434`?
54
- 3. Do you have enough RAM for your chosen models?
55
44
 
56
45
  ## Available Models
57
46
 
package/bun.lock ADDED
@@ -0,0 +1,27 @@
1
+ {
2
+ "lockfileVersion": 1,
3
+ "configVersion": 1,
4
+ "workspaces": {
5
+ "": {
6
+ "name": "ollama-bench",
7
+ "dependencies": {
8
+ "ollama": "latest",
9
+ },
10
+ "devDependencies": {
11
+ "@types/node": "^20.19.25",
12
+ "typescript": "^5.9.3",
13
+ },
14
+ },
15
+ },
16
+ "packages": {
17
+ "@types/node": ["@types/node@20.19.25", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ=="],
18
+
19
+ "ollama": ["ollama@0.6.3", "", { "dependencies": { "whatwg-fetch": "^3.6.20" } }, "sha512-KEWEhIqE5wtfzEIZbDCLH51VFZ6Z3ZSa6sIOg/E/tBV8S51flyqBOXi+bRxlOYKDf8i327zG9eSTb8IJxvm3Zg=="],
20
+
21
+ "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
22
+
23
+ "undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="],
24
+
25
+ "whatwg-fetch": ["whatwg-fetch@3.6.20", "", {}, "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg=="],
26
+ }
27
+ }
package/dist/index.js CHANGED
@@ -12,18 +12,6 @@ const colors = {
12
12
  magenta: '\x1b[35m',
13
13
  blue: '\x1b[34m',
14
14
  };
15
- /**
16
- * Object containing emoji characters for various status indicators.
17
- */
18
- const emojis = {
19
- rocket: '🚀',
20
- check: '✅',
21
- error: '❌',
22
- hourglass: '⏳',
23
- star: '⭐',
24
- trophy: '🏆',
25
- gear: '⚙️',
26
- };
27
15
  /**
28
16
  * Applies color to the given text.
29
17
  * @param text - The text to colorize.
@@ -40,14 +28,14 @@ function colorize(text, color) {
40
28
  * @returns An interval ID for the animation.
41
29
  */
42
30
  function createLoadingAnimation(operation, model) {
43
- const frames = ['', '', '', '', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
31
+ const frames = ['|', '/', '-', '\\'];
44
32
  let i = 0;
45
33
  let dots = 0;
46
34
  return setInterval(() => {
47
35
  const frame = frames[i];
48
36
  const dotString = '.'.repeat(dots);
49
37
  const operationText = colorize(`${operation} ${model}${dotString}`, 'blue');
50
- process.stdout.write(`\r${frame} ${emojis.gear} ${operationText}`.padEnd(50));
38
+ process.stdout.write(`\r${frame} ${operationText}`.padEnd(50));
51
39
  i = (i + 1) % frames.length;
52
40
  dots = (dots + 1) % 4;
53
41
  }, 100);
@@ -57,7 +45,7 @@ function createLoadingAnimation(operation, model) {
57
45
  * @param model - The name of the model to pull.
58
46
  */
59
47
  async function pullModel(model) {
60
- console.log(colorize(`${emojis.rocket} Initiating pull for ${model}...`, 'yellow'));
48
+ console.log(colorize(`Initiating pull for ${model}...`, 'yellow'));
61
49
  const loadingAnimation = createLoadingAnimation('Pulling', model);
62
50
  try {
63
51
  const start = performance.now();
@@ -67,14 +55,14 @@ async function pullModel(model) {
67
55
  clearInterval(loadingAnimation);
68
56
  const end = performance.now();
69
57
  const duration = (end - start) / 1000;
70
- console.log(`\r${colorize(`${emojis.check} Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')} `);
58
+ console.log(`\r${colorize(`Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')} `);
71
59
  return;
72
60
  }
73
61
  }
74
62
  }
75
63
  catch (error) {
76
64
  clearInterval(loadingAnimation);
77
- console.log(`\r${colorize(`${emojis.error} Error pulling ${model}: ${error.message}`, 'red')} `);
65
+ console.log(`\r${colorize(`Error pulling ${model}: ${error.message}`, 'red')} `);
78
66
  }
79
67
  }
80
68
  /**
@@ -84,8 +72,9 @@ async function pullModel(model) {
84
72
  */
85
73
  async function benchmarkModel(model) {
86
74
  const prompt = "Explain the theory of relativity in simple terms.";
87
- console.log(colorize(`${emojis.hourglass} Initiating benchmark for ${model}...`, 'cyan'));
88
- const loadingAnimation = createLoadingAnimation('Benchmarking', model);
75
+ console.log(colorize(`\nBenchmarking ${model}`, 'cyan'));
76
+ console.log(colorize(''.repeat(50), 'cyan'));
77
+ const loadingAnimation = createLoadingAnimation('Running benchmark', model);
89
78
  try {
90
79
  const response = await ollama.generate({
91
80
  model,
@@ -93,19 +82,57 @@ async function benchmarkModel(model) {
93
82
  stream: false,
94
83
  });
95
84
  clearInterval(loadingAnimation);
96
- const totalDuration = response.total_duration / 1e9; // Convert nanoseconds to seconds
97
- const tokensPerSecond = response.eval_count / (response.eval_duration / 1e9);
98
- console.log(`\r${colorize(`${emojis.star} Benchmark results for ${model}:`, 'cyan')} `);
99
- console.log(colorize(` Total time: ${totalDuration.toFixed(2)} seconds`, 'yellow'));
100
- console.log(colorize(` Tokens generated: ${response.eval_count}`, 'yellow'));
101
- console.log(colorize(` Tokens per second: ${tokensPerSecond.toFixed(2)}`, 'yellow'));
85
+ process.stdout.write('\r' + ' '.repeat(50) + '\r');
86
+ // Calculate phase timings
87
+ const loadTime = response.load_duration / 1e9;
88
+ const promptEvalTime = response.prompt_eval_duration / 1e9;
89
+ const generationTime = response.eval_duration / 1e9;
90
+ const totalTime = response.total_duration / 1e9;
91
+ const tokensPerSecond = response.eval_count / generationTime;
92
+ // Calculate percentages
93
+ const loadPercent = (loadTime / totalTime * 100).toFixed(1);
94
+ const promptPercent = (promptEvalTime / totalTime * 100).toFixed(1);
95
+ const genPercent = (generationTime / totalTime * 100).toFixed(1);
96
+ // Display phases
97
+ console.log(colorize('Phase 1: Model Loading (Loading weights into memory)', 'yellow'));
98
+ console.log(colorize(` Time: ${loadTime.toFixed(2)}s (${loadPercent}% of total)`, 'yellow'));
99
+ console.log();
100
+ console.log(colorize('Phase 2: Prompt Processing (Encoding input)', 'yellow'));
101
+ console.log(colorize(` Tokens: ${response.prompt_eval_count}`, 'yellow'));
102
+ console.log(colorize(` Time: ${promptEvalTime.toFixed(2)}s (${promptPercent}% of total)`, 'yellow'));
103
+ console.log(colorize(` Speed: ${(response.prompt_eval_count / promptEvalTime).toFixed(2)} tokens/s`, 'yellow'));
104
+ console.log();
105
+ console.log(colorize('Phase 3: Response Generation (Creating output)', 'yellow'));
106
+ console.log(colorize(` Tokens: ${response.eval_count}`, 'yellow'));
107
+ console.log(colorize(` Time: ${generationTime.toFixed(2)}s (${genPercent}% of total)`, 'yellow'));
108
+ console.log(colorize(` Speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'yellow'));
102
109
  console.log();
103
- return { model, tokensPerSecond };
110
+ console.log(colorize('Summary', 'green'));
111
+ console.log(colorize(` Total time: ${totalTime.toFixed(2)}s`, 'green'));
112
+ console.log(colorize(` Generation speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'green'));
113
+ console.log();
114
+ return {
115
+ model,
116
+ tokensPerSecond,
117
+ loadTime,
118
+ promptEvalTime,
119
+ generationTime,
120
+ totalTime
121
+ };
104
122
  }
105
123
  catch (error) {
106
124
  clearInterval(loadingAnimation);
107
- console.log(`\r${colorize(`${emojis.error} Error benchmarking ${model}: ${error.message}`, 'red')} `);
108
- return { model, tokensPerSecond: 0 };
125
+ process.stdout.write('\r' + ' '.repeat(50) + '\r');
126
+ console.log(colorize(`Error benchmarking ${model}: ${error.message}`, 'red'));
127
+ console.log();
128
+ return {
129
+ model,
130
+ tokensPerSecond: 0,
131
+ loadTime: 0,
132
+ promptEvalTime: 0,
133
+ generationTime: 0,
134
+ totalTime: 0
135
+ };
109
136
  }
110
137
  }
111
138
  /**
@@ -114,17 +141,20 @@ async function benchmarkModel(model) {
114
141
  export async function main() {
115
142
  const models = process.argv.slice(2);
116
143
  if (models.length === 0) {
117
- console.log(colorize(`${emojis.error} Error: No models provided. Please specify at least one model.`, 'red'));
144
+ console.log(colorize(`Error: No models provided. Please specify at least one model.`, 'red'));
118
145
  process.exit(1);
119
146
  }
120
- console.log(colorize(`${emojis.rocket} Ollama Benchmark Script`, 'cyan'));
121
- console.log(colorize("=======================", 'cyan'));
147
+ console.log(colorize(`Ollama Benchmark Script`, 'cyan'));
148
+ console.log(colorize('═'.repeat(50), 'cyan'));
122
149
  // Pull models
150
+ console.log(colorize('\nPhase: Model Preparation', 'cyan'));
151
+ console.log(colorize('─'.repeat(50), 'cyan'));
123
152
  for (const model of models) {
124
153
  await pullModel(model);
125
154
  }
126
- console.log();
127
155
  // Benchmark models
156
+ console.log(colorize('\nPhase: Performance Testing', 'cyan'));
157
+ console.log(colorize('─'.repeat(50), 'cyan'));
128
158
  const results = [];
129
159
  for (const model of models) {
130
160
  const result = await benchmarkModel(model);
@@ -132,8 +162,11 @@ export async function main() {
132
162
  }
133
163
  // Find the best performing model
134
164
  const bestModel = results.reduce((best, current) => current.tokensPerSecond > best.tokensPerSecond ? current : best);
135
- console.log(colorize(`${emojis.trophy} Best performing model:`, 'magenta'));
136
- console.log(colorize(` ${bestModel.model} with ${bestModel.tokensPerSecond.toFixed(2)} tokens/second`, 'magenta'));
165
+ console.log(colorize('Final Results', 'magenta'));
166
+ console.log(colorize('═'.repeat(50), 'magenta'));
167
+ console.log(colorize(`Best performing model: ${bestModel.model}`, 'magenta'));
168
+ console.log(colorize(`Generation speed: ${bestModel.tokensPerSecond.toFixed(2)} tokens/s`, 'magenta'));
169
+ console.log(colorize(`Total time: ${bestModel.totalTime.toFixed(2)}s`, 'magenta'));
137
170
  }
138
171
  if (import.meta.url === import.meta.resolve(process.argv[1])) {
139
172
  main().catch(error => {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "ollama-bench",
3
- "version": "1.0.4",
4
- "description": "A CLI tool to benchmark Ollama models performance",
3
+ "version": "1.1.1",
4
+ "description": "Minimal CLI tool to benchmark Ollama models with detailed phase analysis. Zero runtime dependencies.",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",
7
7
  "bin": {
@@ -12,7 +12,7 @@
12
12
  "start": "node dist/index.js",
13
13
  "dev": "tsc && node dist/index.js"
14
14
  },
15
- "keywords": ["ollama", "benchmark", "ai", "models", "cli"],
15
+ "keywords": ["ollama", "benchmark", "ai", "models", "cli", "performance", "llm", "testing"],
16
16
  "author": "dalist1",
17
17
  "license": "MIT",
18
18
  "repository": {
@@ -27,8 +27,8 @@
27
27
  "ollama": "latest"
28
28
  },
29
29
  "devDependencies": {
30
- "@types/node": "^20.17.5",
31
- "typescript": "^5.6.3"
30
+ "@types/node": "^20.19.25",
31
+ "typescript": "^5.9.3"
32
32
  },
33
33
  "engines": {
34
34
  "node": ">=14.0.0"
package/src/index.ts CHANGED
@@ -7,11 +7,6 @@ import ollama from 'ollama';
7
7
  */
8
8
  type Color = 'reset' | 'green' | 'yellow' | 'red' | 'cyan' | 'magenta' | 'blue';
9
9
 
10
- /**
11
- * Represents the available emoji keys.
12
- */
13
- type Emoji = 'rocket' | 'check' | 'error' | 'hourglass' | 'star' | 'trophy' | 'gear';
14
-
15
10
  /**
16
11
  * Object containing ANSI color codes for text coloring.
17
12
  */
@@ -25,19 +20,6 @@ const colors: Record<Color, string> = {
25
20
  blue: '\x1b[34m',
26
21
  };
27
22
 
28
- /**
29
- * Object containing emoji characters for various status indicators.
30
- */
31
- const emojis: Record<Emoji, string> = {
32
- rocket: '🚀',
33
- check: '✅',
34
- error: '❌',
35
- hourglass: '⏳',
36
- star: '⭐',
37
- trophy: '🏆',
38
- gear: '⚙️',
39
- };
40
-
41
23
  /**
42
24
  * Applies color to the given text.
43
25
  * @param text - The text to colorize.
@@ -55,14 +37,14 @@ function colorize(text: string, color: Color): string {
55
37
  * @returns An interval ID for the animation.
56
38
  */
57
39
  function createLoadingAnimation(operation: string, model: string): NodeJS.Timeout {
58
- const frames: string[] = ['', '', '', '', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
40
+ const frames: string[] = ['|', '/', '-', '\\'];
59
41
  let i = 0;
60
42
  let dots = 0;
61
43
  return setInterval(() => {
62
44
  const frame = frames[i];
63
45
  const dotString = '.'.repeat(dots);
64
46
  const operationText = colorize(`${operation} ${model}${dotString}`, 'blue');
65
- process.stdout.write(`\r${frame} ${emojis.gear} ${operationText}`.padEnd(50));
47
+ process.stdout.write(`\r${frame} ${operationText}`.padEnd(50));
66
48
  i = (i + 1) % frames.length;
67
49
  dots = (dots + 1) % 4;
68
50
  }, 100);
@@ -73,7 +55,7 @@ function createLoadingAnimation(operation: string, model: string): NodeJS.Timeou
73
55
  * @param model - The name of the model to pull.
74
56
  */
75
57
  async function pullModel(model: string): Promise<void> {
76
- console.log(colorize(`${emojis.rocket} Initiating pull for ${model}...`, 'yellow'));
58
+ console.log(colorize(`Initiating pull for ${model}...`, 'yellow'));
77
59
  const loadingAnimation = createLoadingAnimation('Pulling', model);
78
60
  try {
79
61
  const start = performance.now();
@@ -83,13 +65,13 @@ async function pullModel(model: string): Promise<void> {
83
65
  clearInterval(loadingAnimation);
84
66
  const end = performance.now();
85
67
  const duration = (end - start) / 1000;
86
- console.log(`\r${colorize(`${emojis.check} Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')} `);
68
+ console.log(`\r${colorize(`Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')} `);
87
69
  return;
88
70
  }
89
71
  }
90
72
  } catch (error) {
91
73
  clearInterval(loadingAnimation);
92
- console.log(`\r${colorize(`${emojis.error} Error pulling ${model}: ${(error as Error).message}`, 'red')} `);
74
+ console.log(`\r${colorize(`Error pulling ${model}: ${(error as Error).message}`, 'red')} `);
93
75
  }
94
76
  }
95
77
 
@@ -99,6 +81,10 @@ async function pullModel(model: string): Promise<void> {
99
81
  interface BenchmarkResult {
100
82
  model: string;
101
83
  tokensPerSecond: number;
84
+ loadTime: number;
85
+ promptEvalTime: number;
86
+ generationTime: number;
87
+ totalTime: number;
102
88
  }
103
89
 
104
90
  /**
@@ -108,8 +94,10 @@ interface BenchmarkResult {
108
94
  */
109
95
  async function benchmarkModel(model: string): Promise<BenchmarkResult> {
110
96
  const prompt = "Explain the theory of relativity in simple terms.";
111
- console.log(colorize(`${emojis.hourglass} Initiating benchmark for ${model}...`, 'cyan'));
112
- const loadingAnimation = createLoadingAnimation('Benchmarking', model);
97
+ console.log(colorize(`\nBenchmarking ${model}`, 'cyan'));
98
+ console.log(colorize(''.repeat(50), 'cyan'));
99
+
100
+ const loadingAnimation = createLoadingAnimation('Running benchmark', model);
113
101
 
114
102
  try {
115
103
  const response = await ollama.generate({
@@ -119,20 +107,63 @@ async function benchmarkModel(model: string): Promise<BenchmarkResult> {
119
107
  });
120
108
 
121
109
  clearInterval(loadingAnimation);
122
- const totalDuration = response.total_duration / 1e9; // Convert nanoseconds to seconds
123
- const tokensPerSecond = response.eval_count / (response.eval_duration / 1e9);
110
+ process.stdout.write('\r' + ' '.repeat(50) + '\r');
111
+
112
+ // Calculate phase timings
113
+ const loadTime = response.load_duration / 1e9;
114
+ const promptEvalTime = response.prompt_eval_duration / 1e9;
115
+ const generationTime = response.eval_duration / 1e9;
116
+ const totalTime = response.total_duration / 1e9;
117
+ const tokensPerSecond = response.eval_count / generationTime;
118
+
119
+ // Calculate percentages
120
+ const loadPercent = (loadTime / totalTime * 100).toFixed(1);
121
+ const promptPercent = (promptEvalTime / totalTime * 100).toFixed(1);
122
+ const genPercent = (generationTime / totalTime * 100).toFixed(1);
123
+
124
+ // Display phases
125
+ console.log(colorize('Phase 1: Model Loading (Loading weights into memory)', 'yellow'));
126
+ console.log(colorize(` Time: ${loadTime.toFixed(2)}s (${loadPercent}% of total)`, 'yellow'));
127
+ console.log();
124
128
 
125
- console.log(`\r${colorize(`${emojis.star} Benchmark results for ${model}:`, 'cyan')} `);
126
- console.log(colorize(` Total time: ${totalDuration.toFixed(2)} seconds`, 'yellow'));
127
- console.log(colorize(` Tokens generated: ${response.eval_count}`, 'yellow'));
128
- console.log(colorize(` Tokens per second: ${tokensPerSecond.toFixed(2)}`, 'yellow'));
129
+ console.log(colorize('Phase 2: Prompt Processing (Encoding input)', 'yellow'));
130
+ console.log(colorize(` Tokens: ${response.prompt_eval_count}`, 'yellow'));
131
+ console.log(colorize(` Time: ${promptEvalTime.toFixed(2)}s (${promptPercent}% of total)`, 'yellow'));
132
+ console.log(colorize(` Speed: ${(response.prompt_eval_count / promptEvalTime).toFixed(2)} tokens/s`, 'yellow'));
129
133
  console.log();
130
134
 
131
- return { model, tokensPerSecond };
135
+ console.log(colorize('Phase 3: Response Generation (Creating output)', 'yellow'));
136
+ console.log(colorize(` Tokens: ${response.eval_count}`, 'yellow'));
137
+ console.log(colorize(` Time: ${generationTime.toFixed(2)}s (${genPercent}% of total)`, 'yellow'));
138
+ console.log(colorize(` Speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'yellow'));
139
+ console.log();
140
+
141
+ console.log(colorize('Summary', 'green'));
142
+ console.log(colorize(` Total time: ${totalTime.toFixed(2)}s`, 'green'));
143
+ console.log(colorize(` Generation speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'green'));
144
+ console.log();
145
+
146
+ return {
147
+ model,
148
+ tokensPerSecond,
149
+ loadTime,
150
+ promptEvalTime,
151
+ generationTime,
152
+ totalTime
153
+ };
132
154
  } catch (error) {
133
155
  clearInterval(loadingAnimation);
134
- console.log(`\r${colorize(`${emojis.error} Error benchmarking ${model}: ${(error as Error).message}`, 'red')} `);
135
- return { model, tokensPerSecond: 0 };
156
+ process.stdout.write('\r' + ' '.repeat(50) + '\r');
157
+ console.log(colorize(`Error benchmarking ${model}: ${(error as Error).message}`, 'red'));
158
+ console.log();
159
+ return {
160
+ model,
161
+ tokensPerSecond: 0,
162
+ loadTime: 0,
163
+ promptEvalTime: 0,
164
+ generationTime: 0,
165
+ totalTime: 0
166
+ };
136
167
  }
137
168
  }
138
169
 
@@ -143,21 +174,23 @@ export async function main(): Promise<void> {
143
174
  const models = process.argv.slice(2);
144
175
 
145
176
  if (models.length === 0) {
146
- console.log(colorize(`${emojis.error} Error: No models provided. Please specify at least one model.`, 'red'));
177
+ console.log(colorize(`Error: No models provided. Please specify at least one model.`, 'red'));
147
178
  process.exit(1);
148
179
  }
149
180
 
150
- console.log(colorize(`${emojis.rocket} Ollama Benchmark Script`, 'cyan'));
151
- console.log(colorize("=======================", 'cyan'));
181
+ console.log(colorize(`Ollama Benchmark Script`, 'cyan'));
182
+ console.log(colorize('═'.repeat(50), 'cyan'));
152
183
 
153
184
  // Pull models
185
+ console.log(colorize('\nPhase: Model Preparation', 'cyan'));
186
+ console.log(colorize('─'.repeat(50), 'cyan'));
154
187
  for (const model of models) {
155
188
  await pullModel(model);
156
189
  }
157
190
 
158
- console.log();
159
-
160
191
  // Benchmark models
192
+ console.log(colorize('\nPhase: Performance Testing', 'cyan'));
193
+ console.log(colorize('─'.repeat(50), 'cyan'));
161
194
  const results: BenchmarkResult[] = [];
162
195
  for (const model of models) {
163
196
  const result = await benchmarkModel(model);
@@ -165,12 +198,15 @@ export async function main(): Promise<void> {
165
198
  }
166
199
 
167
200
  // Find the best performing model
168
- const bestModel = results.reduce((best, current) =>
201
+ const bestModel = results.reduce((best, current) =>
169
202
  current.tokensPerSecond > best.tokensPerSecond ? current : best
170
203
  );
171
204
 
172
- console.log(colorize(`${emojis.trophy} Best performing model:`, 'magenta'));
173
- console.log(colorize(` ${bestModel.model} with ${bestModel.tokensPerSecond.toFixed(2)} tokens/second`, 'magenta'));
205
+ console.log(colorize('Final Results', 'magenta'));
206
+ console.log(colorize('═'.repeat(50), 'magenta'));
207
+ console.log(colorize(`Best performing model: ${bestModel.model}`, 'magenta'));
208
+ console.log(colorize(`Generation speed: ${bestModel.tokensPerSecond.toFixed(2)} tokens/s`, 'magenta'));
209
+ console.log(colorize(`Total time: ${bestModel.totalTime.toFixed(2)}s`, 'magenta'));
174
210
  }
175
211
 
176
212
  if (import.meta.url === import.meta.resolve(process.argv[1])) {