ollama-bench 1.0.4 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -44
- package/bun.lock +27 -0
- package/dist/index.js +67 -34
- package/package.json +5 -5
- package/src/index.ts +78 -42
package/README.md
CHANGED
|
@@ -1,57 +1,46 @@
|
|
|
1
1
|
# Ollama-bench
|
|
2
|
-
A command-line tool to benchmark and compare the performance of Ollama language models. Measures tokens per second and total processing time.
|
|
3
|
-
|
|
4
|
-
## Setup
|
|
5
|
-
|
|
6
|
-
### 1. Install Ollama
|
|
7
|
-
Choose your platform:
|
|
8
|
-
- **Windows:** [Download Installer](https://ollama.com/download/OllamaSetup.exe)
|
|
9
|
-
- **macOS:** [Download App](https://ollama.com/download/Ollama-darwin.zip)
|
|
10
|
-
- **Linux:** Run:
|
|
11
|
-
```bash
|
|
12
|
-
curl -fsSL https://ollama.com/install.sh | sh
|
|
13
|
-
```
|
|
14
|
-
- **Docker:** Pull and run:
|
|
15
|
-
```bash
|
|
16
|
-
docker pull ollama/ollama
|
|
17
|
-
docker run -d -v ollama:/root/.ollama -p 11434:11434 ollama/ollama
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
### 2. Start Ollama Server
|
|
21
|
-
Before running any benchmarks, make sure the Ollama server is running:
|
|
22
2
|
|
|
23
|
-
|
|
24
|
-
# On Linux/macOS terminal or Windows PowerShell
|
|
25
|
-
ollama serve
|
|
26
|
-
```
|
|
3
|
+
Minimal CLI tool to benchmark Ollama models with detailed phase analysis. Zero runtime dependencies.
|
|
27
4
|
|
|
28
|
-
|
|
5
|
+
## Features
|
|
29
6
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
Or run directly with npx:
|
|
36
|
-
```bash
|
|
37
|
-
npx ollama-bench <model1> [model2] [model3]
|
|
38
|
-
```
|
|
7
|
+
- Phase-by-phase performance breakdown
|
|
8
|
+
- Precise timing measurements
|
|
9
|
+
- Works with npm, pnpm, yarn, and bun
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
39
12
|
|
|
40
|
-
## Usage
|
|
41
13
|
```bash
|
|
42
|
-
#
|
|
43
|
-
ollama-bench
|
|
14
|
+
# Run directly (no installation)
|
|
15
|
+
npx ollama-bench qwen2.5:0.5b llama3.2:1b
|
|
44
16
|
|
|
45
|
-
#
|
|
46
|
-
|
|
17
|
+
# Or with other package managers
|
|
18
|
+
bunx ollama-bench qwen2.5:0.5b
|
|
19
|
+
pnpm dlx ollama-bench qwen2.5:0.5b
|
|
47
20
|
```
|
|
48
21
|
|
|
49
|
-
##
|
|
22
|
+
## Prerequisites
|
|
23
|
+
|
|
24
|
+
1. **Install Ollama** - [ollama.com/download](https://ollama.com/download)
|
|
25
|
+
2. **Start Ollama server** - Run `ollama serve`
|
|
26
|
+
|
|
27
|
+
## Benchmark Phases
|
|
28
|
+
|
|
29
|
+
Each benchmark measures three distinct phases:
|
|
30
|
+
|
|
31
|
+
**Phase 1: Model Loading** (Loading weights into memory)
|
|
32
|
+
- Time to load model from disk into RAM
|
|
33
|
+
- Hardware-dependent, very consistent
|
|
34
|
+
|
|
35
|
+
**Phase 2: Prompt Processing** (Encoding input)
|
|
36
|
+
- Time to encode and process your input prompt
|
|
37
|
+
- Fast, scales with prompt length
|
|
38
|
+
|
|
39
|
+
**Phase 3: Response Generation** (Creating output)
|
|
40
|
+
- Time to generate the actual response
|
|
41
|
+
- Most important metric for user-facing performance
|
|
42
|
+
- Varies with content complexity
|
|
50
43
|
|
|
51
|
-
If you encounter errors, check:
|
|
52
|
-
1. Is the Ollama server running? (`ollama serve`)
|
|
53
|
-
2. Can you access `http://localhost:11434`?
|
|
54
|
-
3. Do you have enough RAM for your chosen models?
|
|
55
44
|
|
|
56
45
|
## Available Models
|
|
57
46
|
|
package/bun.lock
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"lockfileVersion": 1,
|
|
3
|
+
"configVersion": 1,
|
|
4
|
+
"workspaces": {
|
|
5
|
+
"": {
|
|
6
|
+
"name": "ollama-bench",
|
|
7
|
+
"dependencies": {
|
|
8
|
+
"ollama": "latest",
|
|
9
|
+
},
|
|
10
|
+
"devDependencies": {
|
|
11
|
+
"@types/node": "^20.19.25",
|
|
12
|
+
"typescript": "^5.9.3",
|
|
13
|
+
},
|
|
14
|
+
},
|
|
15
|
+
},
|
|
16
|
+
"packages": {
|
|
17
|
+
"@types/node": ["@types/node@20.19.25", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ=="],
|
|
18
|
+
|
|
19
|
+
"ollama": ["ollama@0.6.3", "", { "dependencies": { "whatwg-fetch": "^3.6.20" } }, "sha512-KEWEhIqE5wtfzEIZbDCLH51VFZ6Z3ZSa6sIOg/E/tBV8S51flyqBOXi+bRxlOYKDf8i327zG9eSTb8IJxvm3Zg=="],
|
|
20
|
+
|
|
21
|
+
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
|
|
22
|
+
|
|
23
|
+
"undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="],
|
|
24
|
+
|
|
25
|
+
"whatwg-fetch": ["whatwg-fetch@3.6.20", "", {}, "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg=="],
|
|
26
|
+
}
|
|
27
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -12,18 +12,6 @@ const colors = {
|
|
|
12
12
|
magenta: '\x1b[35m',
|
|
13
13
|
blue: '\x1b[34m',
|
|
14
14
|
};
|
|
15
|
-
/**
|
|
16
|
-
* Object containing emoji characters for various status indicators.
|
|
17
|
-
*/
|
|
18
|
-
const emojis = {
|
|
19
|
-
rocket: '🚀',
|
|
20
|
-
check: '✅',
|
|
21
|
-
error: '❌',
|
|
22
|
-
hourglass: '⏳',
|
|
23
|
-
star: '⭐',
|
|
24
|
-
trophy: '🏆',
|
|
25
|
-
gear: '⚙️',
|
|
26
|
-
};
|
|
27
15
|
/**
|
|
28
16
|
* Applies color to the given text.
|
|
29
17
|
* @param text - The text to colorize.
|
|
@@ -40,14 +28,14 @@ function colorize(text, color) {
|
|
|
40
28
|
* @returns An interval ID for the animation.
|
|
41
29
|
*/
|
|
42
30
|
function createLoadingAnimation(operation, model) {
|
|
43
|
-
const frames = ['
|
|
31
|
+
const frames = ['|', '/', '-', '\\'];
|
|
44
32
|
let i = 0;
|
|
45
33
|
let dots = 0;
|
|
46
34
|
return setInterval(() => {
|
|
47
35
|
const frame = frames[i];
|
|
48
36
|
const dotString = '.'.repeat(dots);
|
|
49
37
|
const operationText = colorize(`${operation} ${model}${dotString}`, 'blue');
|
|
50
|
-
process.stdout.write(`\r${frame} ${
|
|
38
|
+
process.stdout.write(`\r${frame} ${operationText}`.padEnd(50));
|
|
51
39
|
i = (i + 1) % frames.length;
|
|
52
40
|
dots = (dots + 1) % 4;
|
|
53
41
|
}, 100);
|
|
@@ -57,7 +45,7 @@ function createLoadingAnimation(operation, model) {
|
|
|
57
45
|
* @param model - The name of the model to pull.
|
|
58
46
|
*/
|
|
59
47
|
async function pullModel(model) {
|
|
60
|
-
console.log(colorize(
|
|
48
|
+
console.log(colorize(`Initiating pull for ${model}...`, 'yellow'));
|
|
61
49
|
const loadingAnimation = createLoadingAnimation('Pulling', model);
|
|
62
50
|
try {
|
|
63
51
|
const start = performance.now();
|
|
@@ -67,14 +55,14 @@ async function pullModel(model) {
|
|
|
67
55
|
clearInterval(loadingAnimation);
|
|
68
56
|
const end = performance.now();
|
|
69
57
|
const duration = (end - start) / 1000;
|
|
70
|
-
console.log(`\r${colorize(
|
|
58
|
+
console.log(`\r${colorize(`Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')} `);
|
|
71
59
|
return;
|
|
72
60
|
}
|
|
73
61
|
}
|
|
74
62
|
}
|
|
75
63
|
catch (error) {
|
|
76
64
|
clearInterval(loadingAnimation);
|
|
77
|
-
console.log(`\r${colorize(
|
|
65
|
+
console.log(`\r${colorize(`Error pulling ${model}: ${error.message}`, 'red')} `);
|
|
78
66
|
}
|
|
79
67
|
}
|
|
80
68
|
/**
|
|
@@ -84,8 +72,9 @@ async function pullModel(model) {
|
|
|
84
72
|
*/
|
|
85
73
|
async function benchmarkModel(model) {
|
|
86
74
|
const prompt = "Explain the theory of relativity in simple terms.";
|
|
87
|
-
console.log(colorize(
|
|
88
|
-
|
|
75
|
+
console.log(colorize(`\nBenchmarking ${model}`, 'cyan'));
|
|
76
|
+
console.log(colorize('─'.repeat(50), 'cyan'));
|
|
77
|
+
const loadingAnimation = createLoadingAnimation('Running benchmark', model);
|
|
89
78
|
try {
|
|
90
79
|
const response = await ollama.generate({
|
|
91
80
|
model,
|
|
@@ -93,19 +82,57 @@ async function benchmarkModel(model) {
|
|
|
93
82
|
stream: false,
|
|
94
83
|
});
|
|
95
84
|
clearInterval(loadingAnimation);
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
85
|
+
process.stdout.write('\r' + ' '.repeat(50) + '\r');
|
|
86
|
+
// Calculate phase timings
|
|
87
|
+
const loadTime = response.load_duration / 1e9;
|
|
88
|
+
const promptEvalTime = response.prompt_eval_duration / 1e9;
|
|
89
|
+
const generationTime = response.eval_duration / 1e9;
|
|
90
|
+
const totalTime = response.total_duration / 1e9;
|
|
91
|
+
const tokensPerSecond = response.eval_count / generationTime;
|
|
92
|
+
// Calculate percentages
|
|
93
|
+
const loadPercent = (loadTime / totalTime * 100).toFixed(1);
|
|
94
|
+
const promptPercent = (promptEvalTime / totalTime * 100).toFixed(1);
|
|
95
|
+
const genPercent = (generationTime / totalTime * 100).toFixed(1);
|
|
96
|
+
// Display phases
|
|
97
|
+
console.log(colorize('Phase 1: Model Loading (Loading weights into memory)', 'yellow'));
|
|
98
|
+
console.log(colorize(` Time: ${loadTime.toFixed(2)}s (${loadPercent}% of total)`, 'yellow'));
|
|
99
|
+
console.log();
|
|
100
|
+
console.log(colorize('Phase 2: Prompt Processing (Encoding input)', 'yellow'));
|
|
101
|
+
console.log(colorize(` Tokens: ${response.prompt_eval_count}`, 'yellow'));
|
|
102
|
+
console.log(colorize(` Time: ${promptEvalTime.toFixed(2)}s (${promptPercent}% of total)`, 'yellow'));
|
|
103
|
+
console.log(colorize(` Speed: ${(response.prompt_eval_count / promptEvalTime).toFixed(2)} tokens/s`, 'yellow'));
|
|
104
|
+
console.log();
|
|
105
|
+
console.log(colorize('Phase 3: Response Generation (Creating output)', 'yellow'));
|
|
106
|
+
console.log(colorize(` Tokens: ${response.eval_count}`, 'yellow'));
|
|
107
|
+
console.log(colorize(` Time: ${generationTime.toFixed(2)}s (${genPercent}% of total)`, 'yellow'));
|
|
108
|
+
console.log(colorize(` Speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'yellow'));
|
|
102
109
|
console.log();
|
|
103
|
-
|
|
110
|
+
console.log(colorize('Summary', 'green'));
|
|
111
|
+
console.log(colorize(` Total time: ${totalTime.toFixed(2)}s`, 'green'));
|
|
112
|
+
console.log(colorize(` Generation speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'green'));
|
|
113
|
+
console.log();
|
|
114
|
+
return {
|
|
115
|
+
model,
|
|
116
|
+
tokensPerSecond,
|
|
117
|
+
loadTime,
|
|
118
|
+
promptEvalTime,
|
|
119
|
+
generationTime,
|
|
120
|
+
totalTime
|
|
121
|
+
};
|
|
104
122
|
}
|
|
105
123
|
catch (error) {
|
|
106
124
|
clearInterval(loadingAnimation);
|
|
107
|
-
|
|
108
|
-
|
|
125
|
+
process.stdout.write('\r' + ' '.repeat(50) + '\r');
|
|
126
|
+
console.log(colorize(`Error benchmarking ${model}: ${error.message}`, 'red'));
|
|
127
|
+
console.log();
|
|
128
|
+
return {
|
|
129
|
+
model,
|
|
130
|
+
tokensPerSecond: 0,
|
|
131
|
+
loadTime: 0,
|
|
132
|
+
promptEvalTime: 0,
|
|
133
|
+
generationTime: 0,
|
|
134
|
+
totalTime: 0
|
|
135
|
+
};
|
|
109
136
|
}
|
|
110
137
|
}
|
|
111
138
|
/**
|
|
@@ -114,17 +141,20 @@ async function benchmarkModel(model) {
|
|
|
114
141
|
export async function main() {
|
|
115
142
|
const models = process.argv.slice(2);
|
|
116
143
|
if (models.length === 0) {
|
|
117
|
-
console.log(colorize(
|
|
144
|
+
console.log(colorize(`Error: No models provided. Please specify at least one model.`, 'red'));
|
|
118
145
|
process.exit(1);
|
|
119
146
|
}
|
|
120
|
-
console.log(colorize(
|
|
121
|
-
console.log(colorize(
|
|
147
|
+
console.log(colorize(`Ollama Benchmark Script`, 'cyan'));
|
|
148
|
+
console.log(colorize('═'.repeat(50), 'cyan'));
|
|
122
149
|
// Pull models
|
|
150
|
+
console.log(colorize('\nPhase: Model Preparation', 'cyan'));
|
|
151
|
+
console.log(colorize('─'.repeat(50), 'cyan'));
|
|
123
152
|
for (const model of models) {
|
|
124
153
|
await pullModel(model);
|
|
125
154
|
}
|
|
126
|
-
console.log();
|
|
127
155
|
// Benchmark models
|
|
156
|
+
console.log(colorize('\nPhase: Performance Testing', 'cyan'));
|
|
157
|
+
console.log(colorize('─'.repeat(50), 'cyan'));
|
|
128
158
|
const results = [];
|
|
129
159
|
for (const model of models) {
|
|
130
160
|
const result = await benchmarkModel(model);
|
|
@@ -132,8 +162,11 @@ export async function main() {
|
|
|
132
162
|
}
|
|
133
163
|
// Find the best performing model
|
|
134
164
|
const bestModel = results.reduce((best, current) => current.tokensPerSecond > best.tokensPerSecond ? current : best);
|
|
135
|
-
console.log(colorize(
|
|
136
|
-
console.log(colorize(
|
|
165
|
+
console.log(colorize('Final Results', 'magenta'));
|
|
166
|
+
console.log(colorize('═'.repeat(50), 'magenta'));
|
|
167
|
+
console.log(colorize(`Best performing model: ${bestModel.model}`, 'magenta'));
|
|
168
|
+
console.log(colorize(`Generation speed: ${bestModel.tokensPerSecond.toFixed(2)} tokens/s`, 'magenta'));
|
|
169
|
+
console.log(colorize(`Total time: ${bestModel.totalTime.toFixed(2)}s`, 'magenta'));
|
|
137
170
|
}
|
|
138
171
|
if (import.meta.url === import.meta.resolve(process.argv[1])) {
|
|
139
172
|
main().catch(error => {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ollama-bench",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "1.1.1",
|
|
4
|
+
"description": "Minimal CLI tool to benchmark Ollama models with detailed phase analysis. Zero runtime dependencies.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"bin": {
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
"start": "node dist/index.js",
|
|
13
13
|
"dev": "tsc && node dist/index.js"
|
|
14
14
|
},
|
|
15
|
-
"keywords": ["ollama", "benchmark", "ai", "models", "cli"],
|
|
15
|
+
"keywords": ["ollama", "benchmark", "ai", "models", "cli", "performance", "llm", "testing"],
|
|
16
16
|
"author": "dalist1",
|
|
17
17
|
"license": "MIT",
|
|
18
18
|
"repository": {
|
|
@@ -27,8 +27,8 @@
|
|
|
27
27
|
"ollama": "latest"
|
|
28
28
|
},
|
|
29
29
|
"devDependencies": {
|
|
30
|
-
"@types/node": "^20.
|
|
31
|
-
"typescript": "^5.
|
|
30
|
+
"@types/node": "^20.19.25",
|
|
31
|
+
"typescript": "^5.9.3"
|
|
32
32
|
},
|
|
33
33
|
"engines": {
|
|
34
34
|
"node": ">=14.0.0"
|
package/src/index.ts
CHANGED
|
@@ -7,11 +7,6 @@ import ollama from 'ollama';
|
|
|
7
7
|
*/
|
|
8
8
|
type Color = 'reset' | 'green' | 'yellow' | 'red' | 'cyan' | 'magenta' | 'blue';
|
|
9
9
|
|
|
10
|
-
/**
|
|
11
|
-
* Represents the available emoji keys.
|
|
12
|
-
*/
|
|
13
|
-
type Emoji = 'rocket' | 'check' | 'error' | 'hourglass' | 'star' | 'trophy' | 'gear';
|
|
14
|
-
|
|
15
10
|
/**
|
|
16
11
|
* Object containing ANSI color codes for text coloring.
|
|
17
12
|
*/
|
|
@@ -25,19 +20,6 @@ const colors: Record<Color, string> = {
|
|
|
25
20
|
blue: '\x1b[34m',
|
|
26
21
|
};
|
|
27
22
|
|
|
28
|
-
/**
|
|
29
|
-
* Object containing emoji characters for various status indicators.
|
|
30
|
-
*/
|
|
31
|
-
const emojis: Record<Emoji, string> = {
|
|
32
|
-
rocket: '🚀',
|
|
33
|
-
check: '✅',
|
|
34
|
-
error: '❌',
|
|
35
|
-
hourglass: '⏳',
|
|
36
|
-
star: '⭐',
|
|
37
|
-
trophy: '🏆',
|
|
38
|
-
gear: '⚙️',
|
|
39
|
-
};
|
|
40
|
-
|
|
41
23
|
/**
|
|
42
24
|
* Applies color to the given text.
|
|
43
25
|
* @param text - The text to colorize.
|
|
@@ -55,14 +37,14 @@ function colorize(text: string, color: Color): string {
|
|
|
55
37
|
* @returns An interval ID for the animation.
|
|
56
38
|
*/
|
|
57
39
|
function createLoadingAnimation(operation: string, model: string): NodeJS.Timeout {
|
|
58
|
-
const frames: string[] = ['
|
|
40
|
+
const frames: string[] = ['|', '/', '-', '\\'];
|
|
59
41
|
let i = 0;
|
|
60
42
|
let dots = 0;
|
|
61
43
|
return setInterval(() => {
|
|
62
44
|
const frame = frames[i];
|
|
63
45
|
const dotString = '.'.repeat(dots);
|
|
64
46
|
const operationText = colorize(`${operation} ${model}${dotString}`, 'blue');
|
|
65
|
-
process.stdout.write(`\r${frame} ${
|
|
47
|
+
process.stdout.write(`\r${frame} ${operationText}`.padEnd(50));
|
|
66
48
|
i = (i + 1) % frames.length;
|
|
67
49
|
dots = (dots + 1) % 4;
|
|
68
50
|
}, 100);
|
|
@@ -73,7 +55,7 @@ function createLoadingAnimation(operation: string, model: string): NodeJS.Timeou
|
|
|
73
55
|
* @param model - The name of the model to pull.
|
|
74
56
|
*/
|
|
75
57
|
async function pullModel(model: string): Promise<void> {
|
|
76
|
-
console.log(colorize(
|
|
58
|
+
console.log(colorize(`Initiating pull for ${model}...`, 'yellow'));
|
|
77
59
|
const loadingAnimation = createLoadingAnimation('Pulling', model);
|
|
78
60
|
try {
|
|
79
61
|
const start = performance.now();
|
|
@@ -83,13 +65,13 @@ async function pullModel(model: string): Promise<void> {
|
|
|
83
65
|
clearInterval(loadingAnimation);
|
|
84
66
|
const end = performance.now();
|
|
85
67
|
const duration = (end - start) / 1000;
|
|
86
|
-
console.log(`\r${colorize(
|
|
68
|
+
console.log(`\r${colorize(`Successfully pulled ${model} in ${duration.toFixed(2)} seconds`, 'green')} `);
|
|
87
69
|
return;
|
|
88
70
|
}
|
|
89
71
|
}
|
|
90
72
|
} catch (error) {
|
|
91
73
|
clearInterval(loadingAnimation);
|
|
92
|
-
console.log(`\r${colorize(
|
|
74
|
+
console.log(`\r${colorize(`Error pulling ${model}: ${(error as Error).message}`, 'red')} `);
|
|
93
75
|
}
|
|
94
76
|
}
|
|
95
77
|
|
|
@@ -99,6 +81,10 @@ async function pullModel(model: string): Promise<void> {
|
|
|
99
81
|
interface BenchmarkResult {
|
|
100
82
|
model: string;
|
|
101
83
|
tokensPerSecond: number;
|
|
84
|
+
loadTime: number;
|
|
85
|
+
promptEvalTime: number;
|
|
86
|
+
generationTime: number;
|
|
87
|
+
totalTime: number;
|
|
102
88
|
}
|
|
103
89
|
|
|
104
90
|
/**
|
|
@@ -108,8 +94,10 @@ interface BenchmarkResult {
|
|
|
108
94
|
*/
|
|
109
95
|
async function benchmarkModel(model: string): Promise<BenchmarkResult> {
|
|
110
96
|
const prompt = "Explain the theory of relativity in simple terms.";
|
|
111
|
-
console.log(colorize(
|
|
112
|
-
|
|
97
|
+
console.log(colorize(`\nBenchmarking ${model}`, 'cyan'));
|
|
98
|
+
console.log(colorize('─'.repeat(50), 'cyan'));
|
|
99
|
+
|
|
100
|
+
const loadingAnimation = createLoadingAnimation('Running benchmark', model);
|
|
113
101
|
|
|
114
102
|
try {
|
|
115
103
|
const response = await ollama.generate({
|
|
@@ -119,20 +107,63 @@ async function benchmarkModel(model: string): Promise<BenchmarkResult> {
|
|
|
119
107
|
});
|
|
120
108
|
|
|
121
109
|
clearInterval(loadingAnimation);
|
|
122
|
-
|
|
123
|
-
|
|
110
|
+
process.stdout.write('\r' + ' '.repeat(50) + '\r');
|
|
111
|
+
|
|
112
|
+
// Calculate phase timings
|
|
113
|
+
const loadTime = response.load_duration / 1e9;
|
|
114
|
+
const promptEvalTime = response.prompt_eval_duration / 1e9;
|
|
115
|
+
const generationTime = response.eval_duration / 1e9;
|
|
116
|
+
const totalTime = response.total_duration / 1e9;
|
|
117
|
+
const tokensPerSecond = response.eval_count / generationTime;
|
|
118
|
+
|
|
119
|
+
// Calculate percentages
|
|
120
|
+
const loadPercent = (loadTime / totalTime * 100).toFixed(1);
|
|
121
|
+
const promptPercent = (promptEvalTime / totalTime * 100).toFixed(1);
|
|
122
|
+
const genPercent = (generationTime / totalTime * 100).toFixed(1);
|
|
123
|
+
|
|
124
|
+
// Display phases
|
|
125
|
+
console.log(colorize('Phase 1: Model Loading (Loading weights into memory)', 'yellow'));
|
|
126
|
+
console.log(colorize(` Time: ${loadTime.toFixed(2)}s (${loadPercent}% of total)`, 'yellow'));
|
|
127
|
+
console.log();
|
|
124
128
|
|
|
125
|
-
console.log(
|
|
126
|
-
console.log(colorize(`
|
|
127
|
-
console.log(colorize(`
|
|
128
|
-
console.log(colorize(`
|
|
129
|
+
console.log(colorize('Phase 2: Prompt Processing (Encoding input)', 'yellow'));
|
|
130
|
+
console.log(colorize(` Tokens: ${response.prompt_eval_count}`, 'yellow'));
|
|
131
|
+
console.log(colorize(` Time: ${promptEvalTime.toFixed(2)}s (${promptPercent}% of total)`, 'yellow'));
|
|
132
|
+
console.log(colorize(` Speed: ${(response.prompt_eval_count / promptEvalTime).toFixed(2)} tokens/s`, 'yellow'));
|
|
129
133
|
console.log();
|
|
130
134
|
|
|
131
|
-
|
|
135
|
+
console.log(colorize('Phase 3: Response Generation (Creating output)', 'yellow'));
|
|
136
|
+
console.log(colorize(` Tokens: ${response.eval_count}`, 'yellow'));
|
|
137
|
+
console.log(colorize(` Time: ${generationTime.toFixed(2)}s (${genPercent}% of total)`, 'yellow'));
|
|
138
|
+
console.log(colorize(` Speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'yellow'));
|
|
139
|
+
console.log();
|
|
140
|
+
|
|
141
|
+
console.log(colorize('Summary', 'green'));
|
|
142
|
+
console.log(colorize(` Total time: ${totalTime.toFixed(2)}s`, 'green'));
|
|
143
|
+
console.log(colorize(` Generation speed: ${tokensPerSecond.toFixed(2)} tokens/s`, 'green'));
|
|
144
|
+
console.log();
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
model,
|
|
148
|
+
tokensPerSecond,
|
|
149
|
+
loadTime,
|
|
150
|
+
promptEvalTime,
|
|
151
|
+
generationTime,
|
|
152
|
+
totalTime
|
|
153
|
+
};
|
|
132
154
|
} catch (error) {
|
|
133
155
|
clearInterval(loadingAnimation);
|
|
134
|
-
|
|
135
|
-
|
|
156
|
+
process.stdout.write('\r' + ' '.repeat(50) + '\r');
|
|
157
|
+
console.log(colorize(`Error benchmarking ${model}: ${(error as Error).message}`, 'red'));
|
|
158
|
+
console.log();
|
|
159
|
+
return {
|
|
160
|
+
model,
|
|
161
|
+
tokensPerSecond: 0,
|
|
162
|
+
loadTime: 0,
|
|
163
|
+
promptEvalTime: 0,
|
|
164
|
+
generationTime: 0,
|
|
165
|
+
totalTime: 0
|
|
166
|
+
};
|
|
136
167
|
}
|
|
137
168
|
}
|
|
138
169
|
|
|
@@ -143,21 +174,23 @@ export async function main(): Promise<void> {
|
|
|
143
174
|
const models = process.argv.slice(2);
|
|
144
175
|
|
|
145
176
|
if (models.length === 0) {
|
|
146
|
-
console.log(colorize(
|
|
177
|
+
console.log(colorize(`Error: No models provided. Please specify at least one model.`, 'red'));
|
|
147
178
|
process.exit(1);
|
|
148
179
|
}
|
|
149
180
|
|
|
150
|
-
console.log(colorize(
|
|
151
|
-
console.log(colorize(
|
|
181
|
+
console.log(colorize(`Ollama Benchmark Script`, 'cyan'));
|
|
182
|
+
console.log(colorize('═'.repeat(50), 'cyan'));
|
|
152
183
|
|
|
153
184
|
// Pull models
|
|
185
|
+
console.log(colorize('\nPhase: Model Preparation', 'cyan'));
|
|
186
|
+
console.log(colorize('─'.repeat(50), 'cyan'));
|
|
154
187
|
for (const model of models) {
|
|
155
188
|
await pullModel(model);
|
|
156
189
|
}
|
|
157
190
|
|
|
158
|
-
console.log();
|
|
159
|
-
|
|
160
191
|
// Benchmark models
|
|
192
|
+
console.log(colorize('\nPhase: Performance Testing', 'cyan'));
|
|
193
|
+
console.log(colorize('─'.repeat(50), 'cyan'));
|
|
161
194
|
const results: BenchmarkResult[] = [];
|
|
162
195
|
for (const model of models) {
|
|
163
196
|
const result = await benchmarkModel(model);
|
|
@@ -165,12 +198,15 @@ export async function main(): Promise<void> {
|
|
|
165
198
|
}
|
|
166
199
|
|
|
167
200
|
// Find the best performing model
|
|
168
|
-
const bestModel = results.reduce((best, current) =>
|
|
201
|
+
const bestModel = results.reduce((best, current) =>
|
|
169
202
|
current.tokensPerSecond > best.tokensPerSecond ? current : best
|
|
170
203
|
);
|
|
171
204
|
|
|
172
|
-
console.log(colorize(
|
|
173
|
-
console.log(colorize(
|
|
205
|
+
console.log(colorize('Final Results', 'magenta'));
|
|
206
|
+
console.log(colorize('═'.repeat(50), 'magenta'));
|
|
207
|
+
console.log(colorize(`Best performing model: ${bestModel.model}`, 'magenta'));
|
|
208
|
+
console.log(colorize(`Generation speed: ${bestModel.tokensPerSecond.toFixed(2)} tokens/s`, 'magenta'));
|
|
209
|
+
console.log(colorize(`Total time: ${bestModel.totalTime.toFixed(2)}s`, 'magenta'));
|
|
174
210
|
}
|
|
175
211
|
|
|
176
212
|
if (import.meta.url === import.meta.resolve(process.argv[1])) {
|