@whatcanirun/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,301 @@
1
+ import { createHash } from 'crypto';
2
+ import { existsSync, readdirSync, readFileSync, statSync } from 'fs';
3
+ import { homedir } from 'os';
4
+ import { basename, extname, join, resolve } from 'path';
5
+
6
+ // -----------------------------------------------------------------------------
7
+ // Types
8
+ // -----------------------------------------------------------------------------
9
+
10
+ export interface ModelInfo {
11
+ display_name: string;
12
+ path: string;
13
+ format: string;
14
+ quant: string | null;
15
+ artifact_sha256: string;
16
+ source?: string;
17
+ file_size_bytes?: number;
18
+ parameters?: string;
19
+ architecture?: string;
20
+ }
21
+
22
+ // -----------------------------------------------------------------------------
23
+ // Constants
24
+ // -----------------------------------------------------------------------------
25
+
26
+ const QUANT_PATTERNS = [
27
+ /\b(q2_k)\b/i,
28
+ /\b(q3_k_[sml])\b/i,
29
+ /\b(q4_0)\b/i,
30
+ /\b(q4_1)\b/i,
31
+ /\b(q4_k_[sml])\b/i,
32
+ /\b(q4_k_xl)\b/i,
33
+ /\b(q5_0)\b/i,
34
+ /\b(q5_1)\b/i,
35
+ /\b(q5_k_[sml])\b/i,
36
+ /\b(q6_k)\b/i,
37
+ /\b(q8_0)\b/i,
38
+ /\b(fp16)\b/i,
39
+ /\b(fp32)\b/i,
40
+ /\b(f16)\b/i,
41
+ /\b(f32)\b/i,
42
+ /\b(awq)\b/i,
43
+ /\b(gptq)\b/i,
44
+ /\b(bnb)\b/i,
45
+ ];
46
+
47
+ const MLX_BIT_PATTERNS = [/(\d+)[\s-]*bit/i];
48
+
49
+ // -----------------------------------------------------------------------------
50
+ // Functions
51
+ // -----------------------------------------------------------------------------
52
+
53
+ export function inferQuant(name: string): string | null {
54
+ // Try GGUF-style quant patterns first
55
+ for (const pattern of QUANT_PATTERNS) {
56
+ const match = name.match(pattern);
57
+ if (match) return match[1]!.toLowerCase();
58
+ }
59
+ // Try MLX-style bit patterns (e.g. "4bit", "8bit")
60
+ for (const pattern of MLX_BIT_PATTERNS) {
61
+ const match = name.match(pattern);
62
+ if (match) return `${match[1]}bit`;
63
+ }
64
+ return null;
65
+ }
66
+
67
+ export function inferFormat(modelPath: string): string {
68
+ const ext = extname(modelPath).toLowerCase();
69
+ if (ext === '.gguf') return 'gguf';
70
+ if (ext === '.safetensors') return 'safetensors';
71
+ if (ext === '.bin') return 'bin';
72
+ if (ext === '.pt' || ext === '.pth') return 'pytorch';
73
+
74
+ // Check if it's an mlx directory
75
+ const configPath = resolve(modelPath, 'config.json');
76
+ if (existsSync(configPath)) {
77
+ try {
78
+ const config = JSON.parse(readFileSync(configPath, 'utf-8'));
79
+ if (config.model_type) return 'mlx';
80
+ } catch (e: unknown) {
81
+ console.warn(
82
+ `Warning: could not parse ${configPath}: ${e instanceof Error ? e.message : String(e)}`
83
+ );
84
+ }
85
+ }
86
+
87
+ return 'unknown';
88
+ }
89
+
90
+ // -----------------------------------------------------------------------------
91
+ // Helpers
92
+ // -----------------------------------------------------------------------------
93
+
94
+ /**
95
+ * Check if a string looks like a HuggingFace repo ID (e.g. "mlx-community/Qwen3.5-0.8B-4bit").
96
+ */
97
+ export function isHuggingFaceRepoId(ref: string): boolean {
98
+ return /^[\w.-]+\/[\w.-]+$/.test(ref) && !ref.startsWith('/') && !ref.startsWith('.');
99
+ }
100
+
101
+ /**
102
+ * Find the HF cache directory for a given repo ID.
103
+ * Returns the latest snapshot path, or null if not cached.
104
+ */
105
+ export function findHfCachePath(repoId: string): string | null {
106
+ const [org, name] = repoId.split('/');
107
+ const cacheDir = join(homedir(), '.cache', 'huggingface', 'hub', `models--${org}--${name}`);
108
+ const snapshotsDir = join(cacheDir, 'snapshots');
109
+
110
+ if (!existsSync(snapshotsDir)) return null;
111
+
112
+ const snapshots = readdirSync(snapshotsDir).filter(
113
+ (d) => !d.startsWith('.') && statSync(join(snapshotsDir, d)).isDirectory()
114
+ );
115
+
116
+ if (snapshots.length === 0) return null;
117
+
118
+ // Return the most recently modified snapshot
119
+ return snapshots
120
+ .map((d) => ({ name: d, mtime: statSync(join(snapshotsDir, d)).mtimeMs }))
121
+ .sort((a, b) => b.mtime - a.mtime)
122
+ .map((d) => join(snapshotsDir, d.name))[0]!;
123
+ }
124
+
125
+ export async function resolveModel(modelRef: string): Promise<string> {
126
+ // Direct file path or directory (mlx model dir or gguf file)
127
+ const resolved = resolve(modelRef);
128
+ if (existsSync(resolved)) return resolved;
129
+
130
+ // HuggingFace repo ID — return as-is (mlx_lm handles download)
131
+ if (isHuggingFaceRepoId(modelRef)) return modelRef;
132
+
133
+ // Try alias
134
+ const aliases = await loadModelAliases();
135
+ const aliasPath = aliases[modelRef];
136
+ if (aliasPath) {
137
+ const aliasResolved = resolve(aliasPath);
138
+ if (existsSync(aliasResolved)) return aliasResolved;
139
+ throw new Error(`Model alias '${modelRef}' points to '${aliasPath}' which does not exist`);
140
+ }
141
+
142
+ throw new Error(
143
+ `Model not found: '${modelRef}'. Provide a file path, HuggingFace repo ID, or alias from ~/.config/whatcanirun/models.toml`
144
+ );
145
+ }
146
+
147
+ async function loadModelAliases(): Promise<Record<string, string>> {
148
+ const configPath = resolve(homedir(), '.config', 'whatcanirun', 'models.toml');
149
+ if (!existsSync(configPath)) return {};
150
+
151
+ try {
152
+ const content = await Bun.file(configPath).text();
153
+ const { parse } = await import('smol-toml');
154
+ const config = parse(content);
155
+ return (config.models as Record<string, string>) || {};
156
+ } catch (e: unknown) {
157
+ console.warn(
158
+ `Warning: could not parse ~/.config/whatcanirun/models.toml: ${e instanceof Error ? e.message : String(e)}`
159
+ );
160
+ return {};
161
+ }
162
+ }
163
+
164
+ export async function computeSha256(filePath: string): Promise<string> {
165
+ const file = Bun.file(filePath);
166
+ const hasher = createHash('sha256');
167
+ const stream = file.stream();
168
+
169
+ for await (const chunk of stream) {
170
+ hasher.update(chunk);
171
+ }
172
+
173
+ return hasher.digest('hex');
174
+ }
175
+
176
+ /**
177
+ * Compute SHA256 for a directory of safetensors shards.
178
+ * Hashes only the largest shard as a practical proxy.
179
+ */
180
+ async function computeDirSha256(dirPath: string): Promise<string> {
181
+ const files = readdirSync(dirPath).filter((f) => f.endsWith('.safetensors'));
182
+ if (files.length === 0) {
183
+ // Fall back to config.json
184
+ const configPath = join(dirPath, 'config.json');
185
+ if (existsSync(configPath)) return computeSha256(configPath);
186
+ return '';
187
+ }
188
+
189
+ // Hash the largest shard
190
+ const largest = files
191
+ .map((f) => ({ name: f, size: statSync(join(dirPath, f)).size }))
192
+ .sort((a, b) => b.size - a.size)[0]!;
193
+
194
+ return computeSha256(join(dirPath, largest.name));
195
+ }
196
+
197
+ /**
198
+ * Sum file sizes for all safetensors shards in a directory.
199
+ */
200
+ function sumShardSizes(dirPath: string): number {
201
+ return readdirSync(dirPath)
202
+ .filter((f) => f.endsWith('.safetensors'))
203
+ .reduce((sum, f) => sum + statSync(join(dirPath, f)).size, 0);
204
+ }
205
+
206
+ export async function inspectModel(modelRef: string): Promise<ModelInfo> {
207
+ const isHfRepo = isHuggingFaceRepoId(modelRef);
208
+ const name = isHfRepo ? modelRef.split('/')[1]! : basename(modelRef);
209
+
210
+ let format: string;
211
+ let quant: string | null;
212
+ let sha256 = '';
213
+ let fileSizeBytes: number | undefined;
214
+ let parameters: string | undefined;
215
+ let architecture: string | undefined;
216
+ let source: string | undefined;
217
+
218
+ if (isHfRepo) {
219
+ format = 'mlx';
220
+ quant = inferQuant(modelRef);
221
+ source = modelRef;
222
+
223
+ // Try to get metadata from HF cache
224
+ const cachePath = findHfCachePath(modelRef);
225
+ if (cachePath) {
226
+ sha256 = await computeDirSha256(cachePath);
227
+ fileSizeBytes = sumShardSizes(cachePath);
228
+
229
+ try {
230
+ const configPath = join(cachePath, 'config.json');
231
+ if (existsSync(configPath)) {
232
+ const config = JSON.parse(readFileSync(configPath, 'utf-8'));
233
+ architecture = config.model_type || config.architectures?.[0];
234
+ if (config.num_parameters) {
235
+ parameters = formatParamCount(config.num_parameters);
236
+ }
237
+ }
238
+ } catch (e: unknown) {
239
+ console.warn(
240
+ `Warning: could not read model config: ${e instanceof Error ? e.message : String(e)}`
241
+ );
242
+ }
243
+ }
244
+ } else {
245
+ const resolved = resolve(modelRef);
246
+ format = inferFormat(resolved);
247
+ quant = inferQuant(name);
248
+
249
+ try {
250
+ const stat = statSync(resolved);
251
+ if (stat.isFile()) {
252
+ sha256 = await computeSha256(resolved);
253
+ fileSizeBytes = stat.size;
254
+ } else if (stat.isDirectory()) {
255
+ sha256 = await computeDirSha256(resolved);
256
+ fileSizeBytes = sumShardSizes(resolved);
257
+ }
258
+ } catch (e: unknown) {
259
+ console.warn(
260
+ `Warning: could not compute model hash/size: ${e instanceof Error ? e.message : String(e)}`
261
+ );
262
+ }
263
+
264
+ // Try to read architecture and parameters from config.json
265
+ try {
266
+ const stat = statSync(resolved);
267
+ const configPath = stat.isDirectory()
268
+ ? resolve(resolved, 'config.json')
269
+ : resolve(resolved, '..', 'config.json');
270
+ if (existsSync(configPath)) {
271
+ const config = JSON.parse(readFileSync(configPath, 'utf-8'));
272
+ architecture = config.model_type || config.architectures?.[0];
273
+ if (config.num_parameters) {
274
+ parameters = formatParamCount(config.num_parameters);
275
+ }
276
+ }
277
+ } catch (e: unknown) {
278
+ console.warn(
279
+ `Warning: could not read model config: ${e instanceof Error ? e.message : String(e)}`
280
+ );
281
+ }
282
+ }
283
+
284
+ return {
285
+ display_name: name,
286
+ path: modelRef,
287
+ format,
288
+ quant,
289
+ artifact_sha256: sha256,
290
+ source,
291
+ file_size_bytes: fileSizeBytes,
292
+ parameters,
293
+ architecture,
294
+ };
295
+ }
296
+
297
+ function formatParamCount(n: number): string {
298
+ if (n >= 1e9) return `${(n / 1e9).toFixed(1)}B`;
299
+ if (n >= 1e6) return `${(n / 1e6).toFixed(0)}M`;
300
+ return `${n}`;
301
+ }
@@ -0,0 +1,187 @@
1
+ import type { BenchOpts, BenchResult, BenchTrial, RuntimeAdapter, RuntimeInfo } from './types.ts';
2
+
3
+ // -----------------------------------------------------------------------------
4
+ // Types
5
+ // -----------------------------------------------------------------------------
6
+
7
+ interface LlamaBenchEntry {
8
+ build_commit: string;
9
+ build_number: number;
10
+ model_filename: string;
11
+ model_type: string;
12
+ model_size: number;
13
+ model_n_params: number;
14
+ n_prompt: number;
15
+ n_gen: number;
16
+ avg_ts: number;
17
+ stddev_ts: number;
18
+ samples_ts: number[];
19
+ [key: string]: unknown;
20
+ }
21
+
22
+ // -----------------------------------------------------------------------------
23
+ // Adapter
24
+ // -----------------------------------------------------------------------------
25
+
26
+ export class LlamaCppAdapter implements RuntimeAdapter {
27
+ name = 'llama.cpp';
28
+
29
+ async detect(): Promise<RuntimeInfo | null> {
30
+ // llama-cli --version gives clean output like "version: 8240 (d088d5b74)"
31
+ for (const bin of ['llama-cli', 'llama-completion', 'llama-cpp', 'main']) {
32
+ try {
33
+ const proc = Bun.spawn([bin, '--version'], {
34
+ stdout: 'pipe',
35
+ stderr: 'pipe',
36
+ });
37
+ const stdout = (await new Response(proc.stdout).text()).trim();
38
+ const stderr = (await new Response(proc.stderr).text()).trim();
39
+ const code = await proc.exited;
40
+ if (code !== 0) continue;
41
+
42
+ const output = stdout || stderr;
43
+ const versionMatch = output.match(/version:\s*(\d+)\s*\((\w+)\)/);
44
+ if (versionMatch) {
45
+ return {
46
+ name: this.name,
47
+ version: `b${versionMatch[1]}`,
48
+ build_flags: 'metal',
49
+ };
50
+ }
51
+
52
+ const fallbackMatch = output.match(/version:\s*(\S+)|llama\.cpp\s+(\S+)|build:\s*(\d+)/i);
53
+ const version =
54
+ fallbackMatch?.[1] || fallbackMatch?.[2] || fallbackMatch?.[3] || output.slice(0, 50);
55
+ return { name: this.name, version };
56
+ } catch (e: unknown) {
57
+ if (e instanceof Error && 'code' in e && (e as NodeJS.ErrnoException).code === 'ENOENT') {
58
+ continue;
59
+ }
60
+ console.warn(
61
+ `Warning: failed to run ${bin}: ${e instanceof Error ? e.message : String(e)}`
62
+ );
63
+ continue;
64
+ }
65
+ }
66
+ return null;
67
+ }
68
+
69
+ async benchmark(opts: BenchOpts): Promise<BenchResult> {
70
+ const args = [
71
+ '-m',
72
+ opts.model,
73
+ '-p',
74
+ String(opts.promptTokens),
75
+ '-n',
76
+ String(opts.genTokens),
77
+ '-r',
78
+ String(opts.numTrials),
79
+ '-o',
80
+ 'json',
81
+ ];
82
+
83
+ const proc = Bun.spawn(['llama-bench', ...args], {
84
+ stdout: 'pipe',
85
+ stderr: 'pipe',
86
+ });
87
+
88
+ // Stream both stdout and stderr concurrently to avoid pipe buffer deadlock.
89
+ const stdoutChunks: string[] = [];
90
+ const stderrChunks: string[] = [];
91
+ let trialsSeen = 0;
92
+ const totalTrials = opts.numTrials * 2;
93
+
94
+ const streamStdout = (async () => {
95
+ const decoder = new TextDecoder();
96
+ for await (const chunk of proc.stdout) {
97
+ stdoutChunks.push(decoder.decode(chunk, { stream: true }));
98
+ }
99
+ })();
100
+
101
+ const streamStderr = (async () => {
102
+ let buffer = '';
103
+ const decoder = new TextDecoder();
104
+ for await (const chunk of proc.stderr) {
105
+ const text = decoder.decode(chunk, { stream: true });
106
+ stderrChunks.push(text);
107
+ buffer += text;
108
+
109
+ const lines = buffer.split('\n');
110
+ buffer = lines.pop()!;
111
+ for (const line of lines) {
112
+ if (/^\s*\|/.test(line) && /\d/.test(line)) {
113
+ trialsSeen++;
114
+ const fields = line.split('|').filter((f) => f.trim());
115
+ const tpsField = fields[fields.length - 1]?.trim();
116
+ const tps =
117
+ tpsField && /^[\d.]+$/.test(tpsField)
118
+ ? ` — ${parseFloat(tpsField).toFixed(1)} tok/s`
119
+ : '';
120
+ opts.onProgress?.(`Trial ${trialsSeen}/${totalTrials}${tps}`);
121
+ }
122
+ }
123
+ }
124
+ if (buffer) stderrChunks.push(buffer);
125
+ })();
126
+
127
+ await Promise.all([streamStdout, streamStderr]);
128
+ const stdout = stdoutChunks.join('');
129
+ const stderr = stderrChunks.join('');
130
+ const code = await proc.exited;
131
+
132
+ if (code !== 0) {
133
+ const errMsg = stderr.trim() || stdout.trim() || `exit code ${code}`;
134
+ throw new Error(`llama-bench failed: ${errMsg}`);
135
+ }
136
+
137
+ return this.parseOutput(stdout, opts.promptTokens, opts.genTokens);
138
+ }
139
+
140
+ /**
141
+ * Parse llama-bench -o json output.
142
+ * Returns an array with two entries: one for prompt (n_prompt>0, n_gen==0)
143
+ * and one for generation (n_gen>0, n_prompt==0).
144
+ */
145
+ private parseOutput(stdout: string, promptTokens: number, genTokens: number): BenchResult {
146
+ let entries: LlamaBenchEntry[];
147
+ try {
148
+ entries = JSON.parse(stdout);
149
+ } catch {
150
+ throw new Error(
151
+ `Could not parse llama-bench JSON output. Raw output:\n${stdout}\nPlease file an issue.`
152
+ );
153
+ }
154
+
155
+ const promptEntry = entries.find((e) => e.n_prompt > 0 && e.n_gen === 0);
156
+ const genEntry = entries.find((e) => e.n_gen > 0 && e.n_prompt === 0);
157
+
158
+ if (!promptEntry || !genEntry) {
159
+ throw new Error(
160
+ `Expected both prompt and generation entries from llama-bench. Got ${entries.length} entries.`
161
+ );
162
+ }
163
+
164
+ // Build per-trial data from samples_ts arrays
165
+ const numTrials = Math.min(promptEntry.samples_ts.length, genEntry.samples_ts.length);
166
+ const trials: BenchTrial[] = [];
167
+
168
+ for (let i = 0; i < numTrials; i++) {
169
+ trials.push({
170
+ promptTps: promptEntry.samples_ts[i]!,
171
+ generationTps: genEntry.samples_ts[i]!,
172
+ peakMemoryGb: 0, // llama-bench doesn't report memory
173
+ });
174
+ }
175
+
176
+ return {
177
+ promptTokens,
178
+ completionTokens: genTokens,
179
+ trials,
180
+ averages: {
181
+ promptTps: promptEntry.avg_ts,
182
+ generationTps: genEntry.avg_ts,
183
+ peakMemoryGb: 0,
184
+ },
185
+ };
186
+ }
187
+ }
@@ -0,0 +1,190 @@
1
+ import type { BenchOpts, BenchResult, BenchTrial, RuntimeAdapter, RuntimeInfo } from './types.ts';
2
+
3
+ // -----------------------------------------------------------------------------
4
+ // Adapter
5
+ // -----------------------------------------------------------------------------
6
+
7
+ export class MlxAdapter implements RuntimeAdapter {
8
+ name = 'mlx_lm';
9
+
10
+ private useCli = false;
11
+
12
+ async detect(): Promise<RuntimeInfo | null> {
13
+ // Try the standalone CLI first (e.g. Homebrew install).
14
+ try {
15
+ const proc = Bun.spawn(['mlx_lm', '--version'], {
16
+ stdout: 'pipe',
17
+ stderr: 'ignore',
18
+ });
19
+ const version = (await new Response(proc.stdout).text()).trim();
20
+ const code = await proc.exited;
21
+ if (code === 0 && version) {
22
+ this.useCli = true;
23
+ return { name: this.name, version };
24
+ }
25
+ } catch (e: unknown) {
26
+ if (!(e instanceof Error && 'code' in e && (e as NodeJS.ErrnoException).code === 'ENOENT')) {
27
+ console.warn(
28
+ `Warning: mlx_lm CLI found but failed: ${e instanceof Error ? e.message : String(e)}`
29
+ );
30
+ }
31
+ }
32
+
33
+ // Fall back to Python module.
34
+ try {
35
+ const proc = Bun.spawn(['python3', '-c', 'import mlx_lm; print(mlx_lm.__version__)'], {
36
+ stdout: 'pipe',
37
+ stderr: 'ignore',
38
+ });
39
+ const version = (await new Response(proc.stdout).text()).trim();
40
+ const code = await proc.exited;
41
+ if (code !== 0 || !version) return null;
42
+ return { name: this.name, version };
43
+ } catch {
44
+ return null;
45
+ }
46
+ }
47
+
48
+ async benchmark(opts: BenchOpts): Promise<BenchResult> {
49
+ const benchArgs = [
50
+ '--model',
51
+ opts.model,
52
+ '--prompt-tokens',
53
+ String(opts.promptTokens),
54
+ '--generation-tokens',
55
+ String(opts.genTokens),
56
+ '--num-trials',
57
+ String(opts.numTrials),
58
+ ];
59
+
60
+ const cmd = this.useCli
61
+ ? ['mlx_lm', 'benchmark', ...benchArgs]
62
+ : ['python3', '-m', 'mlx_lm.benchmark', ...benchArgs];
63
+
64
+ const proc = Bun.spawn(cmd, {
65
+ stdout: 'pipe',
66
+ stderr: 'pipe',
67
+ env: { ...process.env, PYTHONUNBUFFERED: '1' },
68
+ });
69
+
70
+ // Stream both stdout and stderr concurrently for progress reporting.
71
+ const stdoutChunks: string[] = [];
72
+ const stderrChunks: string[] = [];
73
+
74
+ const streamStdout = (async () => {
75
+ let buffer = '';
76
+ const decoder = new TextDecoder();
77
+ for await (const chunk of proc.stdout) {
78
+ const text = decoder.decode(chunk, { stream: true });
79
+ stdoutChunks.push(text);
80
+ buffer += text;
81
+
82
+ const lines = buffer.split('\n');
83
+ buffer = lines.pop()!;
84
+ for (const line of lines) {
85
+ if (/warmup/i.test(line)) {
86
+ opts.onProgress?.('Warming up...');
87
+ } else {
88
+ const trialMatch = line.match(/^\s*Trial\s+(\d+):/);
89
+ if (trialMatch) {
90
+ const tpsMatch = line.match(/generation_tps=([\d.]+)/);
91
+ const tps = tpsMatch ? ` — ${parseFloat(tpsMatch[1]!).toFixed(1)} tok/s` : '';
92
+ opts.onProgress?.(`Trial ${trialMatch[1]}/${opts.numTrials}${tps}`);
93
+ }
94
+ }
95
+ }
96
+ }
97
+ if (buffer) stdoutChunks.push('');
98
+ })();
99
+
100
+ const streamStderr = (async () => {
101
+ const decoder = new TextDecoder();
102
+ for await (const chunk of proc.stderr) {
103
+ const text = decoder.decode(chunk, { stream: true });
104
+ stderrChunks.push(text);
105
+
106
+ // HF download progress uses \r for progress bars.
107
+ const segments = text.split(/[\r\n]/);
108
+ for (const seg of segments) {
109
+ if (/Fetching|Downloading|downloading/i.test(seg)) {
110
+ // Extract percentage if present (e.g. "Downloading: 45%").
111
+ const pctMatch = seg.match(/(\d+)%/);
112
+ if (pctMatch) {
113
+ opts.onProgress?.(`Downloading model... ${pctMatch[1]}%`);
114
+ } else {
115
+ opts.onProgress?.('Downloading model...');
116
+ }
117
+ }
118
+ }
119
+ }
120
+ })();
121
+
122
+ await Promise.all([streamStdout, streamStderr]);
123
+ const code = await proc.exited;
124
+
125
+ const stdout = stdoutChunks.join('');
126
+ const stderr = stderrChunks.join('');
127
+
128
+ if (code !== 0) {
129
+ const errMsg = stderr.trim() || stdout.trim() || `exit code ${code}`;
130
+ throw new Error(`mlx_lm.benchmark failed: ${errMsg}`);
131
+ }
132
+
133
+ return this.parseOutput(stdout, opts.promptTokens, opts.genTokens);
134
+ }
135
+
136
+ /**
137
+ * Parse mlx_lm.benchmark stdout. Expected format:
138
+ * Running warmup..
139
+ * Timing with prompt_tokens=64, generation_tokens=32, batch_size=1.
140
+ * Trial 1: prompt_tps=1334.858, generation_tps=282.768, peak_memory=0.429
141
+ * Trial 2: prompt_tps=1259.967, generation_tps=252.029, peak_memory=0.429
142
+ * Averages: prompt_tps=1297.412, generation_tps=267.399, peak_memory=0.429
143
+ */
144
+ private parseOutput(stdout: string, promptTokens: number, genTokens: number): BenchResult {
145
+ const lines = stdout.split('\n');
146
+ const trials: BenchTrial[] = [];
147
+ let averages: BenchResult['averages'] | null = null;
148
+
149
+ const metricsPattern = /prompt_tps=([\d.]+),\s*generation_tps=([\d.]+),\s*peak_memory=([\d.]+)/;
150
+
151
+ for (const line of lines) {
152
+ const match = line.match(metricsPattern);
153
+ if (!match) continue;
154
+
155
+ const parsed = {
156
+ promptTps: parseFloat(match[1]!),
157
+ generationTps: parseFloat(match[2]!),
158
+ peakMemoryGb: parseFloat(match[3]!),
159
+ };
160
+
161
+ if (line.startsWith('Averages:')) {
162
+ averages = parsed;
163
+ } else if (/^\s*Trial\s+\d+:/.test(line)) {
164
+ trials.push(parsed);
165
+ }
166
+ }
167
+
168
+ if (trials.length === 0) {
169
+ throw new Error(
170
+ `Could not parse benchmark output. Raw output:\n${stdout}\nPlease file an issue.`
171
+ );
172
+ }
173
+
174
+ // If no averages line, compute from trials
175
+ if (!averages) {
176
+ averages = {
177
+ promptTps: trials.reduce((s, t) => s + t.promptTps, 0) / trials.length,
178
+ generationTps: trials.reduce((s, t) => s + t.generationTps, 0) / trials.length,
179
+ peakMemoryGb: Math.max(...trials.map((t) => t.peakMemoryGb)),
180
+ };
181
+ }
182
+
183
+ return {
184
+ promptTokens,
185
+ completionTokens: genTokens,
186
+ trials,
187
+ averages,
188
+ };
189
+ }
190
+ }
@@ -0,0 +1,29 @@
1
+ import { LlamaCppAdapter } from './llamacpp.ts';
2
+ import { MlxAdapter } from './mlx.ts';
3
+ import type { RuntimeAdapter } from './types.ts';
4
+
5
+ // -----------------------------------------------------------------------------
6
+ // Registry
7
+ // -----------------------------------------------------------------------------
8
+
9
+ const RUNTIMES: Record<string, () => RuntimeAdapter> = {
10
+ mlx_lm: () => new MlxAdapter(),
11
+ 'llama.cpp': () => new LlamaCppAdapter(),
12
+ };
13
+
14
+ // -----------------------------------------------------------------------------
15
+ // Functions
16
+ // -----------------------------------------------------------------------------
17
+
18
+ export function resolveRuntime(name: string): RuntimeAdapter {
19
+ const factory = RUNTIMES[name];
20
+ if (!factory) {
21
+ const valid = Object.keys(RUNTIMES).join(', ');
22
+ throw new Error(`Unknown runtime '${name}'. Supported: ${valid}`);
23
+ }
24
+ return factory();
25
+ }
26
+
27
+ export function listRuntimes(): string[] {
28
+ return Object.keys(RUNTIMES);
29
+ }