npm - @whatcanirun/cli - Versions diffs - 0.1.0 - Mend

@whatcanirun/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/src/model/resolve.ts ADDED Viewed

@@ -0,0 +1,301 @@
+import { createHash } from 'crypto';
+import { existsSync, readdirSync, readFileSync, statSync } from 'fs';
+import { homedir } from 'os';
+import { basename, extname, join, resolve } from 'path';
+// -----------------------------------------------------------------------------
+// Types
+// -----------------------------------------------------------------------------
+export interface ModelInfo {
+  display_name: string;
+  path: string;
+  format: string;
+  quant: string | null;
+  artifact_sha256: string;
+  source?: string;
+  file_size_bytes?: number;
+  parameters?: string;
+  architecture?: string;
+}
+// -----------------------------------------------------------------------------
+// Constants
+// -----------------------------------------------------------------------------
+const QUANT_PATTERNS = [
+  /\b(q2_k)\b/i,
+  /\b(q3_k_[sml])\b/i,
+  /\b(q4_0)\b/i,
+  /\b(q4_1)\b/i,
+  /\b(q4_k_[sml])\b/i,
+  /\b(q4_k_xl)\b/i,
+  /\b(q5_0)\b/i,
+  /\b(q5_1)\b/i,
+  /\b(q5_k_[sml])\b/i,
+  /\b(q6_k)\b/i,
+  /\b(q8_0)\b/i,
+  /\b(fp16)\b/i,
+  /\b(fp32)\b/i,
+  /\b(f16)\b/i,
+  /\b(f32)\b/i,
+  /\b(awq)\b/i,
+  /\b(gptq)\b/i,
+  /\b(bnb)\b/i,
+];
+const MLX_BIT_PATTERNS = [/(\d+)[\s-]*bit/i];
+// -----------------------------------------------------------------------------
+// Functions
+// -----------------------------------------------------------------------------
+export function inferQuant(name: string): string | null {
+  // Try GGUF-style quant patterns first
+  for (const pattern of QUANT_PATTERNS) {
+    const match = name.match(pattern);
+    if (match) return match[1]!.toLowerCase();
+  }
+  // Try MLX-style bit patterns (e.g. "4bit", "8bit")
+  for (const pattern of MLX_BIT_PATTERNS) {
+    const match = name.match(pattern);
+    if (match) return `${match[1]}bit`;
+  }
+  return null;
+}
+export function inferFormat(modelPath: string): string {
+  const ext = extname(modelPath).toLowerCase();
+  if (ext === '.gguf') return 'gguf';
+  if (ext === '.safetensors') return 'safetensors';
+  if (ext === '.bin') return 'bin';
+  if (ext === '.pt' || ext === '.pth') return 'pytorch';
+  // Check if it's an mlx directory
+  const configPath = resolve(modelPath, 'config.json');
+  if (existsSync(configPath)) {
+    try {
+      const config = JSON.parse(readFileSync(configPath, 'utf-8'));
+      if (config.model_type) return 'mlx';
+    } catch (e: unknown) {
+      console.warn(
+        `Warning: could not parse ${configPath}: ${e instanceof Error ? e.message : String(e)}`
+      );
+    }
+  }
+  return 'unknown';
+}
+// -----------------------------------------------------------------------------
+// Helpers
+// -----------------------------------------------------------------------------
+/**
+ * Check if a string looks like a HuggingFace repo ID (e.g. "mlx-community/Qwen3.5-0.8B-4bit").
+ */
+export function isHuggingFaceRepoId(ref: string): boolean {
+  return /^[\w.-]+\/[\w.-]+$/.test(ref) && !ref.startsWith('/') && !ref.startsWith('.');
+}
+/**
+ * Find the HF cache directory for a given repo ID.
+ * Returns the latest snapshot path, or null if not cached.
+ */
+export function findHfCachePath(repoId: string): string | null {
+  const [org, name] = repoId.split('/');
+  const cacheDir = join(homedir(), '.cache', 'huggingface', 'hub', `models--${org}--${name}`);
+  const snapshotsDir = join(cacheDir, 'snapshots');
+  if (!existsSync(snapshotsDir)) return null;
+  const snapshots = readdirSync(snapshotsDir).filter(
+    (d) => !d.startsWith('.') && statSync(join(snapshotsDir, d)).isDirectory()
+  );
+  if (snapshots.length === 0) return null;
+  // Return the most recently modified snapshot
+  return snapshots
+    .map((d) => ({ name: d, mtime: statSync(join(snapshotsDir, d)).mtimeMs }))
+    .sort((a, b) => b.mtime - a.mtime)
+    .map((d) => join(snapshotsDir, d.name))[0]!;
+}
+export async function resolveModel(modelRef: string): Promise<string> {
+  // Direct file path or directory (mlx model dir or gguf file)
+  const resolved = resolve(modelRef);
+  if (existsSync(resolved)) return resolved;
+  // HuggingFace repo ID — return as-is (mlx_lm handles download)
+  if (isHuggingFaceRepoId(modelRef)) return modelRef;
+  // Try alias
+  const aliases = await loadModelAliases();
+  const aliasPath = aliases[modelRef];
+  if (aliasPath) {
+    const aliasResolved = resolve(aliasPath);
+    if (existsSync(aliasResolved)) return aliasResolved;
+    throw new Error(`Model alias '${modelRef}' points to '${aliasPath}' which does not exist`);
+  }
+  throw new Error(
+    `Model not found: '${modelRef}'. Provide a file path, HuggingFace repo ID, or alias from ~/.config/whatcanirun/models.toml`
+  );
+}
+async function loadModelAliases(): Promise<Record<string, string>> {
+  const configPath = resolve(homedir(), '.config', 'whatcanirun', 'models.toml');
+  if (!existsSync(configPath)) return {};
+  try {
+    const content = await Bun.file(configPath).text();
+    const { parse } = await import('smol-toml');
+    const config = parse(content);
+    return (config.models as Record<string, string>) || {};
+  } catch (e: unknown) {
+    console.warn(
+      `Warning: could not parse ~/.config/whatcanirun/models.toml: ${e instanceof Error ? e.message : String(e)}`
+    );
+    return {};
+  }
+}
+export async function computeSha256(filePath: string): Promise<string> {
+  const file = Bun.file(filePath);
+  const hasher = createHash('sha256');
+  const stream = file.stream();
+  for await (const chunk of stream) {
+    hasher.update(chunk);
+  }
+  return hasher.digest('hex');
+}
+/**
+ * Compute SHA256 for a directory of safetensors shards.
+ * Hashes only the largest shard as a practical proxy.
+ */
+async function computeDirSha256(dirPath: string): Promise<string> {
+  const files = readdirSync(dirPath).filter((f) => f.endsWith('.safetensors'));
+  if (files.length === 0) {
+    // Fall back to config.json
+    const configPath = join(dirPath, 'config.json');
+    if (existsSync(configPath)) return computeSha256(configPath);
+    return '';
+  }
+  // Hash the largest shard
+  const largest = files
+    .map((f) => ({ name: f, size: statSync(join(dirPath, f)).size }))
+    .sort((a, b) => b.size - a.size)[0]!;
+  return computeSha256(join(dirPath, largest.name));
+}
+/**
+ * Sum file sizes for all safetensors shards in a directory.
+ */
+function sumShardSizes(dirPath: string): number {
+  return readdirSync(dirPath)
+    .filter((f) => f.endsWith('.safetensors'))
+    .reduce((sum, f) => sum + statSync(join(dirPath, f)).size, 0);
+}
+export async function inspectModel(modelRef: string): Promise<ModelInfo> {
+  const isHfRepo = isHuggingFaceRepoId(modelRef);
+  const name = isHfRepo ? modelRef.split('/')[1]! : basename(modelRef);
+  let format: string;
+  let quant: string | null;
+  let sha256 = '';
+  let fileSizeBytes: number | undefined;
+  let parameters: string | undefined;
+  let architecture: string | undefined;
+  let source: string | undefined;
+  if (isHfRepo) {
+    format = 'mlx';
+    quant = inferQuant(modelRef);
+    source = modelRef;
+    // Try to get metadata from HF cache
+    const cachePath = findHfCachePath(modelRef);
+    if (cachePath) {
+      sha256 = await computeDirSha256(cachePath);
+      fileSizeBytes = sumShardSizes(cachePath);
+      try {
+        const configPath = join(cachePath, 'config.json');
+        if (existsSync(configPath)) {
+          const config = JSON.parse(readFileSync(configPath, 'utf-8'));
+          architecture = config.model_type || config.architectures?.[0];
+          if (config.num_parameters) {
+            parameters = formatParamCount(config.num_parameters);
+          }
+        }
+      } catch (e: unknown) {
+        console.warn(
+          `Warning: could not read model config: ${e instanceof Error ? e.message : String(e)}`
+        );
+      }
+    }
+  } else {
+    const resolved = resolve(modelRef);
+    format = inferFormat(resolved);
+    quant = inferQuant(name);
+    try {
+      const stat = statSync(resolved);
+      if (stat.isFile()) {
+        sha256 = await computeSha256(resolved);
+        fileSizeBytes = stat.size;
+      } else if (stat.isDirectory()) {
+        sha256 = await computeDirSha256(resolved);
+        fileSizeBytes = sumShardSizes(resolved);
+      }
+    } catch (e: unknown) {
+      console.warn(
+        `Warning: could not compute model hash/size: ${e instanceof Error ? e.message : String(e)}`
+      );
+    }
+    // Try to read architecture and parameters from config.json
+    try {
+      const stat = statSync(resolved);
+      const configPath = stat.isDirectory()
+        ? resolve(resolved, 'config.json')
+        : resolve(resolved, '..', 'config.json');
+      if (existsSync(configPath)) {
+        const config = JSON.parse(readFileSync(configPath, 'utf-8'));
+        architecture = config.model_type || config.architectures?.[0];
+        if (config.num_parameters) {
+          parameters = formatParamCount(config.num_parameters);
+        }
+      }
+    } catch (e: unknown) {
+      console.warn(
+        `Warning: could not read model config: ${e instanceof Error ? e.message : String(e)}`
+      );
+    }
+  }
+  return {
+    display_name: name,
+    path: modelRef,
+    format,
+    quant,
+    artifact_sha256: sha256,
+    source,
+    file_size_bytes: fileSizeBytes,
+    parameters,
+    architecture,
+  };
+}
+function formatParamCount(n: number): string {
+  if (n >= 1e9) return `${(n / 1e9).toFixed(1)}B`;
+  if (n >= 1e6) return `${(n / 1e6).toFixed(0)}M`;
+  return `${n}`;
+}

package/src/runtime/llamacpp.ts ADDED Viewed

@@ -0,0 +1,187 @@
+import type { BenchOpts, BenchResult, BenchTrial, RuntimeAdapter, RuntimeInfo } from './types.ts';
+// -----------------------------------------------------------------------------
+// Types
+// -----------------------------------------------------------------------------
+interface LlamaBenchEntry {
+  build_commit: string;
+  build_number: number;
+  model_filename: string;
+  model_type: string;
+  model_size: number;
+  model_n_params: number;
+  n_prompt: number;
+  n_gen: number;
+  avg_ts: number;
+  stddev_ts: number;
+  samples_ts: number[];
+  [key: string]: unknown;
+}
+// -----------------------------------------------------------------------------
+// Adapter
+// -----------------------------------------------------------------------------
+export class LlamaCppAdapter implements RuntimeAdapter {
+  name = 'llama.cpp';
+  async detect(): Promise<RuntimeInfo | null> {
+    // llama-cli --version gives clean output like "version: 8240 (d088d5b74)"
+    for (const bin of ['llama-cli', 'llama-completion', 'llama-cpp', 'main']) {
+      try {
+        const proc = Bun.spawn([bin, '--version'], {
+          stdout: 'pipe',
+          stderr: 'pipe',
+        });
+        const stdout = (await new Response(proc.stdout).text()).trim();
+        const stderr = (await new Response(proc.stderr).text()).trim();
+        const code = await proc.exited;
+        if (code !== 0) continue;
+        const output = stdout || stderr;
+        const versionMatch = output.match(/version:\s*(\d+)\s*\((\w+)\)/);
+        if (versionMatch) {
+          return {
+            name: this.name,
+            version: `b${versionMatch[1]}`,
+            build_flags: 'metal',
+          };
+        }
+        const fallbackMatch = output.match(/version:\s*(\S+)|llama\.cpp\s+(\S+)|build:\s*(\d+)/i);
+        const version =
+          fallbackMatch?.[1] || fallbackMatch?.[2] || fallbackMatch?.[3] || output.slice(0, 50);
+        return { name: this.name, version };
+      } catch (e: unknown) {
+        if (e instanceof Error && 'code' in e && (e as NodeJS.ErrnoException).code === 'ENOENT') {
+          continue;
+        }
+        console.warn(
+          `Warning: failed to run ${bin}: ${e instanceof Error ? e.message : String(e)}`
+        );
+        continue;
+      }
+    }
+    return null;
+  }
+  async benchmark(opts: BenchOpts): Promise<BenchResult> {
+    const args = [
+      '-m',
+      opts.model,
+      '-p',
+      String(opts.promptTokens),
+      '-n',
+      String(opts.genTokens),
+      '-r',
+      String(opts.numTrials),
+      '-o',
+      'json',
+    ];
+    const proc = Bun.spawn(['llama-bench', ...args], {
+      stdout: 'pipe',
+      stderr: 'pipe',
+    });
+    // Stream both stdout and stderr concurrently to avoid pipe buffer deadlock.
+    const stdoutChunks: string[] = [];
+    const stderrChunks: string[] = [];
+    let trialsSeen = 0;
+    const totalTrials = opts.numTrials * 2;
+    const streamStdout = (async () => {
+      const decoder = new TextDecoder();
+      for await (const chunk of proc.stdout) {
+        stdoutChunks.push(decoder.decode(chunk, { stream: true }));
+      }
+    })();
+    const streamStderr = (async () => {
+      let buffer = '';
+      const decoder = new TextDecoder();
+      for await (const chunk of proc.stderr) {
+        const text = decoder.decode(chunk, { stream: true });
+        stderrChunks.push(text);
+        buffer += text;
+        const lines = buffer.split('\n');
+        buffer = lines.pop()!;
+        for (const line of lines) {
+          if (/^\s*\|/.test(line) && /\d/.test(line)) {
+            trialsSeen++;
+            const fields = line.split('|').filter((f) => f.trim());
+            const tpsField = fields[fields.length - 1]?.trim();
+            const tps =
+              tpsField && /^[\d.]+$/.test(tpsField)
+                ? ` — ${parseFloat(tpsField).toFixed(1)} tok/s`
+                : '';
+            opts.onProgress?.(`Trial ${trialsSeen}/${totalTrials}${tps}`);
+          }
+        }
+      }
+      if (buffer) stderrChunks.push(buffer);
+    })();
+    await Promise.all([streamStdout, streamStderr]);
+    const stdout = stdoutChunks.join('');
+    const stderr = stderrChunks.join('');
+    const code = await proc.exited;
+    if (code !== 0) {
+      const errMsg = stderr.trim() || stdout.trim() || `exit code ${code}`;
+      throw new Error(`llama-bench failed: ${errMsg}`);
+    }
+    return this.parseOutput(stdout, opts.promptTokens, opts.genTokens);
+  }
+  /**
+   * Parse llama-bench -o json output.
+   * Returns an array with two entries: one for prompt (n_prompt>0, n_gen==0)
+   * and one for generation (n_gen>0, n_prompt==0).
+   */
+  private parseOutput(stdout: string, promptTokens: number, genTokens: number): BenchResult {
+    let entries: LlamaBenchEntry[];
+    try {
+      entries = JSON.parse(stdout);
+    } catch {
+      throw new Error(
+        `Could not parse llama-bench JSON output. Raw output:\n${stdout}\nPlease file an issue.`
+      );
+    }
+    const promptEntry = entries.find((e) => e.n_prompt > 0 && e.n_gen === 0);
+    const genEntry = entries.find((e) => e.n_gen > 0 && e.n_prompt === 0);
+    if (!promptEntry || !genEntry) {
+      throw new Error(
+        `Expected both prompt and generation entries from llama-bench. Got ${entries.length} entries.`
+      );
+    }
+    // Build per-trial data from samples_ts arrays
+    const numTrials = Math.min(promptEntry.samples_ts.length, genEntry.samples_ts.length);
+    const trials: BenchTrial[] = [];
+    for (let i = 0; i < numTrials; i++) {
+      trials.push({
+        promptTps: promptEntry.samples_ts[i]!,
+        generationTps: genEntry.samples_ts[i]!,
+        peakMemoryGb: 0, // llama-bench doesn't report memory
+      });
+    }
+    return {
+      promptTokens,
+      completionTokens: genTokens,
+      trials,
+      averages: {
+        promptTps: promptEntry.avg_ts,
+        generationTps: genEntry.avg_ts,
+        peakMemoryGb: 0,
+      },
+    };
+  }
+}

package/src/runtime/mlx.ts ADDED Viewed

@@ -0,0 +1,190 @@
+import type { BenchOpts, BenchResult, BenchTrial, RuntimeAdapter, RuntimeInfo } from './types.ts';
+// -----------------------------------------------------------------------------
+// Adapter
+// -----------------------------------------------------------------------------
+export class MlxAdapter implements RuntimeAdapter {
+  name = 'mlx_lm';
+  private useCli = false;
+  async detect(): Promise<RuntimeInfo | null> {
+    // Try the standalone CLI first (e.g. Homebrew install).
+    try {
+      const proc = Bun.spawn(['mlx_lm', '--version'], {
+        stdout: 'pipe',
+        stderr: 'ignore',
+      });
+      const version = (await new Response(proc.stdout).text()).trim();
+      const code = await proc.exited;
+      if (code === 0 && version) {
+        this.useCli = true;
+        return { name: this.name, version };
+      }
+    } catch (e: unknown) {
+      if (!(e instanceof Error && 'code' in e && (e as NodeJS.ErrnoException).code === 'ENOENT')) {
+        console.warn(
+          `Warning: mlx_lm CLI found but failed: ${e instanceof Error ? e.message : String(e)}`
+        );
+      }
+    }
+    // Fall back to Python module.
+    try {
+      const proc = Bun.spawn(['python3', '-c', 'import mlx_lm; print(mlx_lm.__version__)'], {
+        stdout: 'pipe',
+        stderr: 'ignore',
+      });
+      const version = (await new Response(proc.stdout).text()).trim();
+      const code = await proc.exited;
+      if (code !== 0 || !version) return null;
+      return { name: this.name, version };
+    } catch {
+      return null;
+    }
+  }
+  async benchmark(opts: BenchOpts): Promise<BenchResult> {
+    const benchArgs = [
+      '--model',
+      opts.model,
+      '--prompt-tokens',
+      String(opts.promptTokens),
+      '--generation-tokens',
+      String(opts.genTokens),
+      '--num-trials',
+      String(opts.numTrials),
+    ];
+    const cmd = this.useCli
+      ? ['mlx_lm', 'benchmark', ...benchArgs]
+      : ['python3', '-m', 'mlx_lm.benchmark', ...benchArgs];
+    const proc = Bun.spawn(cmd, {
+      stdout: 'pipe',
+      stderr: 'pipe',
+      env: { ...process.env, PYTHONUNBUFFERED: '1' },
+    });
+    // Stream both stdout and stderr concurrently for progress reporting.
+    const stdoutChunks: string[] = [];
+    const stderrChunks: string[] = [];
+    const streamStdout = (async () => {
+      let buffer = '';
+      const decoder = new TextDecoder();
+      for await (const chunk of proc.stdout) {
+        const text = decoder.decode(chunk, { stream: true });
+        stdoutChunks.push(text);
+        buffer += text;
+        const lines = buffer.split('\n');
+        buffer = lines.pop()!;
+        for (const line of lines) {
+          if (/warmup/i.test(line)) {
+            opts.onProgress?.('Warming up...');
+          } else {
+            const trialMatch = line.match(/^\s*Trial\s+(\d+):/);
+            if (trialMatch) {
+              const tpsMatch = line.match(/generation_tps=([\d.]+)/);
+              const tps = tpsMatch ? ` — ${parseFloat(tpsMatch[1]!).toFixed(1)} tok/s` : '';
+              opts.onProgress?.(`Trial ${trialMatch[1]}/${opts.numTrials}${tps}`);
+            }
+          }
+        }
+      }
+      if (buffer) stdoutChunks.push('');
+    })();
+    const streamStderr = (async () => {
+      const decoder = new TextDecoder();
+      for await (const chunk of proc.stderr) {
+        const text = decoder.decode(chunk, { stream: true });
+        stderrChunks.push(text);
+        // HF download progress uses \r for progress bars.
+        const segments = text.split(/[\r\n]/);
+        for (const seg of segments) {
+          if (/Fetching|Downloading|downloading/i.test(seg)) {
+            // Extract percentage if present (e.g. "Downloading: 45%").
+            const pctMatch = seg.match(/(\d+)%/);
+            if (pctMatch) {
+              opts.onProgress?.(`Downloading model... ${pctMatch[1]}%`);
+            } else {
+              opts.onProgress?.('Downloading model...');
+            }
+          }
+        }
+      }
+    })();
+    await Promise.all([streamStdout, streamStderr]);
+    const code = await proc.exited;
+    const stdout = stdoutChunks.join('');
+    const stderr = stderrChunks.join('');
+    if (code !== 0) {
+      const errMsg = stderr.trim() || stdout.trim() || `exit code ${code}`;
+      throw new Error(`mlx_lm.benchmark failed: ${errMsg}`);
+    }
+    return this.parseOutput(stdout, opts.promptTokens, opts.genTokens);
+  }
+  /**
+   * Parse mlx_lm.benchmark stdout. Expected format:
+   *   Running warmup..
+   *   Timing with prompt_tokens=64, generation_tokens=32, batch_size=1.
+   *   Trial 1:  prompt_tps=1334.858, generation_tps=282.768, peak_memory=0.429
+   *   Trial 2:  prompt_tps=1259.967, generation_tps=252.029, peak_memory=0.429
+   *   Averages: prompt_tps=1297.412, generation_tps=267.399, peak_memory=0.429
+   */
+  private parseOutput(stdout: string, promptTokens: number, genTokens: number): BenchResult {
+    const lines = stdout.split('\n');
+    const trials: BenchTrial[] = [];
+    let averages: BenchResult['averages'] | null = null;
+    const metricsPattern = /prompt_tps=([\d.]+),\s*generation_tps=([\d.]+),\s*peak_memory=([\d.]+)/;
+    for (const line of lines) {
+      const match = line.match(metricsPattern);
+      if (!match) continue;
+      const parsed = {
+        promptTps: parseFloat(match[1]!),
+        generationTps: parseFloat(match[2]!),
+        peakMemoryGb: parseFloat(match[3]!),
+      };
+      if (line.startsWith('Averages:')) {
+        averages = parsed;
+      } else if (/^\s*Trial\s+\d+:/.test(line)) {
+        trials.push(parsed);
+      }
+    }
+    if (trials.length === 0) {
+      throw new Error(
+        `Could not parse benchmark output. Raw output:\n${stdout}\nPlease file an issue.`
+      );
+    }
+    // If no averages line, compute from trials
+    if (!averages) {
+      averages = {
+        promptTps: trials.reduce((s, t) => s + t.promptTps, 0) / trials.length,
+        generationTps: trials.reduce((s, t) => s + t.generationTps, 0) / trials.length,
+        peakMemoryGb: Math.max(...trials.map((t) => t.peakMemoryGb)),
+      };
+    }
+    return {
+      promptTokens,
+      completionTokens: genTokens,
+      trials,
+      averages,
+    };
+  }
+}

package/src/runtime/resolve.ts ADDED Viewed

@@ -0,0 +1,29 @@
+import { LlamaCppAdapter } from './llamacpp.ts';
+import { MlxAdapter } from './mlx.ts';
+import type { RuntimeAdapter } from './types.ts';
+// -----------------------------------------------------------------------------
+// Registry
+// -----------------------------------------------------------------------------
+const RUNTIMES: Record<string, () => RuntimeAdapter> = {
+  mlx_lm: () => new MlxAdapter(),
+  'llama.cpp': () => new LlamaCppAdapter(),
+};
+// -----------------------------------------------------------------------------
+// Functions
+// -----------------------------------------------------------------------------
+export function resolveRuntime(name: string): RuntimeAdapter {
+  const factory = RUNTIMES[name];
+  if (!factory) {
+    const valid = Object.keys(RUNTIMES).join(', ');
+    throw new Error(`Unknown runtime '${name}'. Supported: ${valid}`);
+  }
+  return factory();
+}
+export function listRuntimes(): string[] {
+  return Object.keys(RUNTIMES);
+}