npm - offgrid-ai - Versions diffs - 0.17.0 → 0.18.0 - Mend

offgrid-ai 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/package.json +1 -1
package/resources/recommendations.json +8 -8
package/src/commands/main.mjs +1 -4
package/src/commands/models.mjs +199 -15
package/src/commands/onboard.mjs +6 -106
package/src/commands/run.mjs +1 -0
package/src/commands/status.mjs +1 -0
package/src/commands/stop.mjs +1 -0
package/src/discovery-shared.mjs +2 -3
package/src/download.mjs +221 -0
package/src/harness-pi.mjs +2 -3
package/src/huggingface.mjs +72 -72
package/src/managed.mjs +1 -6
package/src/model-presenters.mjs +1 -23
package/src/model-summary.mjs +2 -2
package/src/omlx-runtime.mjs +29 -4
package/src/process.mjs +3 -5
package/src/profiles.mjs +1 -1
package/src/runtime.mjs +2 -2
package/src/ui.mjs +2 -0
package/resources/hf-download.py +0 -79
package/src/backend-installers.mjs +0 -42

package/src/download.mjs ADDED Viewed

@@ -0,0 +1,221 @@
+// Model download flow — HuggingFace downloads with quant picker and RAM fit.
+// Used by onboarding (no models found) and the model picker (↓ Download a model).
+import { hasHfCli, parseHfRef, resolveHfDownload, downloadModel, listGgufFiles, getHfModelInfo, isMlxRepo } from "./huggingface.mjs";
+import { detectHardware, installedRamGB, getFreeDiskBytes } from "./hardware.mjs";
+import { allFittingModels } from "./recommendations.mjs";
+import { parseModelName } from "./model-name.mjs";
+import { HF_HUB_DIR } from "./config.mjs";
+import { offerOmlxRestart } from "./omlx-runtime.mjs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import { pc, formatBytes, renderCard, renderRows } from "./ui.mjs";
+const GB = 1024 ** 3;
+/**
+ * Interactive model download flow.
+ * @param {object} prompt - createPrompt() instance
+ * @returns {Promise<boolean>} true if a model was downloaded
+ */
+export async function downloadFlow(prompt) {
+  console.log("");
+  const method = await prompt.choice("Download a model", [
+    { value: "manual", label: "Enter a HuggingFace repo ID" },
+    { value: "recommended", label: "Recommended for your machine" },
+  ], "manual");
+  if (!method) return false;
+  let repo, filename;
+  if (method === "recommended") {
+    const hardware = detectHardware();
+    const models = allFittingModels(hardware);
+    if (models.length === 0) {
+      console.log(pc.yellow("No recommended models fit your hardware."));
+      console.log(pc.dim("You can still enter a repo ID manually."));
+      return false;
+    }
+    const choices = models.map((m) => ({
+      value: m,
+      label: `${pc.bold(m.label)}  ${pc.dim(`(${m.minRamGb} GB RAM min)`)}`,
+    }));
+    const selected = await prompt.choice("Select a model", choices, choices[0].value);
+    if (!selected) return false;
+    // Determine available formats (ignore empty strings)
+    const hasGguf = Boolean(selected.gguf);
+    const hasMlx = Boolean(selected.mlx && selected.mlx.trim());
+    let format;
+    if (hasGguf && hasMlx) {
+      // Both available — let the user choose
+      const formatChoices = [
+        { value: "gguf", label: `GGUF (llama.cpp) — ${selected.gguf.split("/").pop()}` },
+        { value: "mlx", label: `MLX (oMLX) — ${selected.mlx}` },
+      ];
+      // Default to MLX on Apple Silicon, GGUF elsewhere
+      const defaultFormat = (hardware.platform === "darwin" && hardware.arch === "arm64") ? "mlx" : "gguf";
+      format = await prompt.choice("Download format", formatChoices, defaultFormat);
+      if (!format) return false;
+    } else if (hasGguf) {
+      format = "gguf";
+    } else if (hasMlx) {
+      format = "mlx";
+    } else {
+      console.log(pc.yellow("No download path available for this model."));
+      return false;
+    }
+    if (format === "gguf") {
+      const ref = parseHfRef(selected.gguf);
+      repo = ref.repo;
+      filename = ref.filename;
+    } else {
+      repo = selected.mlx;
+      filename = undefined;
+    }
+  } else {
+    console.log(pc.dim("  Browse models at huggingface.co/models"));
+    const input = await prompt.text("HuggingFace repo ID (e.g. unsloth/gemma-4-E2B-it-GGUF)", "");
+    if (!input || !input.trim()) return false;
+    const ref = parseHfRef(input.trim());
+    repo = ref.repo;
+    filename = ref.filename;
+  }
+  // For GGUF repos without a specific file, show quant picker
+  if (!filename) {
+    let ggufFiles;
+    try {
+      ggufFiles = await listGgufFiles(repo);
+    } catch (err) {
+      console.log(pc.red(`Could not fetch repo info: ${err.message}`));
+      return false;
+    }
+    if (ggufFiles.length > 0) {
+      filename = await pickGgufQuant(prompt, repo, ggufFiles);
+      if (!filename) return false;
+    } else {
+      // No GGUF files — check if it's an MLX repo via HF metadata
+      let modelInfo;
+      try {
+        modelInfo = await getHfModelInfo(repo);
+      } catch {
+        console.log(pc.red(`Could not fetch repo info for ${repo}. Check the repo ID and try again.`));
+        return false;
+      }
+      if (!isMlxRepo(modelInfo)) {
+        console.log(pc.yellow(`This repo is not a GGUF or MLX model (library: ${modelInfo.library_name ?? "unknown"}).`));
+        console.log(pc.dim("For llama.cpp: look for a repo ending in -GGUF (e.g. org/model-name-GGUF)"));
+        console.log(pc.dim("For oMLX: look for a repo in mlx-community/ (e.g. mlx-community/model-name-4bit)"));
+        return false;
+      }
+      // It's MLX — download everything
+    }
+  }
+  // Check for huggingface_hub
+  if (!(await hasHfCli())) {
+    console.log(pc.yellow("HuggingFace CLI is required to download models."));
+    console.log(pc.dim("Install it: pip3 install huggingface_hub"));
+    return false;
+  }
+  // Resolve download plan
+  const ref = filename ? `${repo}/${filename}` : repo;
+  let plan;
+  try {
+    plan = await resolveHfDownload(ref);
+  } catch (err) {
+    console.log(pc.red(`Could not resolve download: ${err.message}`));
+    return false;
+  }
+  // Check disk space
+  const freeBytes = getFreeDiskBytes(HF_HUB_DIR);
+  if (plan.totalSizeBytes > 0 && freeBytes < plan.totalSizeBytes * 1.1) {
+    console.log(pc.red(`Not enough disk space: need ~${formatBytes(plan.totalSizeBytes)}, only ${formatBytes(freeBytes)} free.`));
+    return false;
+  }
+  console.log(pc.dim(`\nDownloading ${repo}${filename ? `/${filename}` : ""} (${formatBytes(plan.totalSizeBytes)})`));
+  if (plan.format === "mlx") {
+    const modelParts = repo.split("/").filter(Boolean);
+    const localDir = join(homedir(), ".omlx", "models", ...modelParts);
+    console.log(pc.dim(`Location: ${localDir}\n`));
+  } else {
+    console.log(pc.dim(`Location: HF cache (${HF_HUB_DIR})\n`));
+  }
+  try {
+    if (plan.format === "mlx") {
+      // Download directly to ~/.omlx/models/<org>/<model> — oMLX scans this dir
+      const modelParts = repo.split("/").filter(Boolean);
+      const localDir = join(homedir(), ".omlx", "models", ...modelParts);
+      await downloadModel(plan, { localDir });
+      console.log(pc.green("\n✓ Download complete."));
+      await offerOmlxRestart(prompt, "to load the new model");
+    } else {
+      await downloadModel(plan);
+      console.log(pc.green("\n✓ Download complete. Run offgrid-ai again to see the model in the picker."));
+    }
+    return true;
+  } catch (err) {
+    console.log(pc.red("\nDownload failed: " + err.message));
+    return false;
+  }
+}
+// ── Quant picker with RAM fit indicators ───────────────────────────────────
+async function pickGgufQuant(prompt, repo, ggufFiles) {
+  const hardware = detectHardware();
+  const totalRam = hardware.totalRamBytes;
+  const availableRam = totalRam - 4 * GB; // leave 4GB for OS
+  // Sort by size descending (highest quality first)
+  const sorted = [...ggufFiles].sort((a, b) => b.sizeBytes - a.sizeBytes);
+  // Find recommended: largest file that fits comfortably
+  const recommended = sorted.find((f) => f.sizeBytes + 2 * GB <= availableRam);
+  console.log("");
+  console.log(renderCard("Select quantization", renderRows([
+    ["Your RAM", `${installedRamGB()} GB`],
+    ["Available", `~${formatBytes(availableRam)} (after OS)`],
+    ["Rule", "Lower quant = smaller/faster · Higher = better quality"],
+  ]), { formatBorder: pc.cyan }));
+  console.log("");
+  const choices = sorted.map((file) => {
+    const sizeBytes = file.sizeBytes;
+    const parsed = parseModelName(file.path, "huggingface");
+    const quant = parsed.quant ?? file.path.replace(/\.gguf$/i, "");
+    let indicator, fitLabel;
+    if (sizeBytes > availableRam) {
+      indicator = pc.red("✗");
+      fitLabel = pc.red("won't fit");
+    } else if (sizeBytes + 2 * GB > availableRam) {
+      indicator = pc.yellow("⚠");
+      fitLabel = pc.yellow("tight");
+    } else {
+      indicator = pc.green("✓");
+      fitLabel = pc.green("fits");
+    }
+    const isRecommended = recommended && file.path === recommended.path;
+    const hint = isRecommended ? "recommended" : undefined;
+    return {
+      value: file.path,
+      label: `${indicator}  ${quant.padEnd(12)} ${formatBytes(sizeBytes).padEnd(10)} ${fitLabel}`,
+      ...(hint ? { hint } : {}),
+    };
+  });
+  const defaultValue = recommended?.path;
+  return await prompt.choice("Quantization", choices, defaultValue);
+}

package/src/harness-pi.mjs CHANGED Viewed

@@ -1,5 +1,6 @@
 import { existsSync } from "node:fs";
-import { spawn } from "node:child_process";
+import { execFile, spawn } from "node:child_process";
+import { promisify } from "node:util";
 import { PI_CONFIG } from "./config.mjs";
 import { loadProfiles } from "./profiles.mjs";
 import { readJson, writeJson } from "./json.mjs";
@@ -69,8 +70,6 @@ export async function launchPi(profile) {
 export async function hasPi() {
   try {
-    const { execFile } = await import("node:child_process");
-    const { promisify } = await import("node:util");
     await promisify(execFile)("which", ["pi"]);
     return true;
   } catch {

package/src/huggingface.mjs CHANGED Viewed

@@ -1,23 +1,19 @@
 // HuggingFace model download helpers.
-// Uses the Python huggingface_hub package (the standard, maintained downloader)
-// to download models into the standard HF cache directory.
-// Downloads go to ~/.cache/huggingface/hub, NOT a custom offgrid-ai folder.
+// Uses the `hf` CLI (from huggingface_hub) for actual downloads.
+// The interactive model/quant selection happens in download.mjs; here we
+// just hand off to the CLI and let it handle progress bars, resumption, etc.
-import { execFile } from "node:child_process";
+import { spawn, execFile } from "node:child_process";
 import { promisify } from "node:util";
-import { join, dirname } from "node:path";
 import { mkdir } from "node:fs/promises";
-import { fileURLToPath } from "node:url";
 import { HF_HUB_DIR } from "./config.mjs";
 const execFileAsync = promisify(execFile);
-const HF_DOWNLOAD_SCRIPT = join(dirname(fileURLToPath(import.meta.url)), "..", "resources", "hf-download.py");
-/** Check whether python3 + huggingface_hub is available. */
-export async function hasHuggingfaceHub() {
+/** Check whether the `hf` CLI is available. */
+export async function hasHfCli() {
   try {
-    const { stdout } = await execFileAsync("python3", ["-c", "import huggingface_hub; print(huggingface_hub.__version__)"]);
+    const { stdout } = await execFileAsync("hf", ["--version"]);
     return Boolean(stdout.trim());
   } catch {
     return false;
@@ -96,6 +92,33 @@ async function getHfTree(repo, { branch = "main", fetchImpl = globalThis.fetch }
   return await response.json();
 }
+/** List all GGUF files in a HuggingFace repo with their sizes. */
+export async function listGgufFiles(repo, { fetchImpl = globalThis.fetch } = {}) {
+  const tree = await getHfTree(repo, { fetchImpl });
+  return tree
+    .filter((f) => f.type === "file" && f.path.endsWith(".gguf"))
+    .map((f) => ({
+      path: f.path,
+      sizeBytes: f.lfs?.size ?? f.size ?? 0,
+    }))
+    .sort((a, b) => a.sizeBytes - b.sizeBytes);
+}
+/** Fetch model metadata from the HF API. */
+export async function getHfModelInfo(repo, { fetchImpl = globalThis.fetch } = {}) {
+  const url = `https://huggingface.co/api/models/${repo}`;
+  const response = await fetchImpl(url, { signal: AbortSignal.timeout(10000) });
+  if (!response.ok) throw new Error(`HuggingFace API error: HTTP ${response.status} for ${repo}`);
+  return await response.json();
+}
+/** Check if a repo is MLX-formatted based on its HF metadata. */
+export function isMlxRepo(modelInfo) {
+  if (modelInfo.library_name === "mlx") return true;
+  if (Array.isArray(modelInfo.tags) && modelInfo.tags.includes("mlx")) return true;
+  return false;
+}
 /** Resolve a user-provided HF reference into a download plan. */
 export async function resolveHfDownload(input, { fetchImpl = globalThis.fetch } = {}) {
   const { repo, filename } = parseHfRef(input);
@@ -136,74 +159,51 @@ export async function resolveHfDownload(input, { fetchImpl = globalThis.fetch }
 }
 /**
- * Download a resolved model into the HF hub cache.
+ * Download a resolved model using the `hf` CLI.
+ * GGUF: downloads single file to HF cache (offgrid-ai scanner finds it there).
+ * MLX: downloads full repo to a local directory (oMLX scans ~/.omlx/models).
+ * Progress bars are handled natively by the CLI (stdio inherited).
  * @param {object} model - from resolveHfDownload
- * @param {object} options
- * @param {function} options.onProgress - ({ downloadedBytes, totalBytes, percentage, file }) => void
+ * @param {object} [options]
+ * @param {string} [options.localDir] - for MLX: target directory
  * @returns {Promise<{ localDir: string, format: string }>}
  */
-export async function downloadToHfCache(model, options = {}) {
-  await mkdir(HF_HUB_DIR, { recursive: true });
+export async function downloadModel(model, options = {}) {
+  const args = ["download", model.repo];
+  let localDir;
-  const script = HF_DOWNLOAD_SCRIPT;
-  const args = ["--repo", model.repo, "--cache-dir", HF_HUB_DIR];
   if (model.format === "gguf") {
-    args.push("--file", model.files[0].filename);
+    // Single file to HF cache — scanner finds it there
+    await mkdir(HF_HUB_DIR, { recursive: true });
+    args.push(model.files[0].filename, "--cache-dir", HF_HUB_DIR);
+    localDir = HF_HUB_DIR;
+  } else if (options.localDir) {
+    // Full repo to a flat local directory (oMLX)
+    await mkdir(options.localDir, { recursive: true });
+    args.push(
+      "--local-dir", options.localDir,
+      "--exclude", "*.md",
+      "--exclude", ".gitattributes",
+      "--exclude", "LICENSE",
+      "--exclude", ".gitignore",
+    );
+    localDir = options.localDir;
+  } else {
+    // Fallback: full repo to HF cache
+    await mkdir(HF_HUB_DIR, { recursive: true });
+    args.push("--cache-dir", HF_HUB_DIR);
+    localDir = HF_HUB_DIR;
   }
-  const onProgress = options.onProgress ?? (() => {});
-  return new Promise((resolve, reject) => {
-    const child = execFile("python3", [script, ...args], { env: process.env });
-    let stdoutBuf = "";
-    let downloadedBytes = 0;
-    let currentFile = null;
-    // huggingface_hub streams NDJSON progress events to stdout, one per line.
-    // Buffer and split on complete newlines so an event split across chunk
-    // boundaries isn't silently dropped.
-    const handleLine = (line) => {
-      if (!line) return;
-      try {
-        const event = JSON.parse(line);
-        if (event.type === "progress") {
-          downloadedBytes = event.downloadedBytes ?? downloadedBytes;
-          currentFile = event.file ?? currentFile;
-          onProgress({
-            downloadedBytes,
-            totalBytes: model.totalSizeBytes,
-            percentage: Math.min(100, Math.round((downloadedBytes / model.totalSizeBytes) * 100)),
-            file: currentFile,
-          });
-        } else if (event.type === "complete") {
-          resolve({ localDir: event.localDir, format: model.format });
-        } else if (event.type === "error") {
-          reject(new Error(event.message));
-        }
-      } catch {
-        // Ignore non-JSON output (progress bars, etc.)
-      }
-    };
-    child.stdout?.on("data", (chunk) => {
-      stdoutBuf += String(chunk);
-      let nl;
-      while ((nl = stdoutBuf.indexOf("\n")) !== -1) {
-        handleLine(stdoutBuf.slice(0, nl));
-        stdoutBuf = stdoutBuf.slice(nl + 1);
-      }
-    });
-    child.stderr?.on("data", () => {
-      // huggingface_hub prints progress bars to stderr; ignore.
-    });
+  const exitCode = await new Promise((resolve, reject) => {
+    const child = spawn("hf", args, { stdio: "inherit", env: process.env });
     child.on("error", reject);
-    child.on("exit", (code) => {
-      // Flush any final line that lacked a trailing newline.
-      if (stdoutBuf.trim()) handleLine(stdoutBuf.trim());
-      if (code !== 0) reject(new Error(`Download failed with exit code ${code}`));
-    });
+    child.on("exit", resolve);
   });
+  if (exitCode !== 0) {
+    throw new Error(`hf download exited with code ${exitCode}`);
+  }
+  return { localDir, format: model.format };
 }

package/src/managed.mjs CHANGED Viewed

@@ -1,8 +1,7 @@
 import { existsSync } from "node:fs";
 import { BACKENDS } from "./backends.mjs";
-import { hasOmlx } from "./omlx-runtime.mjs";
-export const MANAGED_BACKEND_IDS = ["omlx"];
+const MANAGED_BACKEND_IDS = ["omlx"];
 export async function scanManagedModels() {
   const results = [];
@@ -21,7 +20,3 @@ export async function scanManagedModels() {
 export function hasLmStudioInstalled() {
   return existsSync("/Applications/LM Studio.app");
 }
-export async function hasOmlxInstalled() {
-  return await hasOmlx();
-}

package/src/model-presenters.mjs CHANGED Viewed

@@ -1,12 +1,12 @@
 import { existsSync, statSync } from "node:fs";
 import { basename, dirname, join } from "node:path";
+import { stripVTControlCharacters } from "node:util";
 import { backendFor } from "./backends.mjs";
 import { computeServerCommand, buildStartScript, isProfileRunning } from "./process.mjs";
 import { profileDir } from "./profiles.mjs";
 import { pc, formatBytes, renderSectionRows } from "./ui.mjs";
 import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
 import { itemKey } from "./model-catalog.mjs";
-import { DATA_DIR } from "./config.mjs";
 const OPTION_SEPARATOR = "  ";
 const OPTION_STATUS_WIDTH = 12;
@@ -15,8 +15,6 @@ const OPTION_SOURCE_WIDTH = 14;
 const OPTION_QUANT_WIDTH = 10;
 const OPTION_CTX_WIDTH = 5;
-const { stripVTControlCharacters } = await import("node:util");
 function optionPad(text, color, width) {
   const visible = stripVTControlCharacters(String(text)).length;
   const padding = Math.max(1, width - visible);
@@ -197,26 +195,6 @@ export function inferBackendId(item) {
   return "llama-cpp";
 }
-export function printWorkspaceHeader(normalized, runningProfilesNow, modelMissingIds = new Set()) {
-  const profiles = normalized.profiles;
-  const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
-  const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
-  const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p)).length;
-  const runningCount = runningProfilesNow.length;
-  const missingCount = profiles.filter(isMissing).length;
-  const setupCount = normalized.newModels.length + normalized.managedItems.length;
-  const countParts = [];
-  if (runningCount > 0) countParts.push(pc.green(`${runningCount} running`));
-  if (readyCount > 0) countParts.push(pc.blue(`${readyCount} model${readyCount === 1 ? "" : "s"} ready`));
-  if (missingCount > 0) countParts.push(pc.red(`${missingCount} model${missingCount === 1 ? "" : "s"} missing`));
-  if (setupCount > 0) countParts.push(pc.yellow(`${setupCount} model${setupCount === 1 ? "" : "s"} need${setupCount === 1 ? "s" : ""} setup`));
-  console.log(`   ${countParts.join(pc.dim(" · "))}`);
-  console.log(pc.dim(`   Profiles: ${DATA_DIR}`));
-  console.log(pc.dim("   ─────────────────────────────────────────────────────────"));
-}
 export async function printProfileDetails(profile) {
   const backend = backendFor(profile.backend);
   const isManaged = backend.type === "managed-server";

package/src/model-summary.mjs CHANGED Viewed

@@ -23,7 +23,7 @@ export function capabilitySummary(caps) {
   return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
 }
-export function profileMtpLabel(profile, drafters, { detailed = false } = {}) {
+function profileMtpLabel(profile, drafters, { detailed = false } = {}) {
   if (profile.drafterPath) {
     return detailed ? pc.green(`MTP enabled (drafter: ${basename(profile.drafterPath)})`) : pc.green("MTP enabled");
   }
@@ -34,7 +34,7 @@ export function profileMtpLabel(profile, drafters, { detailed = false } = {}) {
   return null;
 }
-export function ggufMtpLabel(model, drafter) {
+function ggufMtpLabel(model, drafter) {
   const caps = detectCapabilities(model.path, model.mmprojPath);
   if (caps.mtp || Boolean(drafter)) return pc.green("MTP ✓");
   if (caps.architecture === "gemma4") return pc.yellow("MTP: needs drafter");

package/src/omlx-runtime.mjs CHANGED Viewed

@@ -14,7 +14,7 @@ import { join } from "node:path";
 import { promisify } from "node:util";
 import { compareVersions } from "./updates.mjs";
 import { hasHomebrew, ensureHomebrewFor } from "./config.mjs";
-import { commandExists } from "./exec.mjs";
+import { commandExists, runCommand } from "./exec.mjs";
 import { pc, renderCard, renderRows } from "./ui.mjs";
 const execFileAsync = promisify(execFile);
@@ -138,7 +138,6 @@ export async function offerManagedOmlxUpdate(prompt, { fetchImpl = globalThis.fe
   if (!shouldUpdate) return false;
   try {
-    const { runCommand } = await import("./exec.mjs");
     console.log(pc.dim("Updating oMLX via Homebrew..."));
     await runCommand("brew", ["update"], { label: "brew update" });
     await runCommand("brew", ["upgrade", "omlx"], { label: "brew upgrade omlx" });
@@ -153,6 +152,34 @@ export async function offerManagedOmlxUpdate(prompt, { fetchImpl = globalThis.fe
 // ── Installation ───────────────────────────────────────────────────────────
+/**
+ * Offer to restart oMLX so it picks up new or deleted models.
+ * @param {object} prompt - UI prompt interface (yesNo)
+ * @param {string} [reason] - why we're restarting (e.g. "to load the new model")
+ * @returns {Promise<boolean>} true if oMLX was restarted
+ */
+export async function offerOmlxRestart(prompt, reason = "to update its model list") {
+  const bin = await findOmlx();
+  if (!bin) {
+    console.log(pc.dim("Restart oMLX manually: omlx restart"));
+    return false;
+  }
+  const shouldRestart = await prompt.yesNo(`Restart oMLX ${reason}?`, true);
+  if (!shouldRestart) {
+    console.log(pc.dim("Restart manually later: omlx restart"));
+    return false;
+  }
+  try {
+    await execFileAsync(bin, ["restart"], { timeout: 15000 });
+    console.log(pc.green("✓ oMLX restarted"));
+    return true;
+  } catch (err) {
+    console.log(pc.red(`✗ Restart failed: ${err.message}`));
+    console.log(pc.dim("Restart manually: omlx restart"));
+    return false;
+  }
+}
 /**
  * Install oMLX. Uses Homebrew if available (automating tap + install).
  * If Homebrew is not available, prompts to download the DMG from GitHub
@@ -167,7 +194,6 @@ export async function installOmlx(prompt, run) {
   if (!hasBrew) {
     if (!(await ensureHomebrewFor(prompt, run || (async (cmd, args, label) => {
-      const { runCommand } = await import("./exec.mjs");
       return runCommand(cmd, args, { label });
     }), "oMLX"))) {
       console.log(pc.dim("Install oMLX manually:"));
@@ -179,7 +205,6 @@ export async function installOmlx(prompt, run) {
   // Install oMLX via Homebrew
   const runner = run || (async (cmd, args, label) => {
-    const { runCommand } = await import("./exec.mjs");
     return runCommand(cmd, args, { label });
   });

package/src/process.mjs CHANGED Viewed

@@ -6,6 +6,8 @@ import { basename, join } from "node:path";
 import { LOG_DIR } from "./config.mjs";
 import { writeState, readState, profileDir } from "./profiles.mjs";
 import { backendFor, backendBinaryFor } from "./backends.mjs";
+import { computeFlags } from "./autodetect.mjs";
+import { findOmlx } from "./omlx-runtime.mjs";
 import { pc } from "./ui.mjs";
 const execFileAsync = promisify(execFile);
@@ -23,7 +25,6 @@ export async function computeServerCommand(profile) {
   if (!binary) throw new Error("Server binary not found. Run offgrid-ai interactively to install.");
   // llama-cpp
-  const { computeFlags } = await import("./autodetect.mjs");
   const result = computeFlags(
     profile.capabilities ?? {},
     profile.modelPath,
@@ -129,14 +130,11 @@ async function startManagedServer(profile, backend) {
   // Try to start the managed server via CLI
   if (backend.id === "omlx") {
     try {
-      const { execFile } = await import("node:child_process");
-      const { promisify } = await import("node:util");
-      const { findOmlx } = await import("./omlx-runtime.mjs");
       const omlxBin = await findOmlx();
       if (!omlxBin) {
         throw new Error(`${backend.label} is not installed. Run offgrid-ai to install it, or install manually: brew tap jundot/omlx && brew install omlx`);
       }
-      await promisify(execFile)(omlxBin, ["start"], { timeout: 10000 });
+      await execFileAsync(omlxBin, ["start"], { timeout: 10000 });
     } catch (err) {
       if (err.message.includes("not installed")) throw err;
       throw new Error(`${backend.label} could not be auto-started: ${err.message}. Run \`omlx start\` manually.`, { cause: err });

package/src/profiles.mjs CHANGED Viewed

@@ -3,6 +3,7 @@ import { mkdir, readdir, rm, unlink, writeFile, readFile } from "node:fs/promise
 import { join } from "node:path";
 import { PROFILE_DIR, RUN_DIR, LOG_DIR } from "./config.mjs";
 import { backendFor, baseUrlForFlags, defaultFlagsForBackend, BACKENDS } from "./backends.mjs";
+import { detectCapabilities } from "./autodetect.mjs";
 import { computeFlags } from "./autodetect.mjs";
 import { readJson, writeJson } from "./json.mjs";
@@ -130,7 +131,6 @@ export function normalizeProfile(profile) {
 // ── Auto-create profile from a discovered model ────────────────────────────
 export async function createProfileFromModel(model, backendId, drafterPath) {
-  const { detectCapabilities } = await import("./autodetect.mjs");
   const caps = detectCapabilities(model.path, model.mmprojPath);
   // If a drafter is provided, this model supports MTP regardless of filename
   const hasMtp = caps.mtp || Boolean(drafterPath);

package/src/runtime.mjs CHANGED Viewed

@@ -3,7 +3,7 @@ import { execFile } from "node:child_process";
 import { existsSync } from "node:fs";
 import { chmod, mkdir, mkdtemp, readFile, rm, symlink, unlink, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
-import { basename, join } from "node:path";
+import { join } from "node:path";
 import { promisify } from "node:util";
 import { MANAGED_LLAMA_SERVER, RUNTIME_DIR } from "./config.mjs";
 import { compareVersions } from "./updates.mjs";
@@ -137,5 +137,5 @@ function verifyDigest(bytes, digest) {
   if (!digest?.startsWith("sha256:")) return;
   const expected = digest.slice("sha256:".length);
   const actual = createHash("sha256").update(bytes).digest("hex");
-  if (actual !== expected) throw new Error(`${basename("llama.cpp")}: checksum mismatch`);
+  if (actual !== expected) throw new Error("llama.cpp: checksum mismatch");
 }

package/src/ui.mjs CHANGED Viewed

@@ -237,6 +237,8 @@ export function createPrompt() {
 export async function modelSelect(label, groups, { defaultKey, pageSize = 20 } = {}) {
   const choices = [];
+  // Blank line after the prompt message for visual separation
+  choices.push(new Separator(" "));
   for (let i = 0; i < groups.length; i++) {
     const group = groups[i];
     // Add blank line before each group (except the first)