npm - offgrid-ai - Versions diffs - 0.9.6 → 0.10.1 - Mend

offgrid-ai 0.9.6 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +6 -6
package/package.json +4 -3
package/resources/hf-download.py +79 -0
package/resources/mlxvlm-server-wrapper.py +112 -0
package/resources/recommendations.json +60 -0
package/src/backend-installers.mjs +1 -16
package/src/backends.mjs +18 -45
package/src/benchmark/finalize.mjs +3 -90
package/src/benchmark/flow.mjs +3 -4
package/src/benchmark/metrics.mjs +0 -44
package/src/benchmark/prepare.mjs +1 -1
package/src/benchmark.mjs +3 -1
package/src/commands/main.mjs +7 -7
package/src/commands/models.mjs +21 -18
package/src/commands/onboard.mjs +67 -9
package/src/commands/run.mjs +20 -5
package/src/commands/status.mjs +1 -1
package/src/config.mjs +11 -2
package/src/discovery-shared.mjs +44 -0
package/src/hardware.mjs +49 -0
package/src/harness-pi.mjs +25 -11
package/src/huggingface.mjs +209 -0
package/src/managed.mjs +1 -5
package/src/mlx-discovery.mjs +294 -0
package/src/mlx-flags.mjs +93 -0
package/src/model-catalog.mjs +78 -11
package/src/model-name.mjs +7 -25
package/src/model-presenters.mjs +114 -38
package/src/process.mjs +129 -32
package/src/profile-setup.mjs +105 -0
package/src/profiles.mjs +30 -0
package/src/recommendations.mjs +56 -14
package/src/scan.mjs +43 -8

package/src/huggingface.mjs ADDED Viewed

@@ -0,0 +1,209 @@
+// HuggingFace model download helpers.
+// Uses the Python huggingface_hub package (the standard, maintained downloader)
+// to download models into the standard HF cache directory.
+// Downloads go to ~/.cache/huggingface/hub, NOT a custom offgrid-ai folder.
+import { execFile } from "node:child_process";
+import { promisify } from "node:util";
+import { join, dirname } from "node:path";
+import { mkdir } from "node:fs/promises";
+import { fileURLToPath } from "node:url";
+import { HF_HUB_DIR } from "./config.mjs";
+const execFileAsync = promisify(execFile);
+const HF_DOWNLOAD_SCRIPT = join(dirname(fileURLToPath(import.meta.url)), "..", "resources", "hf-download.py");
+/** Check whether python3 + huggingface_hub is available. */
+export async function hasHuggingfaceHub() {
+  try {
+    const { stdout } = await execFileAsync("python3", ["-c", "import huggingface_hub; print(huggingface_hub.__version__)"]);
+    return Boolean(stdout.trim());
+  } catch {
+    return false;
+  }
+}
+/** Parse a HuggingFace reference (URL, repo/filename, or repo ID). */
+export function parseHfRef(input) {
+  const trimmed = input.trim();
+  if (trimmed.startsWith("https://huggingface.co/")) {
+    const url = new URL(trimmed);
+    const pathParts = url.pathname.split("/").filter(Boolean);
+    const resolveIdx = pathParts.indexOf("resolve");
+    if (resolveIdx > 0 && pathParts[resolveIdx + 1] === "main") {
+      return {
+        repo: pathParts.slice(0, resolveIdx).join("/"),
+        filename: pathParts.slice(resolveIdx + 2).join("/"),
+      };
+    }
+    if (pathParts.length >= 2) {
+      return {
+        repo: pathParts.slice(0, 2).join("/"),
+        filename: pathParts.length > 2 ? pathParts.slice(2).join("/") : undefined,
+      };
+    }
+    throw new Error(`Invalid HuggingFace URL: ${input}`);
+  }
+  const parts = trimmed.split("/").filter(Boolean);
+  if (parts.length < 2) {
+    throw new Error(`Invalid HuggingFace reference: "${input}". Expected at least org/name.`);
+  }
+  return {
+    repo: parts.slice(0, 2).join("/"),
+    filename: parts.length > 2 ? parts.slice(2).join("/") : undefined,
+  };
+}
+/** Resolve file metadata for a GGUF file from the HF tree API. */
+export async function resolveGgufFile(ref, { fetchImpl = globalThis.fetch } = {}) {
+  const { repo, filename } = parseHfRef(ref);
+  const tree = await getHfTree(repo, { fetchImpl });
+  const entry = tree.find((f) => f.path === filename && f.type === "file");
+  if (!entry) throw new Error(`File '${filename}' not found in HuggingFace repo '${repo}'.`);
+  return {
+    repo,
+    filename,
+    url: `https://huggingface.co/${repo}/resolve/main/${filename}`,
+    sizeBytes: entry.lfs?.size ?? entry.size ?? 0,
+    sha256: entry.lfs?.oid ?? "",
+    relativePath: filename,
+  };
+}
+/** Resolve all model files in an MLX repo from the HF tree API. */
+export async function resolveMlxRepo(repo, { fetchImpl = globalThis.fetch } = {}) {
+  const tree = await getHfTree(repo, { fetchImpl });
+  const modelFiles = tree.filter(
+    (f) => f.type === "file" && !f.path.startsWith(".") && f.path !== ".gitattributes" && f.path !== "README.md",
+  );
+  return modelFiles.map((f) => ({
+    repo,
+    filename: f.path,
+    url: `https://huggingface.co/${repo}/resolve/main/${f.path}`,
+    sizeBytes: f.lfs?.size ?? f.size ?? 0,
+    sha256: f.lfs?.oid ?? "",
+    relativePath: f.path,
+  }));
+}
+async function getHfTree(repo, { branch = "main", fetchImpl = globalThis.fetch } = {}) {
+  const url = `https://huggingface.co/api/models/${repo}/tree/${branch}?recursive=true`;
+  const response = await fetchImpl(url, { signal: AbortSignal.timeout(10000) });
+  if (!response.ok) throw new Error(`HuggingFace API error: HTTP ${response.status} for ${repo}`);
+  return await response.json();
+}
+/** Resolve a user-provided HF reference into a download plan. */
+export async function resolveHfDownload(input, { fetchImpl = globalThis.fetch } = {}) {
+  const { repo, filename } = parseHfRef(input);
+  if (filename && filename.endsWith(".gguf")) {
+    const file = await resolveGgufFile(`${repo}/${filename}`, { fetchImpl });
+    return {
+      id: repo.split("/").pop() ?? repo,
+      repo,
+      format: "gguf",
+      files: [file],
+      totalSizeBytes: file.sizeBytes,
+    };
+  }
+  const tree = await getHfTree(repo, { fetchImpl });
+  const ggufFiles = tree.filter((f) => f.type === "file" && f.path.endsWith(".gguf"));
+  if (ggufFiles.length > 0) {
+    const file = ggufFiles[0];
+    const resolved = await resolveGgufFile(`${repo}/${file.path}`, { fetchImpl });
+    return {
+      id: repo.split("/").pop() ?? repo,
+      repo,
+      format: "gguf",
+      files: [resolved],
+      totalSizeBytes: resolved.sizeBytes,
+    };
+  }
+  const files = await resolveMlxRepo(repo, { fetchImpl });
+  return {
+    id: repo.split("/").pop() ?? repo,
+    repo,
+    format: "mlx",
+    files,
+    totalSizeBytes: files.reduce((sum, f) => sum + f.sizeBytes, 0),
+  };
+}
+/**
+ * Download a resolved model into the HF hub cache.
+ * @param {object} model - from resolveHfDownload
+ * @param {object} options
+ * @param {function} options.onProgress - ({ downloadedBytes, totalBytes, percentage, file }) => void
+ * @returns {Promise<{ localDir: string, format: string }>}
+ */
+export async function downloadToHfCache(model, options = {}) {
+  await mkdir(HF_HUB_DIR, { recursive: true });
+  const script = HF_DOWNLOAD_SCRIPT;
+  const args = ["--repo", model.repo, "--cache-dir", HF_HUB_DIR];
+  if (model.format === "gguf") {
+    args.push("--file", model.files[0].filename);
+  }
+  const onProgress = options.onProgress ?? (() => {});
+  return new Promise((resolve, reject) => {
+    const child = execFile("python3", [script, ...args], { env: process.env });
+    let stdoutBuf = "";
+    let downloadedBytes = 0;
+    let currentFile = null;
+    // huggingface_hub streams NDJSON progress events to stdout, one per line.
+    // Buffer and split on complete newlines so an event split across chunk
+    // boundaries isn't silently dropped.
+    const handleLine = (line) => {
+      if (!line) return;
+      try {
+        const event = JSON.parse(line);
+        if (event.type === "progress") {
+          downloadedBytes = event.downloadedBytes ?? downloadedBytes;
+          currentFile = event.file ?? currentFile;
+          onProgress({
+            downloadedBytes,
+            totalBytes: model.totalSizeBytes,
+            percentage: Math.min(100, Math.round((downloadedBytes / model.totalSizeBytes) * 100)),
+            file: currentFile,
+          });
+        } else if (event.type === "complete") {
+          resolve({ localDir: event.localDir, format: model.format });
+        } else if (event.type === "error") {
+          reject(new Error(event.message));
+        }
+      } catch {
+        // Ignore non-JSON output (progress bars, etc.)
+      }
+    };
+    child.stdout?.on("data", (chunk) => {
+      stdoutBuf += String(chunk);
+      let nl;
+      while ((nl = stdoutBuf.indexOf("\n")) !== -1) {
+        handleLine(stdoutBuf.slice(0, nl));
+        stdoutBuf = stdoutBuf.slice(nl + 1);
+      }
+    });
+    child.stderr?.on("data", () => {
+      // huggingface_hub prints progress bars to stderr; ignore.
+    });
+    child.on("error", reject);
+    child.on("exit", (code) => {
+      // Flush any final line that lacked a trailing newline.
+      if (stdoutBuf.trim()) handleLine(stdoutBuf.trim());
+      if (code !== 0) reject(new Error(`Download failed with exit code ${code}`));
+    });
+  });
+}

package/src/managed.mjs CHANGED Viewed

@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
 import { BACKENDS } from "./backends.mjs";
 import { commandExists } from "./exec.mjs";
-export const MANAGED_BACKEND_IDS = ["ollama", "omlx"];
+export const MANAGED_BACKEND_IDS = ["omlx"];
 export async function scanManagedModels() {
   const results = [];
@@ -22,10 +22,6 @@ export function hasLmStudioInstalled() {
   return existsSync("/Applications/LM Studio.app");
 }
-export function hasOllamaInstalled() {
-  return commandExists("ollama");
-}
 export function hasOmlxInstalled() {
   return commandExists("omlx");
 }

package/src/mlx-discovery.mjs ADDED Viewed

@@ -0,0 +1,294 @@
+// MLX model discovery + metadata — scans configured model directories for MLX
+// model directories and parses their config.json.
+// Ported from deprecated-offgrid-desktop/src/main/model-discovery.ts +
+// mlx-metadata.ts (MLX subset only).
+//
+// This runs ALONGSIDE offgrid-ai's existing GGUF scan (scan.mjs scanGgufModels)
+// — it does not replace it. The picker (main.mjs) will merge GGUF + MLX lists.
+//
+// An MLX model directory is one containing config.json + one or more
+// *.safetensors files. HuggingFace Hub cache layout (models--org--name) is
+// detected and scanned specially.
+import { readdir, stat, readFile } from "node:fs/promises";
+import { existsSync } from "node:fs";
+import { join, basename } from "node:path";
+import { homedir } from "node:os";
+import { getModelScanDirs } from "./config.mjs";
+import { inferSourceLabel, MIN_MODEL_SIZE_BYTES, EMBEDDING_MODEL_TYPES } from "./discovery-shared.mjs";
+// ── Folder → backend mapping ──────────────────────────────────────────────
+// The oMLX folder is oMLX-exclusive: models there are served by the oMLX
+// managed backend, NOT by mlx-vlm. Every OTHER scan dir is format-based
+// (GGUF → llama.cpp, MLX → mlx-vlm). So mlx-vlm scans all configured dirs
+// EXCEPT the oMLX folder.
+const OMLX_MODELS_DIR = join(homedir(), ".omlx", "models");
+function isOmlxFolder(p) {
+  return p === OMLX_MODELS_DIR || p.startsWith(OMLX_MODELS_DIR + "/");
+}
+// ── MLX directory detection ───────────────────────────────────────────────
+/** True if dir contains config.json + at least one .safetensors file. */
+async function isMlxModelDir(dir) {
+  if (!existsSync(join(dir, "config.json"))) return false;
+  try {
+    const entries = await readdir(dir);
+    return entries.some((f) => f.endsWith(".safetensors"));
+  } catch {
+    return false;
+  }
+}
+/** Sum the size of all .safetensors files in an MLX model dir (bytes). */
+async function getMlxDirSizeBytes(dir) {
+  try {
+    const entries = await readdir(dir);
+    const sizes = await Promise.all(
+      entries.filter((f) => f.endsWith(".safetensors")).map(async (f) => {
+        const s = await stat(join(dir, f));
+        return s.size;
+      }),
+    );
+    return sizes.reduce((a, b) => a + b, 0);
+  } catch {
+    return 0;
+  }
+}
+// ── Recursive MLX scanner ─────────────────────────────────────────────────
+/**
+ * Recursively scan a directory for MLX model directories.
+ * Searches up to maxDepth levels deep. Does NOT collect GGUF (that's scan.mjs).
+ */
+async function scanDirRecursiveForMlx(rootDir, sourceLabel, maxDepth = 3) {
+  if (!existsSync(rootDir)) return [];
+  const models = [];
+  async function walk(dir, depth) {
+    if (depth > maxDepth) return;
+    let entries;
+    try {
+      entries = await readdir(dir, { withFileTypes: true });
+    } catch {
+      return;
+    }
+    // Is this directory itself an MLX model dir? (don't recurse into it)
+    if (depth > 0 && await isMlxModelDir(dir)) {
+      const sizeBytes = await getMlxDirSizeBytes(dir);
+      if (sizeBytes < MIN_MODEL_SIZE_BYTES) return;
+      if (await isEmbeddingMlxModel(join(dir, "config.json"))) return;
+      const caps = await detectMlxCapabilities(dir);
+      models.push(makeMlxModel(dir, basename(dir), sizeBytes, sourceLabel, rootDir, caps.contextLength));
+      return;
+    }
+    for (const entry of entries) {
+      if (entry.name.startsWith(".") || entry.name === "README.md" || entry.name === ".gitattributes") continue;
+      const fullPath = join(dir, entry.name);
+      if (entry.isDirectory()) {
+        if (await isMlxModelDir(fullPath)) {
+          const sizeBytes = await getMlxDirSizeBytes(fullPath);
+          if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
+          if (await isEmbeddingMlxModel(join(fullPath, "config.json"))) continue;
+          const caps = await detectMlxCapabilities(fullPath);
+          models.push(makeMlxModel(fullPath, entry.name, sizeBytes, sourceLabel, rootDir, caps.contextLength));
+        } else {
+          await walk(fullPath, depth + 1);
+        }
+      }
+    }
+  }
+  await walk(rootDir, 0);
+  return models;
+}
+// ── HuggingFace Hub layout ────────────────────────────────────────────────
+/** True if dir looks like an HF Hub cache (has models--* subdirs). */
+async function looksLikeHfHub(dir) {
+  if (!existsSync(dir)) return false;
+  try {
+    const entries = await readdir(dir, { withFileTypes: true });
+    return entries.some((e) => e.isDirectory() && e.name.startsWith("models--"));
+  } catch {
+    return false;
+  }
+}
+/**
+ * Scan an HF Hub cache dir for MLX model dirs.
+ * HF layout: models--org--name/snapshots/hash/files
+ */
+async function scanHfHubForMlx(dir, sourceLabel) {
+  if (!existsSync(dir)) return [];
+  const models = [];
+  try {
+    const entries = await readdir(dir, { withFileTypes: true });
+    for (const entry of entries) {
+      if (!entry.isDirectory() || !entry.name.startsWith("models--")) continue;
+      const parts = entry.name.slice("models--".length).split("--");
+      const label = parts.join("/");
+      const snapshotsDir = join(dir, entry.name, "snapshots");
+      if (!existsSync(snapshotsDir)) continue;
+      const snapshots = await readdir(snapshotsDir, { withFileTypes: true });
+      // Follow symlinks (HF hub uses them; test imports use them too). A model
+      // dir can have several snapshots — some incomplete/empty. Check EACH
+      // snapshot and use the first that is a valid MLX model dir, rather than
+      // giving up on the whole model if the first snapshot happens to be empty.
+      const candidates = snapshots.filter((s) => s.isDirectory() || s.isSymbolicLink());
+      let snapshotPath = null;
+      for (const snap of candidates) {
+        const sp = join(snapshotsDir, snap.name);
+        const st = await stat(sp).catch(() => null);
+        if (st?.isDirectory() && await isMlxModelDir(sp)) { snapshotPath = sp; break; }
+      }
+      if (!snapshotPath) continue;
+      const sizeBytes = await getMlxDirSizeBytes(snapshotPath);
+      if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
+      if (await isEmbeddingMlxModel(join(snapshotPath, "config.json"))) continue;
+      models.push({
+        id: `${sourceLabel}:${entry.name}`,
+        label,
+        path: snapshotPath,
+        filePath: snapshotPath,
+        sizeBytes,
+        contextLength: (await detectMlxCapabilities(snapshotPath)).contextLength,
+        backend: "mlx-vlm",
+        format: "mlx",
+        source: sourceLabel,
+      });
+    }
+  } catch {
+    // Can't read — return what we have.
+  }
+  return models;
+}
+// ── Embedding model filtering for MLX ─────────────────────────────────────
+async function isEmbeddingMlxModel(configPath) {
+  if (!existsSync(configPath)) return false;
+  try {
+    const config = JSON.parse(await readFile(configPath, "utf-8"));
+    const textConfig = config.text_config ?? config;
+    const modelType = String(textConfig.model_type ?? "").toLowerCase();
+    if (EMBEDDING_MODEL_TYPES.has(modelType)) return true;
+    const arch = Array.isArray(config.architectures) ? config.architectures[0] : "";
+    const lowerArch = String(arch).toLowerCase();
+    return EMBEDDING_MODEL_TYPES.has(lowerArch) || lowerArch.includes("bert");
+  } catch {
+    return false;
+  }
+}
+// ── MLX model entry builder ───────────────────────────────────────────────
+function makeMlxModel(dir, label, sizeBytes, sourceLabel, rootDir, contextLength = null) {
+  return {
+    id: `${sourceLabel}:${dir.replace(rootDir + "/", "")}`,
+    label,
+    path: dir,
+    filePath: dir,
+    sizeBytes,
+    contextLength,
+    backend: "mlx-vlm",
+    format: "mlx",
+    source: sourceLabel,
+  };
+}
+// ── Public API ─────────────────────────────────────────────────────────────
+/**
+ * Discover all MLX models across the configured scan directories.
+ * Reads scan dirs from config.mjs getModelScanDirs() — same paths GGUF uses
+ * (LM Studio, HF hub, user-added). Returns a flat, deduplicated list.
+ */
+export async function scanMlxModels(dirs) {
+  // mlx-vlm scans every configured dir EXCEPT the oMLX folder (oMLX-exclusive).
+  const scanDirs = (dirs ?? await getModelScanDirs()).filter((d) => !isOmlxFolder(d));
+  const results = await Promise.all(
+    scanDirs.map(async (dir) => {
+      const label = inferSourceLabel(dir);
+      if (await looksLikeHfHub(dir)) return scanHfHubForMlx(dir, label);
+      return scanDirRecursiveForMlx(dir, label);
+    }),
+  );
+  const all = results.flat();
+  // Deduplicate by filePath (same model may appear in multiple paths).
+  const seen = new Set();
+  return all.filter((m) => {
+    if (seen.has(m.filePath)) return false;
+    seen.add(m.filePath);
+    return true;
+  });
+}
+// ── MLX capability detection ─────────────────────────────────────────────
+/**
+ * Detect MLX model capabilities from its config.json.
+ * Returns { architecture, thinking, vision, contextLength }.
+ */
+export async function detectMlxCapabilities(modelDir) {
+  const configPath = join(modelDir, "config.json");
+  if (!existsSync(configPath)) return { thinking: false, vision: false, contextLength: null, architecture: null };
+  try {
+    const config = JSON.parse(await readFile(configPath, "utf-8"));
+    return detectMlxCapabilitiesFromConfig(config, modelDir);
+  } catch {
+    return { thinking: false, vision: false, contextLength: null, architecture: null };
+  }
+}
+export function detectMlxCapabilitiesFromConfig(config, modelDir) {
+  const textConfig = config.text_config ?? config;
+  const rawName = config._name_or_path ?? basename(modelDir ?? "");
+  const name = String(rawName).toLowerCase();
+  const label = String(rawName);
+  const modelType = String(config.model_type ?? "").toLowerCase();
+  const textModelType = String(textConfig.model_type ?? "").toLowerCase();
+  const vision = Boolean(
+    config.vision_config ||
+    config.image_token_id != null ||
+    config.video_token_id != null ||
+    config.vision_start_token_id != null ||
+    modelType.includes("vl") ||
+    modelType.includes("vision") ||
+    textModelType.includes("vl") ||
+    textModelType.includes("vision")
+  );
+  const thinking = /qwen3|gemma-4|gemma4|deepseek-r[12]/i.test(name + " " + label);
+  const architectures = Array.isArray(config.architectures) ? config.architectures : [];
+  const architecture = architectures[0] ?? null;
+  const candidates = [
+    textConfig.max_position_embeddings,
+    textConfig.sliding_window,
+    config.max_position_embeddings,
+    config.sliding_window,
+  ].filter((v) => typeof v === "number" && v > 0);
+  const contextLength = candidates.length > 0 ? Math.max(...candidates) : null;
+  return { thinking, vision, contextLength, architecture };
+}
+/**
+ * Pick a sensible default context length for an MLX model, capping by RAM.
+ */
+export function defaultMlxContextLength(trainedCtx, ramGb) {
+  if (!trainedCtx || trainedCtx <= 0) return 8192;
+  if (ramGb < 12) return Math.min(trainedCtx, 4096);
+  if (ramGb < 16) return Math.min(trainedCtx, 8192);
+  if (ramGb < 32) return Math.min(trainedCtx, 16384);
+  return trainedCtx;
+}

package/src/mlx-flags.mjs ADDED Viewed

@@ -0,0 +1,93 @@
+// mlx-vlm server flag computation — pure functions, no side effects.
+// Ported from deprecated-offgrid-desktop/src/main/server-flags.ts (MLX subset).
+//
+// Benchmark-informed decisions (see sidequests/mlx-backend-benchmark/RESULTS.md):
+// - mlx-vlm requires APC_ENABLED=1 env var (86x TTFT improvement) — set at spawn
+//   time in process.mjs, NOT here (this module only computes args).
+// - mlx-vlm uses a strict=False wrapper script for shared-KV architectures
+//   (Gemma 4-class). Safe for all models — strict=False is a no-op for models
+//   that load fine with strict=True.
+// - mlx-vlm uses --enable-thinking for thinking-mode control.
+// - mlx-vlm uses --max-kv-size for the KV cache / context window.
+//
+// Only the mlx-vlm-relevant logic is ported here. offgrid-ai's existing GGUF
+// flag logic (autodetect.mjs / profile-setup.mjs / estimate.mjs) is unchanged.
+import { fileURLToPath } from "node:url";
+import { dirname, join } from "node:path";
+const MB = 1024 ** 2;
+/** Default port for the local model server. Matches the desktop's DEFAULT_PORT. */
+export const DEFAULT_PORT = 18080;
+/** Resolved path to the bundled strict=False wrapper script (sibling of src/). */
+export const MLX_VLM_WRAPPER = join(dirname(fileURLToPath(import.meta.url)), "..", "resources", "mlxvlm-server-wrapper.py");
+/** Overhead multiplier for mlx-vlm: weights × 1.5 (covers KV cache, activations, APC cache; benchmark-validated). */
+const MLX_VLM_OVERHEAD_MULTIPLIER = 1.5;
+/** Server process overhead in MB. */
+const PROCESS_OVERHEAD_MB = 200;
+/**
+ * Estimate mlx-vlm memory usage (MB): model weights × 1.5 + process overhead.
+ *
+ * The 1.5 multiplier covers KV cache, activations, and APC cache overhead
+ * (benchmark-validated; see sidequests/mlx-backend-benchmark/RESULTS.md).
+ * GGUF/llama-server estimation uses the detailed path in estimate.mjs.
+ *
+ * @param {number} fileSizeBytes - model size on disk (sum of MLX safetensors).
+ * @returns {number} estimated memory in MB.
+ */
+export function estimateMemoryMb(fileSizeBytes) {
+  return Math.round((fileSizeBytes / MB) * MLX_VLM_OVERHEAD_MULTIPLIER + PROCESS_OVERHEAD_MB);
+}
+/**
+ * Compute mlx-vlm server arguments.
+ *
+ * mlx-vlm is the MLX-native server (benchmark-validated best throughput + memory
+ * efficiency on Apple Silicon). Invoked via the strict=False wrapper script for
+ * compatibility with shared-KV architectures (Gemma 4-class).
+ *
+ * The APC_ENABLED=1 env var is MANDATORY but is set at spawn time in
+ * process.mjs, not in args.
+ *
+ * The wrapper script (resources/mlxvlm-server-wrapper.py) applies strict=False
+ * model loading + the BatchRotatingKVCache.merge() fix, both required for
+ * shared-KV architectures (Gemma 4-class). It is resolved to a real path via
+ * MLX_VLM_WRAPPER; there is intentionally no raw-mlx_vlm.server path.
+ *
+ * @param {string} modelPath - path to the MLX model directory.
+ * @param {object} [options]
+ * @param {number} [options.port] - port (default DEFAULT_PORT).
+ * @param {number} [options.ctxSize] - context window (passed as --max-kv-size).
+ * @param {boolean} [options.thinkingEnabled=true] - whether to enable thinking.
+ * @returns {{ args: string[], port: number }}
+ */
+export function computeMlxVlmFlags(modelPath, options = {}) {
+  const port = options.port ?? DEFAULT_PORT;
+  const ctxSize = options.ctxSize;
+  const thinkingEnabled = options.thinkingEnabled ?? true;
+  // The binary is "python3" (resolved by backendBinaryFor in backends.mjs); the
+  // wrapper path is the first arg.
+  const args = [
+    MLX_VLM_WRAPPER,
+    "--model", modelPath,
+    "--host", "127.0.0.1",
+    "--port", String(port),
+  ];
+  if (thinkingEnabled) {
+    args.push("--enable-thinking");
+  }
+  // Context size: mlx-vlm uses --max-kv-size for the KV cache / context window.
+  if (ctxSize && ctxSize > 0) {
+    args.push("--max-kv-size", String(ctxSize));
+  }
+  return { args, port };
+}