npm - offgrid-ai - Versions diffs - 0.12.1 → 0.13.0 - Mend

offgrid-ai 0.12.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "offgrid-ai",
-  "version": "0.12.1",
+  "version": "0.13.0",
   "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
   "author": "Eeshan Srivastava (https://eeshans.com)",
   "type": "module",

package/src/autodetect.mjs CHANGED Viewed

@@ -50,7 +50,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
 // ── Compute llama-server flags from capabilities ───────────────────────────
-export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath) {
+export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath, flagOverrides = {}) {
   const { thinking, mtp, quant } = capabilities;
   const isLowMem = quant && /[Qq]4[_0]/i.test(quant);
@@ -69,6 +69,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
     repeatPenalty: thinking ? 1.1 : 1.0,
     parallel: 1,
     batchSize: 512,
+    ...flagOverrides,
   };
   // Thinking mode

package/src/mlx-flags.mjs CHANGED Viewed

@@ -89,5 +89,12 @@ export function computeMlxVlmFlags(modelPath, options = {}) {
     args.push("--max-kv-size", String(ctxSize));
   }
+  // Default max output tokens — used when the client doesn't specify max_tokens
+  // in the request. Pi's OpenAI completions provider never sends max_tokens
+  // (it doesn't fall back to model.maxTokens like the Anthropic provider does).
+  // llama-server defaults high; mlx-vlm defaults to 2048 which is too low for
+  // coding tasks. Set a generous server-side default.
+  args.push("--max-tokens", "16384");
   return { args, port };
 }

package/src/model-presenters.mjs CHANGED Viewed

@@ -1,9 +1,8 @@
 import { existsSync, statSync } from "node:fs";
-import { basename, dirname } from "node:path";
+import { basename, dirname, join } from "node:path";
 import { backendFor } from "./backends.mjs";
-import { readCommandArgv } from "./profiles.mjs";
-import { isProfileRunning } from "./process.mjs";
-import { buildPrettyCommand } from "./command.mjs";
+import { computeServerCommand, buildStartScript, isProfileRunning } from "./process.mjs";
+import { profileDir } from "./profiles.mjs";
 import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
 import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
 import { itemKey } from "./model-catalog.mjs";
@@ -260,9 +259,16 @@ export async function printProfileDetails(profile) {
   if (fileMissing) console.log("\n" + pc.red("⚠ This model's file is no longer on disk. Remove this setup or move the file back."));
-  if (!isManaged && profile.commandArgv) {
-    const commandArgv = await readCommandArgv(profile);
-    console.log("\n" + renderSection("llama-server command", pc.dim(buildPrettyCommand({ ...profile, commandArgv })), { columns: 120 }));
+  if (!isManaged) {
+    const command = await computeServerCommand(profile);
+    if (command) {
+      const script = buildStartScript(profile, command);
+      const scriptPath = join(profileDir(profile.id), "start.sh");
+      console.log("\n" + renderSection("Server command", renderRows([
+        ["Run manually", pc.cyan(`bash ${scriptPath}`)],
+        ["Command", pc.dim(script)],
+      ]), { columns: 120 }));
+    }
   }
 }

package/src/process.mjs CHANGED Viewed

@@ -1,14 +1,85 @@
 import { execFile, spawn } from "node:child_process";
 import { promisify } from "node:util";
 import { closeSync, openSync } from "node:fs";
-import { readFile, writeFile } from "node:fs/promises";
+import { readFile, writeFile, chmod } from "node:fs/promises";
 import { basename, join } from "node:path";
+import { quoteShell } from "./command.mjs";
 import { LOG_DIR } from "./config.mjs";
-import { writeState, readState, readCommandArgv } from "./profiles.mjs";
+import { writeState, readState, profileDir } from "./profiles.mjs";
 import { backendFor, backendBinaryFor } from "./backends.mjs";
 const execFileAsync = promisify(execFile);
+// ── Compute server command from profile config ─────────────────────────────
+// Single source of truth: derives the full command (binary + args + env) from
+// the profile's stored configuration.  Used for both launching and display so
+// they always match.  No stored commandArgv — the command is always fresh.
+export async function computeServerCommand(profile) {
+  const backend = backendFor(profile.backend);
+  if (backend.type === "managed-server") return null;
+  const binary = await backendBinaryFor(profile.backend);
+  if (!binary) throw new Error("Server binary not found. Run offgrid-ai interactively to install.");
+  let argv, extraEnv;
+  if (profile.backend === "mlx-vlm") {
+    const { computeMlxVlmFlags } = await import("./mlx-flags.mjs");
+    const result = computeMlxVlmFlags(profile.modelPath, {
+      port: profile.flags?.port,
+      ctxSize: profile.flags?.ctxSize,
+      thinkingEnabled: profile.capabilities?.thinking ?? true,
+    });
+    argv = result.args;
+    extraEnv = { APC_ENABLED: "1", MLX_VLM_MAX_TOKENS: "16384" };
+  } else {
+    // llama-cpp / llama-cpp-mtp
+    const { computeFlags } = await import("./autodetect.mjs");
+    const result = computeFlags(
+      profile.capabilities ?? {},
+      profile.modelPath,
+      profile.mmprojPath,
+      profile.drafterPath,
+      profile.flags ?? {},
+    );
+    argv = result.argv;
+    extraEnv = {};
+  }
+  return { binary, argv, extraEnv, backend };
+}
+/** Build a runnable start.sh script for the profile. */
+export function buildStartScript(profile, command) {
+  const { binary, argv, extraEnv } = command;
+  const backend = backendFor(profile.backend);
+  const lines = [
+    "#!/bin/bash",
+    `# Generated by offgrid-ai — do not edit`,
+    `# Profile: ${profile.label}`,
+    `# Backend: ${backend.label}`,
+  ];
+  for (const [key, value] of Object.entries(extraEnv)) {
+    lines.push(`export ${key}="${value}"`);
+  }
+  lines.push(""); // blank line before exec
+  // Format the exec command with backslash continuation
+  lines.push(`exec ${quoteShell(binary)} \\`);
+  for (let i = 0; i < argv.length; i++) {
+    const arg = argv[i];
+    const next = argv[i + 1];
+    const hasValue = arg.startsWith("--") && next && !next.startsWith("--");
+    if (hasValue) {
+      lines.push(`  ${arg} ${quoteShell(next)}${i + 2 < argv.length ? " \\" : ""}`);
+      i += 1;
+    } else {
+      lines.push(`  ${arg}${i + 1 < argv.length ? " \\" : ""}`);
+    }
+  }
+  return lines.join("\n") + "\n";
+}
 // ── Start server ───────────────────────────────────────────────────────────
 export async function startServer(profile) {
@@ -20,23 +91,24 @@ export async function startServer(profile) {
 }
 async function startLocalServer(profile) {
-  const binary = await backendBinaryFor(profile.backend);
-  if (!binary) {
-    throw new Error("llama-server not found. Install the managed llama.cpp runtime by running offgrid-ai interactively.");
-  }
+  const command = await computeServerCommand(profile);
+  if (!command) throw new Error("No server command for this backend.");
+  const { binary, argv, extraEnv } = command;
   const timestamp = timestampForFile();
   const rawLogPath = join(LOG_DIR, `${profile.id}-${timestamp}.raw.log`);
   const friendlyLogPath = join(LOG_DIR, `${profile.id}-${timestamp}.friendly.log`);
-  const commandArgv = await readCommandArgv(profile);
-  await writeFile(rawLogPath, `[offgrid-ai] ${new Date().toISOString()}\n[binary] ${binary}\n[argv]\n${commandArgv.join(" ")}\n`, "utf8");
-  await writeFile(friendlyLogPath, `[launch] starting llama-server for ${profile.label}\n`, "utf8");
+  // Write start.sh so the user can run the model manually
+  const scriptPath = join(profileDir(profile.id), "start.sh");
+  await writeFile(scriptPath, buildStartScript(profile, command), "utf8");
+  await chmod(scriptPath, 0o755);
+  await writeFile(rawLogPath, `[offgrid-ai] ${new Date().toISOString()}\n[binary] ${binary}\n[argv]\n${argv.join(" ")}\n`, "utf8");
+  await writeFile(friendlyLogPath, `[launch] starting ${backendFor(profile.backend).label} for ${profile.label}\n`, "utf8");
-  // Build argv: binary + command.json args
-  const argv = [...commandArgv];
-  // mlx-vlm requires APC_ENABLED=1 (86x TTFT improvement; fixes Metal cache clearing).
-  const env = profile.backend === "mlx-vlm" ? { ...process.env, APC_ENABLED: "1" } : process.env;
+  const env = { ...process.env, ...extraEnv };
   const rawFd = openSync(rawLogPath, "a");
   let child;

package/src/profiles.mjs CHANGED Viewed

@@ -69,14 +69,9 @@ export async function saveProfile(profile, options = {}) {
   };
   await writeJson(profileJsonPath(id), saved);
-  // Write JSON command file for llama-server backends
-  const backend = backendFor(saved.backend);
-  if (backend.needsCommandFile) {
-    const cmdPath = commandJsonPath(id);
-    if (options.writeCommand || !existsSync(cmdPath)) {
-      await writeJson(cmdPath, { argv: saved.commandArgv ?? [] });
-    }
-  }
+  // Note: command.json is no longer written — the server command is computed
+  // fresh from the profile config at launch time (see computeServerCommand in
+  // process.mjs).  commandArgv is kept in the profile for backwards compat.
   if (!existsSync(notesPath(id))) {
     await writeFile(notesPath(id), `# ${saved.label}\n\nNotes for this model profile.\n`, "utf8");