offgrid-ai 0.12.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.12.1",
3
+ "version": "0.13.0",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
@@ -50,7 +50,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
50
50
 
51
51
  // ── Compute llama-server flags from capabilities ───────────────────────────
52
52
 
53
- export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath) {
53
+ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath, flagOverrides = {}) {
54
54
  const { thinking, mtp, quant } = capabilities;
55
55
  const isLowMem = quant && /[Qq]4[_0]/i.test(quant);
56
56
 
@@ -69,6 +69,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
69
69
  repeatPenalty: thinking ? 1.1 : 1.0,
70
70
  parallel: 1,
71
71
  batchSize: 512,
72
+ ...flagOverrides,
72
73
  };
73
74
 
74
75
  // Thinking mode
package/src/mlx-flags.mjs CHANGED
@@ -89,5 +89,12 @@ export function computeMlxVlmFlags(modelPath, options = {}) {
89
89
  args.push("--max-kv-size", String(ctxSize));
90
90
  }
91
91
 
92
+ // Default max output tokens — used when the client doesn't specify max_tokens
93
+ // in the request. Pi's OpenAI completions provider never sends max_tokens
94
+ // (it doesn't fall back to model.maxTokens like the Anthropic provider does).
95
+ // llama-server defaults high; mlx-vlm defaults to 2048 which is too low for
96
+ // coding tasks. Set a generous server-side default.
97
+ args.push("--max-tokens", "16384");
98
+
92
99
  return { args, port };
93
100
  }
@@ -1,9 +1,8 @@
1
1
  import { existsSync, statSync } from "node:fs";
2
- import { basename, dirname } from "node:path";
2
+ import { basename, dirname, join } from "node:path";
3
3
  import { backendFor } from "./backends.mjs";
4
- import { readCommandArgv } from "./profiles.mjs";
5
- import { isProfileRunning } from "./process.mjs";
6
- import { buildPrettyCommand } from "./command.mjs";
4
+ import { computeServerCommand, buildStartScript, isProfileRunning } from "./process.mjs";
5
+ import { profileDir } from "./profiles.mjs";
7
6
  import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
8
7
  import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
9
8
  import { itemKey } from "./model-catalog.mjs";
@@ -260,9 +259,16 @@ export async function printProfileDetails(profile) {
260
259
 
261
260
  if (fileMissing) console.log("\n" + pc.red("⚠ This model's file is no longer on disk. Remove this setup or move the file back."));
262
261
 
263
- if (!isManaged && profile.commandArgv) {
264
- const commandArgv = await readCommandArgv(profile);
265
- console.log("\n" + renderSection("llama-server command", pc.dim(buildPrettyCommand({ ...profile, commandArgv })), { columns: 120 }));
262
+ if (!isManaged) {
263
+ const command = await computeServerCommand(profile);
264
+ if (command) {
265
+ const script = buildStartScript(profile, command);
266
+ const scriptPath = join(profileDir(profile.id), "start.sh");
267
+ console.log("\n" + renderSection("Server command", renderRows([
268
+ ["Run manually", pc.cyan(`bash ${scriptPath}`)],
269
+ ["Command", pc.dim(script)],
270
+ ]), { columns: 120 }));
271
+ }
266
272
  }
267
273
  }
268
274
 
package/src/process.mjs CHANGED
@@ -1,14 +1,85 @@
1
1
  import { execFile, spawn } from "node:child_process";
2
2
  import { promisify } from "node:util";
3
3
  import { closeSync, openSync } from "node:fs";
4
- import { readFile, writeFile } from "node:fs/promises";
4
+ import { readFile, writeFile, chmod } from "node:fs/promises";
5
5
  import { basename, join } from "node:path";
6
+ import { quoteShell } from "./command.mjs";
6
7
  import { LOG_DIR } from "./config.mjs";
7
- import { writeState, readState, readCommandArgv } from "./profiles.mjs";
8
+ import { writeState, readState, profileDir } from "./profiles.mjs";
8
9
  import { backendFor, backendBinaryFor } from "./backends.mjs";
9
10
 
10
11
  const execFileAsync = promisify(execFile);
11
12
 
13
+ // ── Compute server command from profile config ─────────────────────────────
14
+ // Single source of truth: derives the full command (binary + args + env) from
15
+ // the profile's stored configuration. Used for both launching and display so
16
+ // they always match. No stored commandArgv — the command is always fresh.
17
+
18
+ export async function computeServerCommand(profile) {
19
+ const backend = backendFor(profile.backend);
20
+ if (backend.type === "managed-server") return null;
21
+
22
+ const binary = await backendBinaryFor(profile.backend);
23
+ if (!binary) throw new Error("Server binary not found. Run offgrid-ai interactively to install.");
24
+
25
+ let argv, extraEnv;
26
+
27
+ if (profile.backend === "mlx-vlm") {
28
+ const { computeMlxVlmFlags } = await import("./mlx-flags.mjs");
29
+ const result = computeMlxVlmFlags(profile.modelPath, {
30
+ port: profile.flags?.port,
31
+ ctxSize: profile.flags?.ctxSize,
32
+ thinkingEnabled: profile.capabilities?.thinking ?? true,
33
+ });
34
+ argv = result.args;
35
+ extraEnv = { APC_ENABLED: "1", MLX_VLM_MAX_TOKENS: "16384" };
36
+ } else {
37
+ // llama-cpp / llama-cpp-mtp
38
+ const { computeFlags } = await import("./autodetect.mjs");
39
+ const result = computeFlags(
40
+ profile.capabilities ?? {},
41
+ profile.modelPath,
42
+ profile.mmprojPath,
43
+ profile.drafterPath,
44
+ profile.flags ?? {},
45
+ );
46
+ argv = result.argv;
47
+ extraEnv = {};
48
+ }
49
+
50
+ return { binary, argv, extraEnv, backend };
51
+ }
52
+
53
+ /** Build a runnable start.sh script for the profile. */
54
+ export function buildStartScript(profile, command) {
55
+ const { binary, argv, extraEnv } = command;
56
+ const backend = backendFor(profile.backend);
57
+ const lines = [
58
+ "#!/bin/bash",
59
+ `# Generated by offgrid-ai — do not edit`,
60
+ `# Profile: ${profile.label}`,
61
+ `# Backend: ${backend.label}`,
62
+ ];
63
+ for (const [key, value] of Object.entries(extraEnv)) {
64
+ lines.push(`export ${key}="${value}"`);
65
+ }
66
+ lines.push(""); // blank line before exec
67
+ // Format the exec command with backslash continuation
68
+ lines.push(`exec ${quoteShell(binary)} \\`);
69
+ for (let i = 0; i < argv.length; i++) {
70
+ const arg = argv[i];
71
+ const next = argv[i + 1];
72
+ const hasValue = arg.startsWith("--") && next && !next.startsWith("--");
73
+ if (hasValue) {
74
+ lines.push(` ${arg} ${quoteShell(next)}${i + 2 < argv.length ? " \\" : ""}`);
75
+ i += 1;
76
+ } else {
77
+ lines.push(` ${arg}${i + 1 < argv.length ? " \\" : ""}`);
78
+ }
79
+ }
80
+ return lines.join("\n") + "\n";
81
+ }
82
+
12
83
  // ── Start server ───────────────────────────────────────────────────────────
13
84
 
14
85
  export async function startServer(profile) {
@@ -20,23 +91,24 @@ export async function startServer(profile) {
20
91
  }
21
92
 
22
93
  async function startLocalServer(profile) {
23
- const binary = await backendBinaryFor(profile.backend);
24
- if (!binary) {
25
- throw new Error("llama-server not found. Install the managed llama.cpp runtime by running offgrid-ai interactively.");
26
- }
94
+ const command = await computeServerCommand(profile);
95
+ if (!command) throw new Error("No server command for this backend.");
96
+
97
+ const { binary, argv, extraEnv } = command;
27
98
 
28
99
  const timestamp = timestampForFile();
29
100
  const rawLogPath = join(LOG_DIR, `${profile.id}-${timestamp}.raw.log`);
30
101
  const friendlyLogPath = join(LOG_DIR, `${profile.id}-${timestamp}.friendly.log`);
31
- const commandArgv = await readCommandArgv(profile);
32
102
 
33
- await writeFile(rawLogPath, `[offgrid-ai] ${new Date().toISOString()}\n[binary] ${binary}\n[argv]\n${commandArgv.join(" ")}\n`, "utf8");
34
- await writeFile(friendlyLogPath, `[launch] starting llama-server for ${profile.label}\n`, "utf8");
103
+ // Write start.sh so the user can run the model manually
104
+ const scriptPath = join(profileDir(profile.id), "start.sh");
105
+ await writeFile(scriptPath, buildStartScript(profile, command), "utf8");
106
+ await chmod(scriptPath, 0o755);
107
+
108
+ await writeFile(rawLogPath, `[offgrid-ai] ${new Date().toISOString()}\n[binary] ${binary}\n[argv]\n${argv.join(" ")}\n`, "utf8");
109
+ await writeFile(friendlyLogPath, `[launch] starting ${backendFor(profile.backend).label} for ${profile.label}\n`, "utf8");
35
110
 
36
- // Build argv: binary + command.json args
37
- const argv = [...commandArgv];
38
- // mlx-vlm requires APC_ENABLED=1 (86x TTFT improvement; fixes Metal cache clearing).
39
- const env = profile.backend === "mlx-vlm" ? { ...process.env, APC_ENABLED: "1" } : process.env;
111
+ const env = { ...process.env, ...extraEnv };
40
112
 
41
113
  const rawFd = openSync(rawLogPath, "a");
42
114
  let child;
package/src/profiles.mjs CHANGED
@@ -69,14 +69,9 @@ export async function saveProfile(profile, options = {}) {
69
69
  };
70
70
  await writeJson(profileJsonPath(id), saved);
71
71
 
72
- // Write JSON command file for llama-server backends
73
- const backend = backendFor(saved.backend);
74
- if (backend.needsCommandFile) {
75
- const cmdPath = commandJsonPath(id);
76
- if (options.writeCommand || !existsSync(cmdPath)) {
77
- await writeJson(cmdPath, { argv: saved.commandArgv ?? [] });
78
- }
79
- }
72
+ // Note: command.json is no longer written — the server command is computed
73
+ // fresh from the profile config at launch time (see computeServerCommand in
74
+ // process.mjs). commandArgv is kept in the profile for backwards compat.
80
75
 
81
76
  if (!existsSync(notesPath(id))) {
82
77
  await writeFile(notesPath(id), `# ${saved.label}\n\nNotes for this model profile.\n`, "utf8");