offgrid-ai 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,10 +8,8 @@ import pc from "picocolors";
8
8
  // ── Pi model id ─────────────────────────────────────────────────────────────
9
9
 
10
10
  /**
11
- * The model id Pi must send in requests. mlx-vlm registers the loaded model
12
- * with the full --model path as its API id (verified via /v1/models); sending
13
- * the repo-id label instead makes mlx-vlm unload the local model and re-fetch
14
- * the repo from HuggingFace. Other backends use the friendly modelAlias.
11
+ * The model id Pi must send in requests. All backends use the friendly
12
+ * modelAlias as the API model id.
15
13
  */
16
14
  export function piApiModelId(profile) {
17
15
  return profile.modelAlias;
@@ -87,7 +85,7 @@ async function activeProviderProfiles(currentProfile) {
87
85
  const byAlias = new Map();
88
86
  for (const item of [...allProfiles, currentProfile]) {
89
87
  if (item.providerId !== currentProfile.providerId) continue;
90
- if (item.backend !== "llama-cpp" && item.backend !== "llama-cpp-mtp") {
88
+ if (item.backend !== "llama-cpp") {
91
89
  byAlias.set(item.modelAlias, item);
92
90
  continue;
93
91
  }
package/src/managed.mjs CHANGED
@@ -1,6 +1,6 @@
1
1
  import { existsSync } from "node:fs";
2
2
  import { BACKENDS } from "./backends.mjs";
3
- import { commandExists } from "./exec.mjs";
3
+ import { hasOmlx } from "./omlx-runtime.mjs";
4
4
 
5
5
  export const MANAGED_BACKEND_IDS = ["omlx"];
6
6
 
@@ -22,6 +22,6 @@ export function hasLmStudioInstalled() {
22
22
  return existsSync("/Applications/LM Studio.app");
23
23
  }
24
24
 
25
- export function hasOmlxInstalled() {
26
- return commandExists("omlx");
25
+ export async function hasOmlxInstalled() {
26
+ return await hasOmlx();
27
27
  }
@@ -2,7 +2,7 @@
2
2
  // to compute sizes and publishers. The oMLX API doesn't return these, so we
3
3
  // read them from disk.
4
4
 
5
- import { readdir, stat } from "node:fs/promises";
5
+ import { readdir, stat, readFile } from "node:fs/promises";
6
6
  import { existsSync } from "node:fs";
7
7
  import { join } from "node:path";
8
8
  import { homedir } from "node:os";
@@ -65,6 +65,99 @@ export async function scanOmlxModelSizes() {
65
65
  return infoByBasename;
66
66
  }
67
67
 
68
+ // ── MTP capability detection ─────────────────────────────────────────────
69
+ // oMLX supports native MTP for Qwen 3.5/3.6 (dense + MoE) and DeepSeek-V4
70
+ // models that ship MTP heads in their weights. The check mirrors oMLX's own
71
+ // _mtp_compat_for_model: config must declare mtp_num_hidden_layers, model_type
72
+ // must be on the whitelist, and the safetensors index must contain mtp.* keys.
73
+
74
+ const MTP_MODEL_TYPES = ["qwen3_5", "qwen3_5_moe", "qwen3_6", "qwen3_6_moe", "deepseek_v4"];
75
+
76
+ /**
77
+ * Check if an oMLX model directory supports native MTP.
78
+ * Returns { compatible: boolean, reason: string }.
79
+ */
80
+ export async function detectOmlxMtpCapability(modelDir) {
81
+ const configPath = join(modelDir, "config.json");
82
+ if (!existsSync(configPath)) return { compatible: false, reason: "config.json not found" };
83
+
84
+ let config;
85
+ try {
86
+ config = JSON.parse(await readFile(configPath, "utf8"));
87
+ } catch {
88
+ return { compatible: false, reason: "failed to read config.json" };
89
+ }
90
+
91
+ const mtpLayers = config.mtp_num_hidden_layers;
92
+ if (!mtpLayers || mtpLayers <= 0) {
93
+ return { compatible: false, reason: "model has no MTP heads in config" };
94
+ }
95
+
96
+ const modelType = config.model_type;
97
+ if (!MTP_MODEL_TYPES.some((t) => modelType === t || modelType?.startsWith(t))) {
98
+ return { compatible: false, reason: `model_type=${modelType} is not on the MTP whitelist (supported: ${MTP_MODEL_TYPES.join(", ")})` };
99
+ }
100
+
101
+ // Check for mtp.* weight tensors in the safetensors index
102
+ const hasWeights = await modelHasMtpWeights(modelDir);
103
+ if (!hasWeights) {
104
+ return { compatible: false, reason: "Config declares MTP layers but the converted weights are missing mtp.* tensors. Re-convert from HF with a converter that preserves MTP weights." };
105
+ }
106
+
107
+ return { compatible: true, reason: "" };
108
+ }
109
+
110
+ async function modelHasMtpWeights(modelDir) {
111
+ const indexPath = join(modelDir, "model.safetensors.index.json");
112
+ if (existsSync(indexPath)) {
113
+ try {
114
+ const index = JSON.parse(await readFile(indexPath, "utf8"));
115
+ const weightMap = index.weight_map ?? {};
116
+ return Object.keys(weightMap).some((key) => key.includes("mtp."));
117
+ } catch {
118
+ return false;
119
+ }
120
+ }
121
+ // Single-shard fallback: can't easily read safetensors keys in Node without
122
+ // the safetensors library. Check for an mtp-specific safetensors file.
123
+ try {
124
+ const entries = await readdir(modelDir);
125
+ return entries.some((f) => f.endsWith(".safetensors") && /mtp/i.test(f));
126
+ } catch {
127
+ return false;
128
+ }
129
+ }
130
+
131
+ /**
132
+ * Find the model directory for a given oMLX model ID.
133
+ * Searches ~/.omlx/models/ recursively for a directory matching the model ID.
134
+ */
135
+ export async function findOmlxModelDir(modelId) {
136
+ if (!existsSync(OMLX_MODELS_DIR)) return null;
137
+ const basename = modelId.includes("/") ? modelId.slice(modelId.lastIndexOf("/") + 1)
138
+ : modelId.includes("--") ? modelId.slice(modelId.lastIndexOf("--") + 2)
139
+ : modelId;
140
+
141
+ async function walk(dir) {
142
+ let entries;
143
+ try {
144
+ entries = await readdir(dir, { withFileTypes: true });
145
+ } catch {
146
+ return null;
147
+ }
148
+ for (const entry of entries) {
149
+ if (!entry.isDirectory()) continue;
150
+ const fullPath = join(dir, entry.name);
151
+ if (entry.name === basename && await isMlxModelDir(fullPath)) return fullPath;
152
+ const found = await walk(fullPath);
153
+ if (found) return found;
154
+ }
155
+ return null;
156
+ }
157
+
158
+ return await walk(OMLX_MODELS_DIR);
159
+ }
160
+
68
161
  /**
69
162
  * Look up a model's info by its oMLX API id. Tries exact match, then the
70
163
  * segment after `--` (oMLX org--name format), then after `/` (HF format).
@@ -4,9 +4,9 @@
4
4
  // No other function should format, title-case, or dissect a model name.
5
5
  //
6
6
  // The returned `id` is always the raw identifier (untouched) and is used for
7
- // API calls, profile IDs, Pi config matching, and benchmark directory slugs.
7
+ // API calls, profile IDs, and Pi config matching.
8
8
  // The returned `display` is the human-readable string shown in pickers, details,
9
- // and benchmark metadata.
9
+
10
10
 
11
11
  // ── Known model families ────────────────────────────────────────────────
12
12
  //
@@ -7,9 +7,8 @@ import { pc, formatBytes, renderSectionRows } from "./ui.mjs";
7
7
  import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
8
8
  import { itemKey } from "./model-catalog.mjs";
9
9
  import { DATA_DIR } from "./config.mjs";
10
- import { findBenchmarkRepo } from "./benchmark.mjs";
11
10
 
12
- const OPTION_SEPARATOR = pc.dim(" ");
11
+ const OPTION_SEPARATOR = " ";
13
12
  const OPTION_STATUS_WIDTH = 12;
14
13
  const OPTION_BACKEND_WIDTH = 14;
15
14
  const OPTION_SOURCE_WIDTH = 14;
@@ -53,7 +52,6 @@ function optionBackendTag(backendId) {
53
52
  const label = backend?.label ?? backendId ?? "unknown";
54
53
  const colors = {
55
54
  "llama-cpp": pc.cyan,
56
- "llama-cpp-mtp": pc.blue,
57
55
  omlx: pc.magenta,
58
56
  };
59
57
  return optionPad(label, colors[backendId] ?? pc.dim, OPTION_BACKEND_WIDTH);
@@ -219,15 +217,6 @@ export function printWorkspaceHeader(normalized, runningProfilesNow, modelMissin
219
217
  console.log(pc.dim(" ─────────────────────────────────────────────────────────"));
220
218
  }
221
219
 
222
- export async function printBenchmarkLine() {
223
- const repoPath = await findBenchmarkRepo();
224
- if (repoPath) {
225
- console.log(pc.green(" ✓") + " local-llm-visual-benchmark linked");
226
- } else {
227
- console.log(pc.yellow(" ○") + " to run benchmarks, pair with " + pc.cyan("local-llm-visual-benchmark"));
228
- }
229
- }
230
-
231
220
  export async function printProfileDetails(profile) {
232
221
  const backend = backendFor(profile.backend);
233
222
  const isManaged = backend.type === "managed-server";
@@ -267,8 +256,9 @@ export async function printProfileDetails(profile) {
267
256
  const scriptPath = join(profileDir(profile.id), "start.sh");
268
257
  console.log("\n" + renderSectionRows("Server command", [
269
258
  ["Run manually", pc.cyan(`bash ${scriptPath}`)],
270
- ["Command", pc.dim(script)],
271
- ], { columns: Math.min(process.stdout.columns ?? 120, 140) }));
259
+ ]));
260
+ console.log("");
261
+ console.log(pc.dim(script));
272
262
  }
273
263
  }
274
264
  }
@@ -0,0 +1,232 @@
1
+ // oMLX runtime management — discovery, version checking, update prompts,
2
+ // and installation. Mirrors the llama.cpp runtime pattern in runtime.mjs.
3
+ //
4
+ // oMLX is installed either via the macOS app (DMG from GitHub Releases, which
5
+ // installs a ~/.omlx/bin/omlx CLI shim with in-app auto-update) or via Homebrew
6
+ // (brew tap jundot/omlx && brew install omlx). offgrid-ai does NOT download
7
+ // binaries directly — it uses brew for automated installs, and prompts for the
8
+ // DMG when brew is unavailable.
9
+
10
+ import { execFile } from "node:child_process";
11
+ import { existsSync } from "node:fs";
12
+ import { homedir } from "node:os";
13
+ import { join } from "node:path";
14
+ import { promisify } from "node:util";
15
+ import { compareVersions } from "./updates.mjs";
16
+ import { hasHomebrew, ensureHomebrewFor } from "./config.mjs";
17
+ import { commandExists } from "./exec.mjs";
18
+ import { pc, renderCard, renderRows } from "./ui.mjs";
19
+
20
+ const execFileAsync = promisify(execFile);
21
+
22
+ const OMLX_CLI_SHIM = join(homedir(), ".omlx", "bin", "omlx");
23
+ const RELEASE_API = "https://api.github.com/repos/jundot/omlx/releases/latest";
24
+
25
+ // ── Discovery ──────────────────────────────────────────────────────────────
26
+
27
+ /**
28
+ * Find the oMLX CLI binary. Checks the app-installed shim first (it shadows
29
+ * brew on PATH), then falls back to PATH. Returns the path or null.
30
+ */
31
+ export async function findOmlx() {
32
+ // 1. macOS app CLI shim (shadows brew on PATH)
33
+ if (existsSync(OMLX_CLI_SHIM)) return OMLX_CLI_SHIM;
34
+
35
+ // 2. PATH (brew install)
36
+ try {
37
+ const { stdout } = await execFileAsync("which", ["omlx"]);
38
+ const path = stdout.trim();
39
+ if (path && existsSync(path)) return path;
40
+ } catch { /* not on PATH */ }
41
+
42
+ return null;
43
+ }
44
+
45
+ /**
46
+ * Check if oMLX is installed (app shim or on PATH).
47
+ */
48
+ export async function hasOmlx() {
49
+ return (await findOmlx()) !== null;
50
+ }
51
+
52
+ /**
53
+ * Detect how oMLX was installed: "app" (macOS app CLI shim) or "brew" (on PATH
54
+ * but not the shim) or null (not installed).
55
+ */
56
+ export async function omlxInstallMethod() {
57
+ if (existsSync(OMLX_CLI_SHIM)) return "app";
58
+ if (await commandExists("omlx")) return "brew";
59
+ return null;
60
+ }
61
+
62
+ // ── Version checking ───────────────────────────────────────────────────────
63
+
64
+ /**
65
+ * Get the installed oMLX version by running `omlx --version`.
66
+ * Returns null if oMLX is not installed or the version can't be parsed.
67
+ */
68
+ export async function installedOmlxVersion() {
69
+ const bin = await findOmlx();
70
+ if (!bin) return null;
71
+ try {
72
+ const { stdout } = await execFileAsync(bin, ["--version"], { timeout: 5000 });
73
+ const match = stdout.trim().match(/(\d+\.\d+\.\d+)/u);
74
+ return match ? match[1] : null;
75
+ } catch {
76
+ return null;
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Check for the latest oMLX release on GitHub.
82
+ * Returns { tag, version } or null if the check fails.
83
+ * Callers must treat null as "check failed, skip prompt".
84
+ */
85
+ export async function latestOmlxRelease(fetchImpl = globalThis.fetch) {
86
+ try {
87
+ const response = await fetchImpl(RELEASE_API, { signal: AbortSignal.timeout(5000) });
88
+ if (!response.ok) return null;
89
+ const body = await response.json();
90
+ const tag = typeof body?.tag_name === "string" ? body.tag_name : null;
91
+ if (!tag) return null;
92
+ const version = tag.replace(/^v/u, "");
93
+ return { tag, version };
94
+ } catch {
95
+ return null;
96
+ }
97
+ }
98
+
99
+ // ── Update prompt ──────────────────────────────────────────────────────────
100
+
101
+ /**
102
+ * Check if an oMLX update is available and offer to install it.
103
+ * Returns true if an update was installed, false otherwise.
104
+ *
105
+ * For brew installs: runs `brew upgrade omlx`.
106
+ * For app installs: the app has in-app auto-update, so we just inform the user.
107
+ */
108
+ export async function offerManagedOmlxUpdate(prompt, { fetchImpl = globalThis.fetch } = {}) {
109
+ const latest = await latestOmlxRelease(fetchImpl);
110
+ if (!latest) return false;
111
+
112
+ const installed = await installedOmlxVersion();
113
+ if (installed && compareVersions(installed, latest.version) >= 0) return false;
114
+
115
+ const method = await omlxInstallMethod();
116
+
117
+ console.log("\n" + renderCard("oMLX runtime", renderRows([
118
+ ["Installed", installed ?? pc.yellow("not installed")],
119
+ ["Latest", pc.green(latest.version)],
120
+ ["Source", method === "app" ? "macOS app (in-app auto-update)" : "Homebrew"],
121
+ ]), { formatBorder: pc.cyan }));
122
+
123
+ // App installs have their own auto-update — just inform the user
124
+ if (method === "app" && installed) {
125
+ console.log(pc.dim(" Update oMLX via the app's in-app updater, then restart offgrid-ai."));
126
+ return false;
127
+ }
128
+
129
+ // Not installed, or brew install — offer to install/upgrade
130
+ if (!installed) {
131
+ const shouldInstall = await prompt.yesNo("Install oMLX runtime?", false);
132
+ if (!shouldInstall) return false;
133
+ return await installOmlx(prompt);
134
+ }
135
+
136
+ // Brew install with an update available
137
+ const shouldUpdate = await prompt.yesNo("Update oMLX runtime?", false);
138
+ if (!shouldUpdate) return false;
139
+
140
+ try {
141
+ const { runCommand } = await import("./exec.mjs");
142
+ console.log(pc.dim("Updating oMLX via Homebrew..."));
143
+ await runCommand("brew", ["update"], { label: "brew update" });
144
+ await runCommand("brew", ["upgrade", "omlx"], { label: "brew upgrade omlx" });
145
+ console.log(pc.green(`✓ Updated oMLX to latest`));
146
+ return true;
147
+ } catch (err) {
148
+ console.log(pc.red(`✗ Update failed: ${err.message}`));
149
+ console.log(pc.dim("Update manually: brew update && brew upgrade omlx"));
150
+ return false;
151
+ }
152
+ }
153
+
154
+ // ── Installation ───────────────────────────────────────────────────────────
155
+
156
+ /**
157
+ * Install oMLX. Uses Homebrew if available (automating tap + install).
158
+ * If Homebrew is not available, prompts to download the DMG from GitHub
159
+ * Releases or install Homebrew first.
160
+ *
161
+ * @param {object} prompt - UI prompt interface (yesNo, choice)
162
+ * @param {function} [run] - runCommand function for verbose command execution
163
+ * @returns {Promise<boolean>} true if installation succeeded
164
+ */
165
+ export async function installOmlx(prompt, run) {
166
+ const hasBrew = await hasHomebrew();
167
+
168
+ if (!hasBrew) {
169
+ if (!(await ensureHomebrewFor(prompt, run || (async (cmd, args, label) => {
170
+ const { runCommand } = await import("./exec.mjs");
171
+ return runCommand(cmd, args, { label });
172
+ }), "oMLX"))) {
173
+ console.log(pc.dim("Install oMLX manually:"));
174
+ console.log(pc.dim(" brew tap jundot/omlx && brew install omlx"));
175
+ console.log(pc.dim(" — or download the macOS app from https://github.com/jundot/omlx/releases"));
176
+ return false;
177
+ }
178
+ }
179
+
180
+ // Install oMLX via Homebrew
181
+ const runner = run || (async (cmd, args, label) => {
182
+ const { runCommand } = await import("./exec.mjs");
183
+ return runCommand(cmd, args, { label });
184
+ });
185
+
186
+ console.log(pc.cyan("Installing oMLX via Homebrew..."));
187
+ try {
188
+ await runner("brew", ["tap", "jundot/omlx", "https://github.com/jundot/omlx"], "oMLX tap");
189
+ await runner("brew", ["install", "omlx"], "oMLX");
190
+ console.log(pc.green("✓ oMLX installed"));
191
+ return true;
192
+ } catch (err) {
193
+ console.log(pc.red(`✗ oMLX installation failed: ${err.message}`));
194
+ console.log(pc.dim("Install manually: brew tap jundot/omlx && brew install omlx"));
195
+ console.log(pc.dim(" — or download the macOS app from https://github.com/jundot/omlx/releases"));
196
+ return false;
197
+ }
198
+ }
199
+
200
+ /**
201
+ * Ensure oMLX runtime is available. For onboarding: if not installed, offer
202
+ * to install it. This is the oMLX equivalent of ensureLlamaRuntime().
203
+ *
204
+ * @param {object} prompt - UI prompt interface
205
+ * @param {function} [run] - runCommand function for verbose output
206
+ * @returns {Promise<boolean>} true if oMLX is available (installed or pre-existing)
207
+ */
208
+ export async function ensureOmlxRuntime(prompt, run) {
209
+ let omlxBin = await findOmlx();
210
+ if (!omlxBin) {
211
+ console.log(renderCard("oMLX runtime", renderRows([
212
+ ["Status", pc.yellow("not installed")],
213
+ ["Used for", "local MLX models (Apple Silicon optimized)"],
214
+ ["Install", "managed by offgrid-ai via Homebrew"],
215
+ ]), { formatBorder: pc.cyan }));
216
+
217
+ const shouldInstall = await prompt.yesNo("Install oMLX runtime?", true);
218
+ if (shouldInstall) {
219
+ await installOmlx(prompt, run);
220
+ omlxBin = await findOmlx();
221
+ }
222
+
223
+ if (!omlxBin) {
224
+ console.log(pc.yellow("Skipping oMLX for now. You can still use llama.cpp, or run offgrid-ai again to install."));
225
+ return false;
226
+ }
227
+ }
228
+
229
+ const version = await installedOmlxVersion();
230
+ console.log(pc.green(`✓ oMLX: ${omlxBin}${version ? ` (v${version})` : ""}`));
231
+ return true;
232
+ }
package/src/process.mjs CHANGED
@@ -6,6 +6,7 @@ import { basename, join } from "node:path";
6
6
  import { LOG_DIR } from "./config.mjs";
7
7
  import { writeState, readState, profileDir } from "./profiles.mjs";
8
8
  import { backendFor, backendBinaryFor } from "./backends.mjs";
9
+ import { pc } from "./ui.mjs";
9
10
 
10
11
  const execFileAsync = promisify(execFile);
11
12
 
@@ -21,7 +22,7 @@ export async function computeServerCommand(profile) {
21
22
  const binary = await backendBinaryFor(profile.backend);
22
23
  if (!binary) throw new Error("Server binary not found. Run offgrid-ai interactively to install.");
23
24
 
24
- // llama-cpp / llama-cpp-mtp
25
+ // llama-cpp
25
26
  const { computeFlags } = await import("./autodetect.mjs");
26
27
  const result = computeFlags(
27
28
  profile.capabilities ?? {},
@@ -118,6 +119,10 @@ async function startLocalServer(profile) {
118
119
 
119
120
  async function startManagedServer(profile, backend) {
120
121
  if (await serverReady(profile.baseUrl)) {
122
+ // Apply per-model settings (MTP) even when server is already running.
123
+ if (backend.id === "omlx" && profile.capabilities?.mtp) {
124
+ await ensureOmlxMtpSetting(profile);
125
+ }
121
126
  return writeManagedState(profile, backend);
122
127
  }
123
128
 
@@ -126,9 +131,15 @@ async function startManagedServer(profile, backend) {
126
131
  try {
127
132
  const { execFile } = await import("node:child_process");
128
133
  const { promisify } = await import("node:util");
129
- await promisify(execFile)("omlx", ["start"], { timeout: 10000 });
130
- } catch {
131
- throw new Error(`${backend.label} is not running and could not be auto-started. Install oMLX or run \`omlx start\` manually.`);
134
+ const { findOmlx } = await import("./omlx-runtime.mjs");
135
+ const omlxBin = await findOmlx();
136
+ if (!omlxBin) {
137
+ throw new Error(`${backend.label} is not installed. Run offgrid-ai to install it, or install manually: brew tap jundot/omlx && brew install omlx`);
138
+ }
139
+ await promisify(execFile)(omlxBin, ["start"], { timeout: 10000 });
140
+ } catch (err) {
141
+ if (err.message.includes("not installed")) throw err;
142
+ throw new Error(`${backend.label} could not be auto-started: ${err.message}. Run \`omlx start\` manually.`, { cause: err });
132
143
  }
133
144
  }
134
145
 
@@ -141,9 +152,48 @@ async function startManagedServer(profile, backend) {
141
152
  if (!(await serverReady(profile.baseUrl))) {
142
153
  throw new Error(`${backend.label} is not responding at ${profile.baseUrl}. Start it and try again.`);
143
154
  }
155
+
156
+ // Apply per-model settings (MTP) before the model is loaded.
157
+ // oMLX applies MTP patches at load time, so the setting must be in
158
+ // model_settings.json before any request triggers a load.
159
+ if (backend.id === "omlx" && profile.capabilities?.mtp) {
160
+ await ensureOmlxMtpSetting(profile);
161
+ }
162
+
144
163
  return writeManagedState(profile, backend);
145
164
  }
146
165
 
166
+ /**
167
+ * Enable MTP on an oMLX model via the admin API before loading.
168
+ * oMLX applies MTP patches at model load time, so the setting must be
169
+ * persisted to model_settings.json before any request triggers a load.
170
+ * If the model is already loaded, oMLX will use the setting on next reload.
171
+ */
172
+ async function ensureOmlxMtpSetting(profile) {
173
+ const baseUrl = profile.baseUrl?.replace(/\/v1\/?$/u, "") || "";
174
+ const modelId = profile.omlxModel ?? profile.modelAlias ?? profile.id;
175
+ try {
176
+ const response = await fetch(`${baseUrl}/admin/api/models/${encodeURIComponent(modelId)}/settings`, {
177
+ method: "PUT",
178
+ headers: { "Content-Type": "application/json" },
179
+ body: JSON.stringify({ mtp_enabled: true }),
180
+ signal: AbortSignal.timeout(5000),
181
+ });
182
+ if (response.ok) {
183
+ console.log(pc.green(`[mtp] Enabled MTP speculative decoding for ${modelId}`));
184
+ } else if (response.status === 401 || response.status === 403) {
185
+ console.log(pc.yellow(`[mtp] Could not enable MTP: oMLX admin authentication required. Enable skip_api_key_verification in oMLX settings, or enable MTP manually from the admin panel.`));
186
+ } else if (response.status === 404) {
187
+ console.log(pc.yellow(`[mtp] Model ${modelId} not found on oMLX server. MTP setting not applied.`));
188
+ } else {
189
+ const detail = await response.text().catch(() => "");
190
+ console.log(pc.yellow(`[mtp] Could not enable MTP: HTTP ${response.status} ${detail}`));
191
+ }
192
+ } catch (err) {
193
+ console.log(pc.yellow(`[mtp] Could not enable MTP: ${err.message}`));
194
+ }
195
+ }
196
+
147
197
  async function writeManagedState(profile, backend) {
148
198
  const state = {
149
199
  pid: null,
@@ -223,7 +273,7 @@ async function processGone(pid) {
223
273
  export async function unloadModelFromServer(profile) {
224
274
  const backend = backendFor(profile.backend);
225
275
 
226
- if (backend.id === "llama-cpp" || backend.id === "llama-cpp-mtp") {
276
+ if (backend.id === "llama-cpp") {
227
277
  return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
228
278
  }
229
279