offgrid-ai 0.16.0 → 0.16.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/package.json +1 -1
- package/src/autodetect.mjs +1 -1
- package/src/backends.mjs +0 -12
- package/src/benchmark/flow.mjs +14 -13
- package/src/benchmark/metrics.mjs +14 -20
- package/src/commands/main.mjs +4 -2
- package/src/commands/models.mjs +10 -9
- package/src/commands/onboard.mjs +3 -31
- package/src/commands/run.mjs +1 -1
- package/src/config.mjs +46 -0
- package/src/harness-pi.mjs +3 -5
- package/src/managed.mjs +3 -3
- package/src/mlx-discovery.mjs +94 -1
- package/src/model-presenters.mjs +0 -1
- package/src/omlx-runtime.mjs +232 -0
- package/src/process.mjs +55 -5
- package/src/profile-setup.mjs +50 -24
- package/src/profiles.mjs +11 -3
- package/src/ui.mjs +2 -19
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# offgrid-ai
|
|
4
4
|
|
|
5
|
-
**Helper CLI for running local AI models on Mac with llama-server
|
|
5
|
+
**Helper CLI for running local AI models on Mac with llama-server and oMLX.**
|
|
6
6
|
|
|
7
7
|
[](package.json)
|
|
8
8
|
[]()
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
## What is offgrid-ai?
|
|
14
14
|
|
|
15
|
-
offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server
|
|
15
|
+
offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server or oMLX have a steep learning curve compared to cloud-based models, so offgrid-ai is designed to abstract away the complexity, while still providing a powerful and flexible way to run local models.
|
|
16
16
|
|
|
17
17
|
This is the recommended workflow:
|
|
18
18
|
|
|
@@ -23,8 +23,8 @@ This is the recommended workflow:
|
|
|
23
23
|
## Core Features
|
|
24
24
|
- Auto-detects available models from LM Studio, oMLX, and HuggingFace
|
|
25
25
|
- Auto-detects MTP (multi-token prediction) or QAT (quantization aware training) models, and applies the correct flags for llama.cpp
|
|
26
|
-
- Auto-applies the optimal flags for the model type (llama.cpp server flags,
|
|
27
|
-
- Start / stop local servers automatically for chat sessions (llama-server and
|
|
26
|
+
- Auto-applies the optimal flags for the model type (llama.cpp server flags, oMLX auto-start and cache management)
|
|
27
|
+
- Start / stop local servers automatically for chat sessions (llama-server and oMLX)
|
|
28
28
|
|
|
29
29
|
## Quick start
|
|
30
30
|
|
package/package.json
CHANGED
package/src/autodetect.mjs
CHANGED
|
@@ -55,7 +55,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
|
|
|
55
55
|
const isLowMem = quant && /[Qq]4[_0]/i.test(quant);
|
|
56
56
|
|
|
57
57
|
const flags = {
|
|
58
|
-
...defaultFlagsForBackend(
|
|
58
|
+
...defaultFlagsForBackend("llama-cpp"),
|
|
59
59
|
ctxSize: capabilities.ctxSize,
|
|
60
60
|
flashAttention: "on",
|
|
61
61
|
cacheTypeK: isLowMem ? "f16" : "bf16",
|
package/src/backends.mjs
CHANGED
|
@@ -7,7 +7,6 @@ import { scanOmlxModelSizes, lookupOmlxModelInfo } from "./mlx-discovery.mjs";
|
|
|
7
7
|
|
|
8
8
|
export const LOCAL_HOST = "127.0.0.1";
|
|
9
9
|
export const LLAMA_CPP_PORT = 8080;
|
|
10
|
-
export const LLAMA_CPP_MTP_PORT = 8081;
|
|
11
10
|
export const OMLX_PORT = 8000;
|
|
12
11
|
|
|
13
12
|
export function baseUrlFor({ host = LOCAL_HOST, port, path = "/v1" }) {
|
|
@@ -30,17 +29,6 @@ export const BACKENDS = {
|
|
|
30
29
|
needsCommandFile: true,
|
|
31
30
|
scanModels: async () => (await scanGgufModels()).models,
|
|
32
31
|
},
|
|
33
|
-
"llama-cpp-mtp": {
|
|
34
|
-
id: "llama-cpp-mtp",
|
|
35
|
-
label: "llama.cpp MTP",
|
|
36
|
-
type: "local-server",
|
|
37
|
-
providerId: "llama-cpp-mtp",
|
|
38
|
-
defaultHost: LOCAL_HOST,
|
|
39
|
-
defaultPort: LLAMA_CPP_MTP_PORT,
|
|
40
|
-
defaultBaseUrl: baseUrlFor({ port: LLAMA_CPP_MTP_PORT }),
|
|
41
|
-
needsCommandFile: true,
|
|
42
|
-
scanModels: async () => (await scanGgufModels()).models,
|
|
43
|
-
},
|
|
44
32
|
"omlx": {
|
|
45
33
|
id: "omlx",
|
|
46
34
|
label: "oMLX",
|
package/src/benchmark/flow.mjs
CHANGED
|
@@ -15,7 +15,7 @@ import { finalizeBenchmarkRun, renderBenchmarkSummary } from "./finalize.mjs";
|
|
|
15
15
|
|
|
16
16
|
function benchmarkModelSource(profile) {
|
|
17
17
|
if (!profile) return "cloud";
|
|
18
|
-
return profile.
|
|
18
|
+
return profile.backend === "omlx" ? "omlx" : "llama-cpp";
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
async function chooseBenchmarkAction(prompt, canRun) {
|
|
@@ -150,16 +150,22 @@ async function selectBenchmark(prompt, repoPath) {
|
|
|
150
150
|
return { kind, benchmark };
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
// ──
|
|
153
|
+
// ── Shared benchmark setup ───────────────────────────────────────────────
|
|
154
154
|
|
|
155
|
-
|
|
155
|
+
async function benchmarkSetup() {
|
|
156
156
|
await ensureDirs();
|
|
157
157
|
const prompt = createPrompt();
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
158
|
+
const repoPath = await linkBenchmarkRepo(prompt);
|
|
159
|
+
if (!repoPath) return { prompt, repoPath: null, selected: null };
|
|
160
|
+
const selected = await selectBenchmark(prompt, repoPath);
|
|
161
|
+
return { prompt, repoPath, selected };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// ── Benchmark from a selected profile (from model picker) ────────────────
|
|
161
165
|
|
|
162
|
-
|
|
166
|
+
export async function benchmarkForProfile(profile) {
|
|
167
|
+
const { prompt, repoPath, selected } = await benchmarkSetup();
|
|
168
|
+
try {
|
|
163
169
|
if (!selected) return;
|
|
164
170
|
const { kind, benchmark: selectedBenchmark } = selected;
|
|
165
171
|
|
|
@@ -185,13 +191,8 @@ export async function benchmarkForProfile(profile) {
|
|
|
185
191
|
// ── Standalone benchmark flow (offgrid-ai benchmark) ──────────────────────
|
|
186
192
|
|
|
187
193
|
export async function benchmarkFlow() {
|
|
188
|
-
await
|
|
189
|
-
const prompt = createPrompt();
|
|
194
|
+
const { prompt, repoPath, selected } = await benchmarkSetup();
|
|
190
195
|
try {
|
|
191
|
-
const repoPath = await linkBenchmarkRepo(prompt);
|
|
192
|
-
if (!repoPath) return;
|
|
193
|
-
|
|
194
|
-
const selected = await selectBenchmark(prompt, repoPath);
|
|
195
196
|
if (!selected) return;
|
|
196
197
|
const { kind, benchmark: selectedBenchmark } = selected;
|
|
197
198
|
|
|
@@ -9,7 +9,7 @@ const SPEED_QUERY_MAX_TOKENS = 64;
|
|
|
9
9
|
export async function queryServerMetrics(profile) {
|
|
10
10
|
const backend = backendFor(profile.backend);
|
|
11
11
|
|
|
12
|
-
if (backend.id === "llama-cpp"
|
|
12
|
+
if (backend.id === "llama-cpp") {
|
|
13
13
|
return await queryLlamaCppMetrics(profile);
|
|
14
14
|
}
|
|
15
15
|
if (backend.id === "omlx") {
|
|
@@ -19,12 +19,13 @@ export async function queryServerMetrics(profile) {
|
|
|
19
19
|
throw new Error(`Unsupported backend for benchmark speed metrics: ${backend.id}`);
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
async function
|
|
22
|
+
async function speedQueryFetch(profile, { stream = false, streamOptions = null, errorLabel = "speed query" } = {}) {
|
|
23
23
|
const body = {
|
|
24
24
|
model: profile.modelAlias,
|
|
25
25
|
messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
|
|
26
|
-
stream
|
|
26
|
+
stream,
|
|
27
27
|
max_tokens: SPEED_QUERY_MAX_TOKENS,
|
|
28
|
+
...(streamOptions ? { stream_options: streamOptions } : {}),
|
|
28
29
|
};
|
|
29
30
|
|
|
30
31
|
const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
|
|
@@ -35,9 +36,15 @@ async function queryLlamaCppMetrics(profile) {
|
|
|
35
36
|
});
|
|
36
37
|
|
|
37
38
|
if (!response.ok) {
|
|
38
|
-
throw new Error(
|
|
39
|
+
throw new Error(`${errorLabel} failed: ${response.status} ${response.statusText}`);
|
|
39
40
|
}
|
|
40
41
|
|
|
42
|
+
return response;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async function queryLlamaCppMetrics(profile) {
|
|
46
|
+
const response = await speedQueryFetch(profile, { errorLabel: "llama.cpp speed query" });
|
|
47
|
+
|
|
41
48
|
const data = await response.json();
|
|
42
49
|
const timings = data.timings;
|
|
43
50
|
if (!timings || typeof timings.prompt_per_second !== "number" || typeof timings.predicted_per_second !== "number") {
|
|
@@ -60,25 +67,12 @@ async function queryLlamaCppMetrics(profile) {
|
|
|
60
67
|
}
|
|
61
68
|
|
|
62
69
|
async function queryOmlxMetrics(profile) {
|
|
63
|
-
const
|
|
64
|
-
model: profile.modelAlias,
|
|
65
|
-
messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
|
|
70
|
+
const response = await speedQueryFetch(profile, {
|
|
66
71
|
stream: true,
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
|
|
72
|
-
method: "POST",
|
|
73
|
-
headers: { "Content-Type": "application/json" },
|
|
74
|
-
body: JSON.stringify(body),
|
|
75
|
-
signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
|
|
72
|
+
streamOptions: { include_usage: true },
|
|
73
|
+
errorLabel: "oMLX speed query",
|
|
76
74
|
});
|
|
77
75
|
|
|
78
|
-
if (!response.ok) {
|
|
79
|
-
throw new Error(`oMLX speed query failed: ${response.status} ${response.statusText}`);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
76
|
const text = await response.text();
|
|
83
77
|
let usage = null;
|
|
84
78
|
for (const line of text.split("\n").reverse()) {
|
package/src/commands/main.mjs
CHANGED
|
@@ -4,7 +4,8 @@ import { scanGgufModels } from "../scan.mjs";
|
|
|
4
4
|
import { loadProfiles } from "../profiles.mjs";
|
|
5
5
|
import { hasPi } from "../harness-pi.mjs";
|
|
6
6
|
import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
|
|
7
|
-
import {
|
|
7
|
+
import { offerManagedOmlxUpdate, hasOmlx } from "../omlx-runtime.mjs";
|
|
8
|
+
import { hasLmStudioInstalled, scanManagedModels } from "../managed.mjs";
|
|
8
9
|
import { recommendedModel } from "../recommendations.mjs";
|
|
9
10
|
import { pc, startInteractive, createPrompt } from "../ui.mjs";
|
|
10
11
|
import { onboardFlow } from "./onboard.mjs";
|
|
@@ -18,6 +19,7 @@ export async function mainFlow() {
|
|
|
18
19
|
const runtimePrompt = createPrompt();
|
|
19
20
|
try {
|
|
20
21
|
await offerManagedLlamaRuntimeUpdate(runtimePrompt);
|
|
22
|
+
await offerManagedOmlxUpdate(runtimePrompt);
|
|
21
23
|
} finally {
|
|
22
24
|
runtimePrompt.close();
|
|
23
25
|
}
|
|
@@ -63,7 +65,7 @@ async function printNoModelsHelp(llamaBinary) {
|
|
|
63
65
|
console.log(pc.yellow("No models found."));
|
|
64
66
|
console.log(pc.dim("You need to download a model to use offgrid-ai.\n"));
|
|
65
67
|
|
|
66
|
-
const omlxInstalled = await
|
|
68
|
+
const omlxInstalled = await hasOmlx();
|
|
67
69
|
const lmStudioInstalled = hasLmStudioInstalled();
|
|
68
70
|
const hasBackends = llamaBinary || omlxInstalled || lmStudioInstalled;
|
|
69
71
|
if (!hasBackends) {
|
package/src/commands/models.mjs
CHANGED
|
@@ -3,7 +3,7 @@ import { backendFor, BACKENDS } from "../backends.mjs";
|
|
|
3
3
|
import { createProfileFromModel, readProfile, saveProfile, deleteProfile, profileJsonPath } from "../profiles.mjs";
|
|
4
4
|
import { isProfileRunning, isProfileServerUp, modelAvailableOnServer, stopProfile } from "../process.mjs";
|
|
5
5
|
import { syncPiConfig, removeFromPiConfig } from "../harness-pi.mjs";
|
|
6
|
-
import { configureLocalProfile } from "../profile-setup.mjs";
|
|
6
|
+
import { configureLocalProfile, configureManagedProfile } from "../profile-setup.mjs";
|
|
7
7
|
import { pc, startInteractive, createPrompt, modelSelect } from "../ui.mjs";
|
|
8
8
|
import { buildCatalogItems, createManagedProfile, itemKey, loadModelCatalog, normalizeCatalog } from "../model-catalog.mjs";
|
|
9
9
|
import { modelSelectOption, modelNameWidth, inferBackendId, formatSourceLabel, discoverySourceForItem, printGgufModelDetails, printManagedModelDetails, printWorkspaceHeader, printBenchmarkLine, printProfileDetails } from "../model-presenters.mjs";
|
|
@@ -84,7 +84,6 @@ export async function modelCommandCenter(initialCatalog) {
|
|
|
84
84
|
const groups = [];
|
|
85
85
|
const backendColors = {
|
|
86
86
|
"llama-cpp": pc.cyan,
|
|
87
|
-
"llama-cpp-mtp": pc.blue,
|
|
88
87
|
omlx: pc.magenta,
|
|
89
88
|
};
|
|
90
89
|
for (const { backendId, sourceId, items } of byBackend.values()) {
|
|
@@ -195,7 +194,7 @@ async function performAction(prompt, action, item) {
|
|
|
195
194
|
return await benchmarkFlow();
|
|
196
195
|
}
|
|
197
196
|
if (action === "run") return await runItem(item);
|
|
198
|
-
if (action === "reconfigure" || action === "setup") return await setupItem(prompt, item
|
|
197
|
+
if (action === "reconfigure" || action === "setup") return await setupItem(prompt, item);
|
|
199
198
|
if (action === "remove" && item.type === "profile") return await removeProfileInteractive(item.profile.id);
|
|
200
199
|
}
|
|
201
200
|
|
|
@@ -207,26 +206,28 @@ function printProfileSaved(id) {
|
|
|
207
206
|
console.log(pc.dim(` Profile: ${profileJsonPath(id)}`));
|
|
208
207
|
}
|
|
209
208
|
|
|
210
|
-
async function setupItem(prompt, item
|
|
209
|
+
async function setupItem(prompt, item) {
|
|
211
210
|
if (item.type === "profile") {
|
|
212
211
|
const configured = await configureLocalProfile(prompt, await readProfile(item.profile.id));
|
|
213
212
|
if (!configured) return;
|
|
214
|
-
await saveProfile(configured
|
|
213
|
+
await saveProfile(configured);
|
|
215
214
|
await syncPiConfig(configured);
|
|
216
215
|
printProfileSaved(configured.id);
|
|
217
216
|
return;
|
|
218
217
|
}
|
|
219
218
|
if (item.type === "managed") {
|
|
220
219
|
const profile = createManagedProfile(item.model, item.backendId);
|
|
221
|
-
await
|
|
222
|
-
|
|
223
|
-
|
|
220
|
+
const configured = await configureManagedProfile(prompt, profile);
|
|
221
|
+
if (!configured) return;
|
|
222
|
+
await saveProfile(configured);
|
|
223
|
+
await syncPiConfig(configured);
|
|
224
|
+
printProfileSaved(configured.id);
|
|
224
225
|
return;
|
|
225
226
|
}
|
|
226
227
|
const profile = await createProfileFromModel(item.model, null, item.drafter?.path);
|
|
227
228
|
const configured = await configureLocalProfile(prompt, profile);
|
|
228
229
|
if (!configured) return;
|
|
229
|
-
await saveProfile(configured
|
|
230
|
+
await saveProfile(configured);
|
|
230
231
|
await syncPiConfig(configured);
|
|
231
232
|
printProfileSaved(configured.id);
|
|
232
233
|
}
|
package/src/commands/onboard.mjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { ensureDirs, findLlamaServer, hasHomebrew, HF_HUB_DIR } from "../config.mjs";
|
|
1
|
+
import { ensureDirs, findLlamaServer, ensureHomebrewFor, HF_HUB_DIR } from "../config.mjs";
|
|
3
2
|
import { BACKENDS } from "../backends.mjs";
|
|
4
3
|
import { scanGgufModels } from "../scan.mjs";
|
|
5
4
|
import { hasPi } from "../harness-pi.mjs";
|
|
6
5
|
import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
|
|
6
|
+
import { ensureOmlxRuntime } from "../omlx-runtime.mjs";
|
|
7
7
|
import { scanManagedModels } from "../managed.mjs";
|
|
8
8
|
import { BACKEND_INSTALL_CHOICES, BACKEND_INSTALLERS } from "../backend-installers.mjs";
|
|
9
9
|
import { recommendedModel, selectFormat, allFittingModels } from "../recommendations.mjs";
|
|
@@ -24,6 +24,7 @@ export async function onboardFlow() {
|
|
|
24
24
|
console.log(pc.dim("Let's make sure you have everything you need to run local models.\n"));
|
|
25
25
|
|
|
26
26
|
const llamaBinary = await ensureLlamaRuntime(prompt);
|
|
27
|
+
await ensureOmlxRuntime(prompt, run);
|
|
27
28
|
if (!(await ensurePi(prompt, run))) return;
|
|
28
29
|
|
|
29
30
|
const [{ models: ggufModels }, managedModels] = await Promise.all([
|
|
@@ -169,35 +170,6 @@ async function offerBackendInstall(prompt, run) {
|
|
|
169
170
|
await installBackend(prompt, run, choice, model);
|
|
170
171
|
}
|
|
171
172
|
|
|
172
|
-
async function ensureHomebrewFor(prompt, run, label) {
|
|
173
|
-
if (await hasHomebrew()) return true;
|
|
174
|
-
const install = await prompt.yesNo(`Homebrew is needed to install ${label}. Install Homebrew now?`, true);
|
|
175
|
-
if (!install) {
|
|
176
|
-
console.log(pc.dim(`Install ${label} manually, or install Homebrew from https://brew.sh and run offgrid-ai again.`));
|
|
177
|
-
return false;
|
|
178
|
-
}
|
|
179
|
-
console.log(pc.cyan("Installing Homebrew..."));
|
|
180
|
-
try {
|
|
181
|
-
await run("/bin/bash", ["-c", "NONINTERACTIVE=1 /bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""], "Homebrew");
|
|
182
|
-
for (const path of ["/opt/homebrew/bin", "/usr/local/bin"]) {
|
|
183
|
-
if (existsSync(path)) {
|
|
184
|
-
process.env.PATH = `${path}:${process.env.PATH}`;
|
|
185
|
-
break;
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
} catch {
|
|
189
|
-
console.log(pc.red("✗ Homebrew installation failed."));
|
|
190
|
-
console.log(pc.dim("Install it manually from https://brew.sh, then run offgrid-ai again."));
|
|
191
|
-
return false;
|
|
192
|
-
}
|
|
193
|
-
if (!(await hasHomebrew())) {
|
|
194
|
-
console.log(pc.red("Homebrew was installed but not found on PATH. Restart your terminal and run offgrid-ai again."));
|
|
195
|
-
return false;
|
|
196
|
-
}
|
|
197
|
-
console.log(pc.green("✓ Homebrew found"));
|
|
198
|
-
return true;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
173
|
async function installBackend(prompt, run, backendId, model) {
|
|
202
174
|
const installer = BACKEND_INSTALLERS[backendId];
|
|
203
175
|
if (!(await ensureHomebrewFor(prompt, run, installer.label))) return;
|
package/src/commands/run.mjs
CHANGED
|
@@ -77,7 +77,7 @@ async function ensureLocalServer(profile, backend, options) {
|
|
|
77
77
|
console.log(pc.yellow("Vision projector is not supported by this llama.cpp build. Retrying text-only."));
|
|
78
78
|
console.log(pc.dim("Update llama.cpp later to re-enable vision for this model."));
|
|
79
79
|
const textOnly = textOnlyProfile(profile);
|
|
80
|
-
await saveProfile(textOnly
|
|
80
|
+
await saveProfile(textOnly);
|
|
81
81
|
return { handled: true, result: await runProfile(textOnly, { ...options, textOnlyRetry: true }) };
|
|
82
82
|
}
|
|
83
83
|
throw err;
|
package/src/config.mjs
CHANGED
|
@@ -3,6 +3,7 @@ import { existsSync } from "node:fs";
|
|
|
3
3
|
import { homedir } from "node:os";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
5
|
import { readFile, writeFile } from "node:fs/promises";
|
|
6
|
+
import { pc } from "./ui.mjs";
|
|
6
7
|
|
|
7
8
|
// ── Base directories ──────────────────────────────────────────────────────
|
|
8
9
|
|
|
@@ -124,4 +125,49 @@ export async function hasHomebrew() {
|
|
|
124
125
|
} catch {
|
|
125
126
|
return false;
|
|
126
127
|
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Install Homebrew non-interactively and add it to PATH for this process.
|
|
132
|
+
* Returns true if Homebrew is available after installation.
|
|
133
|
+
*/
|
|
134
|
+
export async function installHomebrew(run) {
|
|
135
|
+
await run("/bin/bash", ["-c", 'NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'], "Homebrew");
|
|
136
|
+
for (const path of ["/opt/homebrew/bin", "/usr/local/bin"]) {
|
|
137
|
+
if (existsSync(path)) {
|
|
138
|
+
process.env.PATH = `${path}:${process.env.PATH}`;
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return await hasHomebrew();
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Ensure Homebrew is installed, prompting the user if necessary.
|
|
147
|
+
* @param {object} prompt - UI prompt interface (needs yesNo)
|
|
148
|
+
* @param {function} run - runCommand function for verbose command execution
|
|
149
|
+
* @param {string} label - what we're installing (for the prompt message)
|
|
150
|
+
* @returns {Promise<boolean>} true if Homebrew is available
|
|
151
|
+
*/
|
|
152
|
+
export async function ensureHomebrewFor(prompt, run, label) {
|
|
153
|
+
if (await hasHomebrew()) return true;
|
|
154
|
+
const install = await prompt.yesNo(`Homebrew is needed to install ${label}. Install Homebrew now?`, true);
|
|
155
|
+
if (!install) {
|
|
156
|
+
console.log(pc.dim(`Install ${label} manually, or install Homebrew from https://brew.sh and run offgrid-ai again.`));
|
|
157
|
+
return false;
|
|
158
|
+
}
|
|
159
|
+
console.log(pc.cyan("Installing Homebrew..."));
|
|
160
|
+
try {
|
|
161
|
+
const success = await installHomebrew(run);
|
|
162
|
+
if (!success) {
|
|
163
|
+
console.log(pc.red("Homebrew was installed but not found on PATH. Restart your terminal and run offgrid-ai again."));
|
|
164
|
+
return false;
|
|
165
|
+
}
|
|
166
|
+
} catch {
|
|
167
|
+
console.log(pc.red("✗ Homebrew installation failed."));
|
|
168
|
+
console.log(pc.dim("Install it manually from https://brew.sh, then run offgrid-ai again."));
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
console.log(pc.green("✓ Homebrew found"));
|
|
172
|
+
return true;
|
|
127
173
|
}
|
package/src/harness-pi.mjs
CHANGED
|
@@ -8,10 +8,8 @@ import pc from "picocolors";
|
|
|
8
8
|
// ── Pi model id ─────────────────────────────────────────────────────────────
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
|
-
* The model id Pi must send in requests.
|
|
12
|
-
*
|
|
13
|
-
* the repo-id label instead makes mlx-vlm unload the local model and re-fetch
|
|
14
|
-
* the repo from HuggingFace. Other backends use the friendly modelAlias.
|
|
11
|
+
* The model id Pi must send in requests. All backends use the friendly
|
|
12
|
+
* modelAlias as the API model id.
|
|
15
13
|
*/
|
|
16
14
|
export function piApiModelId(profile) {
|
|
17
15
|
return profile.modelAlias;
|
|
@@ -87,7 +85,7 @@ async function activeProviderProfiles(currentProfile) {
|
|
|
87
85
|
const byAlias = new Map();
|
|
88
86
|
for (const item of [...allProfiles, currentProfile]) {
|
|
89
87
|
if (item.providerId !== currentProfile.providerId) continue;
|
|
90
|
-
if (item.backend !== "llama-cpp"
|
|
88
|
+
if (item.backend !== "llama-cpp") {
|
|
91
89
|
byAlias.set(item.modelAlias, item);
|
|
92
90
|
continue;
|
|
93
91
|
}
|
package/src/managed.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { existsSync } from "node:fs";
|
|
2
2
|
import { BACKENDS } from "./backends.mjs";
|
|
3
|
-
import {
|
|
3
|
+
import { hasOmlx } from "./omlx-runtime.mjs";
|
|
4
4
|
|
|
5
5
|
export const MANAGED_BACKEND_IDS = ["omlx"];
|
|
6
6
|
|
|
@@ -22,6 +22,6 @@ export function hasLmStudioInstalled() {
|
|
|
22
22
|
return existsSync("/Applications/LM Studio.app");
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
export function hasOmlxInstalled() {
|
|
26
|
-
return
|
|
25
|
+
export async function hasOmlxInstalled() {
|
|
26
|
+
return await hasOmlx();
|
|
27
27
|
}
|
package/src/mlx-discovery.mjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
// to compute sizes and publishers. The oMLX API doesn't return these, so we
|
|
3
3
|
// read them from disk.
|
|
4
4
|
|
|
5
|
-
import { readdir, stat } from "node:fs/promises";
|
|
5
|
+
import { readdir, stat, readFile } from "node:fs/promises";
|
|
6
6
|
import { existsSync } from "node:fs";
|
|
7
7
|
import { join } from "node:path";
|
|
8
8
|
import { homedir } from "node:os";
|
|
@@ -65,6 +65,99 @@ export async function scanOmlxModelSizes() {
|
|
|
65
65
|
return infoByBasename;
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
+
// ── MTP capability detection ─────────────────────────────────────────────
|
|
69
|
+
// oMLX supports native MTP for Qwen 3.5/3.6 (dense + MoE) and DeepSeek-V4
|
|
70
|
+
// models that ship MTP heads in their weights. The check mirrors oMLX's own
|
|
71
|
+
// _mtp_compat_for_model: config must declare mtp_num_hidden_layers, model_type
|
|
72
|
+
// must be on the whitelist, and the safetensors index must contain mtp.* keys.
|
|
73
|
+
|
|
74
|
+
const MTP_MODEL_TYPES = ["qwen3_5", "qwen3_5_moe", "qwen3_6", "qwen3_6_moe", "deepseek_v4"];
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Check if an oMLX model directory supports native MTP.
|
|
78
|
+
* Returns { compatible: boolean, reason: string }.
|
|
79
|
+
*/
|
|
80
|
+
export async function detectOmlxMtpCapability(modelDir) {
|
|
81
|
+
const configPath = join(modelDir, "config.json");
|
|
82
|
+
if (!existsSync(configPath)) return { compatible: false, reason: "config.json not found" };
|
|
83
|
+
|
|
84
|
+
let config;
|
|
85
|
+
try {
|
|
86
|
+
config = JSON.parse(await readFile(configPath, "utf8"));
|
|
87
|
+
} catch {
|
|
88
|
+
return { compatible: false, reason: "failed to read config.json" };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const mtpLayers = config.mtp_num_hidden_layers;
|
|
92
|
+
if (!mtpLayers || mtpLayers <= 0) {
|
|
93
|
+
return { compatible: false, reason: "model has no MTP heads in config" };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const modelType = config.model_type;
|
|
97
|
+
if (!MTP_MODEL_TYPES.some((t) => modelType === t || modelType?.startsWith(t))) {
|
|
98
|
+
return { compatible: false, reason: `model_type=${modelType} is not on the MTP whitelist (supported: ${MTP_MODEL_TYPES.join(", ")})` };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Check for mtp.* weight tensors in the safetensors index
|
|
102
|
+
const hasWeights = await modelHasMtpWeights(modelDir);
|
|
103
|
+
if (!hasWeights) {
|
|
104
|
+
return { compatible: false, reason: "Config declares MTP layers but the converted weights are missing mtp.* tensors. Re-convert from HF with a converter that preserves MTP weights." };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return { compatible: true, reason: "" };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async function modelHasMtpWeights(modelDir) {
|
|
111
|
+
const indexPath = join(modelDir, "model.safetensors.index.json");
|
|
112
|
+
if (existsSync(indexPath)) {
|
|
113
|
+
try {
|
|
114
|
+
const index = JSON.parse(await readFile(indexPath, "utf8"));
|
|
115
|
+
const weightMap = index.weight_map ?? {};
|
|
116
|
+
return Object.keys(weightMap).some((key) => key.includes("mtp."));
|
|
117
|
+
} catch {
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
// Single-shard fallback: can't easily read safetensors keys in Node without
|
|
122
|
+
// the safetensors library. Check for an mtp-specific safetensors file.
|
|
123
|
+
try {
|
|
124
|
+
const entries = await readdir(modelDir);
|
|
125
|
+
return entries.some((f) => f.endsWith(".safetensors") && /mtp/i.test(f));
|
|
126
|
+
} catch {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Find the model directory for a given oMLX model ID.
|
|
133
|
+
* Searches ~/.omlx/models/ recursively for a directory matching the model ID.
|
|
134
|
+
*/
|
|
135
|
+
export async function findOmlxModelDir(modelId) {
|
|
136
|
+
if (!existsSync(OMLX_MODELS_DIR)) return null;
|
|
137
|
+
const basename = modelId.includes("/") ? modelId.slice(modelId.lastIndexOf("/") + 1)
|
|
138
|
+
: modelId.includes("--") ? modelId.slice(modelId.lastIndexOf("--") + 2)
|
|
139
|
+
: modelId;
|
|
140
|
+
|
|
141
|
+
async function walk(dir) {
|
|
142
|
+
let entries;
|
|
143
|
+
try {
|
|
144
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
145
|
+
} catch {
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
for (const entry of entries) {
|
|
149
|
+
if (!entry.isDirectory()) continue;
|
|
150
|
+
const fullPath = join(dir, entry.name);
|
|
151
|
+
if (entry.name === basename && await isMlxModelDir(fullPath)) return fullPath;
|
|
152
|
+
const found = await walk(fullPath);
|
|
153
|
+
if (found) return found;
|
|
154
|
+
}
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return await walk(OMLX_MODELS_DIR);
|
|
159
|
+
}
|
|
160
|
+
|
|
68
161
|
/**
|
|
69
162
|
* Look up a model's info by its oMLX API id. Tries exact match, then the
|
|
70
163
|
* segment after `--` (oMLX org--name format), then after `/` (HF format).
|
package/src/model-presenters.mjs
CHANGED
|
@@ -53,7 +53,6 @@ function optionBackendTag(backendId) {
|
|
|
53
53
|
const label = backend?.label ?? backendId ?? "unknown";
|
|
54
54
|
const colors = {
|
|
55
55
|
"llama-cpp": pc.cyan,
|
|
56
|
-
"llama-cpp-mtp": pc.blue,
|
|
57
56
|
omlx: pc.magenta,
|
|
58
57
|
};
|
|
59
58
|
return optionPad(label, colors[backendId] ?? pc.dim, OPTION_BACKEND_WIDTH);
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
// oMLX runtime management — discovery, version checking, update prompts,
|
|
2
|
+
// and installation. Mirrors the llama.cpp runtime pattern in runtime.mjs.
|
|
3
|
+
//
|
|
4
|
+
// oMLX is installed either via the macOS app (DMG from GitHub Releases, which
|
|
5
|
+
// installs a ~/.omlx/bin/omlx CLI shim with in-app auto-update) or via Homebrew
|
|
6
|
+
// (brew tap jundot/omlx && brew install omlx). offgrid-ai does NOT download
|
|
7
|
+
// binaries directly — it uses brew for automated installs, and prompts for the
|
|
8
|
+
// DMG when brew is unavailable.
|
|
9
|
+
|
|
10
|
+
import { execFile } from "node:child_process";
|
|
11
|
+
import { existsSync } from "node:fs";
|
|
12
|
+
import { homedir } from "node:os";
|
|
13
|
+
import { join } from "node:path";
|
|
14
|
+
import { promisify } from "node:util";
|
|
15
|
+
import { compareVersions } from "./updates.mjs";
|
|
16
|
+
import { hasHomebrew, ensureHomebrewFor } from "./config.mjs";
|
|
17
|
+
import { commandExists } from "./exec.mjs";
|
|
18
|
+
import { pc, renderCard, renderRows } from "./ui.mjs";
|
|
19
|
+
|
|
20
|
+
const execFileAsync = promisify(execFile);
|
|
21
|
+
|
|
22
|
+
const OMLX_CLI_SHIM = join(homedir(), ".omlx", "bin", "omlx");
|
|
23
|
+
const RELEASE_API = "https://api.github.com/repos/jundot/omlx/releases/latest";
|
|
24
|
+
|
|
25
|
+
// ── Discovery ──────────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Find the oMLX CLI binary. Checks the app-installed shim first (it shadows
|
|
29
|
+
* brew on PATH), then falls back to PATH. Returns the path or null.
|
|
30
|
+
*/
|
|
31
|
+
export async function findOmlx() {
|
|
32
|
+
// 1. macOS app CLI shim (shadows brew on PATH)
|
|
33
|
+
if (existsSync(OMLX_CLI_SHIM)) return OMLX_CLI_SHIM;
|
|
34
|
+
|
|
35
|
+
// 2. PATH (brew install)
|
|
36
|
+
try {
|
|
37
|
+
const { stdout } = await execFileAsync("which", ["omlx"]);
|
|
38
|
+
const path = stdout.trim();
|
|
39
|
+
if (path && existsSync(path)) return path;
|
|
40
|
+
} catch { /* not on PATH */ }
|
|
41
|
+
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Check if oMLX is installed (app shim or on PATH).
|
|
47
|
+
*/
|
|
48
|
+
export async function hasOmlx() {
|
|
49
|
+
return (await findOmlx()) !== null;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Detect how oMLX was installed: "app" (macOS app CLI shim) or "brew" (on PATH
|
|
54
|
+
* but not the shim) or null (not installed).
|
|
55
|
+
*/
|
|
56
|
+
export async function omlxInstallMethod() {
|
|
57
|
+
if (existsSync(OMLX_CLI_SHIM)) return "app";
|
|
58
|
+
if (await commandExists("omlx")) return "brew";
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ── Version checking ───────────────────────────────────────────────────────
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Get the installed oMLX version by running `omlx --version`.
|
|
66
|
+
* Returns null if oMLX is not installed or the version can't be parsed.
|
|
67
|
+
*/
|
|
68
|
+
export async function installedOmlxVersion() {
|
|
69
|
+
const bin = await findOmlx();
|
|
70
|
+
if (!bin) return null;
|
|
71
|
+
try {
|
|
72
|
+
const { stdout } = await execFileAsync(bin, ["--version"], { timeout: 5000 });
|
|
73
|
+
const match = stdout.trim().match(/(\d+\.\d+\.\d+)/u);
|
|
74
|
+
return match ? match[1] : null;
|
|
75
|
+
} catch {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Check for the latest oMLX release on GitHub.
|
|
82
|
+
* Returns { tag, version } or null if the check fails.
|
|
83
|
+
* Callers must treat null as "check failed, skip prompt".
|
|
84
|
+
*/
|
|
85
|
+
export async function latestOmlxRelease(fetchImpl = globalThis.fetch) {
|
|
86
|
+
try {
|
|
87
|
+
const response = await fetchImpl(RELEASE_API, { signal: AbortSignal.timeout(5000) });
|
|
88
|
+
if (!response.ok) return null;
|
|
89
|
+
const body = await response.json();
|
|
90
|
+
const tag = typeof body?.tag_name === "string" ? body.tag_name : null;
|
|
91
|
+
if (!tag) return null;
|
|
92
|
+
const version = tag.replace(/^v/u, "");
|
|
93
|
+
return { tag, version };
|
|
94
|
+
} catch {
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ── Update prompt ──────────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Check if an oMLX update is available and offer to install it.
|
|
103
|
+
* Returns true if an update was installed, false otherwise.
|
|
104
|
+
*
|
|
105
|
+
* For brew installs: runs `brew upgrade omlx`.
|
|
106
|
+
* For app installs: the app has in-app auto-update, so we just inform the user.
|
|
107
|
+
*/
|
|
108
|
+
export async function offerManagedOmlxUpdate(prompt, { fetchImpl = globalThis.fetch } = {}) {
|
|
109
|
+
const latest = await latestOmlxRelease(fetchImpl);
|
|
110
|
+
if (!latest) return false;
|
|
111
|
+
|
|
112
|
+
const installed = await installedOmlxVersion();
|
|
113
|
+
if (installed && compareVersions(installed, latest.version) >= 0) return false;
|
|
114
|
+
|
|
115
|
+
const method = await omlxInstallMethod();
|
|
116
|
+
|
|
117
|
+
console.log("\n" + renderCard("oMLX runtime", renderRows([
|
|
118
|
+
["Installed", installed ?? pc.yellow("not installed")],
|
|
119
|
+
["Latest", pc.green(latest.version)],
|
|
120
|
+
["Source", method === "app" ? "macOS app (in-app auto-update)" : "Homebrew"],
|
|
121
|
+
]), { formatBorder: pc.cyan }));
|
|
122
|
+
|
|
123
|
+
// App installs have their own auto-update — just inform the user
|
|
124
|
+
if (method === "app" && installed) {
|
|
125
|
+
console.log(pc.dim(" Update oMLX via the app's in-app updater, then restart offgrid-ai."));
|
|
126
|
+
return false;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Not installed, or brew install — offer to install/upgrade
|
|
130
|
+
if (!installed) {
|
|
131
|
+
const shouldInstall = await prompt.yesNo("Install oMLX runtime?", false);
|
|
132
|
+
if (!shouldInstall) return false;
|
|
133
|
+
return await installOmlx(prompt);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Brew install with an update available
|
|
137
|
+
const shouldUpdate = await prompt.yesNo("Update oMLX runtime?", false);
|
|
138
|
+
if (!shouldUpdate) return false;
|
|
139
|
+
|
|
140
|
+
try {
|
|
141
|
+
const { runCommand } = await import("./exec.mjs");
|
|
142
|
+
console.log(pc.dim("Updating oMLX via Homebrew..."));
|
|
143
|
+
await runCommand("brew", ["update"], { label: "brew update" });
|
|
144
|
+
await runCommand("brew", ["upgrade", "omlx"], { label: "brew upgrade omlx" });
|
|
145
|
+
console.log(pc.green(`✓ Updated oMLX to latest`));
|
|
146
|
+
return true;
|
|
147
|
+
} catch (err) {
|
|
148
|
+
console.log(pc.red(`✗ Update failed: ${err.message}`));
|
|
149
|
+
console.log(pc.dim("Update manually: brew update && brew upgrade omlx"));
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// ── Installation ───────────────────────────────────────────────────────────
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Install oMLX. Uses Homebrew if available (automating tap + install).
|
|
158
|
+
* If Homebrew is not available, prompts to download the DMG from GitHub
|
|
159
|
+
* Releases or install Homebrew first.
|
|
160
|
+
*
|
|
161
|
+
* @param {object} prompt - UI prompt interface (yesNo, choice)
|
|
162
|
+
* @param {function} [run] - runCommand function for verbose command execution
|
|
163
|
+
* @returns {Promise<boolean>} true if installation succeeded
|
|
164
|
+
*/
|
|
165
|
+
export async function installOmlx(prompt, run) {
|
|
166
|
+
const hasBrew = await hasHomebrew();
|
|
167
|
+
|
|
168
|
+
if (!hasBrew) {
|
|
169
|
+
if (!(await ensureHomebrewFor(prompt, run || (async (cmd, args, label) => {
|
|
170
|
+
const { runCommand } = await import("./exec.mjs");
|
|
171
|
+
return runCommand(cmd, args, { label });
|
|
172
|
+
}), "oMLX"))) {
|
|
173
|
+
console.log(pc.dim("Install oMLX manually:"));
|
|
174
|
+
console.log(pc.dim(" brew tap jundot/omlx && brew install omlx"));
|
|
175
|
+
console.log(pc.dim(" — or download the macOS app from https://github.com/jundot/omlx/releases"));
|
|
176
|
+
return false;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Install oMLX via Homebrew
|
|
181
|
+
const runner = run || (async (cmd, args, label) => {
|
|
182
|
+
const { runCommand } = await import("./exec.mjs");
|
|
183
|
+
return runCommand(cmd, args, { label });
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
console.log(pc.cyan("Installing oMLX via Homebrew..."));
|
|
187
|
+
try {
|
|
188
|
+
await runner("brew", ["tap", "jundot/omlx", "https://github.com/jundot/omlx"], "oMLX tap");
|
|
189
|
+
await runner("brew", ["install", "omlx"], "oMLX");
|
|
190
|
+
console.log(pc.green("✓ oMLX installed"));
|
|
191
|
+
return true;
|
|
192
|
+
} catch (err) {
|
|
193
|
+
console.log(pc.red(`✗ oMLX installation failed: ${err.message}`));
|
|
194
|
+
console.log(pc.dim("Install manually: brew tap jundot/omlx && brew install omlx"));
|
|
195
|
+
console.log(pc.dim(" — or download the macOS app from https://github.com/jundot/omlx/releases"));
|
|
196
|
+
return false;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Ensure oMLX runtime is available. For onboarding: if not installed, offer
|
|
202
|
+
* to install it. This is the oMLX equivalent of ensureLlamaRuntime().
|
|
203
|
+
*
|
|
204
|
+
* @param {object} prompt - UI prompt interface
|
|
205
|
+
* @param {function} [run] - runCommand function for verbose output
|
|
206
|
+
* @returns {Promise<boolean>} true if oMLX is available (installed or pre-existing)
|
|
207
|
+
*/
|
|
208
|
+
export async function ensureOmlxRuntime(prompt, run) {
|
|
209
|
+
let omlxBin = await findOmlx();
|
|
210
|
+
if (!omlxBin) {
|
|
211
|
+
console.log(renderCard("oMLX runtime", renderRows([
|
|
212
|
+
["Status", pc.yellow("not installed")],
|
|
213
|
+
["Used for", "local MLX models (Apple Silicon optimized)"],
|
|
214
|
+
["Install", "managed by offgrid-ai via Homebrew"],
|
|
215
|
+
]), { formatBorder: pc.cyan }));
|
|
216
|
+
|
|
217
|
+
const shouldInstall = await prompt.yesNo("Install oMLX runtime?", true);
|
|
218
|
+
if (shouldInstall) {
|
|
219
|
+
await installOmlx(prompt, run);
|
|
220
|
+
omlxBin = await findOmlx();
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if (!omlxBin) {
|
|
224
|
+
console.log(pc.yellow("Skipping oMLX for now. You can still use llama.cpp, or run offgrid-ai again to install."));
|
|
225
|
+
return false;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const version = await installedOmlxVersion();
|
|
230
|
+
console.log(pc.green(`✓ oMLX: ${omlxBin}${version ? ` (v${version})` : ""}`));
|
|
231
|
+
return true;
|
|
232
|
+
}
|
package/src/process.mjs
CHANGED
|
@@ -6,6 +6,7 @@ import { basename, join } from "node:path";
|
|
|
6
6
|
import { LOG_DIR } from "./config.mjs";
|
|
7
7
|
import { writeState, readState, profileDir } from "./profiles.mjs";
|
|
8
8
|
import { backendFor, backendBinaryFor } from "./backends.mjs";
|
|
9
|
+
import { pc } from "./ui.mjs";
|
|
9
10
|
|
|
10
11
|
const execFileAsync = promisify(execFile);
|
|
11
12
|
|
|
@@ -21,7 +22,7 @@ export async function computeServerCommand(profile) {
|
|
|
21
22
|
const binary = await backendBinaryFor(profile.backend);
|
|
22
23
|
if (!binary) throw new Error("Server binary not found. Run offgrid-ai interactively to install.");
|
|
23
24
|
|
|
24
|
-
// llama-cpp
|
|
25
|
+
// llama-cpp
|
|
25
26
|
const { computeFlags } = await import("./autodetect.mjs");
|
|
26
27
|
const result = computeFlags(
|
|
27
28
|
profile.capabilities ?? {},
|
|
@@ -118,6 +119,10 @@ async function startLocalServer(profile) {
|
|
|
118
119
|
|
|
119
120
|
async function startManagedServer(profile, backend) {
|
|
120
121
|
if (await serverReady(profile.baseUrl)) {
|
|
122
|
+
// Apply per-model settings (MTP) even when server is already running.
|
|
123
|
+
if (backend.id === "omlx" && profile.capabilities?.mtp) {
|
|
124
|
+
await ensureOmlxMtpSetting(profile);
|
|
125
|
+
}
|
|
121
126
|
return writeManagedState(profile, backend);
|
|
122
127
|
}
|
|
123
128
|
|
|
@@ -126,9 +131,15 @@ async function startManagedServer(profile, backend) {
|
|
|
126
131
|
try {
|
|
127
132
|
const { execFile } = await import("node:child_process");
|
|
128
133
|
const { promisify } = await import("node:util");
|
|
129
|
-
await
|
|
130
|
-
|
|
131
|
-
|
|
134
|
+
const { findOmlx } = await import("./omlx-runtime.mjs");
|
|
135
|
+
const omlxBin = await findOmlx();
|
|
136
|
+
if (!omlxBin) {
|
|
137
|
+
throw new Error(`${backend.label} is not installed. Run offgrid-ai to install it, or install manually: brew tap jundot/omlx && brew install omlx`);
|
|
138
|
+
}
|
|
139
|
+
await promisify(execFile)(omlxBin, ["start"], { timeout: 10000 });
|
|
140
|
+
} catch (err) {
|
|
141
|
+
if (err.message.includes("not installed")) throw err;
|
|
142
|
+
throw new Error(`${backend.label} could not be auto-started: ${err.message}. Run \`omlx start\` manually.`, { cause: err });
|
|
132
143
|
}
|
|
133
144
|
}
|
|
134
145
|
|
|
@@ -141,9 +152,48 @@ async function startManagedServer(profile, backend) {
|
|
|
141
152
|
if (!(await serverReady(profile.baseUrl))) {
|
|
142
153
|
throw new Error(`${backend.label} is not responding at ${profile.baseUrl}. Start it and try again.`);
|
|
143
154
|
}
|
|
155
|
+
|
|
156
|
+
// Apply per-model settings (MTP) before the model is loaded.
|
|
157
|
+
// oMLX applies MTP patches at load time, so the setting must be in
|
|
158
|
+
// model_settings.json before any request triggers a load.
|
|
159
|
+
if (backend.id === "omlx" && profile.capabilities?.mtp) {
|
|
160
|
+
await ensureOmlxMtpSetting(profile);
|
|
161
|
+
}
|
|
162
|
+
|
|
144
163
|
return writeManagedState(profile, backend);
|
|
145
164
|
}
|
|
146
165
|
|
|
166
|
+
/**
|
|
167
|
+
* Enable MTP on an oMLX model via the admin API before loading.
|
|
168
|
+
* oMLX applies MTP patches at model load time, so the setting must be
|
|
169
|
+
* persisted to model_settings.json before any request triggers a load.
|
|
170
|
+
* If the model is already loaded, oMLX will use the setting on next reload.
|
|
171
|
+
*/
|
|
172
|
+
async function ensureOmlxMtpSetting(profile) {
|
|
173
|
+
const baseUrl = profile.baseUrl?.replace(/\/v1\/?$/u, "") || "";
|
|
174
|
+
const modelId = profile.omlxModel ?? profile.modelAlias ?? profile.id;
|
|
175
|
+
try {
|
|
176
|
+
const response = await fetch(`${baseUrl}/admin/api/models/${encodeURIComponent(modelId)}/settings`, {
|
|
177
|
+
method: "PUT",
|
|
178
|
+
headers: { "Content-Type": "application/json" },
|
|
179
|
+
body: JSON.stringify({ mtp_enabled: true }),
|
|
180
|
+
signal: AbortSignal.timeout(5000),
|
|
181
|
+
});
|
|
182
|
+
if (response.ok) {
|
|
183
|
+
console.log(pc.green(`[mtp] Enabled MTP speculative decoding for ${modelId}`));
|
|
184
|
+
} else if (response.status === 401 || response.status === 403) {
|
|
185
|
+
console.log(pc.yellow(`[mtp] Could not enable MTP: oMLX admin authentication required. Enable skip_api_key_verification in oMLX settings, or enable MTP manually from the admin panel.`));
|
|
186
|
+
} else if (response.status === 404) {
|
|
187
|
+
console.log(pc.yellow(`[mtp] Model ${modelId} not found on oMLX server. MTP setting not applied.`));
|
|
188
|
+
} else {
|
|
189
|
+
const detail = await response.text().catch(() => "");
|
|
190
|
+
console.log(pc.yellow(`[mtp] Could not enable MTP: HTTP ${response.status} ${detail}`));
|
|
191
|
+
}
|
|
192
|
+
} catch (err) {
|
|
193
|
+
console.log(pc.yellow(`[mtp] Could not enable MTP: ${err.message}`));
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
147
197
|
async function writeManagedState(profile, backend) {
|
|
148
198
|
const state = {
|
|
149
199
|
pid: null,
|
|
@@ -223,7 +273,7 @@ async function processGone(pid) {
|
|
|
223
273
|
export async function unloadModelFromServer(profile) {
|
|
224
274
|
const backend = backendFor(profile.backend);
|
|
225
275
|
|
|
226
|
-
if (backend.id === "llama-cpp"
|
|
276
|
+
if (backend.id === "llama-cpp") {
|
|
227
277
|
return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
|
|
228
278
|
}
|
|
229
279
|
|
package/src/profile-setup.mjs
CHANGED
|
@@ -3,12 +3,13 @@ import { execFile } from "node:child_process";
|
|
|
3
3
|
import { promisify } from "node:util";
|
|
4
4
|
import { estimateMemory } from "./estimate.mjs";
|
|
5
5
|
import { findLlamaServer } from "./config.mjs";
|
|
6
|
-
import { baseUrlForFlags
|
|
6
|
+
import { baseUrlForFlags } from "./backends.mjs";
|
|
7
7
|
import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
|
|
8
8
|
import { detectCapabilities } from "./autodetect.mjs";
|
|
9
9
|
import { matchDrafter } from "./scan.mjs";
|
|
10
10
|
import { scanGgufModels } from "./scan.mjs";
|
|
11
11
|
import { capabilitySummary } from "./model-summary.mjs";
|
|
12
|
+
import { detectOmlxMtpCapability, findOmlxModelDir } from "./mlx-discovery.mjs";
|
|
12
13
|
|
|
13
14
|
const execFileAsync = promisify(execFile);
|
|
14
15
|
|
|
@@ -49,19 +50,9 @@ export async function configureLocalProfile(prompt, profile) {
|
|
|
49
50
|
}
|
|
50
51
|
const hasMtp = freshCaps.mtp || Boolean(drafterPath);
|
|
51
52
|
const caps = { ...freshCaps, mtp: hasMtp };
|
|
52
|
-
//
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
}
|
|
56
|
-
// If the profile was MTP but the drafter is now gone (and the model isn't
|
|
57
|
-
// natively MTP), switch back to plain llama.cpp so the server can start.
|
|
58
|
-
if (!hasMtp && configured.backend === "llama-cpp-mtp") {
|
|
59
|
-
console.log(pc.yellow("MTP drafter no longer found — switching to llama.cpp without speculative decoding."));
|
|
60
|
-
configured = removeMtpDefaults(configured);
|
|
61
|
-
}
|
|
62
|
-
if (drafterPath && !configured.drafterPath) {
|
|
63
|
-
configured = { ...configured, drafterPath };
|
|
64
|
-
}
|
|
53
|
+
// MTP is a capability, not a separate backend. Just update the profile's
|
|
54
|
+
// capabilities and drafter path — flag computation handles the rest.
|
|
55
|
+
configured = { ...configured, drafterPath: drafterPath ?? null, capabilities: { ...configured.capabilities, mtp: hasMtp } };
|
|
65
56
|
// If vision was previously disabled but mmproj is back, re-enable
|
|
66
57
|
if (configured.disabledMmprojPath && configured.mmprojPath === null && freshCaps.vision) {
|
|
67
58
|
configured = { ...configured, mmprojPath: configured.disabledMmprojPath, disabledMmprojPath: undefined, capabilities: { ...configured.capabilities, vision: true, visionDisabledReason: undefined } };
|
|
@@ -81,8 +72,7 @@ export async function configureLocalProfile(prompt, profile) {
|
|
|
81
72
|
if (caps.mtp) {
|
|
82
73
|
const drafterInfo = configured.drafterPath ? `\n Drafter: ${configured.drafterPath}` : "";
|
|
83
74
|
console.log(renderSection("MTP detected", renderRows([
|
|
84
|
-
["
|
|
85
|
-
["Port", String(LLAMA_CPP_MTP_PORT)],
|
|
75
|
+
["Feature", "Multi-Token Prediction (speculative decoding)"],
|
|
86
76
|
["Flags", `--spec-type draft-mtp --spec-draft-n-max 4${configured.drafterPath ? " --spec-draft-model <drafter>" : ""}`],
|
|
87
77
|
])));
|
|
88
78
|
if (drafterInfo) console.log(pc.dim(drafterInfo));
|
|
@@ -150,24 +140,60 @@ export function applyRuntimeFlagOverrides(profile, overrides) {
|
|
|
150
140
|
}
|
|
151
141
|
|
|
152
142
|
export function applyMtpDefaults(profile) {
|
|
153
|
-
const flags = { ...profile.flags, port: LLAMA_CPP_MTP_PORT };
|
|
154
143
|
return applyProfileFlags({
|
|
155
144
|
...profile,
|
|
156
|
-
backend: "llama-cpp-mtp",
|
|
157
|
-
providerId: "llama-cpp-mtp",
|
|
158
145
|
capabilities: { ...(profile.capabilities ?? {}), mtp: true },
|
|
159
|
-
}, flags);
|
|
146
|
+
}, profile.flags);
|
|
160
147
|
}
|
|
161
148
|
|
|
162
149
|
export function removeMtpDefaults(profile) {
|
|
163
|
-
const flags = { ...profile.flags, port: LLAMA_CPP_PORT };
|
|
164
150
|
return applyProfileFlags({
|
|
165
151
|
...profile,
|
|
166
|
-
backend: "llama-cpp",
|
|
167
|
-
providerId: "llama-cpp",
|
|
168
152
|
drafterPath: null,
|
|
169
153
|
capabilities: { ...(profile.capabilities ?? {}), mtp: false },
|
|
170
|
-
}, flags);
|
|
154
|
+
}, profile.flags);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ── oMLX (managed server) profile configuration ───────────────────────────
|
|
158
|
+
|
|
159
|
+
export async function configureManagedProfile(prompt, profile) {
|
|
160
|
+
let configured = profile;
|
|
161
|
+
const modelId = profile.omlxModel ?? profile.modelAlias ?? profile.id;
|
|
162
|
+
|
|
163
|
+
// Detect MTP capability from the model's config.json
|
|
164
|
+
const modelDir = await findOmlxModelDir(modelId);
|
|
165
|
+
if (modelDir) {
|
|
166
|
+
const mtpResult = await detectOmlxMtpCapability(modelDir);
|
|
167
|
+
if (mtpResult.compatible) {
|
|
168
|
+
console.log("");
|
|
169
|
+
console.log(renderSection("MTP detected", renderRows([
|
|
170
|
+
["Feature", "Multi-Token Prediction (speculative decoding)"],
|
|
171
|
+
["Mechanism", "oMLX native MTP (enabled via admin API at load time)"],
|
|
172
|
+
])));
|
|
173
|
+
const useMtp = await prompt.yesNo("Use MTP speculative decoding?", true);
|
|
174
|
+
configured = { ...configured, capabilities: { ...(configured.capabilities ?? {}), mtp: useMtp } };
|
|
175
|
+
} else if (mtpResult.reason !== "model has no MTP heads in config") {
|
|
176
|
+
// Model declares MTP but can't use it — surface the reason
|
|
177
|
+
console.log("");
|
|
178
|
+
console.log(renderSection("MTP not available", renderRows([
|
|
179
|
+
["Feature", "Multi-Token Prediction (speculative decoding)"],
|
|
180
|
+
["Reason", pc.yellow(mtpResult.reason)],
|
|
181
|
+
])));
|
|
182
|
+
}
|
|
183
|
+
// If reason is "no MTP heads in config", don't surface anything —
|
|
184
|
+
// most models don't have MTP, and showing a card for every non-MTP
|
|
185
|
+
// model would be noise.
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
console.log("");
|
|
189
|
+
console.log(renderSection("Model setup", renderRows([
|
|
190
|
+
["Model", pc.bold(profile.label)],
|
|
191
|
+
["Backend", "oMLX"],
|
|
192
|
+
...(configured.capabilities?.mtp ? [["MTP", "enabled"]] : []),
|
|
193
|
+
])));
|
|
194
|
+
|
|
195
|
+
if (!(await prompt.yesNo("Save profile with these settings?", true))) return null;
|
|
196
|
+
return configured;
|
|
171
197
|
}
|
|
172
198
|
|
|
173
199
|
function applyVisionDefaults(profile) {
|
package/src/profiles.mjs
CHANGED
|
@@ -40,7 +40,15 @@ export async function loadProfiles() {
|
|
|
40
40
|
.filter((e) => e.isDirectory() && existsSync(profileJsonPath(e.name)))
|
|
41
41
|
.map((e) => e.name)
|
|
42
42
|
.sort();
|
|
43
|
-
return (await Promise.all(ids.map((id) => readProfile(id))))
|
|
43
|
+
return (await Promise.all(ids.map((id) => readProfile(id))))
|
|
44
|
+
.map((p) => {
|
|
45
|
+
// Migrate legacy llama-cpp-mtp backend → llama-cpp with mtp capability
|
|
46
|
+
if (p.backend === "llama-cpp-mtp") {
|
|
47
|
+
return { ...p, backend: "llama-cpp", providerId: "llama-cpp", capabilities: { ...(p.capabilities ?? {}), mtp: true } };
|
|
48
|
+
}
|
|
49
|
+
return p;
|
|
50
|
+
})
|
|
51
|
+
.filter((p) => BACKENDS[p.backend]);
|
|
44
52
|
}
|
|
45
53
|
|
|
46
54
|
export async function readProfile(id) {
|
|
@@ -49,7 +57,7 @@ export async function readProfile(id) {
|
|
|
49
57
|
return JSON.parse(await readFile(path, "utf8"));
|
|
50
58
|
}
|
|
51
59
|
|
|
52
|
-
export async function saveProfile(profile
|
|
60
|
+
export async function saveProfile(profile) {
|
|
53
61
|
const id = sanitizeProfileId(profile.id);
|
|
54
62
|
const dir = profileDir(id);
|
|
55
63
|
await mkdir(dir, { recursive: true });
|
|
@@ -126,7 +134,7 @@ export async function createProfileFromModel(model, backendId, drafterPath) {
|
|
|
126
134
|
const caps = detectCapabilities(model.path, model.mmprojPath);
|
|
127
135
|
// If a drafter is provided, this model supports MTP regardless of filename
|
|
128
136
|
const hasMtp = caps.mtp || Boolean(drafterPath);
|
|
129
|
-
const backend = backendId ??
|
|
137
|
+
const backend = backendId ?? "llama-cpp";
|
|
130
138
|
const { flags } = computeFlags(
|
|
131
139
|
{ ...caps, mtp: hasMtp },
|
|
132
140
|
model.path,
|
package/src/ui.mjs
CHANGED
|
@@ -106,25 +106,7 @@ export function renderCard(title, body, options = {}) {
|
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
function wrapVisible(text, width) {
|
|
109
|
-
|
|
110
|
-
const lines = [];
|
|
111
|
-
let current = "";
|
|
112
|
-
for (let word of words) {
|
|
113
|
-
// If a single word exceeds the width, hard-break it
|
|
114
|
-
while (visibleLen(word) > width) {
|
|
115
|
-
if (current.trim()) { lines.push(current.trimEnd()); current = ""; }
|
|
116
|
-
lines.push(word.slice(0, width));
|
|
117
|
-
word = word.slice(width);
|
|
118
|
-
}
|
|
119
|
-
if (visibleLen(current + word) > width && current.trim()) {
|
|
120
|
-
lines.push(current.trimEnd());
|
|
121
|
-
current = word.trimStart();
|
|
122
|
-
} else {
|
|
123
|
-
current += word;
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
if (current.trim()) lines.push(current.trimEnd());
|
|
127
|
-
return lines.length > 0 ? lines : [text];
|
|
109
|
+
return wrapText(text, width);
|
|
128
110
|
}
|
|
129
111
|
|
|
130
112
|
export function renderSectionRows(title, rows, options = {}) {
|
|
@@ -221,6 +203,7 @@ export function createPrompt() {
|
|
|
221
203
|
if (!Number.isFinite(input) || input < min || input > max) {
|
|
222
204
|
return `Enter a number from ${min} to ${max}.`;
|
|
223
205
|
}
|
|
206
|
+
return true;
|
|
224
207
|
},
|
|
225
208
|
});
|
|
226
209
|
return Number(value);
|