offgrid-ai 0.15.9 → 0.16.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/package.json +1 -1
- package/src/autodetect.mjs +1 -1
- package/src/backends.mjs +4 -41
- package/src/benchmark/flow.mjs +14 -13
- package/src/benchmark/metrics.mjs +14 -20
- package/src/commands/main.mjs +7 -7
- package/src/commands/models.mjs +8 -21
- package/src/commands/onboard.mjs +10 -43
- package/src/commands/run.mjs +1 -1
- package/src/commands/status.mjs +19 -0
- package/src/config.mjs +48 -2
- package/src/harness-pi.mjs +5 -7
- package/src/managed.mjs +3 -3
- package/src/mlx-discovery.mjs +77 -258
- package/src/model-catalog.mjs +9 -14
- package/src/model-presenters.mjs +0 -30
- package/src/omlx-runtime.mjs +232 -0
- package/src/process.mjs +87 -48
- package/src/profile-setup.mjs +50 -113
- package/src/profiles.mjs +12 -28
- package/src/ui.mjs +2 -19
- package/resources/mlxvlm-server-wrapper.py +0 -112
- package/src/mlx-flags.mjs +0 -100
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# offgrid-ai
|
|
4
4
|
|
|
5
|
-
**Helper CLI for running local AI models on Mac with llama-server
|
|
5
|
+
**Helper CLI for running local AI models on Mac with llama-server and oMLX.**
|
|
6
6
|
|
|
7
7
|
[](package.json)
|
|
8
8
|
[]()
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
## What is offgrid-ai?
|
|
14
14
|
|
|
15
|
-
offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server
|
|
15
|
+
offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server or oMLX have a steep learning curve compared to cloud-based models, so offgrid-ai is designed to abstract away the complexity, while still providing a powerful and flexible way to run local models.
|
|
16
16
|
|
|
17
17
|
This is the recommended workflow:
|
|
18
18
|
|
|
@@ -23,8 +23,8 @@ This is the recommended workflow:
|
|
|
23
23
|
## Core Features
|
|
24
24
|
- Auto-detects available models from LM Studio, oMLX, and HuggingFace
|
|
25
25
|
- Auto-detects MTP (multi-token prediction) or QAT (quantization aware training) models, and applies the correct flags for llama.cpp
|
|
26
|
-
- Auto-applies the optimal flags for the model type (llama.cpp server flags,
|
|
27
|
-
- Start / stop local servers automatically for chat sessions (llama-server and
|
|
26
|
+
- Auto-applies the optimal flags for the model type (llama.cpp server flags, oMLX auto-start and cache management)
|
|
27
|
+
- Start / stop local servers automatically for chat sessions (llama-server and oMLX)
|
|
28
28
|
|
|
29
29
|
## Quick start
|
|
30
30
|
|
package/package.json
CHANGED
package/src/autodetect.mjs
CHANGED
|
@@ -55,7 +55,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
|
|
|
55
55
|
const isLowMem = quant && /[Qq]4[_0]/i.test(quant);
|
|
56
56
|
|
|
57
57
|
const flags = {
|
|
58
|
-
...defaultFlagsForBackend(
|
|
58
|
+
...defaultFlagsForBackend("llama-cpp"),
|
|
59
59
|
ctxSize: capabilities.ctxSize,
|
|
60
60
|
flashAttention: "on",
|
|
61
61
|
cacheTypeK: isLowMem ? "f16" : "bf16",
|
package/src/backends.mjs
CHANGED
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
import { findLlamaServer } from "./config.mjs";
|
|
2
2
|
import { scanGgufModels } from "./scan.mjs";
|
|
3
3
|
import { parseModelName } from "./model-name.mjs";
|
|
4
|
-
import {
|
|
5
|
-
import { DEFAULT_PORT as MLX_VLM_PORT } from "./mlx-flags.mjs";
|
|
4
|
+
import { scanOmlxModelSizes, lookupOmlxModelInfo } from "./mlx-discovery.mjs";
|
|
6
5
|
|
|
7
6
|
// ── Backend definitions ────────────────────────────────────────────────────
|
|
8
7
|
|
|
9
8
|
export const LOCAL_HOST = "127.0.0.1";
|
|
10
9
|
export const LLAMA_CPP_PORT = 8080;
|
|
11
|
-
export const LLAMA_CPP_MTP_PORT = 8081;
|
|
12
10
|
export const OMLX_PORT = 8000;
|
|
13
11
|
|
|
14
12
|
export function baseUrlFor({ host = LOCAL_HOST, port, path = "/v1" }) {
|
|
@@ -31,17 +29,6 @@ export const BACKENDS = {
|
|
|
31
29
|
needsCommandFile: true,
|
|
32
30
|
scanModels: async () => (await scanGgufModels()).models,
|
|
33
31
|
},
|
|
34
|
-
"llama-cpp-mtp": {
|
|
35
|
-
id: "llama-cpp-mtp",
|
|
36
|
-
label: "llama.cpp MTP",
|
|
37
|
-
type: "local-server",
|
|
38
|
-
providerId: "llama-cpp-mtp",
|
|
39
|
-
defaultHost: LOCAL_HOST,
|
|
40
|
-
defaultPort: LLAMA_CPP_MTP_PORT,
|
|
41
|
-
defaultBaseUrl: baseUrlFor({ port: LLAMA_CPP_MTP_PORT }),
|
|
42
|
-
needsCommandFile: true,
|
|
43
|
-
scanModels: async () => (await scanGgufModels()).models,
|
|
44
|
-
},
|
|
45
32
|
"omlx": {
|
|
46
33
|
id: "omlx",
|
|
47
34
|
label: "oMLX",
|
|
@@ -54,17 +41,6 @@ export const BACKENDS = {
|
|
|
54
41
|
needsCommandFile: false,
|
|
55
42
|
scanModels: () => scanOmlxModels(),
|
|
56
43
|
},
|
|
57
|
-
"mlx-vlm": {
|
|
58
|
-
id: "mlx-vlm",
|
|
59
|
-
label: "mlx-vlm",
|
|
60
|
-
type: "local-server",
|
|
61
|
-
providerId: "mlx-vlm",
|
|
62
|
-
defaultHost: LOCAL_HOST,
|
|
63
|
-
defaultPort: MLX_VLM_PORT,
|
|
64
|
-
defaultBaseUrl: baseUrlFor({ port: MLX_VLM_PORT }),
|
|
65
|
-
needsCommandFile: true,
|
|
66
|
-
scanModels: async () => scanMlxModels(),
|
|
67
|
-
},
|
|
68
44
|
};
|
|
69
45
|
|
|
70
46
|
export function backendFor(backendId) {
|
|
@@ -75,10 +51,8 @@ export function backendFor(backendId) {
|
|
|
75
51
|
|
|
76
52
|
export async function backendBinaryFor(backendId) {
|
|
77
53
|
const backend = BACKENDS[backendId ?? "llama-cpp"];
|
|
78
|
-
if (backend.id === "mlx-vlm") return "python3"; // mlx-vlm spawns via python3 + the strict=False wrapper
|
|
79
54
|
if (backend.type === "managed-server") return null;
|
|
80
|
-
|
|
81
|
-
return discovered; // null means "not found — trigger onboarding"
|
|
55
|
+
return await findLlamaServer();
|
|
82
56
|
}
|
|
83
57
|
|
|
84
58
|
export function defaultFlagsForBackend(backendId) {
|
|
@@ -96,21 +70,15 @@ async function scanOmlxModels() {
|
|
|
96
70
|
const body = await response.json();
|
|
97
71
|
if (!Array.isArray(body?.data)) return [];
|
|
98
72
|
|
|
99
|
-
// The oMLX API doesn't return model sizes or publishers — look them up from disk.
|
|
100
73
|
const infoMap = await scanOmlxModelSizes();
|
|
101
74
|
|
|
102
|
-
//
|
|
103
|
-
// ID formats (e.g. "Qwen3.6-35B-A3B-OptiQ-4bit" and
|
|
104
|
-
// "mlx-community--Qwen3.6-35B-A3B-OptiQ-4bit"). Deduplicate by the
|
|
105
|
-
// normalized full name (publisher/model with / separator), keeping
|
|
106
|
-
// the first entry (which has the most complete metadata).
|
|
75
|
+
// Deduplicate by normalized full name (publisher/model with / separator)
|
|
107
76
|
const seen = new Set();
|
|
108
77
|
const deduped = [];
|
|
109
78
|
for (const model of body.data.filter(isChatOmlxModel)) {
|
|
110
79
|
const info = lookupOmlxModelInfo(model.id, infoMap);
|
|
111
80
|
const hasPublisher = model.id.includes("/") || model.id.includes("--");
|
|
112
81
|
const fullName = (!hasPublisher && info?.publisher) ? `${info.publisher}/${model.id}` : model.id;
|
|
113
|
-
// Normalize: convert -- separator to / for dedup comparison
|
|
114
82
|
const normalized = fullName.replace(/--/g, "/");
|
|
115
83
|
if (seen.has(normalized)) continue;
|
|
116
84
|
seen.add(normalized);
|
|
@@ -137,15 +105,10 @@ async function scanOmlxModels() {
|
|
|
137
105
|
}).sort((a, b) => a.label.localeCompare(b.label));
|
|
138
106
|
}
|
|
139
107
|
|
|
140
|
-
// ── Labels ──────────────────────────────────────────────────────────────
|
|
141
|
-
|
|
142
108
|
function isChatOmlxModel(model) {
|
|
143
109
|
if (typeof model?.id !== "string" || !model.id.trim()) return false;
|
|
144
110
|
const type = String(model.type ?? model.model_type ?? "").toLowerCase();
|
|
145
111
|
if (["embedding", "embeddings", "reranker", "tool", "converter", "markitdown"].includes(type)) return false;
|
|
146
112
|
if (Object.hasOwn(model, "max_model_len") && model.max_model_len === null) return false;
|
|
147
113
|
return true;
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
// (ollamaLabel and omlxLabel removed — parseModelName in model-name.mjs is the single path)
|
|
151
|
-
// (Ollama backend removed — offgrid-ai now uses llama-server + mlx-vlm + oMLX)
|
|
114
|
+
}
|
package/src/benchmark/flow.mjs
CHANGED
|
@@ -15,7 +15,7 @@ import { finalizeBenchmarkRun, renderBenchmarkSummary } from "./finalize.mjs";
|
|
|
15
15
|
|
|
16
16
|
function benchmarkModelSource(profile) {
|
|
17
17
|
if (!profile) return "cloud";
|
|
18
|
-
return profile.
|
|
18
|
+
return profile.backend === "omlx" ? "omlx" : "llama-cpp";
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
async function chooseBenchmarkAction(prompt, canRun) {
|
|
@@ -150,16 +150,22 @@ async function selectBenchmark(prompt, repoPath) {
|
|
|
150
150
|
return { kind, benchmark };
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
// ──
|
|
153
|
+
// ── Shared benchmark setup ───────────────────────────────────────────────
|
|
154
154
|
|
|
155
|
-
|
|
155
|
+
async function benchmarkSetup() {
|
|
156
156
|
await ensureDirs();
|
|
157
157
|
const prompt = createPrompt();
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
158
|
+
const repoPath = await linkBenchmarkRepo(prompt);
|
|
159
|
+
if (!repoPath) return { prompt, repoPath: null, selected: null };
|
|
160
|
+
const selected = await selectBenchmark(prompt, repoPath);
|
|
161
|
+
return { prompt, repoPath, selected };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// ── Benchmark from a selected profile (from model picker) ────────────────
|
|
161
165
|
|
|
162
|
-
|
|
166
|
+
export async function benchmarkForProfile(profile) {
|
|
167
|
+
const { prompt, repoPath, selected } = await benchmarkSetup();
|
|
168
|
+
try {
|
|
163
169
|
if (!selected) return;
|
|
164
170
|
const { kind, benchmark: selectedBenchmark } = selected;
|
|
165
171
|
|
|
@@ -185,13 +191,8 @@ export async function benchmarkForProfile(profile) {
|
|
|
185
191
|
// ── Standalone benchmark flow (offgrid-ai benchmark) ──────────────────────
|
|
186
192
|
|
|
187
193
|
export async function benchmarkFlow() {
|
|
188
|
-
await
|
|
189
|
-
const prompt = createPrompt();
|
|
194
|
+
const { prompt, repoPath, selected } = await benchmarkSetup();
|
|
190
195
|
try {
|
|
191
|
-
const repoPath = await linkBenchmarkRepo(prompt);
|
|
192
|
-
if (!repoPath) return;
|
|
193
|
-
|
|
194
|
-
const selected = await selectBenchmark(prompt, repoPath);
|
|
195
196
|
if (!selected) return;
|
|
196
197
|
const { kind, benchmark: selectedBenchmark } = selected;
|
|
197
198
|
|
|
@@ -9,7 +9,7 @@ const SPEED_QUERY_MAX_TOKENS = 64;
|
|
|
9
9
|
export async function queryServerMetrics(profile) {
|
|
10
10
|
const backend = backendFor(profile.backend);
|
|
11
11
|
|
|
12
|
-
if (backend.id === "llama-cpp"
|
|
12
|
+
if (backend.id === "llama-cpp") {
|
|
13
13
|
return await queryLlamaCppMetrics(profile);
|
|
14
14
|
}
|
|
15
15
|
if (backend.id === "omlx") {
|
|
@@ -19,12 +19,13 @@ export async function queryServerMetrics(profile) {
|
|
|
19
19
|
throw new Error(`Unsupported backend for benchmark speed metrics: ${backend.id}`);
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
async function
|
|
22
|
+
async function speedQueryFetch(profile, { stream = false, streamOptions = null, errorLabel = "speed query" } = {}) {
|
|
23
23
|
const body = {
|
|
24
24
|
model: profile.modelAlias,
|
|
25
25
|
messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
|
|
26
|
-
stream
|
|
26
|
+
stream,
|
|
27
27
|
max_tokens: SPEED_QUERY_MAX_TOKENS,
|
|
28
|
+
...(streamOptions ? { stream_options: streamOptions } : {}),
|
|
28
29
|
};
|
|
29
30
|
|
|
30
31
|
const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
|
|
@@ -35,9 +36,15 @@ async function queryLlamaCppMetrics(profile) {
|
|
|
35
36
|
});
|
|
36
37
|
|
|
37
38
|
if (!response.ok) {
|
|
38
|
-
throw new Error(
|
|
39
|
+
throw new Error(`${errorLabel} failed: ${response.status} ${response.statusText}`);
|
|
39
40
|
}
|
|
40
41
|
|
|
42
|
+
return response;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async function queryLlamaCppMetrics(profile) {
|
|
46
|
+
const response = await speedQueryFetch(profile, { errorLabel: "llama.cpp speed query" });
|
|
47
|
+
|
|
41
48
|
const data = await response.json();
|
|
42
49
|
const timings = data.timings;
|
|
43
50
|
if (!timings || typeof timings.prompt_per_second !== "number" || typeof timings.predicted_per_second !== "number") {
|
|
@@ -60,25 +67,12 @@ async function queryLlamaCppMetrics(profile) {
|
|
|
60
67
|
}
|
|
61
68
|
|
|
62
69
|
async function queryOmlxMetrics(profile) {
|
|
63
|
-
const
|
|
64
|
-
model: profile.modelAlias,
|
|
65
|
-
messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
|
|
70
|
+
const response = await speedQueryFetch(profile, {
|
|
66
71
|
stream: true,
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
|
|
72
|
-
method: "POST",
|
|
73
|
-
headers: { "Content-Type": "application/json" },
|
|
74
|
-
body: JSON.stringify(body),
|
|
75
|
-
signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
|
|
72
|
+
streamOptions: { include_usage: true },
|
|
73
|
+
errorLabel: "oMLX speed query",
|
|
76
74
|
});
|
|
77
75
|
|
|
78
|
-
if (!response.ok) {
|
|
79
|
-
throw new Error(`oMLX speed query failed: ${response.status} ${response.statusText}`);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
76
|
const text = await response.text();
|
|
83
77
|
let usage = null;
|
|
84
78
|
for (const line of text.split("\n").reverse()) {
|
package/src/commands/main.mjs
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { findLlamaServer, ensureDirs } from "../config.mjs";
|
|
2
2
|
import { backendFor } from "../backends.mjs";
|
|
3
3
|
import { scanGgufModels } from "../scan.mjs";
|
|
4
|
-
import { scanMlxModels } from "../mlx-discovery.mjs";
|
|
5
4
|
import { loadProfiles } from "../profiles.mjs";
|
|
6
5
|
import { hasPi } from "../harness-pi.mjs";
|
|
7
6
|
import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
|
|
8
|
-
import {
|
|
7
|
+
import { offerManagedOmlxUpdate, hasOmlx } from "../omlx-runtime.mjs";
|
|
8
|
+
import { hasLmStudioInstalled, scanManagedModels } from "../managed.mjs";
|
|
9
9
|
import { recommendedModel } from "../recommendations.mjs";
|
|
10
10
|
import { pc, startInteractive, createPrompt } from "../ui.mjs";
|
|
11
11
|
import { onboardFlow } from "./onboard.mjs";
|
|
@@ -19,6 +19,7 @@ export async function mainFlow() {
|
|
|
19
19
|
const runtimePrompt = createPrompt();
|
|
20
20
|
try {
|
|
21
21
|
await offerManagedLlamaRuntimeUpdate(runtimePrompt);
|
|
22
|
+
await offerManagedOmlxUpdate(runtimePrompt);
|
|
22
23
|
} finally {
|
|
23
24
|
runtimePrompt.close();
|
|
24
25
|
}
|
|
@@ -27,10 +28,9 @@ export async function mainFlow() {
|
|
|
27
28
|
const llamaBinary = await findLlamaServer();
|
|
28
29
|
const { models: ggufModels, drafters } = await scanGgufModels();
|
|
29
30
|
const managedModels = await scanManagedModels();
|
|
30
|
-
const mlxModels = await scanMlxModels();
|
|
31
31
|
const profiles = await loadProfiles();
|
|
32
32
|
const hasAnyBackend = llamaBinary || managedModels.some((item) => item.status === "ok" && item.models.length > 0);
|
|
33
|
-
const hasAnyModels = ggufModels.length > 0 ||
|
|
33
|
+
const hasAnyModels = ggufModels.length > 0 || managedModels.some((item) => item.status === "ok" && item.models.length > 0);
|
|
34
34
|
|
|
35
35
|
const piInstalled = await hasPi();
|
|
36
36
|
const needsLlama = ggufModels.length > 0 || profiles.some((profile) => backendFor(profile.backend).type === "local-server");
|
|
@@ -58,14 +58,14 @@ export async function mainFlow() {
|
|
|
58
58
|
if (!process.stdin.isTTY) return await statusCommand();
|
|
59
59
|
|
|
60
60
|
startInteractive("offgrid-ai");
|
|
61
|
-
return await modelCommandCenter({ profiles, ggufModels, managedModels,
|
|
61
|
+
return await modelCommandCenter({ profiles, ggufModels, managedModels, drafters });
|
|
62
62
|
}
|
|
63
63
|
|
|
64
64
|
async function printNoModelsHelp(llamaBinary) {
|
|
65
65
|
console.log(pc.yellow("No models found."));
|
|
66
66
|
console.log(pc.dim("You need to download a model to use offgrid-ai.\n"));
|
|
67
67
|
|
|
68
|
-
const omlxInstalled = await
|
|
68
|
+
const omlxInstalled = await hasOmlx();
|
|
69
69
|
const lmStudioInstalled = hasLmStudioInstalled();
|
|
70
70
|
const hasBackends = llamaBinary || omlxInstalled || lmStudioInstalled;
|
|
71
71
|
if (!hasBackends) {
|
|
@@ -86,4 +86,4 @@ async function printNoModelsHelp(llamaBinary) {
|
|
|
86
86
|
console.log(pc.dim(` Recommended: ${model.label}`));
|
|
87
87
|
}
|
|
88
88
|
if (omlxInstalled) console.log(pc.bold(" omlx start"));
|
|
89
|
-
}
|
|
89
|
+
}
|
package/src/commands/models.mjs
CHANGED
|
@@ -3,10 +3,10 @@ import { backendFor, BACKENDS } from "../backends.mjs";
|
|
|
3
3
|
import { createProfileFromModel, readProfile, saveProfile, deleteProfile, profileJsonPath } from "../profiles.mjs";
|
|
4
4
|
import { isProfileRunning, isProfileServerUp, modelAvailableOnServer, stopProfile } from "../process.mjs";
|
|
5
5
|
import { syncPiConfig, removeFromPiConfig } from "../harness-pi.mjs";
|
|
6
|
-
import { configureLocalProfile } from "../profile-setup.mjs";
|
|
6
|
+
import { configureLocalProfile, configureManagedProfile } from "../profile-setup.mjs";
|
|
7
7
|
import { pc, startInteractive, createPrompt, modelSelect } from "../ui.mjs";
|
|
8
8
|
import { buildCatalogItems, createManagedProfile, itemKey, loadModelCatalog, normalizeCatalog } from "../model-catalog.mjs";
|
|
9
|
-
import { modelSelectOption, modelNameWidth, inferBackendId, formatSourceLabel, discoverySourceForItem, printGgufModelDetails,
|
|
9
|
+
import { modelSelectOption, modelNameWidth, inferBackendId, formatSourceLabel, discoverySourceForItem, printGgufModelDetails, printManagedModelDetails, printWorkspaceHeader, printBenchmarkLine, printProfileDetails } from "../model-presenters.mjs";
|
|
10
10
|
import { runProfile } from "./run.mjs";
|
|
11
11
|
|
|
12
12
|
const { stripVTControlCharacters } = await import("node:util");
|
|
@@ -83,9 +83,7 @@ export async function modelCommandCenter(initialCatalog) {
|
|
|
83
83
|
|
|
84
84
|
const groups = [];
|
|
85
85
|
const backendColors = {
|
|
86
|
-
"mlx-vlm": pc.yellow,
|
|
87
86
|
"llama-cpp": pc.cyan,
|
|
88
|
-
"llama-cpp-mtp": pc.blue,
|
|
89
87
|
omlx: pc.magenta,
|
|
90
88
|
};
|
|
91
89
|
for (const { backendId, sourceId, items } of byBackend.values()) {
|
|
@@ -185,7 +183,6 @@ async function performAction(prompt, action, item) {
|
|
|
185
183
|
if (action === "inspect") {
|
|
186
184
|
if (item.type === "profile") return await printProfileDetails(await readProfile(item.profile.id));
|
|
187
185
|
if (item.type === "managed") return printManagedModelDetails(item.model, BACKENDS[item.backendId]);
|
|
188
|
-
if (item.model?.format === "mlx") return await printMlxModelDetails(item.model);
|
|
189
186
|
return printGgufModelDetails(item.model, item.drafter);
|
|
190
187
|
}
|
|
191
188
|
if (action === "benchmark") {
|
|
@@ -197,7 +194,7 @@ async function performAction(prompt, action, item) {
|
|
|
197
194
|
return await benchmarkFlow();
|
|
198
195
|
}
|
|
199
196
|
if (action === "run") return await runItem(item);
|
|
200
|
-
if (action === "reconfigure" || action === "setup") return await setupItem(prompt, item
|
|
197
|
+
if (action === "reconfigure" || action === "setup") return await setupItem(prompt, item);
|
|
201
198
|
if (action === "remove" && item.type === "profile") return await removeProfileInteractive(item.profile.id);
|
|
202
199
|
}
|
|
203
200
|
|
|
@@ -209,30 +206,20 @@ function printProfileSaved(id) {
|
|
|
209
206
|
console.log(pc.dim(` Profile: ${profileJsonPath(id)}`));
|
|
210
207
|
}
|
|
211
208
|
|
|
212
|
-
async function setupItem(prompt, item
|
|
209
|
+
async function setupItem(prompt, item) {
|
|
213
210
|
if (item.type === "profile") {
|
|
214
211
|
const configured = await configureLocalProfile(prompt, await readProfile(item.profile.id));
|
|
215
212
|
if (!configured) return;
|
|
216
|
-
await saveProfile(configured
|
|
213
|
+
await saveProfile(configured);
|
|
217
214
|
await syncPiConfig(configured);
|
|
218
215
|
printProfileSaved(configured.id);
|
|
219
216
|
return;
|
|
220
217
|
}
|
|
221
218
|
if (item.type === "managed") {
|
|
222
219
|
const profile = createManagedProfile(item.model, item.backendId);
|
|
223
|
-
await
|
|
224
|
-
await syncPiConfig(profile);
|
|
225
|
-
printProfileSaved(profile.id);
|
|
226
|
-
return;
|
|
227
|
-
}
|
|
228
|
-
// MLX models: build a mlx-vlm profile and run interactive config.
|
|
229
|
-
if (item.model.format === "mlx") {
|
|
230
|
-
const { createProfileFromMlxModel } = await import("../profiles.mjs");
|
|
231
|
-
const { configureMlxProfile } = await import("../profile-setup.mjs");
|
|
232
|
-
const profile = await createProfileFromMlxModel(item.model);
|
|
233
|
-
const configured = await configureMlxProfile(prompt, profile);
|
|
220
|
+
const configured = await configureManagedProfile(prompt, profile);
|
|
234
221
|
if (!configured) return;
|
|
235
|
-
await saveProfile(configured
|
|
222
|
+
await saveProfile(configured);
|
|
236
223
|
await syncPiConfig(configured);
|
|
237
224
|
printProfileSaved(configured.id);
|
|
238
225
|
return;
|
|
@@ -240,7 +227,7 @@ async function setupItem(prompt, item, action) {
|
|
|
240
227
|
const profile = await createProfileFromModel(item.model, null, item.drafter?.path);
|
|
241
228
|
const configured = await configureLocalProfile(prompt, profile);
|
|
242
229
|
if (!configured) return;
|
|
243
|
-
await saveProfile(configured
|
|
230
|
+
await saveProfile(configured);
|
|
244
231
|
await syncPiConfig(configured);
|
|
245
232
|
printProfileSaved(configured.id);
|
|
246
233
|
}
|
package/src/commands/onboard.mjs
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { ensureDirs, findLlamaServer, hasHomebrew, HF_HUB_DIR } from "../config.mjs";
|
|
1
|
+
import { ensureDirs, findLlamaServer, ensureHomebrewFor, HF_HUB_DIR } from "../config.mjs";
|
|
3
2
|
import { BACKENDS } from "../backends.mjs";
|
|
4
3
|
import { scanGgufModels } from "../scan.mjs";
|
|
5
|
-
import { scanMlxModels } from "../mlx-discovery.mjs";
|
|
6
4
|
import { hasPi } from "../harness-pi.mjs";
|
|
7
5
|
import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
|
|
6
|
+
import { ensureOmlxRuntime } from "../omlx-runtime.mjs";
|
|
8
7
|
import { scanManagedModels } from "../managed.mjs";
|
|
9
8
|
import { BACKEND_INSTALL_CHOICES, BACKEND_INSTALLERS } from "../backend-installers.mjs";
|
|
10
9
|
import { recommendedModel, selectFormat, allFittingModels } from "../recommendations.mjs";
|
|
@@ -25,18 +24,18 @@ export async function onboardFlow() {
|
|
|
25
24
|
console.log(pc.dim("Let's make sure you have everything you need to run local models.\n"));
|
|
26
25
|
|
|
27
26
|
const llamaBinary = await ensureLlamaRuntime(prompt);
|
|
27
|
+
await ensureOmlxRuntime(prompt, run);
|
|
28
28
|
if (!(await ensurePi(prompt, run))) return;
|
|
29
29
|
|
|
30
|
-
const [{ models: ggufModels }, managedModels
|
|
30
|
+
const [{ models: ggufModels }, managedModels] = await Promise.all([
|
|
31
31
|
scanGgufModels(),
|
|
32
32
|
scanManagedModels(),
|
|
33
|
-
scanMlxModels(),
|
|
34
33
|
]);
|
|
35
34
|
const totalManaged = managedModels.reduce((sum, item) => sum + item.models.length, 0);
|
|
36
|
-
const hasModels = ggufModels.length > 0 || totalManaged > 0
|
|
35
|
+
const hasModels = ggufModels.length > 0 || totalManaged > 0;
|
|
37
36
|
|
|
38
37
|
if (hasModels) {
|
|
39
|
-
printFoundModels(ggufModels, managedModels,
|
|
38
|
+
printFoundModels(ggufModels, managedModels, llamaBinary);
|
|
40
39
|
} else {
|
|
41
40
|
const canDownload = await hasHuggingfaceHub();
|
|
42
41
|
if (canDownload) {
|
|
@@ -96,14 +95,11 @@ async function ensurePi(prompt, run) {
|
|
|
96
95
|
return true;
|
|
97
96
|
}
|
|
98
97
|
|
|
99
|
-
function printFoundModels(ggufModels, managedModels,
|
|
98
|
+
function printFoundModels(ggufModels, managedModels, llamaBinary) {
|
|
100
99
|
if (ggufModels.length > 0) {
|
|
101
100
|
console.log(pc.green(`✓ Found ${ggufModels.length} GGUF model${ggufModels.length === 1 ? "" : "s"}`));
|
|
102
101
|
if (!llamaBinary) console.log(pc.yellow("Install the managed llama.cpp runtime to run these GGUF models."));
|
|
103
102
|
}
|
|
104
|
-
if (mlxModels.length > 0) {
|
|
105
|
-
console.log(pc.green(`✓ Found ${mlxModels.length} MLX model${mlxModels.length === 1 ? "" : "s"}`));
|
|
106
|
-
}
|
|
107
103
|
for (const { backendId, models, status, reason } of managedModels) {
|
|
108
104
|
if (status === "unavailable") {
|
|
109
105
|
console.log(pc.yellow(`${BACKENDS[backendId].label}: unavailable${reason ? ` — ${reason}` : ""}`));
|
|
@@ -117,7 +113,7 @@ async function offerModelDownload(prompt) {
|
|
|
117
113
|
const hardware = detectHardware();
|
|
118
114
|
const candidates = allFittingModels(hardware)
|
|
119
115
|
.map((entry) => ({ entry, format: selectFormat(entry, hardware) }))
|
|
120
|
-
.filter((item) => item.format
|
|
116
|
+
.filter((item) => item.format === "gguf");
|
|
121
117
|
if (candidates.length === 0) {
|
|
122
118
|
console.log(pc.yellow("No curated models fit your hardware."));
|
|
123
119
|
return false;
|
|
@@ -134,7 +130,7 @@ async function offerModelDownload(prompt) {
|
|
|
134
130
|
const shouldDownload = await prompt.yesNo("Download " + primary.entry.label + " (" + primary.format + ")?", true);
|
|
135
131
|
if (!shouldDownload) return false;
|
|
136
132
|
|
|
137
|
-
const hfRef = primary.
|
|
133
|
+
const hfRef = primary.entry.gguf;
|
|
138
134
|
try {
|
|
139
135
|
const plan = await resolveHfDownload(hfRef);
|
|
140
136
|
console.log(pc.dim("Total size: " + formatBytes(plan.totalSizeBytes)));
|
|
@@ -174,35 +170,6 @@ async function offerBackendInstall(prompt, run) {
|
|
|
174
170
|
await installBackend(prompt, run, choice, model);
|
|
175
171
|
}
|
|
176
172
|
|
|
177
|
-
async function ensureHomebrewFor(prompt, run, label) {
|
|
178
|
-
if (await hasHomebrew()) return true;
|
|
179
|
-
const install = await prompt.yesNo(`Homebrew is needed to install ${label}. Install Homebrew now?`, true);
|
|
180
|
-
if (!install) {
|
|
181
|
-
console.log(pc.dim(`Install ${label} manually, or install Homebrew from https://brew.sh and run offgrid-ai again.`));
|
|
182
|
-
return false;
|
|
183
|
-
}
|
|
184
|
-
console.log(pc.cyan("Installing Homebrew..."));
|
|
185
|
-
try {
|
|
186
|
-
await run("/bin/bash", ["-c", "NONINTERACTIVE=1 /bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""], "Homebrew");
|
|
187
|
-
for (const path of ["/opt/homebrew/bin", "/usr/local/bin"]) {
|
|
188
|
-
if (existsSync(path)) {
|
|
189
|
-
process.env.PATH = `${path}:${process.env.PATH}`;
|
|
190
|
-
break;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
} catch {
|
|
194
|
-
console.log(pc.red("✗ Homebrew installation failed."));
|
|
195
|
-
console.log(pc.dim("Install it manually from https://brew.sh, then run offgrid-ai again."));
|
|
196
|
-
return false;
|
|
197
|
-
}
|
|
198
|
-
if (!(await hasHomebrew())) {
|
|
199
|
-
console.log(pc.red("Homebrew was installed but not found on PATH. Restart your terminal and run offgrid-ai again."));
|
|
200
|
-
return false;
|
|
201
|
-
}
|
|
202
|
-
console.log(pc.green("✓ Homebrew found"));
|
|
203
|
-
return true;
|
|
204
|
-
}
|
|
205
|
-
|
|
206
173
|
async function installBackend(prompt, run, backendId, model) {
|
|
207
174
|
const installer = BACKEND_INSTALLERS[backendId];
|
|
208
175
|
if (!(await ensureHomebrewFor(prompt, run, installer.label))) return;
|
|
@@ -236,4 +203,4 @@ async function installAllBackends(prompt, run, model) {
|
|
|
236
203
|
|
|
237
204
|
async function runInstallerCommands(run, installer) {
|
|
238
205
|
for (const [cmd, args, label] of installer.commands) await run(cmd, args, label);
|
|
239
|
-
}
|
|
206
|
+
}
|
package/src/commands/run.mjs
CHANGED
|
@@ -77,7 +77,7 @@ async function ensureLocalServer(profile, backend, options) {
|
|
|
77
77
|
console.log(pc.yellow("Vision projector is not supported by this llama.cpp build. Retrying text-only."));
|
|
78
78
|
console.log(pc.dim("Update llama.cpp later to re-enable vision for this model."));
|
|
79
79
|
const textOnly = textOnlyProfile(profile);
|
|
80
|
-
await saveProfile(textOnly
|
|
80
|
+
await saveProfile(textOnly);
|
|
81
81
|
return { handled: true, result: await runProfile(textOnly, { ...options, textOnlyRetry: true }) };
|
|
82
82
|
}
|
|
83
83
|
throw err;
|
package/src/commands/status.mjs
CHANGED
|
@@ -2,6 +2,10 @@ import { ensureDirs } from "../config.mjs";
|
|
|
2
2
|
import { backendFor } from "../backends.mjs";
|
|
3
3
|
import { loadProfiles } from "../profiles.mjs";
|
|
4
4
|
import { profileRuntimeStatus } from "../process.mjs";
|
|
5
|
+
import { existsSync } from "node:fs";
|
|
6
|
+
import { execFileSync } from "node:child_process";
|
|
7
|
+
import { homedir } from "node:os";
|
|
8
|
+
import { join } from "node:path";
|
|
5
9
|
import { pc, renderRows, renderCard } from "../ui.mjs";
|
|
6
10
|
|
|
7
11
|
export async function statusCommand() {
|
|
@@ -38,6 +42,21 @@ export async function statusCommand() {
|
|
|
38
42
|
|
|
39
43
|
console.log(renderCard("Status", renderRows(summaryRows), { formatBorder: running.length > 0 ? pc.green : pc.dim }));
|
|
40
44
|
|
|
45
|
+
// Show oMLX cache disk usage if cache exists
|
|
46
|
+
const omlxCacheDir = join(homedir(), ".omlx", "cache");
|
|
47
|
+
if (existsSync(omlxCacheDir)) {
|
|
48
|
+
try {
|
|
49
|
+
const duOutput = execFileSync("du", ["-sh", omlxCacheDir], { encoding: "utf8" });
|
|
50
|
+
const cacheSize = duOutput.split(/\s+/)[0];
|
|
51
|
+
console.log("\n" + renderCard("oMLX cache", renderRows([
|
|
52
|
+
["Location", pc.dim(omlxCacheDir)],
|
|
53
|
+
["Disk usage", pc.bold(cacheSize)],
|
|
54
|
+
]), { formatBorder: pc.magenta }));
|
|
55
|
+
} catch {
|
|
56
|
+
// du not available — skip
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
41
60
|
if (managedUpMissing.length > 0 || managedUpNotLoaded.length > 0) {
|
|
42
61
|
const detailRows = [];
|
|
43
62
|
for (const { profile, status } of [...managedUpMissing, ...managedUpNotLoaded]) {
|
package/src/config.mjs
CHANGED
|
@@ -3,6 +3,7 @@ import { existsSync } from "node:fs";
|
|
|
3
3
|
import { homedir } from "node:os";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
5
|
import { readFile, writeFile } from "node:fs/promises";
|
|
6
|
+
import { pc } from "./ui.mjs";
|
|
6
7
|
|
|
7
8
|
// ── Base directories ──────────────────────────────────────────────────────
|
|
8
9
|
|
|
@@ -18,8 +19,8 @@ export const MANAGED_LLAMA_SERVER = join(RUNTIME_DIR, "bin", "llama-server");
|
|
|
18
19
|
// HuggingFace hub cache: $HF_HUB_CACHE, else $HF_HOME/hub, else
|
|
19
20
|
// ~/.cache/huggingface/hub. This is where huggingface_hub stores
|
|
20
21
|
// models--org--name/... and where offgrid-ai scans + downloads. Pointing at the
|
|
21
|
-
// hub (not the HF root) keeps the
|
|
22
|
-
//
|
|
22
|
+
// hub (not the HF root) keeps the GGUF scanner and the downloader on the
|
|
23
|
+
// same layout.
|
|
23
24
|
export const HF_HUB_DIR = process.env.HF_HUB_CACHE
|
|
24
25
|
|| (process.env.HF_HOME ? join(process.env.HF_HOME, "hub") : join(homedir(), ".cache", "huggingface", "hub"));
|
|
25
26
|
|
|
@@ -124,4 +125,49 @@ export async function hasHomebrew() {
|
|
|
124
125
|
} catch {
|
|
125
126
|
return false;
|
|
126
127
|
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Install Homebrew non-interactively and add it to PATH for this process.
|
|
132
|
+
* Returns true if Homebrew is available after installation.
|
|
133
|
+
*/
|
|
134
|
+
export async function installHomebrew(run) {
|
|
135
|
+
await run("/bin/bash", ["-c", 'NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'], "Homebrew");
|
|
136
|
+
for (const path of ["/opt/homebrew/bin", "/usr/local/bin"]) {
|
|
137
|
+
if (existsSync(path)) {
|
|
138
|
+
process.env.PATH = `${path}:${process.env.PATH}`;
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return await hasHomebrew();
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Ensure Homebrew is installed, prompting the user if necessary.
|
|
147
|
+
* @param {object} prompt - UI prompt interface (needs yesNo)
|
|
148
|
+
* @param {function} run - runCommand function for verbose command execution
|
|
149
|
+
* @param {string} label - what we're installing (for the prompt message)
|
|
150
|
+
* @returns {Promise<boolean>} true if Homebrew is available
|
|
151
|
+
*/
|
|
152
|
+
export async function ensureHomebrewFor(prompt, run, label) {
|
|
153
|
+
if (await hasHomebrew()) return true;
|
|
154
|
+
const install = await prompt.yesNo(`Homebrew is needed to install ${label}. Install Homebrew now?`, true);
|
|
155
|
+
if (!install) {
|
|
156
|
+
console.log(pc.dim(`Install ${label} manually, or install Homebrew from https://brew.sh and run offgrid-ai again.`));
|
|
157
|
+
return false;
|
|
158
|
+
}
|
|
159
|
+
console.log(pc.cyan("Installing Homebrew..."));
|
|
160
|
+
try {
|
|
161
|
+
const success = await installHomebrew(run);
|
|
162
|
+
if (!success) {
|
|
163
|
+
console.log(pc.red("Homebrew was installed but not found on PATH. Restart your terminal and run offgrid-ai again."));
|
|
164
|
+
return false;
|
|
165
|
+
}
|
|
166
|
+
} catch {
|
|
167
|
+
console.log(pc.red("✗ Homebrew installation failed."));
|
|
168
|
+
console.log(pc.dim("Install it manually from https://brew.sh, then run offgrid-ai again."));
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
console.log(pc.green("✓ Homebrew found"));
|
|
172
|
+
return true;
|
|
127
173
|
}
|