offgrid-ai 0.9.5 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/package.json +4 -3
- package/resources/hf-download.py +79 -0
- package/resources/mlxvlm-server-wrapper.py +112 -0
- package/resources/recommendations.json +60 -0
- package/src/backend-installers.mjs +1 -16
- package/src/backends.mjs +17 -45
- package/src/benchmark/finalize.mjs +9 -91
- package/src/benchmark/flow.mjs +8 -6
- package/src/benchmark/metrics.mjs +6 -45
- package/src/benchmark/pi-runner.mjs +5 -2
- package/src/benchmark/prepare.mjs +1 -1
- package/src/benchmark/stream-renderer.mjs +31 -2
- package/src/benchmark.mjs +3 -1
- package/src/commands/main.mjs +3 -5
- package/src/commands/models.mjs +27 -19
- package/src/commands/onboard.mjs +67 -9
- package/src/commands/run.mjs +20 -5
- package/src/commands/status.mjs +1 -1
- package/src/config.mjs +11 -2
- package/src/discovery-shared.mjs +44 -0
- package/src/hardware.mjs +49 -0
- package/src/harness-pi.mjs +25 -11
- package/src/huggingface.mjs +209 -0
- package/src/managed.mjs +1 -5
- package/src/mlx-discovery.mjs +290 -0
- package/src/mlx-flags.mjs +93 -0
- package/src/model-catalog.mjs +12 -6
- package/src/model-name.mjs +7 -25
- package/src/model-presenters.mjs +138 -28
- package/src/process.mjs +129 -32
- package/src/profile-setup.mjs +116 -0
- package/src/profiles.mjs +30 -0
- package/src/recommendations.mjs +56 -14
- package/src/scan.mjs +39 -8
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
// ── Backend-aware server speed metrics ───────────────────────────────────────
|
|
2
2
|
|
|
3
3
|
import { backendFor } from "../backends.mjs";
|
|
4
|
-
import { apiRootUrl } from "../process.mjs";
|
|
5
4
|
|
|
6
5
|
const BENCH_SPEED_PROMPT = "Write a one-sentence summary of machine learning.";
|
|
6
|
+
const SPEED_QUERY_TIMEOUT_MS = 120_000;
|
|
7
|
+
const SPEED_QUERY_MAX_TOKENS = 64;
|
|
7
8
|
|
|
8
9
|
export async function queryServerMetrics(profile) {
|
|
9
10
|
const backend = backendFor(profile.backend);
|
|
@@ -14,9 +15,6 @@ export async function queryServerMetrics(profile) {
|
|
|
14
15
|
if (backend.id === "omlx") {
|
|
15
16
|
return await queryOmlxMetrics(profile);
|
|
16
17
|
}
|
|
17
|
-
if (backend.id === "ollama") {
|
|
18
|
-
return await queryOllamaMetrics(profile);
|
|
19
|
-
}
|
|
20
18
|
|
|
21
19
|
throw new Error(`Unsupported backend for benchmark speed metrics: ${backend.id}`);
|
|
22
20
|
}
|
|
@@ -26,13 +24,14 @@ async function queryLlamaCppMetrics(profile) {
|
|
|
26
24
|
model: profile.modelAlias,
|
|
27
25
|
messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
|
|
28
26
|
stream: false,
|
|
27
|
+
max_tokens: SPEED_QUERY_MAX_TOKENS,
|
|
29
28
|
};
|
|
30
29
|
|
|
31
30
|
const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
|
|
32
31
|
method: "POST",
|
|
33
32
|
headers: { "Content-Type": "application/json" },
|
|
34
33
|
body: JSON.stringify(body),
|
|
35
|
-
signal: AbortSignal.timeout(
|
|
34
|
+
signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
|
|
36
35
|
});
|
|
37
36
|
|
|
38
37
|
if (!response.ok) {
|
|
@@ -66,13 +65,14 @@ async function queryOmlxMetrics(profile) {
|
|
|
66
65
|
messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
|
|
67
66
|
stream: true,
|
|
68
67
|
stream_options: { include_usage: true },
|
|
68
|
+
max_tokens: SPEED_QUERY_MAX_TOKENS,
|
|
69
69
|
};
|
|
70
70
|
|
|
71
71
|
const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
|
|
72
72
|
method: "POST",
|
|
73
73
|
headers: { "Content-Type": "application/json" },
|
|
74
74
|
body: JSON.stringify(body),
|
|
75
|
-
signal: AbortSignal.timeout(
|
|
75
|
+
signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
|
|
76
76
|
});
|
|
77
77
|
|
|
78
78
|
if (!response.ok) {
|
|
@@ -111,42 +111,3 @@ async function queryOmlxMetrics(profile) {
|
|
|
111
111
|
metricSource: "oMLX /v1/chat/completions streaming include_usage",
|
|
112
112
|
};
|
|
113
113
|
}
|
|
114
|
-
|
|
115
|
-
async function queryOllamaMetrics(profile) {
|
|
116
|
-
const body = {
|
|
117
|
-
model: profile.modelAlias,
|
|
118
|
-
prompt: BENCH_SPEED_PROMPT,
|
|
119
|
-
stream: false,
|
|
120
|
-
};
|
|
121
|
-
|
|
122
|
-
const apiBaseUrl = apiRootUrl(profile.baseUrl || backendFor(profile.backend).apiBaseUrl || "");
|
|
123
|
-
|
|
124
|
-
const response = await fetch(`${apiBaseUrl}/api/generate`, {
|
|
125
|
-
method: "POST",
|
|
126
|
-
headers: { "Content-Type": "application/json" },
|
|
127
|
-
body: JSON.stringify(body),
|
|
128
|
-
signal: AbortSignal.timeout(60000),
|
|
129
|
-
});
|
|
130
|
-
|
|
131
|
-
if (!response.ok) {
|
|
132
|
-
throw new Error(`Ollama speed query failed: ${response.status} ${response.statusText}`);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
const data = await response.json();
|
|
136
|
-
const promptEvalNs = data.prompt_eval_duration ?? 0;
|
|
137
|
-
const evalNs = data.eval_duration ?? 0;
|
|
138
|
-
const loadNs = data.load_duration ?? 0;
|
|
139
|
-
|
|
140
|
-
const promptEvalCount = data.prompt_eval_count ?? 0;
|
|
141
|
-
const evalCount = data.eval_count ?? 0;
|
|
142
|
-
|
|
143
|
-
return {
|
|
144
|
-
prefillTokensPerSecond: promptEvalNs > 0 ? (promptEvalCount / (promptEvalNs / 1e9)) : null,
|
|
145
|
-
generationTokensPerSecond: evalNs > 0 ? (evalCount / (evalNs / 1e9)) : null,
|
|
146
|
-
ttftMs: promptEvalNs / 1e6,
|
|
147
|
-
modelLoadMs: loadNs / 1e6,
|
|
148
|
-
speculativeDecodeAcceptance: null,
|
|
149
|
-
kvCacheTokens: null,
|
|
150
|
-
metricSource: "Ollama /api/generate",
|
|
151
|
-
};
|
|
152
|
-
}
|
|
@@ -5,7 +5,7 @@ import { join } from "node:path";
|
|
|
5
5
|
import { spawn } from "node:child_process";
|
|
6
6
|
import {
|
|
7
7
|
BENCH_COLORS, renderStreamEvent,
|
|
8
|
-
formatToolCall, printFinalLine,
|
|
8
|
+
formatToolCall, printFinalLine, stopExecTimer,
|
|
9
9
|
} from "./stream-renderer.mjs";
|
|
10
10
|
import { piModelString } from "./shared.mjs";
|
|
11
11
|
|
|
@@ -58,7 +58,8 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
|
58
58
|
turnHadToolError: false,
|
|
59
59
|
modelPrinted: false,
|
|
60
60
|
activeTool: null,
|
|
61
|
-
|
|
61
|
+
execTimer: null,
|
|
62
|
+
status: { mode: "idle", toolName: null, bytes: 0, tokens: 0, execStartedAt: null },
|
|
62
63
|
};
|
|
63
64
|
|
|
64
65
|
function appendResponse(text) {
|
|
@@ -193,6 +194,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
|
193
194
|
return new Promise((resolve) => {
|
|
194
195
|
child.on("exit", async (code) => {
|
|
195
196
|
if (signal) signal.removeEventListener("abort", abortListener);
|
|
197
|
+
stopExecTimer(renderState);
|
|
196
198
|
if (streamBuffer.trim()) {
|
|
197
199
|
processLine(streamBuffer);
|
|
198
200
|
}
|
|
@@ -225,6 +227,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
|
225
227
|
|
|
226
228
|
child.on("error", async (err) => {
|
|
227
229
|
if (signal) signal.removeEventListener("abort", abortListener);
|
|
230
|
+
stopExecTimer(renderState);
|
|
228
231
|
await streamHandle.close();
|
|
229
232
|
await stderrHandle.close();
|
|
230
233
|
runResult.error = { message: err.message };
|
|
@@ -55,7 +55,7 @@ export async function prepareBenchmarkRun({ repoPath, benchmark, kind, modelId,
|
|
|
55
55
|
kind,
|
|
56
56
|
runId,
|
|
57
57
|
benchmark: { id: benchmark.id, title: benchmark.title, description: benchmark.description, prompt: benchmark.prompt },
|
|
58
|
-
model: { id: modelId, slug: modelSlug, displayName: parseModelName(modelId, modelSource === "
|
|
58
|
+
model: { id: modelId, slug: modelSlug, displayName: parseModelName(modelId, modelSource === "omlx" ? "omlx" : "local-gguf").display },
|
|
59
59
|
status: "prepared",
|
|
60
60
|
createdAt: now.toISOString(),
|
|
61
61
|
updatedAt: now.toISOString(),
|
|
@@ -107,6 +107,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
|
|
|
107
107
|
};
|
|
108
108
|
resetStatus(state, "exec", parsed.toolName);
|
|
109
109
|
printFinalLine(BENCH_COLORS.tool(formatToolStart(parsed.toolName, parsed.args ?? {}, state)));
|
|
110
|
+
startExecTimer(state);
|
|
110
111
|
break;
|
|
111
112
|
}
|
|
112
113
|
case "tool_execution_update": {
|
|
@@ -114,11 +115,13 @@ export function renderStreamEvent(parsed, state, opts = {}) {
|
|
|
114
115
|
if (text) {
|
|
115
116
|
if (verbose) process.stdout.write(BENCH_COLORS.toolOutput(text));
|
|
116
117
|
if (state.activeTool) state.activeTool.outputText = text;
|
|
117
|
-
|
|
118
|
+
state.status.bytes += Buffer.byteLength(text, "utf8");
|
|
119
|
+
printExecStatus(state);
|
|
118
120
|
}
|
|
119
121
|
break;
|
|
120
122
|
}
|
|
121
123
|
case "tool_execution_end": {
|
|
124
|
+
stopExecTimer(state);
|
|
122
125
|
const lines = formatToolEnd(parsed, state);
|
|
123
126
|
if (parsed.isError) state.turnHadToolError = true;
|
|
124
127
|
for (const line of lines) printFinalLine(line);
|
|
@@ -133,6 +136,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
|
|
|
133
136
|
break;
|
|
134
137
|
}
|
|
135
138
|
case "turn_end": {
|
|
139
|
+
stopExecTimer(state);
|
|
136
140
|
const usage = parsed.message?.usage;
|
|
137
141
|
const tokenPart = usage ? ` · ${formatTokens(usage.output ?? usage.totalTokens ?? 0)} tokens` : "";
|
|
138
142
|
const marker = state.turnHadToolError ? BENCH_COLORS.warning("⚠") : BENCH_COLORS.success("✓");
|
|
@@ -141,7 +145,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
|
|
|
141
145
|
break;
|
|
142
146
|
}
|
|
143
147
|
case "agent_end":
|
|
144
|
-
|
|
148
|
+
stopExecTimer(state);
|
|
145
149
|
break;
|
|
146
150
|
default:
|
|
147
151
|
break;
|
|
@@ -172,6 +176,31 @@ export function updateStatusFromDelta(state, delta, mode = state.status.mode) {
|
|
|
172
176
|
printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} ${modeLabel}${label} · ${bytes} (~${tokens} tokens)`));
|
|
173
177
|
}
|
|
174
178
|
|
|
179
|
+
export function startExecTimer(state) {
|
|
180
|
+
stopExecTimer(state);
|
|
181
|
+
state.status.execStartedAt = Date.now();
|
|
182
|
+
state.status.bytes = 0;
|
|
183
|
+
if (!process.stdout.isTTY) return;
|
|
184
|
+
printExecStatus(state);
|
|
185
|
+
state.execTimer = setInterval(() => printExecStatus(state), 1000);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export function stopExecTimer(state) {
|
|
189
|
+
if (state.execTimer) {
|
|
190
|
+
clearInterval(state.execTimer);
|
|
191
|
+
state.execTimer = null;
|
|
192
|
+
}
|
|
193
|
+
clearStatusLine();
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
export function printExecStatus(state) {
|
|
197
|
+
if (!process.stdout.isTTY) return;
|
|
198
|
+
const elapsed = state.status.execStartedAt ? Math.floor((Date.now() - state.status.execStartedAt) / 1000) : 0;
|
|
199
|
+
const tool = state.status.toolName ?? "tool";
|
|
200
|
+
const bytes = formatBytes(state.status.bytes);
|
|
201
|
+
printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} running ${tool}… ${elapsed}s · ${bytes}`));
|
|
202
|
+
}
|
|
203
|
+
|
|
175
204
|
export function formatToolStart(toolName, args, state) {
|
|
176
205
|
if (toolName === "read") return `→ read ${displayPath(args.path, state)}`;
|
|
177
206
|
if (toolName === "write") {
|
package/src/benchmark.mjs
CHANGED
|
@@ -6,5 +6,7 @@ export { findBenchmarkRepo, linkBenchmarkRepo } from "./benchmark/repo.mjs";
|
|
|
6
6
|
export { prepareBenchmarkRun } from "./benchmark/prepare.mjs";
|
|
7
7
|
export { runBenchmarkInPi } from "./benchmark/pi-runner.mjs";
|
|
8
8
|
export { queryServerMetrics } from "./benchmark/metrics.mjs";
|
|
9
|
-
|
|
9
|
+
// unloadModelFromServer now lives in src/process.mjs (managed-server counterpart to stopProfile).
|
|
10
|
+
export { unloadModelFromServer } from "./process.mjs";
|
|
11
|
+
export { finalizeBenchmarkRun, renderBenchmarkSummary } from "./benchmark/finalize.mjs";
|
|
10
12
|
export { benchmarkForProfile, benchmarkFlow } from "./benchmark/flow.mjs";
|
package/src/commands/main.mjs
CHANGED
|
@@ -4,7 +4,7 @@ import { scanGgufModels } from "../scan.mjs";
|
|
|
4
4
|
import { loadProfiles } from "../profiles.mjs";
|
|
5
5
|
import { hasPi } from "../harness-pi.mjs";
|
|
6
6
|
import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
|
|
7
|
-
import { hasLmStudioInstalled,
|
|
7
|
+
import { hasLmStudioInstalled, hasOmlxInstalled, scanManagedModels } from "../managed.mjs";
|
|
8
8
|
import { recommendedModel } from "../recommendations.mjs";
|
|
9
9
|
import { pc, startInteractive, createPrompt } from "../ui.mjs";
|
|
10
10
|
import { onboardFlow } from "./onboard.mjs";
|
|
@@ -63,9 +63,9 @@ async function printNoModelsHelp(llamaBinary) {
|
|
|
63
63
|
console.log(pc.yellow("No models found."));
|
|
64
64
|
console.log(pc.dim("You need to download a model to use offgrid-ai.\n"));
|
|
65
65
|
|
|
66
|
-
const
|
|
66
|
+
const omlxInstalled = await hasOmlxInstalled();
|
|
67
67
|
const lmStudioInstalled = hasLmStudioInstalled();
|
|
68
|
-
const hasBackends = llamaBinary ||
|
|
68
|
+
const hasBackends = llamaBinary || omlxInstalled || lmStudioInstalled;
|
|
69
69
|
if (!hasBackends) {
|
|
70
70
|
console.log(pc.dim("Run offgrid-ai to install a backend and download a model."));
|
|
71
71
|
return;
|
|
@@ -73,7 +73,6 @@ async function printNoModelsHelp(llamaBinary) {
|
|
|
73
73
|
|
|
74
74
|
console.log(pc.bold("Backend status:"));
|
|
75
75
|
console.log(` ${lmStudioInstalled ? pc.green("✓") : pc.red("✗")} LM Studio ${lmStudioInstalled ? "— installed" : "— not installed"}`);
|
|
76
|
-
console.log(` ${ollamaInstalled ? pc.green("✓") : pc.red("✗")} Ollama ${ollamaInstalled ? "— installed" : "— not installed"}`);
|
|
77
76
|
console.log(` ${omlxInstalled ? pc.green("✓") : pc.red("✗")} oMLX ${omlxInstalled ? "— installed" : "— not installed"}`);
|
|
78
77
|
console.log(` ${llamaBinary ? pc.green("✓") : pc.red("✗")} llama-server ${llamaBinary ? "— installed" : "— not installed"}`);
|
|
79
78
|
console.log();
|
|
@@ -84,6 +83,5 @@ async function printNoModelsHelp(llamaBinary) {
|
|
|
84
83
|
console.log(" Open LM Studio → browse models → download");
|
|
85
84
|
console.log(pc.dim(` Recommended: ${model.label}`));
|
|
86
85
|
}
|
|
87
|
-
if (ollamaInstalled) console.log(pc.bold(` ollama pull ${model.ollama}`));
|
|
88
86
|
if (omlxInstalled) console.log(pc.bold(" omlx start"));
|
|
89
87
|
}
|
package/src/commands/models.mjs
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { ensureDirs } from "../config.mjs";
|
|
2
2
|
import { backendFor, BACKENDS } from "../backends.mjs";
|
|
3
3
|
import { createProfileFromModel, readProfile, saveProfile, deleteProfile, profileJsonPath } from "../profiles.mjs";
|
|
4
|
-
import { isProfileRunning, isProfileServerUp, stopProfile } from "../process.mjs";
|
|
4
|
+
import { isProfileRunning, isProfileServerUp, modelAvailableOnServer, stopProfile } from "../process.mjs";
|
|
5
5
|
import { syncPiConfig, removeFromPiConfig } from "../harness-pi.mjs";
|
|
6
6
|
import { configureLocalProfile } from "../profile-setup.mjs";
|
|
7
7
|
import { pc, startInteractive, createPrompt } from "../ui.mjs";
|
|
8
8
|
import { buildCatalogItems, createManagedProfile, itemKey, loadModelCatalog, normalizeCatalog } from "../model-catalog.mjs";
|
|
9
|
-
import { modelSelectOption, modelNameWidth, printGgufModelDetails, printManagedModelDetails, printWorkspaceHeader, printBenchmarkLine, printProfileDetails } from "../model-presenters.mjs";
|
|
9
|
+
import { modelSelectOption, modelNameWidth, printGgufModelDetails, printMlxModelDetails, printManagedModelDetails, printWorkspaceHeader, printBenchmarkLine, printProfileDetails } from "../model-presenters.mjs";
|
|
10
10
|
import { runProfile } from "./run.mjs";
|
|
11
11
|
|
|
12
12
|
const { stripVTControlCharacters } = await import("node:util");
|
|
@@ -40,15 +40,17 @@ export async function modelCommandCenter(initialCatalog) {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
const runningProfilesNow = [];
|
|
43
|
-
const
|
|
43
|
+
const modelMissingIds = new Set();
|
|
44
44
|
for (const profile of normalized.profiles) {
|
|
45
45
|
if (await isProfileRunning(profile)) {
|
|
46
46
|
runningProfilesNow.push(profile);
|
|
47
47
|
continue;
|
|
48
48
|
}
|
|
49
|
-
if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile))
|
|
49
|
+
if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile)) {
|
|
50
|
+
if (!(await modelAvailableOnServer(profile))) modelMissingIds.add(profile.id);
|
|
51
|
+
}
|
|
50
52
|
}
|
|
51
|
-
printWorkspaceHeader(normalized, runningProfilesNow,
|
|
53
|
+
printWorkspaceHeader(normalized, runningProfilesNow, modelMissingIds);
|
|
52
54
|
await printBenchmarkLine();
|
|
53
55
|
|
|
54
56
|
const nameWidth = modelNameWidth(allItems);
|
|
@@ -57,13 +59,13 @@ export async function modelCommandCenter(initialCatalog) {
|
|
|
57
59
|
if (item.type === "profile") {
|
|
58
60
|
if (item.fileMissing) return "missing";
|
|
59
61
|
if (runningProfilesNow.some((profile) => profile.id === item.profile.id)) return "running";
|
|
60
|
-
if (
|
|
62
|
+
if (modelMissingIds.has(item.profile.id)) return "missing";
|
|
61
63
|
return "ready";
|
|
62
64
|
}
|
|
63
65
|
return "setup";
|
|
64
66
|
};
|
|
65
67
|
|
|
66
|
-
const groupOrder = ["running", "
|
|
68
|
+
const groupOrder = ["running", "ready", "setup", "missing"];
|
|
67
69
|
const grouped = new Map(groupOrder.map((key) => [key, []]));
|
|
68
70
|
for (const item of allItems) grouped.get(statusFor(item)).push(item);
|
|
69
71
|
|
|
@@ -72,8 +74,8 @@ export async function modelCommandCenter(initialCatalog) {
|
|
|
72
74
|
const bucket = grouped.get(group);
|
|
73
75
|
if (!bucket || bucket.length === 0) continue;
|
|
74
76
|
for (const item of bucket) {
|
|
75
|
-
const opt = modelSelectOption(item, { runningProfilesNow,
|
|
76
|
-
choices.push({ value: opt.value, label: opt.label });
|
|
77
|
+
const opt = modelSelectOption(item, { runningProfilesNow, modelMissingIds, nameWidth, managedModels: catalog.managedModels });
|
|
78
|
+
choices.push({ value: opt.value, label: opt.label, hint: opt.hint });
|
|
77
79
|
}
|
|
78
80
|
}
|
|
79
81
|
|
|
@@ -154,6 +156,7 @@ async function performAction(prompt, action, item) {
|
|
|
154
156
|
if (action === "inspect") {
|
|
155
157
|
if (item.type === "profile") return await printProfileDetails(await readProfile(item.profile.id));
|
|
156
158
|
if (item.type === "managed") return printManagedModelDetails(item.model, BACKENDS[item.backendId]);
|
|
159
|
+
if (item.model?.format === "mlx") return await printMlxModelDetails(item.model);
|
|
157
160
|
return printGgufModelDetails(item.model, item.drafter);
|
|
158
161
|
}
|
|
159
162
|
if (action === "benchmark") {
|
|
@@ -164,20 +167,13 @@ async function performAction(prompt, action, item) {
|
|
|
164
167
|
const { benchmarkFlow } = await import("../benchmark.mjs");
|
|
165
168
|
return await benchmarkFlow();
|
|
166
169
|
}
|
|
167
|
-
if (action === "run") return await runItem(
|
|
170
|
+
if (action === "run") return await runItem(item);
|
|
168
171
|
if (action === "reconfigure" || action === "setup") return await setupItem(prompt, item, action);
|
|
169
172
|
if (action === "remove" && item.type === "profile") return await removeProfileInteractive(item.profile.id);
|
|
170
173
|
}
|
|
171
174
|
|
|
172
|
-
async function runItem(
|
|
173
|
-
|
|
174
|
-
const profile = await createProfileFromModel(item.model, null, item.drafter?.path);
|
|
175
|
-
const configured = await configureLocalProfile(prompt, profile);
|
|
176
|
-
if (!configured) return;
|
|
177
|
-
await saveProfile(configured);
|
|
178
|
-
await syncPiConfig(configured);
|
|
179
|
-
printProfileSaved(configured.id);
|
|
180
|
-
return await runProfile(configured);
|
|
175
|
+
async function runItem(item) {
|
|
176
|
+
return await runProfile(await readProfile(item.profile.id));
|
|
181
177
|
}
|
|
182
178
|
|
|
183
179
|
function printProfileSaved(id) {
|
|
@@ -200,6 +196,18 @@ async function setupItem(prompt, item, action) {
|
|
|
200
196
|
printProfileSaved(profile.id);
|
|
201
197
|
return;
|
|
202
198
|
}
|
|
199
|
+
// MLX models: build a mlx-vlm profile and run interactive config.
|
|
200
|
+
if (item.model.format === "mlx") {
|
|
201
|
+
const { createProfileFromMlxModel } = await import("../profiles.mjs");
|
|
202
|
+
const { configureMlxProfile } = await import("../profile-setup.mjs");
|
|
203
|
+
const profile = await createProfileFromMlxModel(item.model);
|
|
204
|
+
const configured = await configureMlxProfile(prompt, profile);
|
|
205
|
+
if (!configured) return;
|
|
206
|
+
await saveProfile(configured, { writeCommand: true });
|
|
207
|
+
await syncPiConfig(configured);
|
|
208
|
+
printProfileSaved(configured.id);
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
203
211
|
const profile = await createProfileFromModel(item.model, null, item.drafter?.path);
|
|
204
212
|
const configured = await configureLocalProfile(prompt, profile);
|
|
205
213
|
if (!configured) return;
|
package/src/commands/onboard.mjs
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
import { existsSync } from "node:fs";
|
|
2
|
-
import { ensureDirs, findLlamaServer, hasHomebrew } from "../config.mjs";
|
|
2
|
+
import { ensureDirs, findLlamaServer, hasHomebrew, HF_HUB_DIR } from "../config.mjs";
|
|
3
3
|
import { BACKENDS } from "../backends.mjs";
|
|
4
4
|
import { scanGgufModels } from "../scan.mjs";
|
|
5
|
+
import { scanMlxModels } from "../mlx-discovery.mjs";
|
|
5
6
|
import { hasPi } from "../harness-pi.mjs";
|
|
6
7
|
import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
|
|
7
8
|
import { scanManagedModels } from "../managed.mjs";
|
|
8
9
|
import { BACKEND_INSTALL_CHOICES, BACKEND_INSTALLERS } from "../backend-installers.mjs";
|
|
9
|
-
import {
|
|
10
|
+
import { recommendedModel, selectFormat, allFittingModels } from "../recommendations.mjs";
|
|
11
|
+
import { hasHuggingfaceHub, resolveHfDownload, downloadToHfCache } from "../huggingface.mjs";
|
|
12
|
+
import { detectHardware, getFreeDiskBytes, installedRamGB } from "../hardware.mjs";
|
|
10
13
|
import { runCommand } from "../exec.mjs";
|
|
11
|
-
import { pc, renderRows, renderSection, startInteractive, createPrompt } from "../ui.mjs";
|
|
14
|
+
import { pc, formatBytes, renderRows, renderSection, startInteractive, createPrompt } from "../ui.mjs";
|
|
12
15
|
|
|
13
16
|
export async function onboardFlow() {
|
|
14
17
|
await ensureDirs();
|
|
@@ -24,14 +27,22 @@ export async function onboardFlow() {
|
|
|
24
27
|
const llamaBinary = await ensureLlamaRuntime(prompt);
|
|
25
28
|
if (!(await ensurePi(prompt, run))) return;
|
|
26
29
|
|
|
27
|
-
const { models: ggufModels } = await
|
|
28
|
-
|
|
30
|
+
const [{ models: ggufModels }, managedModels, mlxModels] = await Promise.all([
|
|
31
|
+
scanGgufModels(),
|
|
32
|
+
scanManagedModels(),
|
|
33
|
+
scanMlxModels(),
|
|
34
|
+
]);
|
|
29
35
|
const totalManaged = managedModels.reduce((sum, item) => sum + item.models.length, 0);
|
|
30
|
-
const hasModels = ggufModels.length > 0 || totalManaged > 0;
|
|
36
|
+
const hasModels = ggufModels.length > 0 || totalManaged > 0 || mlxModels.length > 0;
|
|
31
37
|
|
|
32
38
|
if (hasModels) {
|
|
33
|
-
printFoundModels(ggufModels, managedModels, llamaBinary);
|
|
39
|
+
printFoundModels(ggufModels, managedModels, mlxModels, llamaBinary);
|
|
34
40
|
} else {
|
|
41
|
+
const canDownload = await hasHuggingfaceHub();
|
|
42
|
+
if (canDownload) {
|
|
43
|
+
const downloaded = await offerModelDownload(prompt);
|
|
44
|
+
if (downloaded) return;
|
|
45
|
+
}
|
|
35
46
|
await offerBackendInstall(prompt, run);
|
|
36
47
|
return;
|
|
37
48
|
}
|
|
@@ -52,7 +63,7 @@ async function ensureLlamaRuntime(prompt) {
|
|
|
52
63
|
]), { formatBorder: pc.cyan }));
|
|
53
64
|
await offerManagedLlamaRuntimeUpdate(prompt);
|
|
54
65
|
llamaBinary = await findLlamaServer();
|
|
55
|
-
if (!llamaBinary) console.log(pc.yellow("Skipping llama.cpp for now. You can still use
|
|
66
|
+
if (!llamaBinary) console.log(pc.yellow("Skipping llama.cpp for now. You can still use oMLX, or run offgrid-ai again to install the managed runtime."));
|
|
56
67
|
}
|
|
57
68
|
if (llamaBinary) console.log(pc.green(`✓ llama-server: ${llamaBinary}`));
|
|
58
69
|
return llamaBinary;
|
|
@@ -85,11 +96,14 @@ async function ensurePi(prompt, run) {
|
|
|
85
96
|
return true;
|
|
86
97
|
}
|
|
87
98
|
|
|
88
|
-
function printFoundModels(ggufModels, managedModels, llamaBinary) {
|
|
99
|
+
function printFoundModels(ggufModels, managedModels, mlxModels, llamaBinary) {
|
|
89
100
|
if (ggufModels.length > 0) {
|
|
90
101
|
console.log(pc.green(`✓ Found ${ggufModels.length} GGUF model${ggufModels.length === 1 ? "" : "s"}`));
|
|
91
102
|
if (!llamaBinary) console.log(pc.yellow("Install the managed llama.cpp runtime to run these GGUF models."));
|
|
92
103
|
}
|
|
104
|
+
if (mlxModels.length > 0) {
|
|
105
|
+
console.log(pc.green(`✓ Found ${mlxModels.length} MLX model${mlxModels.length === 1 ? "" : "s"}`));
|
|
106
|
+
}
|
|
93
107
|
for (const { backendId, models, status, reason } of managedModels) {
|
|
94
108
|
if (status === "unavailable") {
|
|
95
109
|
console.log(pc.yellow(`${BACKENDS[backendId].label}: unavailable${reason ? ` — ${reason}` : ""}`));
|
|
@@ -99,6 +113,50 @@ function printFoundModels(ggufModels, managedModels, llamaBinary) {
|
|
|
99
113
|
}
|
|
100
114
|
}
|
|
101
115
|
|
|
116
|
+
async function offerModelDownload(prompt) {
|
|
117
|
+
const hardware = detectHardware();
|
|
118
|
+
const candidates = allFittingModels(hardware)
|
|
119
|
+
.map((entry) => ({ entry, format: selectFormat(entry, hardware) }))
|
|
120
|
+
.filter((item) => item.format != null);
|
|
121
|
+
if (candidates.length === 0) {
|
|
122
|
+
console.log(pc.yellow("No curated models fit your hardware."));
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const primary = candidates[0];
|
|
127
|
+
console.log(renderSection("Download a recommended model", renderRows([
|
|
128
|
+
["Model", pc.bold(primary.entry.label)],
|
|
129
|
+
["Format", primary.format],
|
|
130
|
+
["Minimum RAM", String(primary.entry.minRamGb) + " GB"],
|
|
131
|
+
["Your RAM", installedRamGB() + " GB"],
|
|
132
|
+
]), { formatBorder: pc.cyan }));
|
|
133
|
+
|
|
134
|
+
const shouldDownload = await prompt.yesNo("Download " + primary.entry.label + " (" + primary.format + ")?", true);
|
|
135
|
+
if (!shouldDownload) return false;
|
|
136
|
+
|
|
137
|
+
const hfRef = primary.format === "mlx" ? primary.entry.mlx : primary.entry.gguf;
|
|
138
|
+
try {
|
|
139
|
+
const plan = await resolveHfDownload(hfRef);
|
|
140
|
+
console.log(pc.dim("Total size: " + formatBytes(plan.totalSizeBytes)));
|
|
141
|
+
const freeBytes = getFreeDiskBytes(HF_HUB_DIR);
|
|
142
|
+
if (plan.totalSizeBytes > 0 && freeBytes < plan.totalSizeBytes * 1.1) {
|
|
143
|
+
console.log(pc.red(`Not enough disk space in ${HF_HUB_DIR}: need ~${formatBytes(plan.totalSizeBytes)}, only ${formatBytes(freeBytes)} free.`));
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
await downloadToHfCache(plan, {
|
|
147
|
+
onProgress({ percentage }) {
|
|
148
|
+
process.stdout.write(pc.cyan("\r " + percentage + "% downloaded"));
|
|
149
|
+
},
|
|
150
|
+
});
|
|
151
|
+
process.stdout.write("\n");
|
|
152
|
+
console.log(pc.green("✓ Download complete. Run offgrid-ai to use the model."));
|
|
153
|
+
return true;
|
|
154
|
+
} catch (err) {
|
|
155
|
+
console.log(pc.red("Download failed: " + err.message));
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
102
160
|
async function offerBackendInstall(prompt, run) {
|
|
103
161
|
console.log(pc.yellow("\nNo models found."));
|
|
104
162
|
console.log(pc.dim("You need at least one model backend to use offgrid-ai.\n"));
|
package/src/commands/run.mjs
CHANGED
|
@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
|
|
|
2
2
|
import { ensureDirs } from "../config.mjs";
|
|
3
3
|
import { backendFor } from "../backends.mjs";
|
|
4
4
|
import { normalizeProfile, readProfile, saveProfile } from "../profiles.mjs";
|
|
5
|
-
import { startServer, stopProfile, waitForReady, serverReady, serverMatchesProfile, modelAvailableOnServer } from "../process.mjs";
|
|
5
|
+
import { startServer, stopProfile, waitForReady, serverReady, serverMatchesProfile, modelAvailableOnServer, unloadModelFromServer } from "../process.mjs";
|
|
6
6
|
import { syncPiConfig, hasPiModel, launchPi, hasPi } from "../harness-pi.mjs";
|
|
7
7
|
import { tailFriendly } from "../logs.mjs";
|
|
8
8
|
import { estimateMemory } from "../estimate.mjs";
|
|
@@ -35,7 +35,7 @@ export async function runProfile(profile, options = {}) {
|
|
|
35
35
|
}
|
|
36
36
|
const available = await modelAvailableOnServer(profile);
|
|
37
37
|
if (!available) {
|
|
38
|
-
const modelId = profile.omlxModel ?? profile.
|
|
38
|
+
const modelId = profile.omlxModel ?? profile.modelAlias ?? profile.label;
|
|
39
39
|
throw new Error(`${modelId} is not available on ${backend.label} at ${profile.baseUrl}.`);
|
|
40
40
|
}
|
|
41
41
|
console.log(pc.green(`[ready] ${backend.label} at ${profile.baseUrl}`));
|
|
@@ -116,9 +116,24 @@ async function launchHarness(profile, options, isManaged, withHarness, backend)
|
|
|
116
116
|
try {
|
|
117
117
|
await launchPi(profile);
|
|
118
118
|
} finally {
|
|
119
|
-
if (!
|
|
120
|
-
|
|
121
|
-
|
|
119
|
+
if (!options["keep-server"]) {
|
|
120
|
+
if (!isManaged) {
|
|
121
|
+
const result = await stopProfile(profile);
|
|
122
|
+
console.log(result.stopped ? pc.green(`[stop] ${result.message}`) : pc.dim(`[stop] ${result.message}`));
|
|
123
|
+
} else {
|
|
124
|
+
// Managed-server backends (oMLX): unload the model from the
|
|
125
|
+
// server's memory via its HTTP API. The server itself stays running
|
|
126
|
+
// (offgrid-ai doesn't manage it), but the model is released — same UX
|
|
127
|
+
// as local-server backends where stopProfile kills the process.
|
|
128
|
+
const result = await unloadModelFromServer(profile);
|
|
129
|
+
if (result.unloaded) {
|
|
130
|
+
console.log(pc.green(`[unload] ${backend.label}: model unloaded`));
|
|
131
|
+
} else if (result.reason) {
|
|
132
|
+
console.log(pc.dim(`[unload] ${backend.label}: ${result.reason}`));
|
|
133
|
+
} else if (result.error) {
|
|
134
|
+
console.log(pc.yellow(`[unload] ${backend.label}: ${result.error}`));
|
|
135
|
+
}
|
|
136
|
+
}
|
|
122
137
|
}
|
|
123
138
|
}
|
|
124
139
|
}
|
package/src/commands/status.mjs
CHANGED
|
@@ -42,7 +42,7 @@ export async function statusCommand() {
|
|
|
42
42
|
const detailRows = [];
|
|
43
43
|
for (const { profile, status } of [...managedUpMissing, ...managedUpNotLoaded]) {
|
|
44
44
|
const backend = backendFor(profile.backend);
|
|
45
|
-
const modelId = profile.omlxModel ?? profile.
|
|
45
|
+
const modelId = profile.omlxModel ?? profile.modelAlias ?? profile.id;
|
|
46
46
|
const state = status.modelAvailable
|
|
47
47
|
? pc.yellow("server up · model not loaded")
|
|
48
48
|
: pc.red("server up · model missing");
|
package/src/config.mjs
CHANGED
|
@@ -15,9 +15,17 @@ export const MANAGED_LLAMA_SERVER = join(RUNTIME_DIR, "bin", "llama-server");
|
|
|
15
15
|
|
|
16
16
|
// ── Default scan directories ──────────────────────────────────────────────
|
|
17
17
|
|
|
18
|
+
// HuggingFace hub cache: $HF_HUB_CACHE, else $HF_HOME/hub, else
|
|
19
|
+
// ~/.cache/huggingface/hub. This is where huggingface_hub stores
|
|
20
|
+
// models--org--name/... and where offgrid-ai scans + downloads. Pointing at the
|
|
21
|
+
// hub (not the HF root) keeps the HF-hub MLX/GGUF scanners and the downloader
|
|
22
|
+
// on the same layout.
|
|
23
|
+
export const HF_HUB_DIR = process.env.HF_HUB_CACHE
|
|
24
|
+
|| (process.env.HF_HOME ? join(process.env.HF_HOME, "hub") : join(homedir(), ".cache", "huggingface", "hub"));
|
|
25
|
+
|
|
18
26
|
export const DEFAULT_MODEL_DIRS = [
|
|
19
27
|
join(homedir(), ".lmstudio", "models"),
|
|
20
|
-
|
|
28
|
+
HF_HUB_DIR,
|
|
21
29
|
];
|
|
22
30
|
|
|
23
31
|
// ── External config paths ─────────────────────────────────────────────────
|
|
@@ -65,7 +73,8 @@ export async function saveConfig(config) {
|
|
|
65
73
|
|
|
66
74
|
export async function getModelScanDirs() {
|
|
67
75
|
const config = await loadConfig();
|
|
68
|
-
|
|
76
|
+
// Dedupe (a user may list a default dir explicitly) so we never scan twice.
|
|
77
|
+
return [...DEFAULT_MODEL_DIRS, ...config.modelScanDirs].filter((dir, i, arr) => arr.indexOf(dir) === i);
|
|
69
78
|
}
|
|
70
79
|
|
|
71
80
|
// ── Binary discovery ──────────────────────────────────────────────────────
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
// Shared discovery helpers used by both the GGUF scanner (scan.mjs) and the
|
|
2
|
+
// MLX scanner (mlx-discovery.mjs). Keeping these here avoids a cross-dependency
|
|
3
|
+
// between the two format-specific scanners.
|
|
4
|
+
|
|
5
|
+
import { basename, dirname } from "node:path";
|
|
6
|
+
|
|
7
|
+
/** Minimum on-disk size for a model to count as real (skips tiny test/embedding files). */
|
|
8
|
+
export const MIN_MODEL_SIZE_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Model-type / architecture names that indicate an embedding model. Shared by
|
|
12
|
+
* GGUF filtering (general.architecture) and MLX filtering (config.model_type /
|
|
13
|
+
* architectures[0]). Format-specific heuristics (e.g. GGUF filename patterns)
|
|
14
|
+
* live alongside this set in each scanner.
|
|
15
|
+
*/
|
|
16
|
+
export const EMBEDDING_MODEL_TYPES = new Set([
|
|
17
|
+
"bert",
|
|
18
|
+
"roberta",
|
|
19
|
+
"mpnet",
|
|
20
|
+
"nomic_bert",
|
|
21
|
+
"nomic-bert",
|
|
22
|
+
"jina",
|
|
23
|
+
"e5",
|
|
24
|
+
"gte",
|
|
25
|
+
"bge",
|
|
26
|
+
"all_minilm",
|
|
27
|
+
"all-minilm",
|
|
28
|
+
"sentence_transformers",
|
|
29
|
+
"sentence-transformers",
|
|
30
|
+
]);
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Infer a human-readable source label from a model scan path.
|
|
34
|
+
* Generic container folders (models, hub, cache) defer to their parent name
|
|
35
|
+
* (e.g. ~/.cache/huggingface/hub -> "huggingface"; ~/.lmstudio/models -> "lmstudio").
|
|
36
|
+
*/
|
|
37
|
+
export function inferSourceLabel(scanPath) {
|
|
38
|
+
const name = basename(scanPath).replace(/^\./, "");
|
|
39
|
+
const parent = basename(dirname(scanPath));
|
|
40
|
+
if (name === "models" || name === "hub" || name === "cache") {
|
|
41
|
+
return parent.replace(/^\./, "");
|
|
42
|
+
}
|
|
43
|
+
return name;
|
|
44
|
+
}
|