offgrid-ai 0.9.6 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/package.json +4 -3
- package/resources/hf-download.py +79 -0
- package/resources/mlxvlm-server-wrapper.py +112 -0
- package/resources/recommendations.json +60 -0
- package/src/backend-installers.mjs +1 -16
- package/src/backends.mjs +18 -45
- package/src/benchmark/finalize.mjs +3 -90
- package/src/benchmark/flow.mjs +3 -4
- package/src/benchmark/metrics.mjs +0 -44
- package/src/benchmark/prepare.mjs +1 -1
- package/src/benchmark.mjs +3 -1
- package/src/commands/main.mjs +7 -7
- package/src/commands/models.mjs +21 -18
- package/src/commands/onboard.mjs +67 -9
- package/src/commands/run.mjs +20 -5
- package/src/commands/status.mjs +1 -1
- package/src/config.mjs +11 -2
- package/src/discovery-shared.mjs +44 -0
- package/src/hardware.mjs +49 -0
- package/src/harness-pi.mjs +25 -11
- package/src/huggingface.mjs +209 -0
- package/src/managed.mjs +1 -5
- package/src/mlx-discovery.mjs +294 -0
- package/src/mlx-flags.mjs +93 -0
- package/src/model-catalog.mjs +78 -11
- package/src/model-name.mjs +7 -25
- package/src/model-presenters.mjs +114 -38
- package/src/process.mjs +129 -32
- package/src/profile-setup.mjs +105 -0
- package/src/profiles.mjs +30 -0
- package/src/recommendations.mjs +56 -14
- package/src/scan.mjs +43 -8
package/src/model-catalog.mjs
CHANGED
|
@@ -1,35 +1,41 @@
|
|
|
1
1
|
import { scanGgufModels, matchDrafter } from "./scan.mjs";
|
|
2
2
|
import { loadProfiles, normalizeProfile, sanitizeProfileId } from "./profiles.mjs";
|
|
3
3
|
import { scanManagedModels } from "./managed.mjs";
|
|
4
|
+
import { scanMlxModels } from "./mlx-discovery.mjs";
|
|
4
5
|
import { isProfileFileMissing } from "./model-summary.mjs";
|
|
6
|
+
import { backendFor } from "./backends.mjs";
|
|
5
7
|
|
|
6
8
|
export async function loadModelCatalog() {
|
|
7
|
-
const [profiles, { models: ggufModels, drafters }, managedModels] = await Promise.all([
|
|
9
|
+
const [profiles, { models: ggufModels, drafters }, managedModels, mlxModels] = await Promise.all([
|
|
8
10
|
loadProfiles(),
|
|
9
11
|
scanGgufModels(),
|
|
10
12
|
scanManagedModels(),
|
|
13
|
+
scanMlxModels(),
|
|
11
14
|
]);
|
|
12
|
-
return normalizeCatalog({ profiles, ggufModels, drafters, managedModels });
|
|
15
|
+
return normalizeCatalog({ profiles, ggufModels, drafters, managedModels, mlxModels });
|
|
13
16
|
}
|
|
14
17
|
|
|
15
18
|
export function normalizeCatalog(catalog) {
|
|
16
19
|
if (catalog.newModels && catalog.managedItems) return catalog;
|
|
17
|
-
const { profiles, ggufModels, drafters, managedModels } = catalog;
|
|
20
|
+
const { profiles, ggufModels, drafters, managedModels, mlxModels = [] } = catalog;
|
|
18
21
|
const profiledPaths = new Set(profiles.map((profile) => profile.modelPath).filter(Boolean));
|
|
19
|
-
const newModels =
|
|
22
|
+
const newModels = [
|
|
23
|
+
...ggufModels.filter((model) => !profiledPaths.has(model.path)),
|
|
24
|
+
...mlxModels.filter((model) => !profiledPaths.has(model.path)),
|
|
25
|
+
];
|
|
20
26
|
const managedItems = [];
|
|
21
27
|
for (const { backendId, models, status } of managedModels) {
|
|
22
28
|
if (status === "unavailable") continue;
|
|
23
29
|
const profiledAliases = new Set(
|
|
24
30
|
profiles
|
|
25
31
|
.filter((profile) => profile.backend === backendId)
|
|
26
|
-
.map((profile) =>
|
|
32
|
+
.map((profile) => `omlx:${profile.omlxModel ?? profile.modelAlias}`),
|
|
27
33
|
);
|
|
28
34
|
for (const model of models) {
|
|
29
35
|
if (!profiledAliases.has(`${backendId}:${model.id}`)) managedItems.push({ model, backendId });
|
|
30
36
|
}
|
|
31
37
|
}
|
|
32
|
-
return { profiles, ggufModels, drafters, managedModels, newModels, managedItems };
|
|
38
|
+
return { profiles, ggufModels, drafters, managedModels, mlxModels, newModels, managedItems };
|
|
33
39
|
}
|
|
34
40
|
|
|
35
41
|
export function itemKey(item) {
|
|
@@ -51,13 +57,73 @@ function compareRecency(a, b) {
|
|
|
51
57
|
}
|
|
52
58
|
|
|
53
59
|
export function buildCatalogItems(normalized) {
|
|
54
|
-
const { profiles, newModels, managedItems, drafters } = normalized;
|
|
55
|
-
|
|
60
|
+
const { profiles, newModels, managedItems, drafters, ggufModels = [], mlxModels = [], managedModels = [] } = normalized;
|
|
61
|
+
|
|
62
|
+
// Lookup maps for enriching profile items with scan data (size + context).
|
|
63
|
+
const scanByPath = new Map();
|
|
64
|
+
for (const m of ggufModels) scanByPath.set(m.path, m);
|
|
65
|
+
for (const m of mlxModels) scanByPath.set(m.filePath ?? m.path, m);
|
|
66
|
+
|
|
67
|
+
const managedByKey = new Map();
|
|
68
|
+
for (const { backendId, models } of managedModels) {
|
|
69
|
+
for (const m of models) managedByKey.set(`${backendId}:${m.id}`, m);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const profileItems = profiles.map((profile) => {
|
|
73
|
+
const item = { type: "profile", profile, label: profile.label, fileMissing: isProfileFileMissing(profile) };
|
|
74
|
+
|
|
75
|
+
// Resolve size: profile.modelSizeBytes → scan lookup → managed lookup
|
|
76
|
+
let sizeBytes = profile.modelSizeBytes || 0;
|
|
77
|
+
if (!sizeBytes && profile.modelPath) {
|
|
78
|
+
const scanModel = scanByPath.get(profile.modelPath);
|
|
79
|
+
if (scanModel?.sizeBytes) sizeBytes = scanModel.sizeBytes;
|
|
80
|
+
}
|
|
81
|
+
if (!sizeBytes) {
|
|
82
|
+
const backend = backendFor(profile.backend);
|
|
83
|
+
if (backend.type === "managed-server" && profile.omlxModel) {
|
|
84
|
+
const managedModel = managedByKey.get(`${profile.backend}:${profile.omlxModel}`);
|
|
85
|
+
if (managedModel?.sizeBytes) sizeBytes = managedModel.sizeBytes;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
item.sizeBytes = sizeBytes || null;
|
|
89
|
+
|
|
90
|
+
// Resolve context: flags.ctxSize (configured) → capabilities.ctxSize (trained) → scan → managed
|
|
91
|
+
let contextLength = profile.flags?.ctxSize ?? null;
|
|
92
|
+
if (!contextLength) contextLength = profile.capabilities?.ctxSize ?? null;
|
|
93
|
+
if (!contextLength && profile.modelPath) {
|
|
94
|
+
const scanModel = scanByPath.get(profile.modelPath);
|
|
95
|
+
if (scanModel?.contextLength) contextLength = scanModel.contextLength;
|
|
96
|
+
}
|
|
97
|
+
if (!contextLength) {
|
|
98
|
+
const backend = backendFor(profile.backend);
|
|
99
|
+
if (backend.type === "managed-server" && profile.omlxModel) {
|
|
100
|
+
const managedModel = managedByKey.get(`${profile.backend}:${profile.omlxModel}`);
|
|
101
|
+
if (managedModel?.contextLength) contextLength = managedModel.contextLength;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
item.contextLength = contextLength;
|
|
105
|
+
|
|
106
|
+
return item;
|
|
107
|
+
});
|
|
56
108
|
profileItems.sort(compareRecency);
|
|
57
109
|
return [
|
|
58
110
|
...profileItems,
|
|
59
|
-
...newModels.map((model) => ({
|
|
60
|
-
|
|
111
|
+
...newModels.map((model) => ({
|
|
112
|
+
type: "new",
|
|
113
|
+
model,
|
|
114
|
+
label: model.label,
|
|
115
|
+
drafter: matchDrafter(model.path, drafters),
|
|
116
|
+
sizeBytes: model.sizeBytes || null,
|
|
117
|
+
contextLength: model.contextLength ?? null,
|
|
118
|
+
})),
|
|
119
|
+
...managedItems.map(({ model, backendId }) => ({
|
|
120
|
+
type: "managed",
|
|
121
|
+
model,
|
|
122
|
+
backendId,
|
|
123
|
+
label: model.label,
|
|
124
|
+
sizeBytes: model.sizeBytes || null,
|
|
125
|
+
contextLength: model.contextLength ?? null,
|
|
126
|
+
})),
|
|
61
127
|
];
|
|
62
128
|
}
|
|
63
129
|
|
|
@@ -66,8 +132,9 @@ export function createManagedProfile(model, backendId) {
|
|
|
66
132
|
id: `${backendId}-${sanitizeProfileId(model.id)}`,
|
|
67
133
|
label: model.label,
|
|
68
134
|
backend: backendId,
|
|
135
|
+
source: backendId,
|
|
69
136
|
modelAlias: model.aliasSuggestion,
|
|
70
|
-
|
|
137
|
+
modelSizeBytes: model.sizeBytes || 0,
|
|
71
138
|
...(backendId === "omlx" ? { omlxModel: model.id } : {}),
|
|
72
139
|
});
|
|
73
140
|
}
|
package/src/model-name.mjs
CHANGED
|
@@ -68,9 +68,8 @@ const TAG_TOKENS = [
|
|
|
68
68
|
/**
|
|
69
69
|
* Parse a raw model identifier into a structured display name.
|
|
70
70
|
*
|
|
71
|
-
* @param {string} rawId The raw identifier: GGUF filename (no .gguf)
|
|
72
|
-
*
|
|
73
|
-
* @param {"local-gguf"|"ollama"|"omlx"} source Where this name came from.
|
|
71
|
+
* @param {string} rawId The raw identifier: GGUF filename (no .gguf) or oMLX model id.
|
|
72
|
+
* @param {"local-gguf"|"omlx"} source Where this name came from.
|
|
74
73
|
* @returns {{ publisher: string|null, model: string, params: string|null,
|
|
75
74
|
* quant: string|null, tags: string[], display: string,
|
|
76
75
|
* sort: string, id: string }}
|
|
@@ -87,18 +86,7 @@ export function parseModelName(rawId, source) {
|
|
|
87
86
|
name = rawId.slice(slashIdx + 1);
|
|
88
87
|
}
|
|
89
88
|
|
|
90
|
-
// 2.
|
|
91
|
-
// The tag after : is a model size/variant identifier — not a GGUF quant.
|
|
92
|
-
let ollamaTag = null;
|
|
93
|
-
if (source === "ollama") {
|
|
94
|
-
const colonIdx = name.lastIndexOf(":");
|
|
95
|
-
if (colonIdx !== -1) {
|
|
96
|
-
ollamaTag = name.slice(colonIdx + 1);
|
|
97
|
-
name = name.slice(0, colonIdx);
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
// 3. Extract quant (GGUF quantization suffix)
|
|
89
|
+
// 2. Extract quant (GGUF quantization suffix)
|
|
102
90
|
let quant = null;
|
|
103
91
|
for (const pattern of QUANT_PATTERNS) {
|
|
104
92
|
const match = name.match(pattern);
|
|
@@ -125,13 +113,7 @@ export function parseModelName(rawId, source) {
|
|
|
125
113
|
// Clean up leftover separators
|
|
126
114
|
name = name.replace(/[-_]{2,}/g, "-").replace(/^[-_]+|[-_]+$/g, "");
|
|
127
115
|
|
|
128
|
-
// 5.
|
|
129
|
-
// (Ollama tags like "4b" or "30b-a3b" are size variants, not quants)
|
|
130
|
-
if (ollamaTag) {
|
|
131
|
-
name = name + "-" + ollamaTag;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// 6. Title-case the remaining model name
|
|
116
|
+
// 5. Title-case the remaining model name
|
|
135
117
|
let model = titleCaseModel(name);
|
|
136
118
|
|
|
137
119
|
// If nothing is left after parsing, fall back to the raw name
|
|
@@ -139,13 +121,13 @@ export function parseModelName(rawId, source) {
|
|
|
139
121
|
model = rawId.includes("/") ? rawId : rawId.replace(/[-_]/g, " ");
|
|
140
122
|
}
|
|
141
123
|
|
|
142
|
-
//
|
|
124
|
+
// 6. Extract params (size like 30B, 12B) for sort/filter convenience
|
|
143
125
|
const params = extractParams(model);
|
|
144
126
|
|
|
145
|
-
//
|
|
127
|
+
// 7. Build display string
|
|
146
128
|
const display = buildDisplay(publisher, model, tags, quant);
|
|
147
129
|
|
|
148
|
-
//
|
|
130
|
+
// 8. Build sort key (lowercase, no publisher, for alphabetical ordering)
|
|
149
131
|
const sort = model.toLowerCase().replace(/[-_]/g, " ");
|
|
150
132
|
|
|
151
133
|
return { publisher, model, params, quant, tags, display, sort, id };
|
package/src/model-presenters.mjs
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { existsSync, statSync } from "node:fs";
|
|
2
|
-
import {
|
|
2
|
+
import { basename, dirname } from "node:path";
|
|
3
|
+
import { backendFor } from "./backends.mjs";
|
|
3
4
|
import { readCommandArgv } from "./profiles.mjs";
|
|
4
|
-
import { isProfileRunning
|
|
5
|
+
import { isProfileRunning } from "./process.mjs";
|
|
5
6
|
import { buildPrettyCommand } from "./command.mjs";
|
|
6
7
|
import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
|
|
7
8
|
import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
|
|
@@ -11,6 +12,7 @@ import { findBenchmarkRepo } from "./benchmark.mjs";
|
|
|
11
12
|
|
|
12
13
|
const OPTION_SEPARATOR = pc.dim(" │ ");
|
|
13
14
|
const OPTION_STATUS_WIDTH = 10;
|
|
15
|
+
const OPTION_BACKEND_WIDTH = 14;
|
|
14
16
|
const OPTION_SOURCE_WIDTH = 14;
|
|
15
17
|
const OPTION_CTX_WIDTH = 5;
|
|
16
18
|
|
|
@@ -25,7 +27,7 @@ function optionPad(text, color, width) {
|
|
|
25
27
|
function optionStatusTag(kind) {
|
|
26
28
|
const statuses = {
|
|
27
29
|
running: ["RUNNING", pc.green],
|
|
28
|
-
serverup: ["
|
|
30
|
+
serverup: ["READY", pc.blue],
|
|
29
31
|
ready: ["READY", pc.blue],
|
|
30
32
|
missing: ["MISSING", pc.red],
|
|
31
33
|
setup: ["SETUP", pc.yellow],
|
|
@@ -34,38 +36,80 @@ function optionStatusTag(kind) {
|
|
|
34
36
|
return optionPad(text, color, OPTION_STATUS_WIDTH);
|
|
35
37
|
}
|
|
36
38
|
|
|
37
|
-
function optionSourceTag(sourceId
|
|
39
|
+
function optionSourceTag(sourceId) {
|
|
40
|
+
const label = formatSourceLabel(sourceId);
|
|
38
41
|
const colors = {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
ollama: pc.green,
|
|
42
|
+
huggingface: pc.cyan,
|
|
43
|
+
lmstudio: pc.blue,
|
|
42
44
|
omlx: pc.magenta,
|
|
45
|
+
"llama.cpp": pc.cyan,
|
|
43
46
|
gguf: pc.cyan,
|
|
47
|
+
mlx: pc.yellow,
|
|
48
|
+
"mlx-vlm": pc.yellow,
|
|
44
49
|
};
|
|
45
50
|
return optionPad(label, colors[sourceId] ?? pc.dim, OPTION_SOURCE_WIDTH);
|
|
46
51
|
}
|
|
47
52
|
|
|
53
|
+
function optionBackendTag(backendId) {
|
|
54
|
+
const backend = backendId ? backendFor(backendId) : null;
|
|
55
|
+
const label = backend?.label ?? backendId ?? "unknown";
|
|
56
|
+
const colors = {
|
|
57
|
+
"llama-cpp": pc.cyan,
|
|
58
|
+
"llama-cpp-mtp": pc.blue,
|
|
59
|
+
omlx: pc.magenta,
|
|
60
|
+
"mlx-vlm": pc.yellow,
|
|
61
|
+
};
|
|
62
|
+
return optionPad(label, colors[backendId] ?? pc.dim, OPTION_BACKEND_WIDTH);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function formatSourceLabel(sourceId) {
|
|
66
|
+
if (!sourceId) return "unknown";
|
|
67
|
+
const map = {
|
|
68
|
+
huggingface: "HuggingFace",
|
|
69
|
+
lmstudio: "LM Studio",
|
|
70
|
+
omlx: "oMLX",
|
|
71
|
+
"llama.cpp": "llama.cpp",
|
|
72
|
+
gguf: "GGUF file",
|
|
73
|
+
mlx: "MLX",
|
|
74
|
+
"mlx-vlm": "MLX",
|
|
75
|
+
};
|
|
76
|
+
return map[sourceId] ?? String(sourceId);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function inferSourceFromPath(modelPath) {
|
|
80
|
+
if (!modelPath) return null;
|
|
81
|
+
const normalized = modelPath.toLowerCase().replace(/\\/g, "/");
|
|
82
|
+
if (normalized.includes("/.omlx/models")) return "omlx";
|
|
83
|
+
if (normalized.includes("/.lmstudio/models")) return "lmstudio";
|
|
84
|
+
if (normalized.includes("/.cache/huggingface")) return "huggingface";
|
|
85
|
+
if (normalized.includes("/.cache/llama.cpp")) return "llama.cpp";
|
|
86
|
+
const parent = basename(dirname(modelPath));
|
|
87
|
+
if (parent && parent !== ".") return parent.replace(/^\./, "");
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function discoverySourceForProfile(profile) {
|
|
92
|
+
const backend = backendFor(profile.backend);
|
|
93
|
+
if (backend.type === "managed-server") return backend.id;
|
|
94
|
+
if (profile.source && profile.source !== "local-gguf") return profile.source;
|
|
95
|
+
return inferSourceFromPath(profile.modelPath);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function discoverySourceForItem(item) {
|
|
99
|
+
if (item.type === "profile") return discoverySourceForProfile(item.profile);
|
|
100
|
+
return item.model?.source ?? null;
|
|
101
|
+
}
|
|
102
|
+
|
|
48
103
|
function optionCtxLabel(item) {
|
|
49
|
-
if (item.
|
|
50
|
-
return optionPad(`${(item.
|
|
104
|
+
if (item.contextLength) {
|
|
105
|
+
return optionPad(`${(item.contextLength / 1000).toFixed(0)}k`, null, OPTION_CTX_WIDTH);
|
|
51
106
|
}
|
|
52
107
|
return optionPad("—", null, OPTION_CTX_WIDTH);
|
|
53
108
|
}
|
|
54
109
|
|
|
55
110
|
function optionSizeLabel(item) {
|
|
56
|
-
if (item.type === "profile")
|
|
57
|
-
|
|
58
|
-
if (item.profile.modelPath && existsSync(item.profile.modelPath)) {
|
|
59
|
-
return formatBytes(statSync(item.profile.modelPath).size);
|
|
60
|
-
}
|
|
61
|
-
return "—";
|
|
62
|
-
}
|
|
63
|
-
if (item.type === "new") {
|
|
64
|
-
return formatBytes(item.model.sizeBytes);
|
|
65
|
-
}
|
|
66
|
-
// managed
|
|
67
|
-
if (item.model.sizeBytes) return formatBytes(item.model.sizeBytes);
|
|
68
|
-
if (item.model.quant) return item.model.quant;
|
|
111
|
+
if (item.type === "profile" && item.fileMissing) return "—";
|
|
112
|
+
if (item.sizeBytes) return formatBytes(item.sizeBytes);
|
|
69
113
|
return "—";
|
|
70
114
|
}
|
|
71
115
|
|
|
@@ -76,17 +120,18 @@ export function modelNameWidth(items) {
|
|
|
76
120
|
return Math.max(20, maxName + 2);
|
|
77
121
|
}
|
|
78
122
|
|
|
79
|
-
function optionLabel({ status, source, name, ctx, size, nameWidth }) {
|
|
80
|
-
return [status, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
|
|
123
|
+
function optionLabel({ status, backend, source, name, ctx, size, nameWidth }) {
|
|
124
|
+
return [status, backend, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
|
|
81
125
|
}
|
|
82
126
|
|
|
83
|
-
export function modelSelectOption(item, { runningProfilesNow,
|
|
127
|
+
export function modelSelectOption(item, { runningProfilesNow, modelMissingIds, nameWidth }) {
|
|
128
|
+
const sourceId = discoverySourceForItem(item) ?? "unknown";
|
|
129
|
+
const backendId = inferBackendId(item);
|
|
84
130
|
if (item.type === "profile") {
|
|
85
131
|
const backend = backendFor(item.profile.backend);
|
|
86
132
|
const running = runningProfilesNow.some((profile) => profile.id === item.profile.id);
|
|
87
|
-
const serverUp = !running && !item.fileMissing && serverUpIds?.has(item.profile.id);
|
|
88
133
|
const modelMissing = !item.fileMissing && modelMissingIds?.has(item.profile.id);
|
|
89
|
-
const status = item.fileMissing || modelMissing ? "missing" : running ? "running" :
|
|
134
|
+
const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : "ready";
|
|
90
135
|
const drafterMissing = Boolean(item.profile.drafterPath) && !existsSync(item.profile.drafterPath);
|
|
91
136
|
const hint = drafterMissing ? "MTP drafter missing — reconfigure"
|
|
92
137
|
: modelMissing ? `${backend.label} model no longer available`
|
|
@@ -95,7 +140,8 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
|
|
|
95
140
|
value: itemKey(item),
|
|
96
141
|
label: optionLabel({
|
|
97
142
|
status: optionStatusTag(status),
|
|
98
|
-
|
|
143
|
+
backend: optionBackendTag(backendId),
|
|
144
|
+
source: optionSourceTag(sourceId),
|
|
99
145
|
name: item.profile.label,
|
|
100
146
|
nameWidth,
|
|
101
147
|
ctx: optionCtxLabel(item),
|
|
@@ -109,7 +155,8 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
|
|
|
109
155
|
value: itemKey(item),
|
|
110
156
|
label: optionLabel({
|
|
111
157
|
status: optionStatusTag("setup"),
|
|
112
|
-
|
|
158
|
+
backend: optionBackendTag(backendId),
|
|
159
|
+
source: optionSourceTag(sourceId),
|
|
113
160
|
name: item.model.label,
|
|
114
161
|
nameWidth,
|
|
115
162
|
ctx: optionCtxLabel(item),
|
|
@@ -117,12 +164,12 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
|
|
|
117
164
|
}),
|
|
118
165
|
};
|
|
119
166
|
}
|
|
120
|
-
const backend = BACKENDS[item.backendId];
|
|
121
167
|
return {
|
|
122
168
|
value: itemKey(item),
|
|
123
169
|
label: optionLabel({
|
|
124
170
|
status: optionStatusTag("setup"),
|
|
125
|
-
|
|
171
|
+
backend: optionBackendTag(backendId),
|
|
172
|
+
source: optionSourceTag(sourceId),
|
|
126
173
|
name: item.model.label,
|
|
127
174
|
nameWidth,
|
|
128
175
|
ctx: optionCtxLabel(item),
|
|
@@ -131,19 +178,26 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
|
|
|
131
178
|
};
|
|
132
179
|
}
|
|
133
180
|
|
|
134
|
-
|
|
181
|
+
function inferBackendId(item) {
|
|
182
|
+
if (item.type === "profile") return item.profile.backend;
|
|
183
|
+
if (item.type === "managed") return item.backendId;
|
|
184
|
+
// new model: derive from format
|
|
185
|
+
if (item.model?.format === "mlx") return "mlx-vlm";
|
|
186
|
+
if (item.model?.backend) return item.model.backend;
|
|
187
|
+
return "llama-cpp";
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
export function printWorkspaceHeader(normalized, runningProfilesNow, modelMissingIds = new Set()) {
|
|
135
191
|
const profiles = normalized.profiles;
|
|
136
192
|
const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
|
|
137
193
|
const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
|
|
138
|
-
const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p)
|
|
194
|
+
const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p)).length;
|
|
139
195
|
const runningCount = runningProfilesNow.length;
|
|
140
|
-
const serverUpCount = profiles.filter((p) => !isMissing(p) && serverUpIds.has(p.id) && !isRunning(p)).length;
|
|
141
196
|
const missingCount = profiles.filter(isMissing).length;
|
|
142
197
|
const setupCount = normalized.newModels.length + normalized.managedItems.length;
|
|
143
198
|
|
|
144
199
|
const countParts = [];
|
|
145
200
|
if (runningCount > 0) countParts.push(pc.green(`${runningCount} running`));
|
|
146
|
-
if (serverUpCount > 0) countParts.push(pc.yellow(`${serverUpCount} server up, model not loaded`));
|
|
147
201
|
if (readyCount > 0) countParts.push(pc.blue(`${readyCount} model${readyCount === 1 ? "" : "s"} ready`));
|
|
148
202
|
if (missingCount > 0) countParts.push(pc.red(`${missingCount} model${missingCount === 1 ? "" : "s"} missing`));
|
|
149
203
|
if (setupCount > 0) countParts.push(pc.yellow(`${setupCount} model${setupCount === 1 ? "" : "s"} need${setupCount === 1 ? "s" : ""} setup`));
|
|
@@ -166,11 +220,10 @@ export async function printProfileDetails(profile) {
|
|
|
166
220
|
const backend = backendFor(profile.backend);
|
|
167
221
|
const isManaged = backend.type === "managed-server";
|
|
168
222
|
const running = await isProfileRunning(profile);
|
|
169
|
-
const serverUp = !running && isManaged && await isProfileServerUp(profile);
|
|
170
223
|
const fileMissing = !isManaged && isProfileFileMissing(profile);
|
|
171
224
|
console.log("\n" + renderSection("Model overview", renderRows([
|
|
172
225
|
["Name", pc.bold(profile.label)],
|
|
173
|
-
["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") :
|
|
226
|
+
["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") : pc.blue("Ready")],
|
|
174
227
|
["Details", profileDetailParts(profile, { fileMissing }).join(pc.dim(" · "))],
|
|
175
228
|
["Server", fileMissing ? pc.red(profile.baseUrl) : profile.baseUrl],
|
|
176
229
|
])));
|
|
@@ -185,7 +238,7 @@ export async function printProfileDetails(profile) {
|
|
|
185
238
|
detailRows.push(
|
|
186
239
|
["Local file", fileMissing ? pc.red(`${profile.modelPath} (not found)`) : profile.modelPath ?? "unknown"],
|
|
187
240
|
["Vision file", profile.mmprojPath ? (existsSync(profile.mmprojPath) ? profile.mmprojPath : pc.red(`${profile.mmprojPath} (not found)`)) : "none"],
|
|
188
|
-
["Model size", profile.modelPath && existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
|
|
241
|
+
["Model size", profile.modelSizeBytes ? formatBytes(profile.modelSizeBytes) : (profile.modelPath && existsSync(profile.modelPath) && statSync(profile.modelPath).isFile() ? formatBytes(statSync(profile.modelPath).size) : "unknown")],
|
|
189
242
|
);
|
|
190
243
|
if (profile.drafterPath) {
|
|
191
244
|
detailRows.push(["Drafter", existsSync(profile.drafterPath) ? profile.drafterPath : pc.red(`${profile.drafterPath} (not found)`)]);
|
|
@@ -219,6 +272,29 @@ export function printGgufModelDetails(model, drafter) {
|
|
|
219
272
|
console.log("\n" + renderSection("Model details", renderRows(detailRows), { columns: 110 }));
|
|
220
273
|
}
|
|
221
274
|
|
|
275
|
+
export async function printMlxModelDetails(model) {
|
|
276
|
+
const { detectMlxCapabilities } = await import("./mlx-discovery.mjs");
|
|
277
|
+
const caps = await detectMlxCapabilities(model.filePath ?? model.path);
|
|
278
|
+
const parts = [];
|
|
279
|
+
if (caps.architecture) parts.push(caps.architecture);
|
|
280
|
+
if (caps.thinking) parts.push("thinking");
|
|
281
|
+
if (caps.vision) parts.push("vision");
|
|
282
|
+
const summary = parts.length > 0 ? parts.join(pc.dim(" · ")) : "standard MLX";
|
|
283
|
+
console.log("\n" + renderSection("Downloaded model", renderRows([
|
|
284
|
+
["Name", pc.bold(model.label)],
|
|
285
|
+
["Status", pc.yellow("Needs one-time setup")],
|
|
286
|
+
["Details", summary],
|
|
287
|
+
])));
|
|
288
|
+
console.log("\n" + renderSection("Model details", renderRows([
|
|
289
|
+
["Model dir", model.path],
|
|
290
|
+
["Backend", "mlx-vlm"],
|
|
291
|
+
["Source", formatSourceLabel(model.source)],
|
|
292
|
+
["Detected", summary],
|
|
293
|
+
["Size", formatBytes(model.sizeBytes)],
|
|
294
|
+
["Context", caps.contextLength ? `${caps.contextLength.toLocaleString()} trained` : "unknown"],
|
|
295
|
+
]), { columns: 110 }));
|
|
296
|
+
}
|
|
297
|
+
|
|
222
298
|
export function printManagedModelDetails(model, backend) {
|
|
223
299
|
console.log("\n" + renderSection(`${backend.label} model`, renderRows([
|
|
224
300
|
["Name", pc.bold(model.label)],
|
package/src/process.mjs
CHANGED
|
@@ -35,11 +35,13 @@ async function startLocalServer(profile) {
|
|
|
35
35
|
|
|
36
36
|
// Build argv: binary + command.json args
|
|
37
37
|
const argv = [...commandArgv];
|
|
38
|
+
// mlx-vlm requires APC_ENABLED=1 (86x TTFT improvement; fixes Metal cache clearing).
|
|
39
|
+
const env = profile.backend === "mlx-vlm" ? { ...process.env, APC_ENABLED: "1" } : process.env;
|
|
38
40
|
|
|
39
41
|
const rawFd = openSync(rawLogPath, "a");
|
|
40
42
|
let child;
|
|
41
43
|
try {
|
|
42
|
-
child = spawn(binary, argv, { detached: true, stdio: ["ignore", rawFd, rawFd] });
|
|
44
|
+
child = spawn(binary, argv, { detached: true, stdio: ["ignore", rawFd, rawFd], env });
|
|
43
45
|
} finally {
|
|
44
46
|
closeSync(rawFd);
|
|
45
47
|
}
|
|
@@ -96,16 +98,134 @@ export async function stopProfile(profile) {
|
|
|
96
98
|
await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopReason: "pid-not-running" });
|
|
97
99
|
return { stopped: false, message: `${profile.id} pid ${state.pid} is no longer running.` };
|
|
98
100
|
}
|
|
101
|
+
const pid = state.pid;
|
|
99
102
|
try {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
}
|
|
103
|
-
process.kill(state.pid, "SIGTERM");
|
|
104
|
-
}
|
|
105
|
-
await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopSignal: "SIGTERM" });
|
|
106
|
-
return { stopped: true, message: `Stopped ${profile.id} pid ${state.pid}` };
|
|
103
|
+
const signal = await terminateProcess(pid);
|
|
104
|
+
await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopSignal: signal });
|
|
105
|
+
return { stopped: true, message: `Stopped ${profile.id} pid ${pid}` };
|
|
107
106
|
} catch (error) {
|
|
108
|
-
return { stopped: false, message: `Could not stop pid ${
|
|
107
|
+
return { stopped: false, message: `Could not stop pid ${pid}: ${error.message}` };
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Reliably terminate a detached local-server process group: SIGTERM with a
|
|
112
|
+
// grace period for graceful shutdown (lets mlx-vlm/llama-server release the
|
|
113
|
+
// model), then SIGKILL if still alive. Guarantees the model is unloaded when a
|
|
114
|
+
// profile stops — consistent across backends (llama-server exits on SIGTERM;
|
|
115
|
+
// mlx-vlm/uvicorn often does not, hence the SIGKILL fallback).
|
|
116
|
+
async function terminateProcess(pid) {
|
|
117
|
+
const signalGroup = (sig) => {
|
|
118
|
+
try { process.kill(-pid, sig); }
|
|
119
|
+
catch { process.kill(pid, sig); } // not a group leader — kill the proc itself
|
|
120
|
+
};
|
|
121
|
+
signalGroup("SIGTERM");
|
|
122
|
+
for (let i = 0; i < 50; i++) { // 5s grace for graceful shutdown
|
|
123
|
+
if (await processGone(pid)) return "SIGTERM";
|
|
124
|
+
await sleep(100);
|
|
125
|
+
}
|
|
126
|
+
signalGroup("SIGKILL");
|
|
127
|
+
for (let i = 0; i < 30; i++) { // 3s for SIGKILL to take effect
|
|
128
|
+
if (await processGone(pid)) return "SIGKILL";
|
|
129
|
+
await sleep(100);
|
|
130
|
+
}
|
|
131
|
+
throw new Error(`pid ${pid} did not exit after SIGKILL`);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// True if the process is dead (or a zombie about to be reaped).
|
|
135
|
+
async function processGone(pid) {
|
|
136
|
+
try { process.kill(pid, 0); }
|
|
137
|
+
catch { return true; } // no such process
|
|
138
|
+
// Alive to signal(0) — but a detached setsid child can briefly appear as a
|
|
139
|
+
// zombie before launchd reaps it. Treat zombie as gone.
|
|
140
|
+
try {
|
|
141
|
+
const { stdout } = await execFileAsync("ps", ["-o", "stat=", "-p", String(pid)]);
|
|
142
|
+
return /^Z/.test(stdout.trim());
|
|
143
|
+
} catch {
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ── Unload model from a managed server (oMLX) ─────────────────────────────
|
|
149
|
+
// Counterpart to stopProfile for local-server backends: stopProfile kills the
|
|
150
|
+
// server process (which unloads the model); unloadModelFromServer asks a
|
|
151
|
+
// managed server to release the model from memory via its HTTP API, leaving the
|
|
152
|
+
// server itself running. Together they give a consistent UX: quitting Pi
|
|
153
|
+
// unloads the model regardless of backend type.
|
|
154
|
+
|
|
155
|
+
export async function unloadModelFromServer(profile) {
|
|
156
|
+
const backend = backendFor(profile.backend);
|
|
157
|
+
|
|
158
|
+
if (backend.id === "llama-cpp" || backend.id === "llama-cpp-mtp") {
|
|
159
|
+
// llama.cpp unloads when the server process exits; no HTTP unload API exists.
|
|
160
|
+
// If offgrid-ai started the server, stopProfile already handled it.
|
|
161
|
+
return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (backend.id === "omlx") {
|
|
165
|
+
return await unloadOmlxModel(profile);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (backend.id === "mlx-vlm") {
|
|
169
|
+
// mlx-vlm is a local-server backend — stopProfile handles unload by killing
|
|
170
|
+
// the process. No HTTP unload API.
|
|
171
|
+
return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return { unloaded: false, backend: backend.id, reason: "unsupported backend" };
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async function unloadOmlxModel(profile) {
|
|
178
|
+
const baseUrl = profile.baseUrl?.replace(/\/v1\/?$/u, "") || "";
|
|
179
|
+
const adminUrl = `${baseUrl}/admin/api/models`;
|
|
180
|
+
const modelId = profile.modelAlias || profile.omlxModel || profile.id;
|
|
181
|
+
|
|
182
|
+
try {
|
|
183
|
+
const ids = await serverModelIds(profile.baseUrl);
|
|
184
|
+
const match = ids.find((id) => id.toLowerCase() === modelId.toLowerCase());
|
|
185
|
+
const targetId = match ?? modelId;
|
|
186
|
+
|
|
187
|
+
const response = await fetch(`${adminUrl}/${encodeURIComponent(targetId)}/unload`, {
|
|
188
|
+
method: "POST",
|
|
189
|
+
headers: { "Content-Type": "application/json" },
|
|
190
|
+
signal: AbortSignal.timeout(30000),
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
if (response.ok) {
|
|
194
|
+
return { unloaded: true, backend: "omlx", modelId: targetId };
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const detail = await responseErrorDetail(response);
|
|
198
|
+
|
|
199
|
+
if (response.status === 400 && /not loaded/i.test(detail)) {
|
|
200
|
+
return { unloaded: true, backend: "omlx", modelId: targetId, reason: "model was not loaded" };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (response.status === 401 || response.status === 403) {
|
|
204
|
+
return {
|
|
205
|
+
unloaded: false,
|
|
206
|
+
backend: "omlx",
|
|
207
|
+
modelId: targetId,
|
|
208
|
+
error: "oMLX admin authentication required. Enable skip_api_key_verification in oMLX settings, or unload manually from the admin panel.",
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return { unloaded: false, backend: "omlx", modelId: targetId, error: `HTTP ${response.status}: ${detail}` };
|
|
213
|
+
} catch (err) {
|
|
214
|
+
if (err?.name === "AbortError" || err?.name === "TimeoutError") {
|
|
215
|
+
return { unloaded: false, backend: "omlx", modelId, error: "Unload request timed out. The model may still be unloading in the background." };
|
|
216
|
+
}
|
|
217
|
+
return { unloaded: false, backend: "omlx", modelId, error: err.message };
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
async function responseErrorDetail(response) {
|
|
222
|
+
const text = await response.text().catch(() => "");
|
|
223
|
+
if (!text) return "";
|
|
224
|
+
try {
|
|
225
|
+
const body = JSON.parse(text);
|
|
226
|
+
return body?.detail ?? body?.message ?? text;
|
|
227
|
+
} catch {
|
|
228
|
+
return text;
|
|
109
229
|
}
|
|
110
230
|
}
|
|
111
231
|
|
|
@@ -126,7 +246,6 @@ export async function isProfileServerUp(profile) {
|
|
|
126
246
|
|
|
127
247
|
export async function modelLoadedOnServer(profile) {
|
|
128
248
|
const backend = backendFor(profile.backend);
|
|
129
|
-
if (backend.id === "ollama") return modelIdsMatch(await ollamaLoadedModelIds(profile), expectedModelIds(profile));
|
|
130
249
|
if (backend.id === "omlx") return modelIdsMatch(await omlxLoadedModelIds(profile), expectedModelIds(profile));
|
|
131
250
|
const { matches } = await serverMatchesProfile(profile);
|
|
132
251
|
return matches;
|
|
@@ -134,9 +253,6 @@ export async function modelLoadedOnServer(profile) {
|
|
|
134
253
|
|
|
135
254
|
export async function modelAvailableOnServer(profile) {
|
|
136
255
|
const backend = backendFor(profile.backend);
|
|
137
|
-
if (backend.id === "ollama") {
|
|
138
|
-
return modelIdsMatch(await ollamaAvailableModelIds(profile), expectedModelIds(profile));
|
|
139
|
-
}
|
|
140
256
|
if (backend.id === "omlx") {
|
|
141
257
|
// /v1/models lists discovered models; an ID must exist there to be usable.
|
|
142
258
|
return modelIdsMatch(await serverModelIds(profile.baseUrl), expectedModelIds(profile));
|
|
@@ -217,24 +333,6 @@ export async function serverModelIds(baseUrl) {
|
|
|
217
333
|
.filter(Boolean);
|
|
218
334
|
}
|
|
219
335
|
|
|
220
|
-
async function ollamaLoadedModelIds(profile) {
|
|
221
|
-
const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/ps`);
|
|
222
|
-
if (!result.ok) return [];
|
|
223
|
-
return (Array.isArray(result.data?.models) ? result.data.models : [])
|
|
224
|
-
.flatMap((model) => [model?.name, model?.model])
|
|
225
|
-
.map((id) => String(id ?? "").trim())
|
|
226
|
-
.filter(Boolean);
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
async function ollamaAvailableModelIds(profile) {
|
|
230
|
-
const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/tags`);
|
|
231
|
-
if (!result.ok) return [];
|
|
232
|
-
return (Array.isArray(result.data?.models) ? result.data.models : [])
|
|
233
|
-
.flatMap((model) => [model?.name, model?.model])
|
|
234
|
-
.map((id) => String(id ?? "").trim())
|
|
235
|
-
.filter(Boolean);
|
|
236
|
-
}
|
|
237
|
-
|
|
238
336
|
async function omlxLoadedModelIds(profile) {
|
|
239
337
|
const statusResult = await fetchJson(`${profile.baseUrl.replace(/\/+$/u, "")}/models/status`);
|
|
240
338
|
const fromStatus = statusResult.ok
|
|
@@ -305,7 +403,6 @@ function expectedModelIds(profile) {
|
|
|
305
403
|
return [
|
|
306
404
|
profile.modelAlias,
|
|
307
405
|
profile.label,
|
|
308
|
-
profile.ollamaModel,
|
|
309
406
|
profile.omlxModel,
|
|
310
407
|
profile.modelPath,
|
|
311
408
|
fileName,
|