offgrid-ai 0.9.5 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/package.json +4 -3
- package/resources/hf-download.py +79 -0
- package/resources/mlxvlm-server-wrapper.py +112 -0
- package/resources/recommendations.json +60 -0
- package/src/backend-installers.mjs +1 -16
- package/src/backends.mjs +17 -45
- package/src/benchmark/finalize.mjs +9 -91
- package/src/benchmark/flow.mjs +8 -6
- package/src/benchmark/metrics.mjs +6 -45
- package/src/benchmark/pi-runner.mjs +5 -2
- package/src/benchmark/prepare.mjs +1 -1
- package/src/benchmark/stream-renderer.mjs +31 -2
- package/src/benchmark.mjs +3 -1
- package/src/commands/main.mjs +3 -5
- package/src/commands/models.mjs +27 -19
- package/src/commands/onboard.mjs +67 -9
- package/src/commands/run.mjs +20 -5
- package/src/commands/status.mjs +1 -1
- package/src/config.mjs +11 -2
- package/src/discovery-shared.mjs +44 -0
- package/src/hardware.mjs +49 -0
- package/src/harness-pi.mjs +25 -11
- package/src/huggingface.mjs +209 -0
- package/src/managed.mjs +1 -5
- package/src/mlx-discovery.mjs +290 -0
- package/src/mlx-flags.mjs +93 -0
- package/src/model-catalog.mjs +12 -6
- package/src/model-name.mjs +7 -25
- package/src/model-presenters.mjs +138 -28
- package/src/process.mjs +129 -32
- package/src/profile-setup.mjs +116 -0
- package/src/profiles.mjs +30 -0
- package/src/recommendations.mjs +56 -14
- package/src/scan.mjs +39 -8
package/src/hardware.mjs
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// Hardware detection — pure functions, no side effects.
|
|
2
|
+
// Ported from deprecated-offgrid-desktop/src/main/hardware.ts.
|
|
3
|
+
//
|
|
4
|
+
// This is the canonical source for hardware facts the model-serving logic
|
|
5
|
+
// needs: recommendations, onboarding, MLX context sizing, disk-space guards,
|
|
6
|
+
// and memory estimation.
|
|
7
|
+
|
|
8
|
+
import { totalmem } from "node:os";
|
|
9
|
+
import { statfsSync, existsSync } from "node:fs";
|
|
10
|
+
import { dirname } from "node:path";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Detect system hardware via the Node.js `os` module.
|
|
14
|
+
* Pure function — no side effects, trivially testable.
|
|
15
|
+
*/
|
|
16
|
+
export function detectHardware() {
|
|
17
|
+
return {
|
|
18
|
+
totalRamBytes: totalmem(),
|
|
19
|
+
platform: process.platform,
|
|
20
|
+
arch: process.arch,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/** Installed RAM in GB (integer). */
|
|
25
|
+
export function installedRamGB() {
|
|
26
|
+
return Math.round(totalmem() / (1024 ** 3));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Get available disk space for a directory (in bytes).
|
|
31
|
+
*
|
|
32
|
+
* If the directory doesn't exist yet, walks up to the nearest existing parent
|
|
33
|
+
* directory. Returns a very large number if the check fails (so we don't block
|
|
34
|
+
* a download unnecessarily).
|
|
35
|
+
*/
|
|
36
|
+
export function getFreeDiskBytes(dir) {
|
|
37
|
+
try {
|
|
38
|
+
let checkDir = dir;
|
|
39
|
+
while (!existsSync(checkDir)) {
|
|
40
|
+
const parent = dirname(checkDir);
|
|
41
|
+
if (parent === checkDir) break; // reached root
|
|
42
|
+
checkDir = parent;
|
|
43
|
+
}
|
|
44
|
+
const stats = statfsSync(checkDir);
|
|
45
|
+
return stats.bavail * stats.bsize;
|
|
46
|
+
} catch {
|
|
47
|
+
return Number.MAX_SAFE_INTEGER;
|
|
48
|
+
}
|
|
49
|
+
}
|
package/src/harness-pi.mjs
CHANGED
|
@@ -5,6 +5,18 @@ import { loadProfiles } from "./profiles.mjs";
|
|
|
5
5
|
import { readJson, writeJson } from "./json.mjs";
|
|
6
6
|
import pc from "picocolors";
|
|
7
7
|
|
|
8
|
+
// ── Pi model id ─────────────────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* The model id Pi must send in requests. mlx-vlm registers the loaded model
|
|
12
|
+
* with the full --model path as its API id (verified via /v1/models); sending
|
|
13
|
+
* the repo-id label instead makes mlx-vlm unload the local model and re-fetch
|
|
14
|
+
* the repo from HuggingFace. Other backends use the friendly modelAlias.
|
|
15
|
+
*/
|
|
16
|
+
export function piApiModelId(profile) {
|
|
17
|
+
return profile.backend === "mlx-vlm" ? profile.modelPath : profile.modelAlias;
|
|
18
|
+
}
|
|
19
|
+
|
|
8
20
|
// ── Sync Pi config ─────────────────────────────────────────────────────────
|
|
9
21
|
|
|
10
22
|
export async function syncPiConfig(profile) {
|
|
@@ -30,26 +42,27 @@ export async function removeFromPiConfig(profile) {
|
|
|
30
42
|
const provider = config.providers[profile.providerId];
|
|
31
43
|
if (!provider?.models) return { cleaned: false, reason: `no ${profile.providerId} provider in Pi config` };
|
|
32
44
|
const before = provider.models.length;
|
|
33
|
-
|
|
45
|
+
const apiId = piApiModelId(profile);
|
|
46
|
+
provider.models = provider.models.filter((m) => m.id !== apiId);
|
|
34
47
|
if (provider.models.length === 0) delete config.providers[profile.providerId];
|
|
35
48
|
if (before > provider.models.length) {
|
|
36
49
|
await writeJson(PI_CONFIG, config);
|
|
37
50
|
return { cleaned: true, removed: before - provider.models.length };
|
|
38
51
|
}
|
|
39
|
-
return { cleaned: false, reason: `${
|
|
52
|
+
return { cleaned: false, reason: `${apiId} not in Pi config` };
|
|
40
53
|
}
|
|
41
54
|
|
|
42
55
|
// ── Check if Pi has the model ──────────────────────────────────────────────
|
|
43
56
|
|
|
44
57
|
export async function hasPiModel(profile) {
|
|
45
58
|
const config = await readJson(PI_CONFIG, null);
|
|
46
|
-
return Boolean(config?.providers?.[profile.providerId]?.models?.some?.((m) => m.id === profile
|
|
59
|
+
return Boolean(config?.providers?.[profile.providerId]?.models?.some?.((m) => m.id === piApiModelId(profile)));
|
|
47
60
|
}
|
|
48
61
|
|
|
49
62
|
// ── Launch Pi ──────────────────────────────────────────────────────────────
|
|
50
63
|
|
|
51
64
|
export async function launchPi(profile) {
|
|
52
|
-
const model =
|
|
65
|
+
const model = `${profile.providerId}/${piApiModelId(profile)}`;
|
|
53
66
|
console.log(pc.bold(`[pi] pi --model ${model}`));
|
|
54
67
|
await runForeground("pi", ["--model", model]);
|
|
55
68
|
}
|
|
@@ -88,7 +101,7 @@ function piModelConfig(profile) {
|
|
|
88
101
|
const compat = modelCompat(profile);
|
|
89
102
|
const reasoning = modelReasoning(profile);
|
|
90
103
|
return {
|
|
91
|
-
id: profile
|
|
104
|
+
id: piApiModelId(profile),
|
|
92
105
|
name: profile.label,
|
|
93
106
|
input: modelInput(profile),
|
|
94
107
|
...(reasoning === undefined ? {} : { reasoning }),
|
|
@@ -99,7 +112,9 @@ function piModelConfig(profile) {
|
|
|
99
112
|
}
|
|
100
113
|
|
|
101
114
|
function modelInput(profile) {
|
|
102
|
-
|
|
115
|
+
if (profile.mmprojPath && existsSync(profile.mmprojPath)) return ["text", "image"];
|
|
116
|
+
if (profile.capabilities?.vision) return ["text", "image"];
|
|
117
|
+
return ["text"];
|
|
103
118
|
}
|
|
104
119
|
|
|
105
120
|
function modelCompat(profile) {
|
|
@@ -119,15 +134,14 @@ function modelReasoning(profile) {
|
|
|
119
134
|
}
|
|
120
135
|
|
|
121
136
|
function modelFamily(profile) {
|
|
122
|
-
return [profile.id, profile.label, profile.modelAlias, profile.modelPath, profile.
|
|
137
|
+
return [profile.id, profile.label, profile.modelAlias, profile.modelPath, profile.omlxModel].filter(Boolean).join(" ").toLowerCase();
|
|
123
138
|
}
|
|
124
139
|
|
|
125
|
-
function piApiKey(
|
|
126
|
-
return
|
|
140
|
+
function piApiKey() {
|
|
141
|
+
return "none";
|
|
127
142
|
}
|
|
128
143
|
|
|
129
|
-
function providerCompat(
|
|
130
|
-
if (providerId === "ollama") return { supportsDeveloperRole: true, supportsReasoningEffort: false };
|
|
144
|
+
function providerCompat() {
|
|
131
145
|
return { supportsDeveloperRole: false, supportsReasoningEffort: false };
|
|
132
146
|
}
|
|
133
147
|
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
// HuggingFace model download helpers.
|
|
2
|
+
// Uses the Python huggingface_hub package (the standard, maintained downloader)
|
|
3
|
+
// to download models into the standard HF cache directory.
|
|
4
|
+
// Downloads go to ~/.cache/huggingface/hub, NOT a custom offgrid-ai folder.
|
|
5
|
+
|
|
6
|
+
import { execFile } from "node:child_process";
|
|
7
|
+
import { promisify } from "node:util";
|
|
8
|
+
import { join, dirname } from "node:path";
|
|
9
|
+
import { mkdir } from "node:fs/promises";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
import { HF_HUB_DIR } from "./config.mjs";
|
|
12
|
+
|
|
13
|
+
const execFileAsync = promisify(execFile);
|
|
14
|
+
|
|
15
|
+
const HF_DOWNLOAD_SCRIPT = join(dirname(fileURLToPath(import.meta.url)), "..", "resources", "hf-download.py");
|
|
16
|
+
|
|
17
|
+
/** Check whether python3 + huggingface_hub is available. */
|
|
18
|
+
export async function hasHuggingfaceHub() {
|
|
19
|
+
try {
|
|
20
|
+
const { stdout } = await execFileAsync("python3", ["-c", "import huggingface_hub; print(huggingface_hub.__version__)"]);
|
|
21
|
+
return Boolean(stdout.trim());
|
|
22
|
+
} catch {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Parse a HuggingFace reference (URL, repo/filename, or repo ID). */
|
|
28
|
+
export function parseHfRef(input) {
|
|
29
|
+
const trimmed = input.trim();
|
|
30
|
+
|
|
31
|
+
if (trimmed.startsWith("https://huggingface.co/")) {
|
|
32
|
+
const url = new URL(trimmed);
|
|
33
|
+
const pathParts = url.pathname.split("/").filter(Boolean);
|
|
34
|
+
const resolveIdx = pathParts.indexOf("resolve");
|
|
35
|
+
if (resolveIdx > 0 && pathParts[resolveIdx + 1] === "main") {
|
|
36
|
+
return {
|
|
37
|
+
repo: pathParts.slice(0, resolveIdx).join("/"),
|
|
38
|
+
filename: pathParts.slice(resolveIdx + 2).join("/"),
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
if (pathParts.length >= 2) {
|
|
42
|
+
return {
|
|
43
|
+
repo: pathParts.slice(0, 2).join("/"),
|
|
44
|
+
filename: pathParts.length > 2 ? pathParts.slice(2).join("/") : undefined,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
throw new Error(`Invalid HuggingFace URL: ${input}`);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const parts = trimmed.split("/").filter(Boolean);
|
|
51
|
+
if (parts.length < 2) {
|
|
52
|
+
throw new Error(`Invalid HuggingFace reference: "${input}". Expected at least org/name.`);
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
repo: parts.slice(0, 2).join("/"),
|
|
56
|
+
filename: parts.length > 2 ? parts.slice(2).join("/") : undefined,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Resolve file metadata for a GGUF file from the HF tree API. */
|
|
61
|
+
export async function resolveGgufFile(ref, { fetchImpl = globalThis.fetch } = {}) {
|
|
62
|
+
const { repo, filename } = parseHfRef(ref);
|
|
63
|
+
const tree = await getHfTree(repo, { fetchImpl });
|
|
64
|
+
const entry = tree.find((f) => f.path === filename && f.type === "file");
|
|
65
|
+
if (!entry) throw new Error(`File '${filename}' not found in HuggingFace repo '${repo}'.`);
|
|
66
|
+
return {
|
|
67
|
+
repo,
|
|
68
|
+
filename,
|
|
69
|
+
url: `https://huggingface.co/${repo}/resolve/main/${filename}`,
|
|
70
|
+
sizeBytes: entry.lfs?.size ?? entry.size ?? 0,
|
|
71
|
+
sha256: entry.lfs?.oid ?? "",
|
|
72
|
+
relativePath: filename,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Resolve all model files in an MLX repo from the HF tree API. */
|
|
77
|
+
export async function resolveMlxRepo(repo, { fetchImpl = globalThis.fetch } = {}) {
|
|
78
|
+
const tree = await getHfTree(repo, { fetchImpl });
|
|
79
|
+
const modelFiles = tree.filter(
|
|
80
|
+
(f) => f.type === "file" && !f.path.startsWith(".") && f.path !== ".gitattributes" && f.path !== "README.md",
|
|
81
|
+
);
|
|
82
|
+
return modelFiles.map((f) => ({
|
|
83
|
+
repo,
|
|
84
|
+
filename: f.path,
|
|
85
|
+
url: `https://huggingface.co/${repo}/resolve/main/${f.path}`,
|
|
86
|
+
sizeBytes: f.lfs?.size ?? f.size ?? 0,
|
|
87
|
+
sha256: f.lfs?.oid ?? "",
|
|
88
|
+
relativePath: f.path,
|
|
89
|
+
}));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function getHfTree(repo, { branch = "main", fetchImpl = globalThis.fetch } = {}) {
|
|
93
|
+
const url = `https://huggingface.co/api/models/${repo}/tree/${branch}?recursive=true`;
|
|
94
|
+
const response = await fetchImpl(url, { signal: AbortSignal.timeout(10000) });
|
|
95
|
+
if (!response.ok) throw new Error(`HuggingFace API error: HTTP ${response.status} for ${repo}`);
|
|
96
|
+
return await response.json();
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Resolve a user-provided HF reference into a download plan. */
|
|
100
|
+
export async function resolveHfDownload(input, { fetchImpl = globalThis.fetch } = {}) {
|
|
101
|
+
const { repo, filename } = parseHfRef(input);
|
|
102
|
+
|
|
103
|
+
if (filename && filename.endsWith(".gguf")) {
|
|
104
|
+
const file = await resolveGgufFile(`${repo}/${filename}`, { fetchImpl });
|
|
105
|
+
return {
|
|
106
|
+
id: repo.split("/").pop() ?? repo,
|
|
107
|
+
repo,
|
|
108
|
+
format: "gguf",
|
|
109
|
+
files: [file],
|
|
110
|
+
totalSizeBytes: file.sizeBytes,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const tree = await getHfTree(repo, { fetchImpl });
|
|
115
|
+
const ggufFiles = tree.filter((f) => f.type === "file" && f.path.endsWith(".gguf"));
|
|
116
|
+
if (ggufFiles.length > 0) {
|
|
117
|
+
const file = ggufFiles[0];
|
|
118
|
+
const resolved = await resolveGgufFile(`${repo}/${file.path}`, { fetchImpl });
|
|
119
|
+
return {
|
|
120
|
+
id: repo.split("/").pop() ?? repo,
|
|
121
|
+
repo,
|
|
122
|
+
format: "gguf",
|
|
123
|
+
files: [resolved],
|
|
124
|
+
totalSizeBytes: resolved.sizeBytes,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const files = await resolveMlxRepo(repo, { fetchImpl });
|
|
129
|
+
return {
|
|
130
|
+
id: repo.split("/").pop() ?? repo,
|
|
131
|
+
repo,
|
|
132
|
+
format: "mlx",
|
|
133
|
+
files,
|
|
134
|
+
totalSizeBytes: files.reduce((sum, f) => sum + f.sizeBytes, 0),
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Download a resolved model into the HF hub cache.
|
|
140
|
+
* @param {object} model - from resolveHfDownload
|
|
141
|
+
* @param {object} options
|
|
142
|
+
* @param {function} options.onProgress - ({ downloadedBytes, totalBytes, percentage, file }) => void
|
|
143
|
+
* @returns {Promise<{ localDir: string, format: string }>}
|
|
144
|
+
*/
|
|
145
|
+
export async function downloadToHfCache(model, options = {}) {
|
|
146
|
+
await mkdir(HF_HUB_DIR, { recursive: true });
|
|
147
|
+
|
|
148
|
+
const script = HF_DOWNLOAD_SCRIPT;
|
|
149
|
+
const args = ["--repo", model.repo, "--cache-dir", HF_HUB_DIR];
|
|
150
|
+
if (model.format === "gguf") {
|
|
151
|
+
args.push("--file", model.files[0].filename);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const onProgress = options.onProgress ?? (() => {});
|
|
155
|
+
|
|
156
|
+
return new Promise((resolve, reject) => {
|
|
157
|
+
const child = execFile("python3", [script, ...args], { env: process.env });
|
|
158
|
+
|
|
159
|
+
let stdoutBuf = "";
|
|
160
|
+
let downloadedBytes = 0;
|
|
161
|
+
let currentFile = null;
|
|
162
|
+
|
|
163
|
+
// huggingface_hub streams NDJSON progress events to stdout, one per line.
|
|
164
|
+
// Buffer and split on complete newlines so an event split across chunk
|
|
165
|
+
// boundaries isn't silently dropped.
|
|
166
|
+
const handleLine = (line) => {
|
|
167
|
+
if (!line) return;
|
|
168
|
+
try {
|
|
169
|
+
const event = JSON.parse(line);
|
|
170
|
+
if (event.type === "progress") {
|
|
171
|
+
downloadedBytes = event.downloadedBytes ?? downloadedBytes;
|
|
172
|
+
currentFile = event.file ?? currentFile;
|
|
173
|
+
onProgress({
|
|
174
|
+
downloadedBytes,
|
|
175
|
+
totalBytes: model.totalSizeBytes,
|
|
176
|
+
percentage: Math.min(100, Math.round((downloadedBytes / model.totalSizeBytes) * 100)),
|
|
177
|
+
file: currentFile,
|
|
178
|
+
});
|
|
179
|
+
} else if (event.type === "complete") {
|
|
180
|
+
resolve({ localDir: event.localDir, format: model.format });
|
|
181
|
+
} else if (event.type === "error") {
|
|
182
|
+
reject(new Error(event.message));
|
|
183
|
+
}
|
|
184
|
+
} catch {
|
|
185
|
+
// Ignore non-JSON output (progress bars, etc.)
|
|
186
|
+
}
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
child.stdout?.on("data", (chunk) => {
|
|
190
|
+
stdoutBuf += String(chunk);
|
|
191
|
+
let nl;
|
|
192
|
+
while ((nl = stdoutBuf.indexOf("\n")) !== -1) {
|
|
193
|
+
handleLine(stdoutBuf.slice(0, nl));
|
|
194
|
+
stdoutBuf = stdoutBuf.slice(nl + 1);
|
|
195
|
+
}
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
child.stderr?.on("data", () => {
|
|
199
|
+
// huggingface_hub prints progress bars to stderr; ignore.
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
child.on("error", reject);
|
|
203
|
+
child.on("exit", (code) => {
|
|
204
|
+
// Flush any final line that lacked a trailing newline.
|
|
205
|
+
if (stdoutBuf.trim()) handleLine(stdoutBuf.trim());
|
|
206
|
+
if (code !== 0) reject(new Error(`Download failed with exit code ${code}`));
|
|
207
|
+
});
|
|
208
|
+
});
|
|
209
|
+
}
|
package/src/managed.mjs
CHANGED
|
@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
|
|
|
2
2
|
import { BACKENDS } from "./backends.mjs";
|
|
3
3
|
import { commandExists } from "./exec.mjs";
|
|
4
4
|
|
|
5
|
-
export const MANAGED_BACKEND_IDS = ["
|
|
5
|
+
export const MANAGED_BACKEND_IDS = ["omlx"];
|
|
6
6
|
|
|
7
7
|
export async function scanManagedModels() {
|
|
8
8
|
const results = [];
|
|
@@ -22,10 +22,6 @@ export function hasLmStudioInstalled() {
|
|
|
22
22
|
return existsSync("/Applications/LM Studio.app");
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
export function hasOllamaInstalled() {
|
|
26
|
-
return commandExists("ollama");
|
|
27
|
-
}
|
|
28
|
-
|
|
29
25
|
export function hasOmlxInstalled() {
|
|
30
26
|
return commandExists("omlx");
|
|
31
27
|
}
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
// MLX model discovery + metadata — scans configured model directories for MLX
|
|
2
|
+
// model directories and parses their config.json.
|
|
3
|
+
// Ported from deprecated-offgrid-desktop/src/main/model-discovery.ts +
|
|
4
|
+
// mlx-metadata.ts (MLX subset only).
|
|
5
|
+
//
|
|
6
|
+
// This runs ALONGSIDE offgrid-ai's existing GGUF scan (scan.mjs scanGgufModels)
|
|
7
|
+
// — it does not replace it. The picker (main.mjs) will merge GGUF + MLX lists.
|
|
8
|
+
//
|
|
9
|
+
// An MLX model directory is one containing config.json + one or more
|
|
10
|
+
// *.safetensors files. HuggingFace Hub cache layout (models--org--name) is
|
|
11
|
+
// detected and scanned specially.
|
|
12
|
+
|
|
13
|
+
import { readdir, stat, readFile } from "node:fs/promises";
|
|
14
|
+
import { existsSync } from "node:fs";
|
|
15
|
+
import { join, basename } from "node:path";
|
|
16
|
+
import { homedir } from "node:os";
|
|
17
|
+
import { getModelScanDirs } from "./config.mjs";
|
|
18
|
+
import { inferSourceLabel, MIN_MODEL_SIZE_BYTES, EMBEDDING_MODEL_TYPES } from "./discovery-shared.mjs";
|
|
19
|
+
|
|
20
|
+
// ── Folder → backend mapping ──────────────────────────────────────────────
|
|
21
|
+
// The oMLX folder is oMLX-exclusive: models there are served by the oMLX
|
|
22
|
+
// managed backend, NOT by mlx-vlm. Every OTHER scan dir is format-based
|
|
23
|
+
// (GGUF → llama.cpp, MLX → mlx-vlm). So mlx-vlm scans all configured dirs
|
|
24
|
+
// EXCEPT the oMLX folder.
|
|
25
|
+
const OMLX_MODELS_DIR = join(homedir(), ".omlx", "models");
|
|
26
|
+
function isOmlxFolder(p) {
|
|
27
|
+
return p === OMLX_MODELS_DIR || p.startsWith(OMLX_MODELS_DIR + "/");
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ── MLX directory detection ───────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
/** True if dir contains config.json + at least one .safetensors file. */
|
|
33
|
+
async function isMlxModelDir(dir) {
|
|
34
|
+
if (!existsSync(join(dir, "config.json"))) return false;
|
|
35
|
+
try {
|
|
36
|
+
const entries = await readdir(dir);
|
|
37
|
+
return entries.some((f) => f.endsWith(".safetensors"));
|
|
38
|
+
} catch {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Sum the size of all .safetensors files in an MLX model dir (bytes). */
|
|
44
|
+
async function getMlxDirSizeBytes(dir) {
|
|
45
|
+
try {
|
|
46
|
+
const entries = await readdir(dir);
|
|
47
|
+
const sizes = await Promise.all(
|
|
48
|
+
entries.filter((f) => f.endsWith(".safetensors")).map(async (f) => {
|
|
49
|
+
const s = await stat(join(dir, f));
|
|
50
|
+
return s.size;
|
|
51
|
+
}),
|
|
52
|
+
);
|
|
53
|
+
return sizes.reduce((a, b) => a + b, 0);
|
|
54
|
+
} catch {
|
|
55
|
+
return 0;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ── Recursive MLX scanner ─────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Recursively scan a directory for MLX model directories.
|
|
63
|
+
* Searches up to maxDepth levels deep. Does NOT collect GGUF (that's scan.mjs).
|
|
64
|
+
*/
|
|
65
|
+
async function scanDirRecursiveForMlx(rootDir, sourceLabel, maxDepth = 3) {
|
|
66
|
+
if (!existsSync(rootDir)) return [];
|
|
67
|
+
const models = [];
|
|
68
|
+
|
|
69
|
+
async function walk(dir, depth) {
|
|
70
|
+
if (depth > maxDepth) return;
|
|
71
|
+
let entries;
|
|
72
|
+
try {
|
|
73
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
74
|
+
} catch {
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Is this directory itself an MLX model dir? (don't recurse into it)
|
|
79
|
+
if (depth > 0 && await isMlxModelDir(dir)) {
|
|
80
|
+
const sizeBytes = await getMlxDirSizeBytes(dir);
|
|
81
|
+
if (sizeBytes < MIN_MODEL_SIZE_BYTES) return;
|
|
82
|
+
if (await isEmbeddingMlxModel(join(dir, "config.json"))) return;
|
|
83
|
+
models.push(makeMlxModel(dir, basename(dir), sizeBytes, sourceLabel, rootDir));
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
for (const entry of entries) {
|
|
88
|
+
if (entry.name.startsWith(".") || entry.name === "README.md" || entry.name === ".gitattributes") continue;
|
|
89
|
+
const fullPath = join(dir, entry.name);
|
|
90
|
+
if (entry.isDirectory()) {
|
|
91
|
+
if (await isMlxModelDir(fullPath)) {
|
|
92
|
+
const sizeBytes = await getMlxDirSizeBytes(fullPath);
|
|
93
|
+
if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
|
|
94
|
+
if (await isEmbeddingMlxModel(join(fullPath, "config.json"))) continue;
|
|
95
|
+
models.push(makeMlxModel(fullPath, entry.name, sizeBytes, sourceLabel, rootDir));
|
|
96
|
+
} else {
|
|
97
|
+
await walk(fullPath, depth + 1);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
await walk(rootDir, 0);
|
|
104
|
+
return models;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ── HuggingFace Hub layout ────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
/** True if dir looks like an HF Hub cache (has models--* subdirs). */
|
|
110
|
+
async function looksLikeHfHub(dir) {
|
|
111
|
+
if (!existsSync(dir)) return false;
|
|
112
|
+
try {
|
|
113
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
114
|
+
return entries.some((e) => e.isDirectory() && e.name.startsWith("models--"));
|
|
115
|
+
} catch {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Scan an HF Hub cache dir for MLX model dirs.
|
|
122
|
+
* HF layout: models--org--name/snapshots/hash/files
|
|
123
|
+
*/
|
|
124
|
+
async function scanHfHubForMlx(dir, sourceLabel) {
|
|
125
|
+
if (!existsSync(dir)) return [];
|
|
126
|
+
const models = [];
|
|
127
|
+
try {
|
|
128
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
129
|
+
for (const entry of entries) {
|
|
130
|
+
if (!entry.isDirectory() || !entry.name.startsWith("models--")) continue;
|
|
131
|
+
const parts = entry.name.slice("models--".length).split("--");
|
|
132
|
+
const label = parts.join("/");
|
|
133
|
+
const snapshotsDir = join(dir, entry.name, "snapshots");
|
|
134
|
+
if (!existsSync(snapshotsDir)) continue;
|
|
135
|
+
const snapshots = await readdir(snapshotsDir, { withFileTypes: true });
|
|
136
|
+
// Follow symlinks (HF hub uses them; test imports use them too). A model
|
|
137
|
+
// dir can have several snapshots — some incomplete/empty. Check EACH
|
|
138
|
+
// snapshot and use the first that is a valid MLX model dir, rather than
|
|
139
|
+
// giving up on the whole model if the first snapshot happens to be empty.
|
|
140
|
+
const candidates = snapshots.filter((s) => s.isDirectory() || s.isSymbolicLink());
|
|
141
|
+
let snapshotPath = null;
|
|
142
|
+
for (const snap of candidates) {
|
|
143
|
+
const sp = join(snapshotsDir, snap.name);
|
|
144
|
+
const st = await stat(sp).catch(() => null);
|
|
145
|
+
if (st?.isDirectory() && await isMlxModelDir(sp)) { snapshotPath = sp; break; }
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (!snapshotPath) continue;
|
|
149
|
+
const sizeBytes = await getMlxDirSizeBytes(snapshotPath);
|
|
150
|
+
if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
|
|
151
|
+
if (await isEmbeddingMlxModel(join(snapshotPath, "config.json"))) continue;
|
|
152
|
+
models.push({
|
|
153
|
+
id: `${sourceLabel}:${entry.name}`,
|
|
154
|
+
label,
|
|
155
|
+
path: snapshotPath,
|
|
156
|
+
filePath: snapshotPath,
|
|
157
|
+
sizeBytes,
|
|
158
|
+
backend: "mlx-vlm",
|
|
159
|
+
format: "mlx",
|
|
160
|
+
source: sourceLabel,
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
} catch {
|
|
164
|
+
// Can't read — return what we have.
|
|
165
|
+
}
|
|
166
|
+
return models;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// ── Embedding model filtering for MLX ─────────────────────────────────────
|
|
170
|
+
|
|
171
|
+
async function isEmbeddingMlxModel(configPath) {
|
|
172
|
+
if (!existsSync(configPath)) return false;
|
|
173
|
+
try {
|
|
174
|
+
const config = JSON.parse(await readFile(configPath, "utf-8"));
|
|
175
|
+
const textConfig = config.text_config ?? config;
|
|
176
|
+
const modelType = String(textConfig.model_type ?? "").toLowerCase();
|
|
177
|
+
if (EMBEDDING_MODEL_TYPES.has(modelType)) return true;
|
|
178
|
+
const arch = Array.isArray(config.architectures) ? config.architectures[0] : "";
|
|
179
|
+
const lowerArch = String(arch).toLowerCase();
|
|
180
|
+
return EMBEDDING_MODEL_TYPES.has(lowerArch) || lowerArch.includes("bert");
|
|
181
|
+
} catch {
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// ── MLX model entry builder ───────────────────────────────────────────────
|
|
187
|
+
|
|
188
|
+
function makeMlxModel(dir, label, sizeBytes, sourceLabel, rootDir) {
|
|
189
|
+
return {
|
|
190
|
+
id: `${sourceLabel}:${dir.replace(rootDir + "/", "")}`,
|
|
191
|
+
label,
|
|
192
|
+
path: dir,
|
|
193
|
+
filePath: dir,
|
|
194
|
+
sizeBytes,
|
|
195
|
+
backend: "mlx-vlm",
|
|
196
|
+
format: "mlx",
|
|
197
|
+
source: sourceLabel,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// ── Public API ─────────────────────────────────────────────────────────────
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Discover all MLX models across the configured scan directories.
|
|
205
|
+
* Reads scan dirs from config.mjs getModelScanDirs() — same paths GGUF uses
|
|
206
|
+
* (LM Studio, HF hub, user-added). Returns a flat, deduplicated list.
|
|
207
|
+
*/
|
|
208
|
+
export async function scanMlxModels(dirs) {
|
|
209
|
+
// mlx-vlm scans every configured dir EXCEPT the oMLX folder (oMLX-exclusive).
|
|
210
|
+
const scanDirs = (dirs ?? await getModelScanDirs()).filter((d) => !isOmlxFolder(d));
|
|
211
|
+
const results = await Promise.all(
|
|
212
|
+
scanDirs.map(async (dir) => {
|
|
213
|
+
const label = inferSourceLabel(dir);
|
|
214
|
+
if (await looksLikeHfHub(dir)) return scanHfHubForMlx(dir, label);
|
|
215
|
+
return scanDirRecursiveForMlx(dir, label);
|
|
216
|
+
}),
|
|
217
|
+
);
|
|
218
|
+
const all = results.flat();
|
|
219
|
+
// Deduplicate by filePath (same model may appear in multiple paths).
|
|
220
|
+
const seen = new Set();
|
|
221
|
+
return all.filter((m) => {
|
|
222
|
+
if (seen.has(m.filePath)) return false;
|
|
223
|
+
seen.add(m.filePath);
|
|
224
|
+
return true;
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ── MLX capability detection ─────────────────────────────────────────────
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Detect MLX model capabilities from its config.json.
|
|
232
|
+
* Returns { architecture, thinking, vision, contextLength }.
|
|
233
|
+
*/
|
|
234
|
+
export async function detectMlxCapabilities(modelDir) {
|
|
235
|
+
const configPath = join(modelDir, "config.json");
|
|
236
|
+
if (!existsSync(configPath)) return { thinking: false, vision: false, contextLength: null, architecture: null };
|
|
237
|
+
try {
|
|
238
|
+
const config = JSON.parse(await readFile(configPath, "utf-8"));
|
|
239
|
+
return detectMlxCapabilitiesFromConfig(config, modelDir);
|
|
240
|
+
} catch {
|
|
241
|
+
return { thinking: false, vision: false, contextLength: null, architecture: null };
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export function detectMlxCapabilitiesFromConfig(config, modelDir) {
|
|
246
|
+
const textConfig = config.text_config ?? config;
|
|
247
|
+
const rawName = config._name_or_path ?? basename(modelDir ?? "");
|
|
248
|
+
const name = String(rawName).toLowerCase();
|
|
249
|
+
const label = String(rawName);
|
|
250
|
+
|
|
251
|
+
const modelType = String(config.model_type ?? "").toLowerCase();
|
|
252
|
+
const textModelType = String(textConfig.model_type ?? "").toLowerCase();
|
|
253
|
+
|
|
254
|
+
const vision = Boolean(
|
|
255
|
+
config.vision_config ||
|
|
256
|
+
config.image_token_id != null ||
|
|
257
|
+
config.video_token_id != null ||
|
|
258
|
+
config.vision_start_token_id != null ||
|
|
259
|
+
modelType.includes("vl") ||
|
|
260
|
+
modelType.includes("vision") ||
|
|
261
|
+
textModelType.includes("vl") ||
|
|
262
|
+
textModelType.includes("vision")
|
|
263
|
+
);
|
|
264
|
+
|
|
265
|
+
const thinking = /qwen3|gemma-4|gemma4|deepseek-r[12]/i.test(name + " " + label);
|
|
266
|
+
|
|
267
|
+
const architectures = Array.isArray(config.architectures) ? config.architectures : [];
|
|
268
|
+
const architecture = architectures[0] ?? null;
|
|
269
|
+
|
|
270
|
+
const candidates = [
|
|
271
|
+
textConfig.max_position_embeddings,
|
|
272
|
+
textConfig.sliding_window,
|
|
273
|
+
config.max_position_embeddings,
|
|
274
|
+
config.sliding_window,
|
|
275
|
+
].filter((v) => typeof v === "number" && v > 0);
|
|
276
|
+
const contextLength = candidates.length > 0 ? Math.max(...candidates) : null;
|
|
277
|
+
|
|
278
|
+
return { thinking, vision, contextLength, architecture };
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Pick a sensible default context length for an MLX model, capping by RAM.
|
|
283
|
+
*/
|
|
284
|
+
export function defaultMlxContextLength(trainedCtx, ramGb) {
|
|
285
|
+
if (!trainedCtx || trainedCtx <= 0) return 8192;
|
|
286
|
+
if (ramGb < 12) return Math.min(trainedCtx, 4096);
|
|
287
|
+
if (ramGb < 16) return Math.min(trainedCtx, 8192);
|
|
288
|
+
if (ramGb < 32) return Math.min(trainedCtx, 16384);
|
|
289
|
+
return trainedCtx;
|
|
290
|
+
}
|