offgrid-ai 0.9.6 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/package.json +4 -3
- package/resources/hf-download.py +79 -0
- package/resources/mlxvlm-server-wrapper.py +112 -0
- package/resources/recommendations.json +60 -0
- package/src/backend-installers.mjs +1 -16
- package/src/backends.mjs +18 -45
- package/src/benchmark/finalize.mjs +3 -90
- package/src/benchmark/flow.mjs +3 -4
- package/src/benchmark/metrics.mjs +0 -44
- package/src/benchmark/prepare.mjs +1 -1
- package/src/benchmark.mjs +3 -1
- package/src/commands/main.mjs +7 -7
- package/src/commands/models.mjs +21 -18
- package/src/commands/onboard.mjs +67 -9
- package/src/commands/run.mjs +20 -5
- package/src/commands/status.mjs +1 -1
- package/src/config.mjs +11 -2
- package/src/discovery-shared.mjs +44 -0
- package/src/hardware.mjs +49 -0
- package/src/harness-pi.mjs +25 -11
- package/src/huggingface.mjs +209 -0
- package/src/managed.mjs +1 -5
- package/src/mlx-discovery.mjs +294 -0
- package/src/mlx-flags.mjs +93 -0
- package/src/model-catalog.mjs +78 -11
- package/src/model-name.mjs +7 -25
- package/src/model-presenters.mjs +114 -38
- package/src/process.mjs +129 -32
- package/src/profile-setup.mjs +105 -0
- package/src/profiles.mjs +30 -0
- package/src/recommendations.mjs +56 -14
- package/src/scan.mjs +43 -8
package/src/profile-setup.mjs
CHANGED
|
@@ -8,6 +8,7 @@ import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
|
|
|
8
8
|
import { detectCapabilities } from "./autodetect.mjs";
|
|
9
9
|
import { matchDrafter } from "./scan.mjs";
|
|
10
10
|
import { scanGgufModels } from "./scan.mjs";
|
|
11
|
+
import { estimateMemoryMb } from "./mlx-flags.mjs";
|
|
11
12
|
|
|
12
13
|
const execFileAsync = promisify(execFile);
|
|
13
14
|
|
|
@@ -297,3 +298,107 @@ function detectionSummary(caps) {
|
|
|
297
298
|
function samplingSummary(flags) {
|
|
298
299
|
return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
|
|
299
300
|
}
|
|
301
|
+
|
|
302
|
+
// ── MLX profile configuration ─────────────────────────────────────────────
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Interactive configuration for an mlx-vlm profile.
|
|
306
|
+
*/
|
|
307
|
+
export async function configureMlxProfile(prompt, profile) {
|
|
308
|
+
let configured = profile;
|
|
309
|
+
|
|
310
|
+
console.log("");
|
|
311
|
+
console.log(renderSection("Model setup", renderRows([
|
|
312
|
+
["Model", pc.bold(profile.label)],
|
|
313
|
+
["Detected", mlxDetectionSummary(configured.capabilities)],
|
|
314
|
+
["Context", String(configured.flags.ctxSize) + " tokens"],
|
|
315
|
+
])));
|
|
316
|
+
console.log(pc.dim("Larger context windows use more memory. You can edit the profile later if needed.\n"));
|
|
317
|
+
|
|
318
|
+
if (configured.capabilities.vision) {
|
|
319
|
+
console.log(renderSection("Vision detected", renderRows([
|
|
320
|
+
["Capability", "image / multimodal input"],
|
|
321
|
+
["Note", "mlx-vlm loads vision from the model directory automatically."],
|
|
322
|
+
])));
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (configured.capabilities.thinking) {
|
|
326
|
+
console.log("");
|
|
327
|
+
console.log(renderSection("Thinking mode", renderRows([
|
|
328
|
+
["Flag", "--enable-thinking"],
|
|
329
|
+
["Default", "on for Qwen 3 / Gemma 4 / DeepSeek-R class models"],
|
|
330
|
+
])));
|
|
331
|
+
const useThinking = await prompt.yesNo("Enable thinking mode?", true);
|
|
332
|
+
configured = await applyMlxThinkingToggle(configured, useThinking);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const ctxSize = await prompt.number("Context window tokens", configured.flags.ctxSize, 1024, 1048576);
|
|
336
|
+
configured = applyMlxContextSize(configured, ctxSize);
|
|
337
|
+
|
|
338
|
+
console.log("\n" + renderMlxMemoryEstimate(configured));
|
|
339
|
+
|
|
340
|
+
console.log("");
|
|
341
|
+
console.log(renderSection("Defaults", renderRows([
|
|
342
|
+
["Backend", configured.backend],
|
|
343
|
+
["Endpoint", configured.baseUrl],
|
|
344
|
+
["Context", String(configured.flags.ctxSize) + " tokens"],
|
|
345
|
+
["Thinking", configured.capabilities.thinking && configured.commandArgv.includes("--enable-thinking") ? "on" : "off"],
|
|
346
|
+
["Vision", configured.capabilities.vision ? "yes" : "no"],
|
|
347
|
+
])));
|
|
348
|
+
|
|
349
|
+
if (!(await prompt.yesNo("Save profile with these settings?", true))) return null;
|
|
350
|
+
return configured;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
async function applyMlxThinkingToggle(profile, enabled) {
|
|
354
|
+
if (!profile.capabilities.thinking) return profile;
|
|
355
|
+
const { computeMlxVlmFlags } = await import("./mlx-flags.mjs");
|
|
356
|
+
const { args } = computeMlxVlmFlags(profile.modelPath, {
|
|
357
|
+
port: profile.flags.port,
|
|
358
|
+
ctxSize: profile.flags.ctxSize,
|
|
359
|
+
thinkingEnabled: enabled,
|
|
360
|
+
});
|
|
361
|
+
return {
|
|
362
|
+
...profile,
|
|
363
|
+
commandArgv: args,
|
|
364
|
+
capabilities: { ...profile.capabilities, thinkingEnabled: enabled },
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
function applyMlxContextSize(profile, ctxSize) {
|
|
369
|
+
const flags = { ...profile.flags, ctxSize };
|
|
370
|
+
const next = {
|
|
371
|
+
...profile,
|
|
372
|
+
flags,
|
|
373
|
+
baseUrl: baseUrlForFlags(flags),
|
|
374
|
+
};
|
|
375
|
+
const idx = next.commandArgv.indexOf("--max-kv-size");
|
|
376
|
+
if (idx !== -1 && next.commandArgv[idx + 1] != null) {
|
|
377
|
+
next.commandArgv[idx + 1] = String(ctxSize);
|
|
378
|
+
} else if (ctxSize && ctxSize > 0) {
|
|
379
|
+
next.commandArgv.push("--max-kv-size", String(ctxSize));
|
|
380
|
+
}
|
|
381
|
+
return next;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
function renderMlxMemoryEstimate(profile) {
|
|
385
|
+
const modelBytes = profile.modelSizeBytes || 0;
|
|
386
|
+
if (!modelBytes) {
|
|
387
|
+
return renderSection("Memory estimate", pc.dim("Model size unknown — save the profile to estimate."));
|
|
388
|
+
}
|
|
389
|
+
const totalMb = estimateMemoryMb(modelBytes);
|
|
390
|
+
const overheadBytes = Math.max(0, totalMb * 1024 * 1024 - modelBytes);
|
|
391
|
+
return renderSection("Memory estimate", renderRows([
|
|
392
|
+
["Estimated total", pc.bold(`~${formatBytes(totalMb * 1024 * 1024)}`)],
|
|
393
|
+
["Model", formatBytes(modelBytes)],
|
|
394
|
+
["Overhead", `~${formatBytes(overheadBytes)} (KV cache, APC, runtime)`],
|
|
395
|
+
]));
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function mlxDetectionSummary(caps) {
|
|
399
|
+
const parts = [];
|
|
400
|
+
if (caps.architecture) parts.push(caps.architecture);
|
|
401
|
+
if (caps.thinking) parts.push("thinking");
|
|
402
|
+
if (caps.vision) parts.push("vision");
|
|
403
|
+
return parts.length > 0 ? parts.join(" · ") : "standard MLX";
|
|
404
|
+
}
|
package/src/profiles.mjs
CHANGED
|
@@ -4,6 +4,8 @@ import { join } from "node:path";
|
|
|
4
4
|
import { PROFILE_DIR, RUN_DIR, LOG_DIR } from "./config.mjs";
|
|
5
5
|
import { backendFor, baseUrlForFlags, defaultFlagsForBackend } from "./backends.mjs";
|
|
6
6
|
import { computeFlags } from "./autodetect.mjs";
|
|
7
|
+
import { detectMlxCapabilities, defaultMlxContextLength } from "./mlx-discovery.mjs";
|
|
8
|
+
import { detectHardware } from "./hardware.mjs";
|
|
7
9
|
import { readJson, writeJson } from "./json.mjs";
|
|
8
10
|
|
|
9
11
|
// ── Path helpers ───────────────────────────────────────────────────────────
|
|
@@ -161,6 +163,34 @@ export async function createProfileFromModel(model, backendId, drafterPath) {
|
|
|
161
163
|
});
|
|
162
164
|
}
|
|
163
165
|
|
|
166
|
+
// ── Auto-create profile from a discovered MLX model ────────────────────────
|
|
167
|
+
|
|
168
|
+
export async function createProfileFromMlxModel(model) {
|
|
169
|
+
const { computeMlxVlmFlags, DEFAULT_PORT } = await import("./mlx-flags.mjs");
|
|
170
|
+
const caps = await detectMlxCapabilities(model.filePath);
|
|
171
|
+
const ctxSize = defaultMlxContextLength(caps.contextLength, detectHardware().totalRamBytes / (1024 ** 3));
|
|
172
|
+
const { args } = computeMlxVlmFlags(model.filePath, {
|
|
173
|
+
port: DEFAULT_PORT,
|
|
174
|
+
ctxSize,
|
|
175
|
+
thinkingEnabled: caps.thinking,
|
|
176
|
+
});
|
|
177
|
+
return normalizeProfile({
|
|
178
|
+
id: slugFromLabel(model.label),
|
|
179
|
+
label: model.label,
|
|
180
|
+
backend: "mlx-vlm",
|
|
181
|
+
providerId: "mlx-vlm",
|
|
182
|
+
modelAlias: model.label,
|
|
183
|
+
source: model.source,
|
|
184
|
+
modelPath: model.filePath,
|
|
185
|
+
mmprojPath: null,
|
|
186
|
+
drafterPath: null,
|
|
187
|
+
modelSizeBytes: model.sizeBytes,
|
|
188
|
+
capabilities: caps,
|
|
189
|
+
flags: { host: "127.0.0.1", port: DEFAULT_PORT, ctxSize },
|
|
190
|
+
commandArgv: args,
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
|
|
164
194
|
function summarizeCapabilities(caps) {
|
|
165
195
|
return {
|
|
166
196
|
architecture: caps.architecture,
|
package/src/recommendations.mjs
CHANGED
|
@@ -1,17 +1,59 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
import { detectHardware } from "./hardware.mjs";
|
|
5
|
+
|
|
6
|
+
const GB = 1024 ** 3;
|
|
7
|
+
|
|
8
|
+
const RECOMMENDATIONS_PATH = join(dirname(fileURLToPath(import.meta.url)), "..", "resources", "recommendations.json");
|
|
9
|
+
|
|
10
|
+
function loadRecommendations() {
|
|
11
|
+
try {
|
|
12
|
+
const raw = readFileSync(RECOMMENDATIONS_PATH, "utf8");
|
|
13
|
+
return JSON.parse(raw).models ?? [];
|
|
14
|
+
} catch {
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** All curated model entries. */
|
|
20
|
+
export function getModelEntries() {
|
|
21
|
+
return loadRecommendations();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/** Recommend models that fit the detected hardware (max tier first). */
|
|
25
|
+
export function recommendModels(hardware) {
|
|
26
|
+
const entries = loadRecommendations();
|
|
27
|
+
const fitting = entries.filter((e) => e.minRamGb * GB <= hardware.totalRamBytes);
|
|
28
|
+
if (fitting.length === 0) return [];
|
|
29
|
+
const maxTier = Math.max(...fitting.map((e) => e.minRamGb));
|
|
30
|
+
// All models at the top fitting tier are genuine alternatives; sort by label
|
|
31
|
+
// so the pick is deterministic regardless of JSON order.
|
|
32
|
+
return fitting
|
|
33
|
+
.filter((e) => e.minRamGb === maxTier)
|
|
34
|
+
.sort((a, b) => a.label.localeCompare(b.label));
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** Pick the best format for the platform. */
|
|
38
|
+
export function selectFormat(entry, hardware) {
|
|
39
|
+
if (hardware.platform === "darwin" && hardware.arch === "arm64") {
|
|
40
|
+
if (entry.mlx) return "mlx";
|
|
41
|
+
if (entry.gguf) return "gguf";
|
|
42
|
+
} else {
|
|
43
|
+
if (entry.gguf) return "gguf";
|
|
44
|
+
}
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Primary recommendation for this machine. */
|
|
49
|
+
export function recommendedModel(hardware) {
|
|
50
|
+
const fitting = recommendModels(hardware ?? detectHardware());
|
|
51
|
+
return fitting[0] ?? null;
|
|
13
52
|
}
|
|
14
53
|
|
|
15
|
-
|
|
16
|
-
|
|
54
|
+
/** All models that fit, sorted best-first (tier desc, then label). */
|
|
55
|
+
export function allFittingModels(hardware) {
|
|
56
|
+
const entries = loadRecommendations();
|
|
57
|
+
const fitting = entries.filter((e) => e.minRamGb * GB <= hardware.totalRamBytes);
|
|
58
|
+
return fitting.sort((a, b) => b.minRamGb - a.minRamGb || a.label.localeCompare(b.label));
|
|
17
59
|
}
|
package/src/scan.mjs
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { statSync } from "node:fs";
|
|
2
|
-
import { readdir } from "node:fs/promises";
|
|
2
|
+
import { readdir, stat } from "node:fs/promises";
|
|
3
3
|
import { basename, dirname, join } from "node:path";
|
|
4
4
|
import { getModelScanDirs } from "./config.mjs";
|
|
5
5
|
import { readGgufMetadata } from "./gguf.mjs";
|
|
6
6
|
import { parseModelName } from "./model-name.mjs";
|
|
7
|
+
import { inferSourceLabel, MIN_MODEL_SIZE_BYTES, EMBEDDING_MODEL_TYPES } from "./discovery-shared.mjs";
|
|
7
8
|
|
|
8
9
|
// ── Scan for GGUF models and MTP drafters ────────────────────────────────
|
|
9
10
|
|
|
@@ -13,7 +14,8 @@ export async function scanGgufModels(dirs) {
|
|
|
13
14
|
const allDrafters = [];
|
|
14
15
|
|
|
15
16
|
for (const root of scanDirs) {
|
|
16
|
-
const
|
|
17
|
+
const sourceLabel = inferSourceLabel(root);
|
|
18
|
+
const { models, drafters } = await scanOneDir(root, sourceLabel);
|
|
17
19
|
allModels.push(...models);
|
|
18
20
|
allDrafters.push(...drafters);
|
|
19
21
|
}
|
|
@@ -36,7 +38,7 @@ export async function scanGgufModels(dirs) {
|
|
|
36
38
|
return { models, drafters };
|
|
37
39
|
}
|
|
38
40
|
|
|
39
|
-
async function scanOneDir(root) {
|
|
41
|
+
async function scanOneDir(root, sourceLabel = "local-gguf") {
|
|
40
42
|
const files = await findFiles(root, (path) => path.toLowerCase().endsWith(".gguf"));
|
|
41
43
|
const mmprojs = files.filter((path) => basename(path).toLowerCase().includes("mmproj"));
|
|
42
44
|
const candidates = files.filter((path) => !basename(path).toLowerCase().includes("mmproj"));
|
|
@@ -49,11 +51,17 @@ async function scanOneDir(root) {
|
|
|
49
51
|
const mmprojPath = mmprojs.find((candidate) => dirname(candidate) === dir) ?? null;
|
|
50
52
|
const name = basename(path).replace(/\.gguf$/i, "");
|
|
51
53
|
const sizeBytes = statSync(path).size;
|
|
54
|
+
if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
|
|
52
55
|
const parsed = parseModelName(name, "local-gguf");
|
|
53
56
|
|
|
54
|
-
// Read GGUF metadata to detect drafter architecture
|
|
57
|
+
// Read GGUF metadata to detect drafter architecture and embeddings
|
|
55
58
|
const meta = safeReadGgufMetadata(path);
|
|
56
59
|
const architecture = typeof meta["general.architecture"] === "string" ? meta["general.architecture"] : null;
|
|
60
|
+
const contextLength = architecture && typeof meta[`${architecture}.context_length`] === "number"
|
|
61
|
+
? meta[`${architecture}.context_length`]
|
|
62
|
+
: null;
|
|
63
|
+
|
|
64
|
+
if (isEmbeddingArchitecture(architecture, name)) continue;
|
|
57
65
|
|
|
58
66
|
if (architecture === "gemma4-assistant" || architecture === "gemma4_assistant") {
|
|
59
67
|
// This is an MTP drafter model, not a main model
|
|
@@ -66,7 +74,7 @@ async function scanOneDir(root) {
|
|
|
66
74
|
architecture,
|
|
67
75
|
targetHint: drafterTargetHint(name),
|
|
68
76
|
backend: "llama-cpp",
|
|
69
|
-
source:
|
|
77
|
+
source: sourceLabel,
|
|
70
78
|
});
|
|
71
79
|
} else {
|
|
72
80
|
models.push({
|
|
@@ -76,8 +84,9 @@ async function scanOneDir(root) {
|
|
|
76
84
|
aliasSuggestion: parsed.id,
|
|
77
85
|
quant: parsed.quant,
|
|
78
86
|
sizeBytes,
|
|
87
|
+
contextLength,
|
|
79
88
|
backend: "llama-cpp",
|
|
80
|
-
source:
|
|
89
|
+
source: sourceLabel,
|
|
81
90
|
});
|
|
82
91
|
}
|
|
83
92
|
}
|
|
@@ -85,6 +94,26 @@ async function scanOneDir(root) {
|
|
|
85
94
|
return { models, drafters };
|
|
86
95
|
}
|
|
87
96
|
|
|
97
|
+
// ── Embedding model filtering ─────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
const EMBEDDING_FILENAME_PATTERNS = [
|
|
100
|
+
/(?:^|[-_])bge[-_]/i,
|
|
101
|
+
/(?:^|[-_])jina[-_]/i,
|
|
102
|
+
/(?:^|[-_])e5[-_]/i,
|
|
103
|
+
/(?:^|[-_])gte[-_]/i,
|
|
104
|
+
/(?:^|[-_])all[-_]minilm/i,
|
|
105
|
+
/(?:^|[-_])mpnet/i,
|
|
106
|
+
/(?:^|[-_])nomic[-_]embed/i,
|
|
107
|
+
/(?:^|[-_])embed/i,
|
|
108
|
+
/(?:^|[-_])rerank/i,
|
|
109
|
+
];
|
|
110
|
+
|
|
111
|
+
export function isEmbeddingArchitecture(architecture, filename = "") {
|
|
112
|
+
if (architecture && EMBEDDING_MODEL_TYPES.has(architecture.toLowerCase())) return true;
|
|
113
|
+
const lowerName = filename.toLowerCase();
|
|
114
|
+
return EMBEDDING_FILENAME_PATTERNS.some((pattern) => pattern.test(lowerName));
|
|
115
|
+
}
|
|
116
|
+
|
|
88
117
|
// ── Match drafters to target models ────────────────────────────────────
|
|
89
118
|
|
|
90
119
|
// Map a drafter filename to a regex that matches its target model filenames.
|
|
@@ -137,8 +166,14 @@ async function findFiles(root, predicate) {
|
|
|
137
166
|
}
|
|
138
167
|
for (const entry of entries) {
|
|
139
168
|
const path = join(dir, entry.name);
|
|
140
|
-
if (entry.isDirectory()
|
|
141
|
-
|
|
169
|
+
if (entry.isDirectory() || entry.isSymbolicLink()) {
|
|
170
|
+
// Follow symlinks (HF cache uses them) and avoid recursion loops.
|
|
171
|
+
const stats = await stat(path).catch(() => null);
|
|
172
|
+
if (stats?.isDirectory()) await walk(path);
|
|
173
|
+
else if (stats?.isFile() && predicate(path)) result.push(path);
|
|
174
|
+
} else if (entry.isFile() && predicate(path)) {
|
|
175
|
+
result.push(path);
|
|
176
|
+
}
|
|
142
177
|
}
|
|
143
178
|
}
|
|
144
179
|
await walk(root);
|