offgrid-ai 0.3.15 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/autodetect.mjs +11 -5
- package/src/cli.mjs +100 -246
- package/src/profile-setup.mjs +118 -13
- package/src/profiles.mjs +18 -2
package/package.json
CHANGED
package/src/autodetect.mjs
CHANGED
|
@@ -7,20 +7,26 @@ import { readGgufMetadata } from "./gguf.mjs";
|
|
|
7
7
|
export function detectCapabilities(modelPath, mmprojPath) {
|
|
8
8
|
const meta = safeReadGgufMetadata(modelPath);
|
|
9
9
|
const name = basename(modelPath).toLowerCase();
|
|
10
|
+
const pathHints = String(modelPath).toLowerCase();
|
|
10
11
|
|
|
11
12
|
// Architecture
|
|
12
13
|
const architecture = meta["general.architecture"] ?? null;
|
|
13
14
|
|
|
14
15
|
// Thinking / reasoning mode
|
|
15
16
|
const hasThinkingKwargs = meta["chat_template_kwargs"] !== undefined;
|
|
16
|
-
const nameHintsThinking = /qwen3|gemma-4|gemma4|deepseek-r[12]/i.test(
|
|
17
|
+
const nameHintsThinking = /qwen3|qwen3\.\d|gemma-4|gemma4|deepseek-r[12]/i.test(pathHints);
|
|
17
18
|
const thinking = hasThinkingKwargs || nameHintsThinking;
|
|
18
19
|
|
|
20
|
+
// Quantization-aware / imatrix quantization hints. These mostly affect
|
|
21
|
+
// display and defaults transparency; llama-server does not need a QAT flag.
|
|
22
|
+
const qat = /qat|imatrix|i-?matrix/i.test(pathHints) || Object.keys(meta).some((key) => key.startsWith("quantize.imatrix."));
|
|
23
|
+
|
|
19
24
|
// Vision — mmproj present
|
|
20
25
|
const vision = Boolean(mmprojPath && existsSync(mmprojPath));
|
|
21
26
|
|
|
22
|
-
// MTP (multi-token prediction) — detect speculative decoding
|
|
23
|
-
|
|
27
|
+
// MTP (multi-token prediction) — detect speculative decoding.
|
|
28
|
+
// Do not treat all Qwen models as MTP; require an explicit filename or metadata hint.
|
|
29
|
+
const mtp = /\bmtp\b|draft-mtp|multi-token/i.test(pathHints) || Object.keys(meta).some((key) => /mtp|draft|speculative/i.test(key));
|
|
24
30
|
|
|
25
31
|
// Quantization
|
|
26
32
|
const quant = name.match(/(Q\d_K_[A-Z]+|UD-[A-Z0-9_]+)/i)?.[1] ?? null;
|
|
@@ -31,7 +37,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
|
|
|
31
37
|
: undefined;
|
|
32
38
|
const ctxSize = metaCtx ?? (thinking ? 80000 : 32768);
|
|
33
39
|
|
|
34
|
-
return { architecture, thinking, vision, mtp, quant, metaCtx, ctxSize, meta };
|
|
40
|
+
return { architecture, thinking, vision, mtp, qat, quant, metaCtx, ctxSize, meta };
|
|
35
41
|
}
|
|
36
42
|
|
|
37
43
|
// ── Compute llama-server flags from capabilities ───────────────────────────
|
|
@@ -42,7 +48,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
|
|
|
42
48
|
|
|
43
49
|
const flags = {
|
|
44
50
|
host: "127.0.0.1",
|
|
45
|
-
port: 8080,
|
|
51
|
+
port: mtp ? 8081 : 8080,
|
|
46
52
|
ctxSize: capabilities.ctxSize,
|
|
47
53
|
flashAttention: "on",
|
|
48
54
|
cacheTypeK: isLowMem ? "f16" : "bf16",
|
package/src/cli.mjs
CHANGED
|
@@ -14,6 +14,7 @@ import { checkForUpdate, currentPackageVersion, detectInvocation, updateCommand,
|
|
|
14
14
|
import { removeInstallerPathEntries } from "./shell-path.mjs";
|
|
15
15
|
import { configureLocalProfile } from "./profile-setup.mjs";
|
|
16
16
|
import { buildPrettyCommand } from "./command.mjs";
|
|
17
|
+
import { detectCapabilities } from "./autodetect.mjs";
|
|
17
18
|
|
|
18
19
|
// ── Entry point ────────────────────────────────────────────────────────────
|
|
19
20
|
|
|
@@ -153,76 +154,15 @@ export async function mainFlow() {
|
|
|
153
154
|
return;
|
|
154
155
|
}
|
|
155
156
|
|
|
156
|
-
// 6. Interactive:
|
|
157
|
+
// 6. Interactive: one command center after onboarding.
|
|
157
158
|
startInteractive("offgrid-ai");
|
|
158
|
-
|
|
159
|
-
try {
|
|
160
|
-
// Show what we found
|
|
161
|
-
const profiledPaths = new Set(profiles.map((p) => p.modelPath).filter(Boolean));
|
|
162
|
-
const newModels = ggufModels.filter((m) => !profiledPaths.has(m.path));
|
|
163
|
-
|
|
164
|
-
// Managed backend models
|
|
165
|
-
const managedItems = [];
|
|
166
|
-
for (const { backendId, models } of managedModels) {
|
|
167
|
-
const profiledAliases = new Set(
|
|
168
|
-
profiles.filter((p) => p.backend === backendId).map((p) => backendId === "ollama" ? `ollama:${p.ollamaModel ?? p.modelAlias}` : `omlx:${p.omlxModel ?? p.modelAlias}`)
|
|
169
|
-
);
|
|
170
|
-
for (const model of models) {
|
|
171
|
-
if (!profiledAliases.has(`${backendId}:${model.id}`)) {
|
|
172
|
-
managedItems.push({ model, backendId });
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// Show what we found
|
|
178
|
-
if (profiles.length > 0) {
|
|
179
|
-
console.log(pc.bold("\nSaved profiles"));
|
|
180
|
-
for (const profile of profiles) {
|
|
181
|
-
const backend = backendFor(profile.backend);
|
|
182
|
-
const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
|
|
183
|
-
const running = await isProfileRunning(profile);
|
|
184
|
-
const c = colorMap[profile.backend] ?? pc.magenta;
|
|
185
|
-
console.log(` ${running ? pc.green("●") : pc.dim("○")} ${pc.bold(profile.label)} ${c(`[${backend.label}]`)} · ${pc.cyan(profile.modelAlias)}`);
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
if (newModels.length > 0) {
|
|
189
|
-
console.log(pc.bold("\nNew models"));
|
|
190
|
-
for (const model of newModels.slice(0, 10)) {
|
|
191
|
-
console.log(` ${pc.cyan(model.label)} ${pc.dim(model.quant ?? "")} · ${pc.dim(formatBytes(model.sizeBytes))}`);
|
|
192
|
-
}
|
|
193
|
-
if (newModels.length > 10) console.log(pc.dim(` ... and ${newModels.length - 10} more`));
|
|
194
|
-
}
|
|
195
|
-
for (const { backendId, models } of managedModels) {
|
|
196
|
-
if (models.length > 0) {
|
|
197
|
-
const be = BACKENDS[backendId];
|
|
198
|
-
console.log(pc.bold(`\n${be.label} models`));
|
|
199
|
-
for (const model of models.slice(0, 5)) {
|
|
200
|
-
console.log(` ${pc.cyan(model.label)}`);
|
|
201
|
-
}
|
|
202
|
-
if (models.length > 5) console.log(pc.dim(` ... and ${models.length - 5} more`));
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
// Pick what to do
|
|
207
|
-
const action = await prompt.choice("What next?", [
|
|
208
|
-
{ value: "run", label: "Run a model", hint: "Start server and launch Pi" },
|
|
209
|
-
...(profiles.length > 0 ? [{ value: "manage", label: "Manage profiles", hint: "Sync, remove, or inspect" }] : []),
|
|
210
|
-
{ value: "benchmark", label: "Benchmark", hint: "Run a benchmark prompt" },
|
|
211
|
-
], "run");
|
|
212
|
-
|
|
213
|
-
if (action === "run") return await pickAndRun(prompt, profiles, newModels, managedItems);
|
|
214
|
-
if (action === "manage") return await manageProfiles(prompt, profiles);
|
|
215
|
-
if (action === "benchmark") return await benchmarkFlow(prompt, profiles);
|
|
216
|
-
} finally {
|
|
217
|
-
prompt.close();
|
|
218
|
-
}
|
|
159
|
+
return await modelCommandCenter({ profiles, ggufModels, managedModels });
|
|
219
160
|
}
|
|
220
161
|
|
|
221
|
-
// ──
|
|
162
|
+
// ── Model command center ────────────────────────────────────────────────────
|
|
222
163
|
|
|
223
164
|
async function modelsCommand(argv) {
|
|
224
165
|
await ensureDirs();
|
|
225
|
-
if (process.stdin.isTTY) startInteractive("offgrid-ai models");
|
|
226
166
|
const catalog = await loadModelCatalog();
|
|
227
167
|
|
|
228
168
|
if (argv[0]) {
|
|
@@ -231,20 +171,28 @@ async function modelsCommand(argv) {
|
|
|
231
171
|
return;
|
|
232
172
|
}
|
|
233
173
|
|
|
234
|
-
|
|
174
|
+
if (process.stdin.isTTY) startInteractive("offgrid-ai");
|
|
175
|
+
return await modelCommandCenter(catalog);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
async function modelCommandCenter(catalog) {
|
|
179
|
+
const normalized = normalizeCatalog(catalog);
|
|
180
|
+
const items = modelCatalogItems(normalized);
|
|
181
|
+
await printModelCatalog(normalized, items);
|
|
235
182
|
if (!process.stdin.isTTY) return;
|
|
236
183
|
|
|
237
|
-
const items = modelCatalogItems(catalog);
|
|
238
184
|
if (items.length === 0) return;
|
|
239
185
|
|
|
240
186
|
const prompt = createPrompt();
|
|
241
187
|
try {
|
|
242
|
-
const action = await prompt.choice("
|
|
243
|
-
{ value: "inspect", label: "Inspect", hint: "View
|
|
188
|
+
const action = await prompt.choice("What do you want to do?", [
|
|
189
|
+
{ value: "inspect", label: "Inspect", hint: "View details" },
|
|
244
190
|
{ value: "setup", label: "Set up / sync", hint: "Create profile or sync Pi" },
|
|
245
191
|
{ value: "run", label: "Run", hint: "Start server and launch Pi" },
|
|
192
|
+
{ value: "benchmark", label: "Benchmark", hint: "Coming soon: local benchmark project" },
|
|
246
193
|
{ value: "remove", label: "Remove", hint: "Delete a saved profile" },
|
|
247
|
-
], "
|
|
194
|
+
], "run");
|
|
195
|
+
if (action === "benchmark") return await benchmarkFlow();
|
|
248
196
|
const item = await chooseCatalogItem(prompt, items, action);
|
|
249
197
|
if (!item) return;
|
|
250
198
|
return await handleCatalogAction(prompt, action, item);
|
|
@@ -256,21 +204,9 @@ async function modelsCommand(argv) {
|
|
|
256
204
|
async function runCommand(argv) {
|
|
257
205
|
await ensureDirs();
|
|
258
206
|
const { positional } = parseOptions(argv);
|
|
259
|
-
if (positional[0])
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
const catalog = await loadModelCatalog();
|
|
265
|
-
if (!process.stdin.isTTY) throw new Error("Run requires a profile id in non-interactive mode: offgrid-ai run <profile>");
|
|
266
|
-
startInteractive("offgrid-ai run");
|
|
267
|
-
await printModelCatalog(catalog);
|
|
268
|
-
const prompt = createPrompt();
|
|
269
|
-
try {
|
|
270
|
-
return await pickAndRun(prompt, catalog.profiles, catalog.newModels, catalog.managedItems);
|
|
271
|
-
} finally {
|
|
272
|
-
prompt.close();
|
|
273
|
-
}
|
|
207
|
+
if (!positional[0]) return await mainFlow();
|
|
208
|
+
const profile = await readProfile(positional[0]);
|
|
209
|
+
return await runProfile(profile);
|
|
274
210
|
}
|
|
275
211
|
|
|
276
212
|
async function loadModelCatalog() {
|
|
@@ -279,6 +215,12 @@ async function loadModelCatalog() {
|
|
|
279
215
|
scanGgufModels(),
|
|
280
216
|
scanManagedModels(),
|
|
281
217
|
]);
|
|
218
|
+
return normalizeCatalog({ profiles, ggufModels, managedModels });
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function normalizeCatalog(catalog) {
|
|
222
|
+
if (catalog.newModels && catalog.managedItems) return catalog;
|
|
223
|
+
const { profiles, ggufModels, managedModels } = catalog;
|
|
282
224
|
const profiledPaths = new Set(profiles.map((p) => p.modelPath).filter(Boolean));
|
|
283
225
|
const newModels = ggufModels.filter((m) => !profiledPaths.has(m.path));
|
|
284
226
|
const managedItems = [];
|
|
@@ -293,36 +235,54 @@ async function loadModelCatalog() {
|
|
|
293
235
|
return { profiles, ggufModels, managedModels, newModels, managedItems };
|
|
294
236
|
}
|
|
295
237
|
|
|
296
|
-
async function printModelCatalog({ profiles, newModels,
|
|
297
|
-
|
|
298
|
-
|
|
238
|
+
async function printModelCatalog({ profiles, newModels, managedItems }, items = modelCatalogItems({ profiles, newModels, managedItems })) {
|
|
239
|
+
const itemNumber = (predicate) => {
|
|
240
|
+
const index = items.findIndex(predicate);
|
|
241
|
+
return index === -1 ? " " : String(index + 1).padStart(2, " ");
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
console.log(pc.bold("\nSaved profiles"));
|
|
245
|
+
if (profiles.length === 0) {
|
|
246
|
+
console.log(pc.dim(" None yet."));
|
|
247
|
+
} else {
|
|
299
248
|
for (const profile of profiles) {
|
|
300
249
|
const backend = backendFor(profile.backend);
|
|
301
250
|
const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
|
|
302
251
|
const running = await isProfileRunning(profile);
|
|
303
252
|
const piConfigured = await hasPiModel(profile);
|
|
304
253
|
const c = colorMap[profile.backend] ?? pc.magenta;
|
|
305
|
-
|
|
254
|
+
const num = itemNumber((item) => item.type === "profile" && item.profile.id === profile.id);
|
|
255
|
+
console.log(`${num}. ${running ? pc.green("●") : pc.dim("○")} ${pc.bold(profile.label)} ${c(`[${backend.label}]`)} · ${pc.cyan(profile.modelAlias)} ${piConfigured ? pc.green("· Pi synced") : pc.yellow("· Pi not synced")}`);
|
|
306
256
|
}
|
|
307
|
-
} else {
|
|
308
|
-
console.log(pc.bold("\nSaved profiles"));
|
|
309
|
-
console.log(pc.dim(" None yet."));
|
|
310
257
|
}
|
|
311
258
|
|
|
312
|
-
|
|
313
|
-
|
|
259
|
+
console.log("");
|
|
260
|
+
console.log(pc.bold("Downloaded models not set up yet"));
|
|
261
|
+
if (newModels.length === 0) {
|
|
262
|
+
console.log(pc.dim(" None. Every downloaded GGUF has a profile."));
|
|
263
|
+
} else {
|
|
314
264
|
for (const model of newModels.slice(0, 20)) {
|
|
315
|
-
|
|
265
|
+
const caps = detectCapabilities(model.path, model.mmprojPath);
|
|
266
|
+
const num = itemNumber((item) => item.type === "new" && item.model.path === model.path);
|
|
267
|
+
console.log(`${num}. ${pc.cyan(model.label)} ${capabilityBadges(caps)} ${pc.dim(model.quant ?? "")}`);
|
|
268
|
+
console.log(` alias: ${pc.cyan(model.aliasSuggestion)}`);
|
|
269
|
+
console.log(` size: ${formatBytes(model.sizeBytes)}`);
|
|
316
270
|
}
|
|
317
271
|
if (newModels.length > 20) console.log(pc.dim(` ... and ${newModels.length - 20} more`));
|
|
318
272
|
}
|
|
319
273
|
|
|
320
|
-
for (const
|
|
321
|
-
|
|
274
|
+
for (const backendId of ["ollama", "omlx"]) {
|
|
275
|
+
const backendItems = managedItems.filter((item) => item.backendId === backendId);
|
|
276
|
+
if (backendItems.length === 0) continue;
|
|
322
277
|
const be = BACKENDS[backendId];
|
|
323
|
-
console.log(
|
|
324
|
-
|
|
325
|
-
|
|
278
|
+
console.log("");
|
|
279
|
+
console.log(pc.bold(`${be.label} models`));
|
|
280
|
+
for (const { model } of backendItems.slice(0, 10)) {
|
|
281
|
+
const num = itemNumber((item) => item.type === "managed" && item.backendId === backendId && item.model.id === model.id);
|
|
282
|
+
console.log(`${num}. ${pc.cyan(model.label)} ${pc.dim(model.quant ?? "")}`);
|
|
283
|
+
console.log(` id: ${pc.cyan(model.id)}`);
|
|
284
|
+
}
|
|
285
|
+
if (backendItems.length > 10) console.log(pc.dim(` ... and ${backendItems.length - 10} more`));
|
|
326
286
|
}
|
|
327
287
|
}
|
|
328
288
|
|
|
@@ -335,17 +295,25 @@ function modelCatalogItems({ profiles, newModels, managedItems }) {
|
|
|
335
295
|
}
|
|
336
296
|
|
|
337
297
|
async function chooseCatalogItem(prompt, items, action) {
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
console.log(pc.yellow(action === "remove" ? "No saved profiles to remove." : "No models available."));
|
|
298
|
+
if (action === "remove" && !items.some((item) => item.type === "profile")) {
|
|
299
|
+
console.log(pc.yellow("No saved profiles to remove."));
|
|
341
300
|
return null;
|
|
342
301
|
}
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
302
|
+
|
|
303
|
+
const input = await prompt.text("Select a number", "");
|
|
304
|
+
if (!input) return null;
|
|
305
|
+
const index = Number(input) - 1;
|
|
306
|
+
if (!Number.isInteger(index) || index < 0 || index >= items.length) {
|
|
307
|
+
console.log(pc.yellow(`No item ${input}.`));
|
|
308
|
+
return null;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
const item = items[index];
|
|
312
|
+
if (action === "remove" && item.type !== "profile") {
|
|
313
|
+
console.log(pc.yellow("Only saved profiles can be removed."));
|
|
314
|
+
return null;
|
|
315
|
+
}
|
|
316
|
+
return item;
|
|
349
317
|
}
|
|
350
318
|
|
|
351
319
|
async function handleCatalogAction(prompt, action, item) {
|
|
@@ -396,6 +364,7 @@ async function printProfileDetails(profile) {
|
|
|
396
364
|
["ID", pc.cyan(profile.id)],
|
|
397
365
|
["Label", pc.bold(profile.label)],
|
|
398
366
|
["Backend", backend.label],
|
|
367
|
+
...(profile.capabilities ? [["Detected", capabilitySummary(profile.capabilities)]] : []),
|
|
399
368
|
["Endpoint", pc.green(profile.baseUrl)],
|
|
400
369
|
...(!isManaged ? [
|
|
401
370
|
["Model", profile.modelPath ?? "unknown"],
|
|
@@ -413,8 +382,10 @@ async function printProfileDetails(profile) {
|
|
|
413
382
|
}
|
|
414
383
|
|
|
415
384
|
function printGgufModelDetails(model) {
|
|
385
|
+
const caps = detectCapabilities(model.path, model.mmprojPath);
|
|
416
386
|
console.log("\n" + renderSection("GGUF model", renderRows([
|
|
417
387
|
["Label", pc.bold(model.label)],
|
|
388
|
+
["Detected", capabilitySummary(caps)],
|
|
418
389
|
["Model", model.path],
|
|
419
390
|
["MMProj", model.mmprojPath ?? "none"],
|
|
420
391
|
["Quant", model.quant ?? "unknown"],
|
|
@@ -431,6 +402,26 @@ function printManagedModelDetails(model, backend) {
|
|
|
431
402
|
])));
|
|
432
403
|
}
|
|
433
404
|
|
|
405
|
+
function capabilitySummary(caps) {
|
|
406
|
+
const parts = [];
|
|
407
|
+
if (caps.architecture) parts.push(caps.architecture);
|
|
408
|
+
if (caps.quant) parts.push(caps.quant);
|
|
409
|
+
if (caps.mtp) parts.push("MTP");
|
|
410
|
+
if (caps.qat) parts.push("QAT/imatrix");
|
|
411
|
+
if (caps.thinking) parts.push("thinking");
|
|
412
|
+
if (caps.vision) parts.push("vision");
|
|
413
|
+
return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function capabilityBadges(caps) {
|
|
417
|
+
const badges = [];
|
|
418
|
+
if (caps.mtp) badges.push(pc.blue("[MTP]"));
|
|
419
|
+
if (caps.qat) badges.push(pc.green("[QAT]"));
|
|
420
|
+
if (caps.thinking) badges.push(pc.magenta("[thinking]"));
|
|
421
|
+
if (caps.vision) badges.push(pc.cyan("[vision]"));
|
|
422
|
+
return badges.join(" ");
|
|
423
|
+
}
|
|
424
|
+
|
|
434
425
|
function createManagedProfile(model, backendId) {
|
|
435
426
|
return normalizeProfile({
|
|
436
427
|
id: model.id.replace(/[^a-z0-9._-]+/gi, "-").toLowerCase(),
|
|
@@ -442,91 +433,6 @@ function createManagedProfile(model, backendId) {
|
|
|
442
433
|
});
|
|
443
434
|
}
|
|
444
435
|
|
|
445
|
-
// ── Pick and run ────────────────────────────────────────────────────────────
|
|
446
|
-
|
|
447
|
-
async function pickAndRun(prompt, profiles, newModels, managedItems) {
|
|
448
|
-
// If there's exactly one profile and it's already running, offer to connect or start fresh
|
|
449
|
-
const choices = [];
|
|
450
|
-
|
|
451
|
-
// Existing profiles
|
|
452
|
-
for (const profile of profiles) {
|
|
453
|
-
const running = await isProfileRunning(profile);
|
|
454
|
-
const backend = backendFor(profile.backend);
|
|
455
|
-
const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
|
|
456
|
-
const c = colorMap[profile.backend] ?? pc.magenta;
|
|
457
|
-
choices.push({
|
|
458
|
-
value: `profile:${profile.id}`,
|
|
459
|
-
label: `${running ? pc.green("● ") : ""}${profile.label}`,
|
|
460
|
-
hint: `${c(backend.label)} · ${profile.modelAlias} · ${profile.baseUrl}`,
|
|
461
|
-
});
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
// New GGUF models
|
|
465
|
-
for (const model of newModels.slice(0, 20)) {
|
|
466
|
-
choices.push({
|
|
467
|
-
value: `new:${model.path}`,
|
|
468
|
-
label: model.label,
|
|
469
|
-
hint: `${model.quant ?? "GGUF"} · ${formatBytes(model.sizeBytes)}`,
|
|
470
|
-
});
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
// Managed models
|
|
474
|
-
for (const { model, backendId } of managedItems) {
|
|
475
|
-
const be = BACKENDS[backendId];
|
|
476
|
-
choices.push({
|
|
477
|
-
value: `managed:${backendId}:${model.id}`,
|
|
478
|
-
label: model.label,
|
|
479
|
-
hint: `${be.label}`,
|
|
480
|
-
});
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
if (choices.length === 0) {
|
|
484
|
-
console.log(pc.yellow("No models available."));
|
|
485
|
-
return;
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
const selected = await prompt.choice("Pick a model", choices, choices[0].value);
|
|
489
|
-
|
|
490
|
-
if (selected.startsWith("profile:")) {
|
|
491
|
-
const id = selected.slice("profile:".length);
|
|
492
|
-
const profile = await readProfile(id);
|
|
493
|
-
return await runProfile(profile);
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
if (selected.startsWith("new:")) {
|
|
497
|
-
const modelPath = selected.slice("new:".length);
|
|
498
|
-
const model = newModels.find((m) => m.path === modelPath);
|
|
499
|
-
if (!model) throw new Error("Model not found.");
|
|
500
|
-
const profile = await createProfileFromModel(model);
|
|
501
|
-
const configured = await configureLocalProfile(prompt, profile);
|
|
502
|
-
if (!configured) return;
|
|
503
|
-
await saveProfile(configured);
|
|
504
|
-
console.log(pc.green(`Saved profile: ${configured.label}`));
|
|
505
|
-
await syncPiConfig(configured);
|
|
506
|
-
return await runProfile(configured);
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
if (selected.startsWith("managed:")) {
|
|
510
|
-
const managedSelection = selected.slice("managed:".length);
|
|
511
|
-
const separator = managedSelection.indexOf(":");
|
|
512
|
-
const backendId = separator === -1 ? managedSelection : managedSelection.slice(0, separator);
|
|
513
|
-
const modelId = separator === -1 ? "" : managedSelection.slice(separator + 1);
|
|
514
|
-
const model = managedItems.find((m) => m.model.id === modelId && m.backendId === backendId)?.model;
|
|
515
|
-
if (!model) throw new Error("Model not found.");
|
|
516
|
-
const profile = normalizeProfile({
|
|
517
|
-
id: model.id.replace(/[^a-z0-9._-]+/gi, "-").toLowerCase(),
|
|
518
|
-
label: model.label,
|
|
519
|
-
backend: backendId,
|
|
520
|
-
modelAlias: model.aliasSuggestion,
|
|
521
|
-
...(backendId === "ollama" ? { ollamaModel: model.id } : {}),
|
|
522
|
-
...(backendId === "omlx" ? { omlxModel: model.id } : {}),
|
|
523
|
-
});
|
|
524
|
-
await saveProfile(profile);
|
|
525
|
-
await syncPiConfig(profile);
|
|
526
|
-
return await runProfile(profile);
|
|
527
|
-
}
|
|
528
|
-
}
|
|
529
|
-
|
|
530
436
|
async function runProfile(profile, options = {}) {
|
|
531
437
|
const backend = backendFor(profile.backend);
|
|
532
438
|
const withHarness = options.with ?? "pi";
|
|
@@ -608,56 +514,6 @@ async function runProfile(profile, options = {}) {
|
|
|
608
514
|
}
|
|
609
515
|
}
|
|
610
516
|
|
|
611
|
-
// ── Manage profiles ─────────────────────────────────────────────────────────
|
|
612
|
-
|
|
613
|
-
async function manageProfiles(prompt, profiles) {
|
|
614
|
-
const choices = profiles.map((p) => ({
|
|
615
|
-
value: p.id,
|
|
616
|
-
label: p.label,
|
|
617
|
-
hint: `${p.modelAlias} · ${p.baseUrl}`,
|
|
618
|
-
}));
|
|
619
|
-
|
|
620
|
-
const selected = await prompt.choice("Which profile?", choices, choices[0].value);
|
|
621
|
-
const profile = await readProfile(selected);
|
|
622
|
-
const backend = backendFor(profile.backend);
|
|
623
|
-
const isManaged = backend.type === "managed-server";
|
|
624
|
-
const piConfigured = await hasPiModel(profile);
|
|
625
|
-
|
|
626
|
-
// Show profile details
|
|
627
|
-
console.log("");
|
|
628
|
-
console.log(renderSection("Profile", renderRows([
|
|
629
|
-
["ID", pc.cyan(profile.id)],
|
|
630
|
-
["Label", pc.bold(profile.label)],
|
|
631
|
-
["Backend", backend.label],
|
|
632
|
-
["Endpoint", pc.green(profile.baseUrl)],
|
|
633
|
-
...(!isManaged ? [
|
|
634
|
-
["Model", profile.modelPath ?? "unknown"],
|
|
635
|
-
["MMProj", profile.mmprojPath ?? "none"],
|
|
636
|
-
["Memory", existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
|
|
637
|
-
] : []),
|
|
638
|
-
["Alias", pc.cyan(profile.modelAlias)],
|
|
639
|
-
["Pi", piConfigured ? pc.green("configured") : pc.yellow("not synced")],
|
|
640
|
-
])));
|
|
641
|
-
|
|
642
|
-
if (!isManaged && profile.commandArgv) {
|
|
643
|
-
console.log("");
|
|
644
|
-
console.log(pc.bold("llama-server command"));
|
|
645
|
-
console.log(pc.dim(buildPrettyCommand(profile)));
|
|
646
|
-
}
|
|
647
|
-
|
|
648
|
-
const action = await prompt.choice("Action", [
|
|
649
|
-
{ value: "sync", label: piConfigured ? `${pc.green("✓")} Pi config synced` : "Sync Pi config", hint: piConfigured ? "Already in ~/.pi/agent/models.json" : "Update ~/.pi/agent/models.json" },
|
|
650
|
-
{ value: "run", label: "Run", hint: "Start server + Pi" },
|
|
651
|
-
...(isManaged ? [] : [{ value: "server", label: "Server only", hint: "Start server, no harness" }]),
|
|
652
|
-
{ value: "remove", label: "Remove", hint: "Delete profile + Pi config" },
|
|
653
|
-
], "sync");
|
|
654
|
-
|
|
655
|
-
if (action === "sync") return await syncPiConfig(profile);
|
|
656
|
-
if (action === "run") return await runProfile(profile);
|
|
657
|
-
if (action === "server") return await runProfile(profile, { with: "server" });
|
|
658
|
-
if (action === "remove") return await removeProfileInteractive(profile.id);
|
|
659
|
-
}
|
|
660
|
-
|
|
661
517
|
async function removeProfileInteractive(id) {
|
|
662
518
|
const profile = await readProfile(id);
|
|
663
519
|
if (!process.stdin.isTTY) {
|
|
@@ -1207,9 +1063,7 @@ function printHelp() {
|
|
|
1207
1063
|
console.log(`${pc.bold("offgrid-ai")} — privacy-first local LLM runner
|
|
1208
1064
|
|
|
1209
1065
|
Usage:
|
|
1210
|
-
offgrid-ai
|
|
1211
|
-
offgrid-ai models List, inspect, set up, sync, or remove models
|
|
1212
|
-
offgrid-ai run Pick and run a model (or: offgrid-ai run <profile>)
|
|
1066
|
+
offgrid-ai Command center: inspect, set up, run, benchmark, or remove models
|
|
1213
1067
|
offgrid-ai status Show running local models
|
|
1214
1068
|
offgrid-ai stop Stop a running server (or: offgrid-ai stop <id>)
|
|
1215
1069
|
offgrid-ai uninstall Remove offgrid-ai, clean up PATH, optionally keep profiles
|
package/src/profile-setup.mjs
CHANGED
|
@@ -8,10 +8,27 @@ const CACHE_CHOICES = [
|
|
|
8
8
|
{ value: "q4_0", label: "q4_0", hint: "lowest memory, quality/speed tradeoff" },
|
|
9
9
|
];
|
|
10
10
|
|
|
11
|
+
const GENERAL_DEFAULTS = {
|
|
12
|
+
topK: 20,
|
|
13
|
+
presencePenalty: 1.5,
|
|
14
|
+
repeatPenalty: 1.0,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
const THINKING_DEFAULTS = {
|
|
18
|
+
topK: 64,
|
|
19
|
+
presencePenalty: 0,
|
|
20
|
+
repeatPenalty: 1.1,
|
|
21
|
+
chatTemplateKwargs: { enable_thinking: true },
|
|
22
|
+
};
|
|
23
|
+
|
|
11
24
|
export async function configureLocalProfile(prompt, profile) {
|
|
25
|
+
let configured = profile;
|
|
26
|
+
const caps = profile.capabilities ?? {};
|
|
27
|
+
|
|
12
28
|
console.log("");
|
|
13
29
|
console.log(renderSection("Model setup", renderRows([
|
|
14
30
|
["Model", pc.bold(profile.label)],
|
|
31
|
+
["Detected", detectionSummary(caps)],
|
|
15
32
|
["Context", `${profile.flags.ctxSize.toLocaleString()} tokens`],
|
|
16
33
|
["KV cache", `${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV}`],
|
|
17
34
|
["Sampling", samplingSummary(profile.flags)],
|
|
@@ -19,13 +36,36 @@ export async function configureLocalProfile(prompt, profile) {
|
|
|
19
36
|
console.log(pc.dim("Larger context windows use more memory. KV cache precision controls memory used by attention history."));
|
|
20
37
|
console.log(pc.dim("Sampling defaults are shown for transparency; you can edit command.json later if needed.\n"));
|
|
21
38
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
39
|
+
if (caps.mtp) {
|
|
40
|
+
console.log(renderSection("Detected MTP", renderRows([
|
|
41
|
+
["Backend", "llama.cpp MTP"],
|
|
42
|
+
["Port", "8081"],
|
|
43
|
+
["Flags", "--spec-type draft-mtp --spec-draft-n-max 2"],
|
|
44
|
+
])));
|
|
45
|
+
const useMtp = await prompt.yesNo("Use MTP speculative decoding flags?", true);
|
|
46
|
+
configured = useMtp ? applyMtpDefaults(configured) : removeMtpDefaults(configured);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (caps.thinking || caps.qat) {
|
|
50
|
+
console.log("");
|
|
51
|
+
console.log(renderSection(caps.qat ? "Detected QAT / imatrix-style model" : "Detected thinking model", renderRows([
|
|
52
|
+
["Defaults", "thinking / loop-safe"],
|
|
53
|
+
["Flags", "--top-k 64 --presence-penalty 0 --repeat-penalty 1.1"],
|
|
54
|
+
["Template", "--chat-template-kwargs { enable_thinking: true }"],
|
|
55
|
+
])));
|
|
56
|
+
const useThinking = await prompt.yesNo("Use these thinking/QAT-safe defaults?", true);
|
|
57
|
+
configured = useThinking ? applyThinkingDefaults(configured) : removeThinkingDefaults(configured);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const ctxSize = await prompt.number("Context window tokens", configured.flags.ctxSize, 1024, 1048576);
|
|
61
|
+
const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, configured.flags.cacheTypeK);
|
|
62
|
+
const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, configured.flags.cacheTypeV);
|
|
63
|
+
configured = applyRuntimeFlagOverrides(configured, { ctxSize, cacheTypeK, cacheTypeV });
|
|
26
64
|
|
|
27
65
|
console.log("");
|
|
28
66
|
console.log(renderSection("Defaults", renderRows([
|
|
67
|
+
["Backend", configured.backend],
|
|
68
|
+
["Endpoint", configured.baseUrl],
|
|
29
69
|
["Temperature", configured.flags.temperature],
|
|
30
70
|
["Top-p", configured.flags.topP],
|
|
31
71
|
["Top-k", configured.flags.topK],
|
|
@@ -41,21 +81,63 @@ export async function configureLocalProfile(prompt, profile) {
|
|
|
41
81
|
|
|
42
82
|
export function applyRuntimeFlagOverrides(profile, overrides) {
|
|
43
83
|
const flags = { ...profile.flags, ...overrides };
|
|
44
|
-
return
|
|
84
|
+
return applyProfileFlags(profile, flags);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function applyMtpDefaults(profile) {
|
|
88
|
+
const flags = { ...profile.flags, port: 8081 };
|
|
89
|
+
return applyProfileFlags({ ...profile, backend: "llama-cpp-mtp", providerId: "llama-cpp-mtp" }, flags, {
|
|
90
|
+
values: { "--spec-type": "draft-mtp", "--spec-draft-n-max": 2 },
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function removeMtpDefaults(profile) {
|
|
95
|
+
const flags = { ...profile.flags, port: 8080 };
|
|
96
|
+
return applyProfileFlags({ ...profile, backend: "llama-cpp", providerId: "llama-cpp" }, flags, {
|
|
97
|
+
remove: ["--spec-type", "--spec-draft-n-max"],
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function applyThinkingDefaults(profile) {
|
|
102
|
+
const flags = { ...profile.flags, ...THINKING_DEFAULTS };
|
|
103
|
+
return applyProfileFlags(profile, flags);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function removeThinkingDefaults(profile) {
|
|
107
|
+
const flags = { ...profile.flags, ...GENERAL_DEFAULTS };
|
|
108
|
+
delete flags.chatTemplateKwargs;
|
|
109
|
+
return applyProfileFlags(profile, flags, { remove: ["--chat-template-kwargs"] });
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function applyProfileFlags(profile, flags, edits = {}) {
|
|
113
|
+
const next = {
|
|
45
114
|
...profile,
|
|
46
115
|
flags,
|
|
47
116
|
baseUrl: `http://${flags.host}:${flags.port}/v1`,
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
}),
|
|
117
|
+
harnesses: {
|
|
118
|
+
...(profile.harnesses ?? {}),
|
|
119
|
+
pi: { ...(profile.harnesses?.pi ?? {}), enabled: true, model: `${profile.providerId ?? profile.backend}/${profile.modelAlias ?? profile.id}` },
|
|
120
|
+
},
|
|
53
121
|
};
|
|
122
|
+
next.commandArgv = updateArgv(profile.commandArgv ?? [], {
|
|
123
|
+
"--host": flags.host,
|
|
124
|
+
"--port": flags.port,
|
|
125
|
+
"--ctx-size": flags.ctxSize,
|
|
126
|
+
"--cache-type-k": flags.cacheTypeK,
|
|
127
|
+
"--cache-type-v": flags.cacheTypeV,
|
|
128
|
+
"--top-k": flags.topK,
|
|
129
|
+
"--presence-penalty": flags.presencePenalty,
|
|
130
|
+
"--repeat-penalty": flags.repeatPenalty,
|
|
131
|
+
...(flags.chatTemplateKwargs ? { "--chat-template-kwargs": JSON.stringify(flags.chatTemplateKwargs) } : {}),
|
|
132
|
+
}, edits);
|
|
133
|
+
return next;
|
|
54
134
|
}
|
|
55
135
|
|
|
56
|
-
function updateArgv(argv, values) {
|
|
57
|
-
|
|
58
|
-
for (const
|
|
136
|
+
function updateArgv(argv, values, edits = {}) {
|
|
137
|
+
let next = [...argv];
|
|
138
|
+
for (const flag of edits.remove ?? []) next = removeOption(next, flag);
|
|
139
|
+
for (const [flag, value] of Object.entries({ ...values, ...(edits.values ?? {}) })) {
|
|
140
|
+
if (value === undefined) continue;
|
|
59
141
|
const index = next.indexOf(flag);
|
|
60
142
|
if (index === -1) next.push(flag, String(value));
|
|
61
143
|
else next[index + 1] = String(value);
|
|
@@ -63,6 +145,18 @@ function updateArgv(argv, values) {
|
|
|
63
145
|
return next;
|
|
64
146
|
}
|
|
65
147
|
|
|
148
|
+
function removeOption(argv, flag) {
|
|
149
|
+
const next = [];
|
|
150
|
+
for (let i = 0; i < argv.length; i++) {
|
|
151
|
+
if (argv[i] === flag) {
|
|
152
|
+
if (argv[i + 1] && !argv[i + 1].startsWith("--")) i += 1;
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
next.push(argv[i]);
|
|
156
|
+
}
|
|
157
|
+
return next;
|
|
158
|
+
}
|
|
159
|
+
|
|
66
160
|
function renderMemoryEstimate(profile) {
|
|
67
161
|
try {
|
|
68
162
|
const est = estimateMemory(profile.modelPath, profile.mmprojPath, null, profile.flags);
|
|
@@ -77,6 +171,17 @@ function renderMemoryEstimate(profile) {
|
|
|
77
171
|
}
|
|
78
172
|
}
|
|
79
173
|
|
|
174
|
+
function detectionSummary(caps) {
|
|
175
|
+
const parts = [];
|
|
176
|
+
if (caps.architecture) parts.push(caps.architecture);
|
|
177
|
+
if (caps.quant) parts.push(caps.quant);
|
|
178
|
+
if (caps.mtp) parts.push("MTP");
|
|
179
|
+
if (caps.qat) parts.push("QAT/imatrix");
|
|
180
|
+
if (caps.thinking) parts.push("thinking");
|
|
181
|
+
if (caps.vision) parts.push("vision");
|
|
182
|
+
return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
|
|
183
|
+
}
|
|
184
|
+
|
|
80
185
|
function samplingSummary(flags) {
|
|
81
186
|
return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
|
|
82
187
|
}
|
package/src/profiles.mjs
CHANGED
|
@@ -136,25 +136,41 @@ export function normalizeProfile(profile) {
|
|
|
136
136
|
|
|
137
137
|
// ── Auto-create profile from a discovered model ────────────────────────────
|
|
138
138
|
|
|
139
|
-
export async function createProfileFromModel(model, backendId
|
|
139
|
+
export async function createProfileFromModel(model, backendId) {
|
|
140
140
|
const { detectCapabilities } = await import("./autodetect.mjs");
|
|
141
141
|
const caps = detectCapabilities(model.path, model.mmprojPath);
|
|
142
|
+
const backend = backendId ?? (caps.mtp ? "llama-cpp-mtp" : "llama-cpp");
|
|
142
143
|
const id = slugFromLabel(model.label);
|
|
143
144
|
const { flags, argv } = computeFlags(caps, model.path, model.mmprojPath, null);
|
|
144
145
|
|
|
145
146
|
return normalizeProfile({
|
|
146
147
|
id,
|
|
147
148
|
label: model.label,
|
|
148
|
-
backend
|
|
149
|
+
backend,
|
|
150
|
+
providerId: backend,
|
|
149
151
|
modelAlias: model.aliasSuggestion,
|
|
150
152
|
modelPath: model.path,
|
|
151
153
|
mmprojPath: model.mmprojPath,
|
|
154
|
+
capabilities: summarizeCapabilities(caps),
|
|
152
155
|
preset: null, // no presets — auto-detected
|
|
153
156
|
flags,
|
|
154
157
|
commandArgv: argv,
|
|
155
158
|
});
|
|
156
159
|
}
|
|
157
160
|
|
|
161
|
+
function summarizeCapabilities(caps) {
|
|
162
|
+
return {
|
|
163
|
+
architecture: caps.architecture,
|
|
164
|
+
thinking: caps.thinking,
|
|
165
|
+
vision: caps.vision,
|
|
166
|
+
mtp: caps.mtp,
|
|
167
|
+
qat: caps.qat,
|
|
168
|
+
quant: caps.quant,
|
|
169
|
+
metaCtx: caps.metaCtx,
|
|
170
|
+
ctxSize: caps.ctxSize,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
158
174
|
// ── State files (for running servers) ──────────────────────────────────────
|
|
159
175
|
|
|
160
176
|
export async function readState(id) {
|