offgrid-ai 0.3.16 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/autodetect.mjs +11 -5
- package/src/cli.mjs +76 -26
- package/src/profile-setup.mjs +118 -13
- package/src/profiles.mjs +18 -2
package/package.json
CHANGED
package/src/autodetect.mjs
CHANGED
|
@@ -7,20 +7,26 @@ import { readGgufMetadata } from "./gguf.mjs";
|
|
|
7
7
|
export function detectCapabilities(modelPath, mmprojPath) {
|
|
8
8
|
const meta = safeReadGgufMetadata(modelPath);
|
|
9
9
|
const name = basename(modelPath).toLowerCase();
|
|
10
|
+
const pathHints = String(modelPath).toLowerCase();
|
|
10
11
|
|
|
11
12
|
// Architecture
|
|
12
13
|
const architecture = meta["general.architecture"] ?? null;
|
|
13
14
|
|
|
14
15
|
// Thinking / reasoning mode
|
|
15
16
|
const hasThinkingKwargs = meta["chat_template_kwargs"] !== undefined;
|
|
16
|
-
const nameHintsThinking = /qwen3|gemma-4|gemma4|deepseek-r[12]/i.test(
|
|
17
|
+
const nameHintsThinking = /qwen3|qwen3\.\d|gemma-4|gemma4|deepseek-r[12]/i.test(pathHints);
|
|
17
18
|
const thinking = hasThinkingKwargs || nameHintsThinking;
|
|
18
19
|
|
|
20
|
+
// Quantization-aware / imatrix quantization hints. These mostly affect
|
|
21
|
+
// display and defaults transparency; llama-server does not need a QAT flag.
|
|
22
|
+
const qat = /qat|imatrix|i-?matrix/i.test(pathHints) || Object.keys(meta).some((key) => key.startsWith("quantize.imatrix."));
|
|
23
|
+
|
|
19
24
|
// Vision — mmproj present
|
|
20
25
|
const vision = Boolean(mmprojPath && existsSync(mmprojPath));
|
|
21
26
|
|
|
22
|
-
// MTP (multi-token prediction) — detect speculative decoding
|
|
23
|
-
|
|
27
|
+
// MTP (multi-token prediction) — detect speculative decoding.
|
|
28
|
+
// Do not treat all Qwen models as MTP; require an explicit filename or metadata hint.
|
|
29
|
+
const mtp = /\bmtp\b|draft-mtp|multi-token/i.test(pathHints) || Object.keys(meta).some((key) => /mtp|draft|speculative/i.test(key));
|
|
24
30
|
|
|
25
31
|
// Quantization
|
|
26
32
|
const quant = name.match(/(Q\d_K_[A-Z]+|UD-[A-Z0-9_]+)/i)?.[1] ?? null;
|
|
@@ -31,7 +37,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
|
|
|
31
37
|
: undefined;
|
|
32
38
|
const ctxSize = metaCtx ?? (thinking ? 80000 : 32768);
|
|
33
39
|
|
|
34
|
-
return { architecture, thinking, vision, mtp, quant, metaCtx, ctxSize, meta };
|
|
40
|
+
return { architecture, thinking, vision, mtp, qat, quant, metaCtx, ctxSize, meta };
|
|
35
41
|
}
|
|
36
42
|
|
|
37
43
|
// ── Compute llama-server flags from capabilities ───────────────────────────
|
|
@@ -42,7 +48,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
|
|
|
42
48
|
|
|
43
49
|
const flags = {
|
|
44
50
|
host: "127.0.0.1",
|
|
45
|
-
port: 8080,
|
|
51
|
+
port: mtp ? 8081 : 8080,
|
|
46
52
|
ctxSize: capabilities.ctxSize,
|
|
47
53
|
flashAttention: "on",
|
|
48
54
|
cacheTypeK: isLowMem ? "f16" : "bf16",
|
package/src/cli.mjs
CHANGED
|
@@ -14,6 +14,7 @@ import { checkForUpdate, currentPackageVersion, detectInvocation, updateCommand,
|
|
|
14
14
|
import { removeInstallerPathEntries } from "./shell-path.mjs";
|
|
15
15
|
import { configureLocalProfile } from "./profile-setup.mjs";
|
|
16
16
|
import { buildPrettyCommand } from "./command.mjs";
|
|
17
|
+
import { detectCapabilities } from "./autodetect.mjs";
|
|
17
18
|
|
|
18
19
|
// ── Entry point ────────────────────────────────────────────────────────────
|
|
19
20
|
|
|
@@ -176,10 +177,10 @@ async function modelsCommand(argv) {
|
|
|
176
177
|
|
|
177
178
|
async function modelCommandCenter(catalog) {
|
|
178
179
|
const normalized = normalizeCatalog(catalog);
|
|
179
|
-
|
|
180
|
+
const items = modelCatalogItems(normalized);
|
|
181
|
+
await printModelCatalog(normalized, items);
|
|
180
182
|
if (!process.stdin.isTTY) return;
|
|
181
183
|
|
|
182
|
-
const items = modelCatalogItems(normalized);
|
|
183
184
|
if (items.length === 0) return;
|
|
184
185
|
|
|
185
186
|
const prompt = createPrompt();
|
|
@@ -234,36 +235,54 @@ function normalizeCatalog(catalog) {
|
|
|
234
235
|
return { profiles, ggufModels, managedModels, newModels, managedItems };
|
|
235
236
|
}
|
|
236
237
|
|
|
237
|
-
async function printModelCatalog({ profiles, newModels,
|
|
238
|
-
|
|
239
|
-
|
|
238
|
+
async function printModelCatalog({ profiles, newModels, managedItems }, items = modelCatalogItems({ profiles, newModels, managedItems })) {
|
|
239
|
+
const itemNumber = (predicate) => {
|
|
240
|
+
const index = items.findIndex(predicate);
|
|
241
|
+
return index === -1 ? " " : String(index + 1).padStart(2, " ");
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
console.log(pc.bold("\nSaved profiles"));
|
|
245
|
+
if (profiles.length === 0) {
|
|
246
|
+
console.log(pc.dim(" None yet."));
|
|
247
|
+
} else {
|
|
240
248
|
for (const profile of profiles) {
|
|
241
249
|
const backend = backendFor(profile.backend);
|
|
242
250
|
const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
|
|
243
251
|
const running = await isProfileRunning(profile);
|
|
244
252
|
const piConfigured = await hasPiModel(profile);
|
|
245
253
|
const c = colorMap[profile.backend] ?? pc.magenta;
|
|
246
|
-
|
|
254
|
+
const num = itemNumber((item) => item.type === "profile" && item.profile.id === profile.id);
|
|
255
|
+
console.log(`${num}. ${running ? pc.green("●") : pc.dim("○")} ${pc.bold(profile.label)} ${c(`[${backend.label}]`)} · ${pc.cyan(profile.modelAlias)} ${piConfigured ? pc.green("· Pi synced") : pc.yellow("· Pi not synced")}`);
|
|
247
256
|
}
|
|
248
|
-
} else {
|
|
249
|
-
console.log(pc.bold("\nSaved profiles"));
|
|
250
|
-
console.log(pc.dim(" None yet."));
|
|
251
257
|
}
|
|
252
258
|
|
|
253
|
-
|
|
254
|
-
|
|
259
|
+
console.log("");
|
|
260
|
+
console.log(pc.bold("Downloaded models not set up yet"));
|
|
261
|
+
if (newModels.length === 0) {
|
|
262
|
+
console.log(pc.dim(" None. Every downloaded GGUF has a profile."));
|
|
263
|
+
} else {
|
|
255
264
|
for (const model of newModels.slice(0, 20)) {
|
|
256
|
-
|
|
265
|
+
const caps = detectCapabilities(model.path, model.mmprojPath);
|
|
266
|
+
const num = itemNumber((item) => item.type === "new" && item.model.path === model.path);
|
|
267
|
+
console.log(`${num}. ${pc.cyan(model.label)} ${capabilityBadges(caps)} ${pc.dim(model.quant ?? "")}`);
|
|
268
|
+
console.log(` alias: ${pc.cyan(model.aliasSuggestion)}`);
|
|
269
|
+
console.log(` size: ${formatBytes(model.sizeBytes)}`);
|
|
257
270
|
}
|
|
258
271
|
if (newModels.length > 20) console.log(pc.dim(` ... and ${newModels.length - 20} more`));
|
|
259
272
|
}
|
|
260
273
|
|
|
261
|
-
for (const
|
|
262
|
-
|
|
274
|
+
for (const backendId of ["ollama", "omlx"]) {
|
|
275
|
+
const backendItems = managedItems.filter((item) => item.backendId === backendId);
|
|
276
|
+
if (backendItems.length === 0) continue;
|
|
263
277
|
const be = BACKENDS[backendId];
|
|
264
|
-
console.log(
|
|
265
|
-
|
|
266
|
-
|
|
278
|
+
console.log("");
|
|
279
|
+
console.log(pc.bold(`${be.label} models`));
|
|
280
|
+
for (const { model } of backendItems.slice(0, 10)) {
|
|
281
|
+
const num = itemNumber((item) => item.type === "managed" && item.backendId === backendId && item.model.id === model.id);
|
|
282
|
+
console.log(`${num}. ${pc.cyan(model.label)} ${pc.dim(model.quant ?? "")}`);
|
|
283
|
+
console.log(` id: ${pc.cyan(model.id)}`);
|
|
284
|
+
}
|
|
285
|
+
if (backendItems.length > 10) console.log(pc.dim(` ... and ${backendItems.length - 10} more`));
|
|
267
286
|
}
|
|
268
287
|
}
|
|
269
288
|
|
|
@@ -276,17 +295,25 @@ function modelCatalogItems({ profiles, newModels, managedItems }) {
|
|
|
276
295
|
}
|
|
277
296
|
|
|
278
297
|
async function chooseCatalogItem(prompt, items, action) {
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
console.log(pc.yellow(action === "remove" ? "No saved profiles to remove." : "No models available."));
|
|
298
|
+
if (action === "remove" && !items.some((item) => item.type === "profile")) {
|
|
299
|
+
console.log(pc.yellow("No saved profiles to remove."));
|
|
282
300
|
return null;
|
|
283
301
|
}
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
302
|
+
|
|
303
|
+
const input = await prompt.text("Select a number", "");
|
|
304
|
+
if (!input) return null;
|
|
305
|
+
const index = Number(input) - 1;
|
|
306
|
+
if (!Number.isInteger(index) || index < 0 || index >= items.length) {
|
|
307
|
+
console.log(pc.yellow(`No item ${input}.`));
|
|
308
|
+
return null;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
const item = items[index];
|
|
312
|
+
if (action === "remove" && item.type !== "profile") {
|
|
313
|
+
console.log(pc.yellow("Only saved profiles can be removed."));
|
|
314
|
+
return null;
|
|
315
|
+
}
|
|
316
|
+
return item;
|
|
290
317
|
}
|
|
291
318
|
|
|
292
319
|
async function handleCatalogAction(prompt, action, item) {
|
|
@@ -337,6 +364,7 @@ async function printProfileDetails(profile) {
|
|
|
337
364
|
["ID", pc.cyan(profile.id)],
|
|
338
365
|
["Label", pc.bold(profile.label)],
|
|
339
366
|
["Backend", backend.label],
|
|
367
|
+
...(profile.capabilities ? [["Detected", capabilitySummary(profile.capabilities)]] : []),
|
|
340
368
|
["Endpoint", pc.green(profile.baseUrl)],
|
|
341
369
|
...(!isManaged ? [
|
|
342
370
|
["Model", profile.modelPath ?? "unknown"],
|
|
@@ -354,8 +382,10 @@ async function printProfileDetails(profile) {
|
|
|
354
382
|
}
|
|
355
383
|
|
|
356
384
|
function printGgufModelDetails(model) {
|
|
385
|
+
const caps = detectCapabilities(model.path, model.mmprojPath);
|
|
357
386
|
console.log("\n" + renderSection("GGUF model", renderRows([
|
|
358
387
|
["Label", pc.bold(model.label)],
|
|
388
|
+
["Detected", capabilitySummary(caps)],
|
|
359
389
|
["Model", model.path],
|
|
360
390
|
["MMProj", model.mmprojPath ?? "none"],
|
|
361
391
|
["Quant", model.quant ?? "unknown"],
|
|
@@ -372,6 +402,26 @@ function printManagedModelDetails(model, backend) {
|
|
|
372
402
|
])));
|
|
373
403
|
}
|
|
374
404
|
|
|
405
|
+
function capabilitySummary(caps) {
|
|
406
|
+
const parts = [];
|
|
407
|
+
if (caps.architecture) parts.push(caps.architecture);
|
|
408
|
+
if (caps.quant) parts.push(caps.quant);
|
|
409
|
+
if (caps.mtp) parts.push("MTP");
|
|
410
|
+
if (caps.qat) parts.push("QAT/imatrix");
|
|
411
|
+
if (caps.thinking) parts.push("thinking");
|
|
412
|
+
if (caps.vision) parts.push("vision");
|
|
413
|
+
return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function capabilityBadges(caps) {
|
|
417
|
+
const badges = [];
|
|
418
|
+
if (caps.mtp) badges.push(pc.blue("[MTP]"));
|
|
419
|
+
if (caps.qat) badges.push(pc.green("[QAT]"));
|
|
420
|
+
if (caps.thinking) badges.push(pc.magenta("[thinking]"));
|
|
421
|
+
if (caps.vision) badges.push(pc.cyan("[vision]"));
|
|
422
|
+
return badges.join(" ");
|
|
423
|
+
}
|
|
424
|
+
|
|
375
425
|
function createManagedProfile(model, backendId) {
|
|
376
426
|
return normalizeProfile({
|
|
377
427
|
id: model.id.replace(/[^a-z0-9._-]+/gi, "-").toLowerCase(),
|
package/src/profile-setup.mjs
CHANGED
|
@@ -8,10 +8,27 @@ const CACHE_CHOICES = [
|
|
|
8
8
|
{ value: "q4_0", label: "q4_0", hint: "lowest memory, quality/speed tradeoff" },
|
|
9
9
|
];
|
|
10
10
|
|
|
11
|
+
const GENERAL_DEFAULTS = {
|
|
12
|
+
topK: 20,
|
|
13
|
+
presencePenalty: 1.5,
|
|
14
|
+
repeatPenalty: 1.0,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
const THINKING_DEFAULTS = {
|
|
18
|
+
topK: 64,
|
|
19
|
+
presencePenalty: 0,
|
|
20
|
+
repeatPenalty: 1.1,
|
|
21
|
+
chatTemplateKwargs: { enable_thinking: true },
|
|
22
|
+
};
|
|
23
|
+
|
|
11
24
|
export async function configureLocalProfile(prompt, profile) {
|
|
25
|
+
let configured = profile;
|
|
26
|
+
const caps = profile.capabilities ?? {};
|
|
27
|
+
|
|
12
28
|
console.log("");
|
|
13
29
|
console.log(renderSection("Model setup", renderRows([
|
|
14
30
|
["Model", pc.bold(profile.label)],
|
|
31
|
+
["Detected", detectionSummary(caps)],
|
|
15
32
|
["Context", `${profile.flags.ctxSize.toLocaleString()} tokens`],
|
|
16
33
|
["KV cache", `${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV}`],
|
|
17
34
|
["Sampling", samplingSummary(profile.flags)],
|
|
@@ -19,13 +36,36 @@ export async function configureLocalProfile(prompt, profile) {
|
|
|
19
36
|
console.log(pc.dim("Larger context windows use more memory. KV cache precision controls memory used by attention history."));
|
|
20
37
|
console.log(pc.dim("Sampling defaults are shown for transparency; you can edit command.json later if needed.\n"));
|
|
21
38
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
39
|
+
if (caps.mtp) {
|
|
40
|
+
console.log(renderSection("Detected MTP", renderRows([
|
|
41
|
+
["Backend", "llama.cpp MTP"],
|
|
42
|
+
["Port", "8081"],
|
|
43
|
+
["Flags", "--spec-type draft-mtp --spec-draft-n-max 2"],
|
|
44
|
+
])));
|
|
45
|
+
const useMtp = await prompt.yesNo("Use MTP speculative decoding flags?", true);
|
|
46
|
+
configured = useMtp ? applyMtpDefaults(configured) : removeMtpDefaults(configured);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (caps.thinking || caps.qat) {
|
|
50
|
+
console.log("");
|
|
51
|
+
console.log(renderSection(caps.qat ? "Detected QAT / imatrix-style model" : "Detected thinking model", renderRows([
|
|
52
|
+
["Defaults", "thinking / loop-safe"],
|
|
53
|
+
["Flags", "--top-k 64 --presence-penalty 0 --repeat-penalty 1.1"],
|
|
54
|
+
["Template", "--chat-template-kwargs { enable_thinking: true }"],
|
|
55
|
+
])));
|
|
56
|
+
const useThinking = await prompt.yesNo("Use these thinking/QAT-safe defaults?", true);
|
|
57
|
+
configured = useThinking ? applyThinkingDefaults(configured) : removeThinkingDefaults(configured);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const ctxSize = await prompt.number("Context window tokens", configured.flags.ctxSize, 1024, 1048576);
|
|
61
|
+
const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, configured.flags.cacheTypeK);
|
|
62
|
+
const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, configured.flags.cacheTypeV);
|
|
63
|
+
configured = applyRuntimeFlagOverrides(configured, { ctxSize, cacheTypeK, cacheTypeV });
|
|
26
64
|
|
|
27
65
|
console.log("");
|
|
28
66
|
console.log(renderSection("Defaults", renderRows([
|
|
67
|
+
["Backend", configured.backend],
|
|
68
|
+
["Endpoint", configured.baseUrl],
|
|
29
69
|
["Temperature", configured.flags.temperature],
|
|
30
70
|
["Top-p", configured.flags.topP],
|
|
31
71
|
["Top-k", configured.flags.topK],
|
|
@@ -41,21 +81,63 @@ export async function configureLocalProfile(prompt, profile) {
|
|
|
41
81
|
|
|
42
82
|
export function applyRuntimeFlagOverrides(profile, overrides) {
|
|
43
83
|
const flags = { ...profile.flags, ...overrides };
|
|
44
|
-
return
|
|
84
|
+
return applyProfileFlags(profile, flags);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function applyMtpDefaults(profile) {
|
|
88
|
+
const flags = { ...profile.flags, port: 8081 };
|
|
89
|
+
return applyProfileFlags({ ...profile, backend: "llama-cpp-mtp", providerId: "llama-cpp-mtp" }, flags, {
|
|
90
|
+
values: { "--spec-type": "draft-mtp", "--spec-draft-n-max": 2 },
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function removeMtpDefaults(profile) {
|
|
95
|
+
const flags = { ...profile.flags, port: 8080 };
|
|
96
|
+
return applyProfileFlags({ ...profile, backend: "llama-cpp", providerId: "llama-cpp" }, flags, {
|
|
97
|
+
remove: ["--spec-type", "--spec-draft-n-max"],
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function applyThinkingDefaults(profile) {
|
|
102
|
+
const flags = { ...profile.flags, ...THINKING_DEFAULTS };
|
|
103
|
+
return applyProfileFlags(profile, flags);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function removeThinkingDefaults(profile) {
|
|
107
|
+
const flags = { ...profile.flags, ...GENERAL_DEFAULTS };
|
|
108
|
+
delete flags.chatTemplateKwargs;
|
|
109
|
+
return applyProfileFlags(profile, flags, { remove: ["--chat-template-kwargs"] });
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function applyProfileFlags(profile, flags, edits = {}) {
|
|
113
|
+
const next = {
|
|
45
114
|
...profile,
|
|
46
115
|
flags,
|
|
47
116
|
baseUrl: `http://${flags.host}:${flags.port}/v1`,
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
}),
|
|
117
|
+
harnesses: {
|
|
118
|
+
...(profile.harnesses ?? {}),
|
|
119
|
+
pi: { ...(profile.harnesses?.pi ?? {}), enabled: true, model: `${profile.providerId ?? profile.backend}/${profile.modelAlias ?? profile.id}` },
|
|
120
|
+
},
|
|
53
121
|
};
|
|
122
|
+
next.commandArgv = updateArgv(profile.commandArgv ?? [], {
|
|
123
|
+
"--host": flags.host,
|
|
124
|
+
"--port": flags.port,
|
|
125
|
+
"--ctx-size": flags.ctxSize,
|
|
126
|
+
"--cache-type-k": flags.cacheTypeK,
|
|
127
|
+
"--cache-type-v": flags.cacheTypeV,
|
|
128
|
+
"--top-k": flags.topK,
|
|
129
|
+
"--presence-penalty": flags.presencePenalty,
|
|
130
|
+
"--repeat-penalty": flags.repeatPenalty,
|
|
131
|
+
...(flags.chatTemplateKwargs ? { "--chat-template-kwargs": JSON.stringify(flags.chatTemplateKwargs) } : {}),
|
|
132
|
+
}, edits);
|
|
133
|
+
return next;
|
|
54
134
|
}
|
|
55
135
|
|
|
56
|
-
function updateArgv(argv, values) {
|
|
57
|
-
|
|
58
|
-
for (const
|
|
136
|
+
function updateArgv(argv, values, edits = {}) {
|
|
137
|
+
let next = [...argv];
|
|
138
|
+
for (const flag of edits.remove ?? []) next = removeOption(next, flag);
|
|
139
|
+
for (const [flag, value] of Object.entries({ ...values, ...(edits.values ?? {}) })) {
|
|
140
|
+
if (value === undefined) continue;
|
|
59
141
|
const index = next.indexOf(flag);
|
|
60
142
|
if (index === -1) next.push(flag, String(value));
|
|
61
143
|
else next[index + 1] = String(value);
|
|
@@ -63,6 +145,18 @@ function updateArgv(argv, values) {
|
|
|
63
145
|
return next;
|
|
64
146
|
}
|
|
65
147
|
|
|
148
|
+
function removeOption(argv, flag) {
|
|
149
|
+
const next = [];
|
|
150
|
+
for (let i = 0; i < argv.length; i++) {
|
|
151
|
+
if (argv[i] === flag) {
|
|
152
|
+
if (argv[i + 1] && !argv[i + 1].startsWith("--")) i += 1;
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
next.push(argv[i]);
|
|
156
|
+
}
|
|
157
|
+
return next;
|
|
158
|
+
}
|
|
159
|
+
|
|
66
160
|
function renderMemoryEstimate(profile) {
|
|
67
161
|
try {
|
|
68
162
|
const est = estimateMemory(profile.modelPath, profile.mmprojPath, null, profile.flags);
|
|
@@ -77,6 +171,17 @@ function renderMemoryEstimate(profile) {
|
|
|
77
171
|
}
|
|
78
172
|
}
|
|
79
173
|
|
|
174
|
+
function detectionSummary(caps) {
|
|
175
|
+
const parts = [];
|
|
176
|
+
if (caps.architecture) parts.push(caps.architecture);
|
|
177
|
+
if (caps.quant) parts.push(caps.quant);
|
|
178
|
+
if (caps.mtp) parts.push("MTP");
|
|
179
|
+
if (caps.qat) parts.push("QAT/imatrix");
|
|
180
|
+
if (caps.thinking) parts.push("thinking");
|
|
181
|
+
if (caps.vision) parts.push("vision");
|
|
182
|
+
return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
|
|
183
|
+
}
|
|
184
|
+
|
|
80
185
|
function samplingSummary(flags) {
|
|
81
186
|
return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
|
|
82
187
|
}
|
package/src/profiles.mjs
CHANGED
|
@@ -136,25 +136,41 @@ export function normalizeProfile(profile) {
|
|
|
136
136
|
|
|
137
137
|
// ── Auto-create profile from a discovered model ────────────────────────────
|
|
138
138
|
|
|
139
|
-
export async function createProfileFromModel(model, backendId
|
|
139
|
+
export async function createProfileFromModel(model, backendId) {
|
|
140
140
|
const { detectCapabilities } = await import("./autodetect.mjs");
|
|
141
141
|
const caps = detectCapabilities(model.path, model.mmprojPath);
|
|
142
|
+
const backend = backendId ?? (caps.mtp ? "llama-cpp-mtp" : "llama-cpp");
|
|
142
143
|
const id = slugFromLabel(model.label);
|
|
143
144
|
const { flags, argv } = computeFlags(caps, model.path, model.mmprojPath, null);
|
|
144
145
|
|
|
145
146
|
return normalizeProfile({
|
|
146
147
|
id,
|
|
147
148
|
label: model.label,
|
|
148
|
-
backend
|
|
149
|
+
backend,
|
|
150
|
+
providerId: backend,
|
|
149
151
|
modelAlias: model.aliasSuggestion,
|
|
150
152
|
modelPath: model.path,
|
|
151
153
|
mmprojPath: model.mmprojPath,
|
|
154
|
+
capabilities: summarizeCapabilities(caps),
|
|
152
155
|
preset: null, // no presets — auto-detected
|
|
153
156
|
flags,
|
|
154
157
|
commandArgv: argv,
|
|
155
158
|
});
|
|
156
159
|
}
|
|
157
160
|
|
|
161
|
+
function summarizeCapabilities(caps) {
|
|
162
|
+
return {
|
|
163
|
+
architecture: caps.architecture,
|
|
164
|
+
thinking: caps.thinking,
|
|
165
|
+
vision: caps.vision,
|
|
166
|
+
mtp: caps.mtp,
|
|
167
|
+
qat: caps.qat,
|
|
168
|
+
quant: caps.quant,
|
|
169
|
+
metaCtx: caps.metaCtx,
|
|
170
|
+
ctxSize: caps.ctxSize,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
158
174
|
// ── State files (for running servers) ──────────────────────────────────────
|
|
159
175
|
|
|
160
176
|
export async function readState(id) {
|