offgrid-ai 0.3.17 → 0.3.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/autodetect.mjs +6 -4
- package/src/cli.mjs +3 -1
- package/src/profile-setup.mjs +14 -4
- package/src/profiles.mjs +1 -0
package/package.json
CHANGED
package/src/autodetect.mjs
CHANGED
|
@@ -17,9 +17,11 @@ export function detectCapabilities(modelPath, mmprojPath) {
|
|
|
17
17
|
const nameHintsThinking = /qwen3|qwen3\.\d|gemma-4|gemma4|deepseek-r[12]/i.test(pathHints);
|
|
18
18
|
const thinking = hasThinkingKwargs || nameHintsThinking;
|
|
19
19
|
|
|
20
|
-
//
|
|
21
|
-
//
|
|
22
|
-
|
|
20
|
+
// QAT is explicit quantization-aware training lineage, mainly seen in
|
|
21
|
+
// Gemma QAT releases. imatrix is common GGUF quantization metadata and is
|
|
22
|
+
// intentionally tracked separately so we don't label every imatrix quant as QAT.
|
|
23
|
+
const qat = /\bqat\b|[-_]qat[-_]|qat[-_]?q\d/i.test(pathHints);
|
|
24
|
+
const imatrix = /imatrix|i-?matrix/i.test(pathHints) || Object.keys(meta).some((key) => key.startsWith("quantize.imatrix."));
|
|
23
25
|
|
|
24
26
|
// Vision — mmproj present
|
|
25
27
|
const vision = Boolean(mmprojPath && existsSync(mmprojPath));
|
|
@@ -37,7 +39,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
|
|
|
37
39
|
: undefined;
|
|
38
40
|
const ctxSize = metaCtx ?? (thinking ? 80000 : 32768);
|
|
39
41
|
|
|
40
|
-
return { architecture, thinking, vision, mtp, qat, quant, metaCtx, ctxSize, meta };
|
|
42
|
+
return { architecture, thinking, vision, mtp, qat, imatrix, quant, metaCtx, ctxSize, meta };
|
|
41
43
|
}
|
|
42
44
|
|
|
43
45
|
// ── Compute llama-server flags from capabilities ───────────────────────────
|
package/src/cli.mjs
CHANGED
|
@@ -407,7 +407,8 @@ function capabilitySummary(caps) {
|
|
|
407
407
|
if (caps.architecture) parts.push(caps.architecture);
|
|
408
408
|
if (caps.quant) parts.push(caps.quant);
|
|
409
409
|
if (caps.mtp) parts.push("MTP");
|
|
410
|
-
if (caps.qat) parts.push("QAT
|
|
410
|
+
if (caps.qat) parts.push("QAT");
|
|
411
|
+
if (caps.imatrix) parts.push("imatrix");
|
|
411
412
|
if (caps.thinking) parts.push("thinking");
|
|
412
413
|
if (caps.vision) parts.push("vision");
|
|
413
414
|
return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
|
|
@@ -417,6 +418,7 @@ function capabilityBadges(caps) {
|
|
|
417
418
|
const badges = [];
|
|
418
419
|
if (caps.mtp) badges.push(pc.blue("[MTP]"));
|
|
419
420
|
if (caps.qat) badges.push(pc.green("[QAT]"));
|
|
421
|
+
if (caps.imatrix) badges.push(pc.dim("[imatrix]"));
|
|
420
422
|
if (caps.thinking) badges.push(pc.magenta("[thinking]"));
|
|
421
423
|
if (caps.vision) badges.push(pc.cyan("[vision]"));
|
|
422
424
|
return badges.join(" ");
|
package/src/profile-setup.mjs
CHANGED
|
@@ -46,14 +46,23 @@ export async function configureLocalProfile(prompt, profile) {
|
|
|
46
46
|
configured = useMtp ? applyMtpDefaults(configured) : removeMtpDefaults(configured);
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
if (caps.
|
|
49
|
+
if (caps.qat || caps.imatrix) {
|
|
50
50
|
console.log("");
|
|
51
|
-
console.log(renderSection(
|
|
51
|
+
console.log(renderSection("Quantization note", renderRows([
|
|
52
|
+
["QAT", caps.qat ? "yes" : "no"],
|
|
53
|
+
["imatrix", caps.imatrix ? "yes" : "no"],
|
|
54
|
+
["Runtime flags", "none required"],
|
|
55
|
+
])));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (caps.thinking) {
|
|
59
|
+
console.log("");
|
|
60
|
+
console.log(renderSection("Detected thinking model", renderRows([
|
|
52
61
|
["Defaults", "thinking / loop-safe"],
|
|
53
62
|
["Flags", "--top-k 64 --presence-penalty 0 --repeat-penalty 1.1"],
|
|
54
63
|
["Template", "--chat-template-kwargs { enable_thinking: true }"],
|
|
55
64
|
])));
|
|
56
|
-
const useThinking = await prompt.yesNo("Use these thinking/
|
|
65
|
+
const useThinking = await prompt.yesNo("Use these thinking/loop-safe defaults?", true);
|
|
57
66
|
configured = useThinking ? applyThinkingDefaults(configured) : removeThinkingDefaults(configured);
|
|
58
67
|
}
|
|
59
68
|
|
|
@@ -176,7 +185,8 @@ function detectionSummary(caps) {
|
|
|
176
185
|
if (caps.architecture) parts.push(caps.architecture);
|
|
177
186
|
if (caps.quant) parts.push(caps.quant);
|
|
178
187
|
if (caps.mtp) parts.push("MTP");
|
|
179
|
-
if (caps.qat) parts.push("QAT
|
|
188
|
+
if (caps.qat) parts.push("QAT");
|
|
189
|
+
if (caps.imatrix) parts.push("imatrix");
|
|
180
190
|
if (caps.thinking) parts.push("thinking");
|
|
181
191
|
if (caps.vision) parts.push("vision");
|
|
182
192
|
return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
|