offgrid-ai 0.3.17 → 0.3.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.3.17",
3
+ "version": "0.3.18",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
@@ -17,9 +17,11 @@ export function detectCapabilities(modelPath, mmprojPath) {
17
17
  const nameHintsThinking = /qwen3|qwen3\.\d|gemma-4|gemma4|deepseek-r[12]/i.test(pathHints);
18
18
  const thinking = hasThinkingKwargs || nameHintsThinking;
19
19
 
20
- // Quantization-aware / imatrix quantization hints. These mostly affect
21
- // display and defaults transparency; llama-server does not need a QAT flag.
22
- const qat = /qat|imatrix|i-?matrix/i.test(pathHints) || Object.keys(meta).some((key) => key.startsWith("quantize.imatrix."));
20
+ // QAT is explicit quantization-aware training lineage, mainly seen in
21
+ // Gemma QAT releases. imatrix is common GGUF quantization metadata and is
22
+ // intentionally tracked separately so we don't label every imatrix quant as QAT.
23
+ const qat = /\bqat\b|[-_]qat[-_]|qat[-_]?q\d/i.test(pathHints);
24
+ const imatrix = /imatrix|i-?matrix/i.test(pathHints) || Object.keys(meta).some((key) => key.startsWith("quantize.imatrix."));
23
25
 
24
26
  // Vision — mmproj present
25
27
  const vision = Boolean(mmprojPath && existsSync(mmprojPath));
@@ -37,7 +39,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
37
39
  : undefined;
38
40
  const ctxSize = metaCtx ?? (thinking ? 80000 : 32768);
39
41
 
40
- return { architecture, thinking, vision, mtp, qat, quant, metaCtx, ctxSize, meta };
42
+ return { architecture, thinking, vision, mtp, qat, imatrix, quant, metaCtx, ctxSize, meta };
41
43
  }
42
44
 
43
45
  // ── Compute llama-server flags from capabilities ───────────────────────────
package/src/cli.mjs CHANGED
@@ -407,7 +407,8 @@ function capabilitySummary(caps) {
407
407
  if (caps.architecture) parts.push(caps.architecture);
408
408
  if (caps.quant) parts.push(caps.quant);
409
409
  if (caps.mtp) parts.push("MTP");
410
- if (caps.qat) parts.push("QAT/imatrix");
410
+ if (caps.qat) parts.push("QAT");
411
+ if (caps.imatrix) parts.push("imatrix");
411
412
  if (caps.thinking) parts.push("thinking");
412
413
  if (caps.vision) parts.push("vision");
413
414
  return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
@@ -417,6 +418,7 @@ function capabilityBadges(caps) {
417
418
  const badges = [];
418
419
  if (caps.mtp) badges.push(pc.blue("[MTP]"));
419
420
  if (caps.qat) badges.push(pc.green("[QAT]"));
421
+ if (caps.imatrix) badges.push(pc.dim("[imatrix]"));
420
422
  if (caps.thinking) badges.push(pc.magenta("[thinking]"));
421
423
  if (caps.vision) badges.push(pc.cyan("[vision]"));
422
424
  return badges.join(" ");
@@ -46,14 +46,23 @@ export async function configureLocalProfile(prompt, profile) {
46
46
  configured = useMtp ? applyMtpDefaults(configured) : removeMtpDefaults(configured);
47
47
  }
48
48
 
49
- if (caps.thinking || caps.qat) {
49
+ if (caps.qat || caps.imatrix) {
50
50
  console.log("");
51
- console.log(renderSection(caps.qat ? "Detected QAT / imatrix-style model" : "Detected thinking model", renderRows([
51
+ console.log(renderSection("Quantization note", renderRows([
52
+ ["QAT", caps.qat ? "yes" : "no"],
53
+ ["imatrix", caps.imatrix ? "yes" : "no"],
54
+ ["Runtime flags", "none required"],
55
+ ])));
56
+ }
57
+
58
+ if (caps.thinking) {
59
+ console.log("");
60
+ console.log(renderSection("Detected thinking model", renderRows([
52
61
  ["Defaults", "thinking / loop-safe"],
53
62
  ["Flags", "--top-k 64 --presence-penalty 0 --repeat-penalty 1.1"],
54
63
  ["Template", "--chat-template-kwargs { enable_thinking: true }"],
55
64
  ])));
56
- const useThinking = await prompt.yesNo("Use these thinking/QAT-safe defaults?", true);
65
+ const useThinking = await prompt.yesNo("Use these thinking/loop-safe defaults?", true);
57
66
  configured = useThinking ? applyThinkingDefaults(configured) : removeThinkingDefaults(configured);
58
67
  }
59
68
 
@@ -176,7 +185,8 @@ function detectionSummary(caps) {
176
185
  if (caps.architecture) parts.push(caps.architecture);
177
186
  if (caps.quant) parts.push(caps.quant);
178
187
  if (caps.mtp) parts.push("MTP");
179
- if (caps.qat) parts.push("QAT/imatrix");
188
+ if (caps.qat) parts.push("QAT");
189
+ if (caps.imatrix) parts.push("imatrix");
180
190
  if (caps.thinking) parts.push("thinking");
181
191
  if (caps.vision) parts.push("vision");
182
192
  return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
package/src/profiles.mjs CHANGED
@@ -165,6 +165,7 @@ function summarizeCapabilities(caps) {
165
165
  vision: caps.vision,
166
166
  mtp: caps.mtp,
167
167
  qat: caps.qat,
168
+ imatrix: caps.imatrix,
168
169
  quant: caps.quant,
169
170
  metaCtx: caps.metaCtx,
170
171
  ctxSize: caps.ctxSize,