npm - offgrid-ai - Versions diffs - 0.3.17 → 0.3.18 - Mend

offgrid-ai 0.3.17 → 0.3.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "offgrid-ai",
-  "version": "0.3.17",
+  "version": "0.3.18",
   "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
   "author": "Eeshan Srivastava (https://eeshans.com)",
   "type": "module",

package/src/autodetect.mjs CHANGED Viewed

@@ -17,9 +17,11 @@ export function detectCapabilities(modelPath, mmprojPath) {
   const nameHintsThinking = /qwen3|qwen3\.\d|gemma-4|gemma4|deepseek-r[12]/i.test(pathHints);
   const thinking = hasThinkingKwargs || nameHintsThinking;
-  // Quantization-aware / imatrix quantization hints. These mostly affect
-  // display and defaults transparency; llama-server does not need a QAT flag.
-  const qat = /qat|imatrix|i-?matrix/i.test(pathHints) || Object.keys(meta).some((key) => key.startsWith("quantize.imatrix."));
+  // QAT is explicit quantization-aware training lineage, mainly seen in
+  // Gemma QAT releases. imatrix is common GGUF quantization metadata and is
+  // intentionally tracked separately so we don't label every imatrix quant as QAT.
+  const qat = /\bqat\b|[-_]qat[-_]|qat[-_]?q\d/i.test(pathHints);
+  const imatrix = /imatrix|i-?matrix/i.test(pathHints) || Object.keys(meta).some((key) => key.startsWith("quantize.imatrix."));
   // Vision — mmproj present
   const vision = Boolean(mmprojPath && existsSync(mmprojPath));
@@ -37,7 +39,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
     : undefined;
   const ctxSize = metaCtx ?? (thinking ? 80000 : 32768);
-  return { architecture, thinking, vision, mtp, qat, quant, metaCtx, ctxSize, meta };
+  return { architecture, thinking, vision, mtp, qat, imatrix, quant, metaCtx, ctxSize, meta };
 }
 // ── Compute llama-server flags from capabilities ───────────────────────────

package/src/cli.mjs CHANGED Viewed

@@ -407,7 +407,8 @@ function capabilitySummary(caps) {
   if (caps.architecture) parts.push(caps.architecture);
   if (caps.quant) parts.push(caps.quant);
   if (caps.mtp) parts.push("MTP");
-  if (caps.qat) parts.push("QAT/imatrix");
+  if (caps.qat) parts.push("QAT");
+  if (caps.imatrix) parts.push("imatrix");
   if (caps.thinking) parts.push("thinking");
   if (caps.vision) parts.push("vision");
   return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
@@ -417,6 +418,7 @@ function capabilityBadges(caps) {
   const badges = [];
   if (caps.mtp) badges.push(pc.blue("[MTP]"));
   if (caps.qat) badges.push(pc.green("[QAT]"));
+  if (caps.imatrix) badges.push(pc.dim("[imatrix]"));
   if (caps.thinking) badges.push(pc.magenta("[thinking]"));
   if (caps.vision) badges.push(pc.cyan("[vision]"));
   return badges.join(" ");

package/src/profile-setup.mjs CHANGED Viewed

@@ -46,14 +46,23 @@ export async function configureLocalProfile(prompt, profile) {
     configured = useMtp ? applyMtpDefaults(configured) : removeMtpDefaults(configured);
   }
-  if (caps.thinking || caps.qat) {
+  if (caps.qat || caps.imatrix) {
     console.log("");
-    console.log(renderSection(caps.qat ? "Detected QAT / imatrix-style model" : "Detected thinking model", renderRows([
+    console.log(renderSection("Quantization note", renderRows([
+      ["QAT", caps.qat ? "yes" : "no"],
+      ["imatrix", caps.imatrix ? "yes" : "no"],
+      ["Runtime flags", "none required"],
+    ])));
+  }
+  if (caps.thinking) {
+    console.log("");
+    console.log(renderSection("Detected thinking model", renderRows([
       ["Defaults", "thinking / loop-safe"],
       ["Flags", "--top-k 64 --presence-penalty 0 --repeat-penalty 1.1"],
       ["Template", "--chat-template-kwargs { enable_thinking: true }"],
     ])));
-    const useThinking = await prompt.yesNo("Use these thinking/QAT-safe defaults?", true);
+    const useThinking = await prompt.yesNo("Use these thinking/loop-safe defaults?", true);
     configured = useThinking ? applyThinkingDefaults(configured) : removeThinkingDefaults(configured);
   }
@@ -176,7 +185,8 @@ function detectionSummary(caps) {
   if (caps.architecture) parts.push(caps.architecture);
   if (caps.quant) parts.push(caps.quant);
   if (caps.mtp) parts.push("MTP");
-  if (caps.qat) parts.push("QAT/imatrix");
+  if (caps.qat) parts.push("QAT");
+  if (caps.imatrix) parts.push("imatrix");
   if (caps.thinking) parts.push("thinking");
   if (caps.vision) parts.push("vision");
   return parts.length > 0 ? parts.join(" · ") : "standard GGUF";

package/src/profiles.mjs CHANGED Viewed

@@ -165,6 +165,7 @@ function summarizeCapabilities(caps) {
     vision: caps.vision,
     mtp: caps.mtp,
     qat: caps.qat,
+    imatrix: caps.imatrix,
     quant: caps.quant,
     metaCtx: caps.metaCtx,
     ctxSize: caps.ctxSize,