npm - offgrid-ai - Versions diffs - 0.3.15 → 0.3.17 - Mend

offgrid-ai 0.3.15 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "offgrid-ai",
-  "version": "0.3.15",
+  "version": "0.3.17",
   "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
   "author": "Eeshan Srivastava (https://eeshans.com)",
   "type": "module",

package/src/autodetect.mjs CHANGED Viewed

@@ -7,20 +7,26 @@ import { readGgufMetadata } from "./gguf.mjs";
 export function detectCapabilities(modelPath, mmprojPath) {
   const meta = safeReadGgufMetadata(modelPath);
   const name = basename(modelPath).toLowerCase();
+  const pathHints = String(modelPath).toLowerCase();
   // Architecture
   const architecture = meta["general.architecture"] ?? null;
   // Thinking / reasoning mode
   const hasThinkingKwargs = meta["chat_template_kwargs"] !== undefined;
-  const nameHintsThinking = /qwen3|gemma-4|gemma4|deepseek-r[12]/i.test(name);
+  const nameHintsThinking = /qwen3|qwen3\.\d|gemma-4|gemma4|deepseek-r[12]/i.test(pathHints);
   const thinking = hasThinkingKwargs || nameHintsThinking;
+  // Quantization-aware / imatrix quantization hints. These mostly affect
+  // display and defaults transparency; llama-server does not need a QAT flag.
+  const qat = /qat|imatrix|i-?matrix/i.test(pathHints) || Object.keys(meta).some((key) => key.startsWith("quantize.imatrix."));
   // Vision — mmproj present
   const vision = Boolean(mmprojPath && existsSync(mmprojPath));
-  // MTP (multi-token prediction) — detect speculative decoding
-  const mtp = /mtp/i.test(name) || architecture === "qwen3";
+  // MTP (multi-token prediction) — detect speculative decoding.
+  // Do not treat all Qwen models as MTP; require an explicit filename or metadata hint.
+  const mtp = /\bmtp\b|draft-mtp|multi-token/i.test(pathHints) || Object.keys(meta).some((key) => /mtp|draft|speculative/i.test(key));
   // Quantization
   const quant = name.match(/(Q\d_K_[A-Z]+|UD-[A-Z0-9_]+)/i)?.[1] ?? null;
@@ -31,7 +37,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
     : undefined;
   const ctxSize = metaCtx ?? (thinking ? 80000 : 32768);
-  return { architecture, thinking, vision, mtp, quant, metaCtx, ctxSize, meta };
+  return { architecture, thinking, vision, mtp, qat, quant, metaCtx, ctxSize, meta };
 }
 // ── Compute llama-server flags from capabilities ───────────────────────────
@@ -42,7 +48,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
   const flags = {
     host: "127.0.0.1",
-    port: 8080,
+    port: mtp ? 8081 : 8080,
     ctxSize: capabilities.ctxSize,
     flashAttention: "on",
     cacheTypeK: isLowMem ? "f16" : "bf16",

package/src/cli.mjs CHANGED Viewed

@@ -14,6 +14,7 @@ import { checkForUpdate, currentPackageVersion, detectInvocation, updateCommand,
 import { removeInstallerPathEntries } from "./shell-path.mjs";
 import { configureLocalProfile } from "./profile-setup.mjs";
 import { buildPrettyCommand } from "./command.mjs";
+import { detectCapabilities } from "./autodetect.mjs";
 // ── Entry point ────────────────────────────────────────────────────────────
@@ -153,76 +154,15 @@ export async function mainFlow() {
     return;
   }
-  // 6. Interactive: pick an action
+  // 6. Interactive: one command center after onboarding.
   startInteractive("offgrid-ai");
-  const prompt = createPrompt();
-  try {
-    // Show what we found
-    const profiledPaths = new Set(profiles.map((p) => p.modelPath).filter(Boolean));
-    const newModels = ggufModels.filter((m) => !profiledPaths.has(m.path));
-    // Managed backend models
-    const managedItems = [];
-    for (const { backendId, models } of managedModels) {
-      const profiledAliases = new Set(
-        profiles.filter((p) => p.backend === backendId).map((p) => backendId === "ollama" ? `ollama:${p.ollamaModel ?? p.modelAlias}` : `omlx:${p.omlxModel ?? p.modelAlias}`)
-      );
-      for (const model of models) {
-        if (!profiledAliases.has(`${backendId}:${model.id}`)) {
-          managedItems.push({ model, backendId });
-        }
-      }
-    }
-    // Show what we found
-    if (profiles.length > 0) {
-      console.log(pc.bold("\nSaved profiles"));
-      for (const profile of profiles) {
-        const backend = backendFor(profile.backend);
-        const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
-        const running = await isProfileRunning(profile);
-        const c = colorMap[profile.backend] ?? pc.magenta;
-        console.log(`  ${running ? pc.green("●") : pc.dim("○")} ${pc.bold(profile.label)} ${c(`[${backend.label}]`)} · ${pc.cyan(profile.modelAlias)}`);
-      }
-    }
-    if (newModels.length > 0) {
-      console.log(pc.bold("\nNew models"));
-      for (const model of newModels.slice(0, 10)) {
-        console.log(`  ${pc.cyan(model.label)} ${pc.dim(model.quant ?? "")} · ${pc.dim(formatBytes(model.sizeBytes))}`);
-      }
-      if (newModels.length > 10) console.log(pc.dim(`  ... and ${newModels.length - 10} more`));
-    }
-    for (const { backendId, models } of managedModels) {
-      if (models.length > 0) {
-        const be = BACKENDS[backendId];
-        console.log(pc.bold(`\n${be.label} models`));
-        for (const model of models.slice(0, 5)) {
-          console.log(`  ${pc.cyan(model.label)}`);
-        }
-        if (models.length > 5) console.log(pc.dim(`  ... and ${models.length - 5} more`));
-      }
-    }
-    // Pick what to do
-    const action = await prompt.choice("What next?", [
-      { value: "run", label: "Run a model", hint: "Start server and launch Pi" },
-      ...(profiles.length > 0 ? [{ value: "manage", label: "Manage profiles", hint: "Sync, remove, or inspect" }] : []),
-      { value: "benchmark", label: "Benchmark", hint: "Run a benchmark prompt" },
-    ], "run");
-    if (action === "run") return await pickAndRun(prompt, profiles, newModels, managedItems);
-    if (action === "manage") return await manageProfiles(prompt, profiles);
-    if (action === "benchmark") return await benchmarkFlow(prompt, profiles);
-  } finally {
-    prompt.close();
-  }
+  return await modelCommandCenter({ profiles, ggufModels, managedModels });
 }
-// ── Explicit model/run commands ─────────────────────────────────────────────
+// ── Model command center ────────────────────────────────────────────────────
 async function modelsCommand(argv) {
   await ensureDirs();
-  if (process.stdin.isTTY) startInteractive("offgrid-ai models");
   const catalog = await loadModelCatalog();
   if (argv[0]) {
@@ -231,20 +171,28 @@ async function modelsCommand(argv) {
     return;
   }
-  await printModelCatalog(catalog);
+  if (process.stdin.isTTY) startInteractive("offgrid-ai");
+  return await modelCommandCenter(catalog);
+}
+async function modelCommandCenter(catalog) {
+  const normalized = normalizeCatalog(catalog);
+  const items = modelCatalogItems(normalized);
+  await printModelCatalog(normalized, items);
   if (!process.stdin.isTTY) return;
-  const items = modelCatalogItems(catalog);
   if (items.length === 0) return;
   const prompt = createPrompt();
   try {
-    const action = await prompt.choice("Action", [
-      { value: "inspect", label: "Inspect", hint: "View profile/model details" },
+    const action = await prompt.choice("What do you want to do?", [
+      { value: "inspect", label: "Inspect", hint: "View details" },
       { value: "setup", label: "Set up / sync", hint: "Create profile or sync Pi" },
       { value: "run", label: "Run", hint: "Start server and launch Pi" },
+      { value: "benchmark", label: "Benchmark", hint: "Coming soon: local benchmark project" },
       { value: "remove", label: "Remove", hint: "Delete a saved profile" },
-    ], "inspect");
+    ], "run");
+    if (action === "benchmark") return await benchmarkFlow();
     const item = await chooseCatalogItem(prompt, items, action);
     if (!item) return;
     return await handleCatalogAction(prompt, action, item);
@@ -256,21 +204,9 @@ async function modelsCommand(argv) {
 async function runCommand(argv) {
   await ensureDirs();
   const { positional } = parseOptions(argv);
-  if (positional[0]) {
-    const profile = await readProfile(positional[0]);
-    return await runProfile(profile);
-  }
-  const catalog = await loadModelCatalog();
-  if (!process.stdin.isTTY) throw new Error("Run requires a profile id in non-interactive mode: offgrid-ai run <profile>");
-  startInteractive("offgrid-ai run");
-  await printModelCatalog(catalog);
-  const prompt = createPrompt();
-  try {
-    return await pickAndRun(prompt, catalog.profiles, catalog.newModels, catalog.managedItems);
-  } finally {
-    prompt.close();
-  }
+  if (!positional[0]) return await mainFlow();
+  const profile = await readProfile(positional[0]);
+  return await runProfile(profile);
 }
 async function loadModelCatalog() {
@@ -279,6 +215,12 @@ async function loadModelCatalog() {
     scanGgufModels(),
     scanManagedModels(),
   ]);
+  return normalizeCatalog({ profiles, ggufModels, managedModels });
+}
+function normalizeCatalog(catalog) {
+  if (catalog.newModels && catalog.managedItems) return catalog;
+  const { profiles, ggufModels, managedModels } = catalog;
   const profiledPaths = new Set(profiles.map((p) => p.modelPath).filter(Boolean));
   const newModels = ggufModels.filter((m) => !profiledPaths.has(m.path));
   const managedItems = [];
@@ -293,36 +235,54 @@ async function loadModelCatalog() {
   return { profiles, ggufModels, managedModels, newModels, managedItems };
 }
-async function printModelCatalog({ profiles, newModels, managedModels }) {
-  if (profiles.length > 0) {
-    console.log(pc.bold("\nSaved profiles"));
+async function printModelCatalog({ profiles, newModels, managedItems }, items = modelCatalogItems({ profiles, newModels, managedItems })) {
+  const itemNumber = (predicate) => {
+    const index = items.findIndex(predicate);
+    return index === -1 ? "  " : String(index + 1).padStart(2, " ");
+  };
+  console.log(pc.bold("\nSaved profiles"));
+  if (profiles.length === 0) {
+    console.log(pc.dim("  None yet."));
+  } else {
     for (const profile of profiles) {
       const backend = backendFor(profile.backend);
       const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
       const running = await isProfileRunning(profile);
       const piConfigured = await hasPiModel(profile);
       const c = colorMap[profile.backend] ?? pc.magenta;
-      console.log(`  ${running ? pc.green("●") : pc.dim("○")} ${pc.bold(profile.label)} ${c(`[${backend.label}]`)} · ${pc.cyan(profile.modelAlias)} ${piConfigured ? pc.green("· Pi synced") : pc.yellow("· Pi not synced")}`);
+      const num = itemNumber((item) => item.type === "profile" && item.profile.id === profile.id);
+      console.log(`${num}. ${running ? pc.green("●") : pc.dim("○")} ${pc.bold(profile.label)} ${c(`[${backend.label}]`)} · ${pc.cyan(profile.modelAlias)} ${piConfigured ? pc.green("· Pi synced") : pc.yellow("· Pi not synced")}`);
     }
-  } else {
-    console.log(pc.bold("\nSaved profiles"));
-    console.log(pc.dim("  None yet."));
   }
-  if (newModels.length > 0) {
-    console.log(pc.bold("\nNew GGUF models"));
+  console.log("");
+  console.log(pc.bold("Downloaded models not set up yet"));
+  if (newModels.length === 0) {
+    console.log(pc.dim("  None. Every downloaded GGUF has a profile."));
+  } else {
     for (const model of newModels.slice(0, 20)) {
-      console.log(`  ${pc.cyan(model.label)} ${pc.dim(model.quant ?? "")} · ${pc.dim(formatBytes(model.sizeBytes))}`);
+      const caps = detectCapabilities(model.path, model.mmprojPath);
+      const num = itemNumber((item) => item.type === "new" && item.model.path === model.path);
+      console.log(`${num}. ${pc.cyan(model.label)} ${capabilityBadges(caps)} ${pc.dim(model.quant ?? "")}`);
+      console.log(`    alias:  ${pc.cyan(model.aliasSuggestion)}`);
+      console.log(`    size:   ${formatBytes(model.sizeBytes)}`);
     }
     if (newModels.length > 20) console.log(pc.dim(`  ... and ${newModels.length - 20} more`));
   }
-  for (const { backendId, models } of managedModels) {
-    if (models.length === 0) continue;
+  for (const backendId of ["ollama", "omlx"]) {
+    const backendItems = managedItems.filter((item) => item.backendId === backendId);
+    if (backendItems.length === 0) continue;
     const be = BACKENDS[backendId];
-    console.log(pc.bold(`\n${be.label} models`));
-    for (const model of models.slice(0, 10)) console.log(`  ${pc.cyan(model.label)}`);
-    if (models.length > 10) console.log(pc.dim(`  ... and ${models.length - 10} more`));
+    console.log("");
+    console.log(pc.bold(`${be.label} models`));
+    for (const { model } of backendItems.slice(0, 10)) {
+      const num = itemNumber((item) => item.type === "managed" && item.backendId === backendId && item.model.id === model.id);
+      console.log(`${num}. ${pc.cyan(model.label)} ${pc.dim(model.quant ?? "")}`);
+      console.log(`    id: ${pc.cyan(model.id)}`);
+    }
+    if (backendItems.length > 10) console.log(pc.dim(`  ... and ${backendItems.length - 10} more`));
   }
 }
@@ -335,17 +295,25 @@ function modelCatalogItems({ profiles, newModels, managedItems }) {
 }
 async function chooseCatalogItem(prompt, items, action) {
-  const allowed = action === "remove" ? items.filter((item) => item.type === "profile") : items;
-  if (allowed.length === 0) {
-    console.log(pc.yellow(action === "remove" ? "No saved profiles to remove." : "No models available."));
+  if (action === "remove" && !items.some((item) => item.type === "profile")) {
+    console.log(pc.yellow("No saved profiles to remove."));
     return null;
   }
-  const selected = await prompt.choice("Select", allowed.map((item, index) => ({
-    value: String(index),
-    label: item.label,
-    hint: item.hint,
-  })), "0");
-  return allowed[Number(selected)];
+  const input = await prompt.text("Select a number", "");
+  if (!input) return null;
+  const index = Number(input) - 1;
+  if (!Number.isInteger(index) || index < 0 || index >= items.length) {
+    console.log(pc.yellow(`No item ${input}.`));
+    return null;
+  }
+  const item = items[index];
+  if (action === "remove" && item.type !== "profile") {
+    console.log(pc.yellow("Only saved profiles can be removed."));
+    return null;
+  }
+  return item;
 }
 async function handleCatalogAction(prompt, action, item) {
@@ -396,6 +364,7 @@ async function printProfileDetails(profile) {
     ["ID", pc.cyan(profile.id)],
     ["Label", pc.bold(profile.label)],
     ["Backend", backend.label],
+    ...(profile.capabilities ? [["Detected", capabilitySummary(profile.capabilities)]] : []),
     ["Endpoint", pc.green(profile.baseUrl)],
     ...(!isManaged ? [
       ["Model", profile.modelPath ?? "unknown"],
@@ -413,8 +382,10 @@ async function printProfileDetails(profile) {
 }
 function printGgufModelDetails(model) {
+  const caps = detectCapabilities(model.path, model.mmprojPath);
   console.log("\n" + renderSection("GGUF model", renderRows([
     ["Label", pc.bold(model.label)],
+    ["Detected", capabilitySummary(caps)],
     ["Model", model.path],
     ["MMProj", model.mmprojPath ?? "none"],
     ["Quant", model.quant ?? "unknown"],
@@ -431,6 +402,26 @@ function printManagedModelDetails(model, backend) {
   ])));
 }
+function capabilitySummary(caps) {
+  const parts = [];
+  if (caps.architecture) parts.push(caps.architecture);
+  if (caps.quant) parts.push(caps.quant);
+  if (caps.mtp) parts.push("MTP");
+  if (caps.qat) parts.push("QAT/imatrix");
+  if (caps.thinking) parts.push("thinking");
+  if (caps.vision) parts.push("vision");
+  return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
+}
+function capabilityBadges(caps) {
+  const badges = [];
+  if (caps.mtp) badges.push(pc.blue("[MTP]"));
+  if (caps.qat) badges.push(pc.green("[QAT]"));
+  if (caps.thinking) badges.push(pc.magenta("[thinking]"));
+  if (caps.vision) badges.push(pc.cyan("[vision]"));
+  return badges.join(" ");
+}
 function createManagedProfile(model, backendId) {
   return normalizeProfile({
     id: model.id.replace(/[^a-z0-9._-]+/gi, "-").toLowerCase(),
@@ -442,91 +433,6 @@ function createManagedProfile(model, backendId) {
   });
 }
-// ── Pick and run ────────────────────────────────────────────────────────────
-async function pickAndRun(prompt, profiles, newModels, managedItems) {
-  // If there's exactly one profile and it's already running, offer to connect or start fresh
-  const choices = [];
-  // Existing profiles
-  for (const profile of profiles) {
-    const running = await isProfileRunning(profile);
-    const backend = backendFor(profile.backend);
-    const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
-    const c = colorMap[profile.backend] ?? pc.magenta;
-    choices.push({
-      value: `profile:${profile.id}`,
-      label: `${running ? pc.green("● ") : ""}${profile.label}`,
-      hint: `${c(backend.label)} · ${profile.modelAlias} · ${profile.baseUrl}`,
-    });
-  }
-  // New GGUF models
-  for (const model of newModels.slice(0, 20)) {
-    choices.push({
-      value: `new:${model.path}`,
-      label: model.label,
-      hint: `${model.quant ?? "GGUF"} · ${formatBytes(model.sizeBytes)}`,
-    });
-  }
-  // Managed models
-  for (const { model, backendId } of managedItems) {
-    const be = BACKENDS[backendId];
-    choices.push({
-      value: `managed:${backendId}:${model.id}`,
-      label: model.label,
-      hint: `${be.label}`,
-    });
-  }
-  if (choices.length === 0) {
-    console.log(pc.yellow("No models available."));
-    return;
-  }
-  const selected = await prompt.choice("Pick a model", choices, choices[0].value);
-  if (selected.startsWith("profile:")) {
-    const id = selected.slice("profile:".length);
-    const profile = await readProfile(id);
-    return await runProfile(profile);
-  }
-  if (selected.startsWith("new:")) {
-    const modelPath = selected.slice("new:".length);
-    const model = newModels.find((m) => m.path === modelPath);
-    if (!model) throw new Error("Model not found.");
-    const profile = await createProfileFromModel(model);
-    const configured = await configureLocalProfile(prompt, profile);
-    if (!configured) return;
-    await saveProfile(configured);
-    console.log(pc.green(`Saved profile: ${configured.label}`));
-    await syncPiConfig(configured);
-    return await runProfile(configured);
-  }
-  if (selected.startsWith("managed:")) {
-    const managedSelection = selected.slice("managed:".length);
-    const separator = managedSelection.indexOf(":");
-    const backendId = separator === -1 ? managedSelection : managedSelection.slice(0, separator);
-    const modelId = separator === -1 ? "" : managedSelection.slice(separator + 1);
-    const model = managedItems.find((m) => m.model.id === modelId && m.backendId === backendId)?.model;
-    if (!model) throw new Error("Model not found.");
-    const profile = normalizeProfile({
-      id: model.id.replace(/[^a-z0-9._-]+/gi, "-").toLowerCase(),
-      label: model.label,
-      backend: backendId,
-      modelAlias: model.aliasSuggestion,
-      ...(backendId === "ollama" ? { ollamaModel: model.id } : {}),
-      ...(backendId === "omlx" ? { omlxModel: model.id } : {}),
-    });
-    await saveProfile(profile);
-    await syncPiConfig(profile);
-    return await runProfile(profile);
-  }
-}
 async function runProfile(profile, options = {}) {
   const backend = backendFor(profile.backend);
   const withHarness = options.with ?? "pi";
@@ -608,56 +514,6 @@ async function runProfile(profile, options = {}) {
   }
 }
-// ── Manage profiles ─────────────────────────────────────────────────────────
-async function manageProfiles(prompt, profiles) {
-  const choices = profiles.map((p) => ({
-    value: p.id,
-    label: p.label,
-    hint: `${p.modelAlias} · ${p.baseUrl}`,
-  }));
-  const selected = await prompt.choice("Which profile?", choices, choices[0].value);
-  const profile = await readProfile(selected);
-  const backend = backendFor(profile.backend);
-  const isManaged = backend.type === "managed-server";
-  const piConfigured = await hasPiModel(profile);
-  // Show profile details
-  console.log("");
-  console.log(renderSection("Profile", renderRows([
-    ["ID", pc.cyan(profile.id)],
-    ["Label", pc.bold(profile.label)],
-    ["Backend", backend.label],
-    ["Endpoint", pc.green(profile.baseUrl)],
-    ...(!isManaged ? [
-      ["Model", profile.modelPath ?? "unknown"],
-      ["MMProj", profile.mmprojPath ?? "none"],
-      ["Memory", existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
-    ] : []),
-    ["Alias", pc.cyan(profile.modelAlias)],
-    ["Pi", piConfigured ? pc.green("configured") : pc.yellow("not synced")],
-  ])));
-  if (!isManaged && profile.commandArgv) {
-    console.log("");
-    console.log(pc.bold("llama-server command"));
-    console.log(pc.dim(buildPrettyCommand(profile)));
-  }
-  const action = await prompt.choice("Action", [
-    { value: "sync", label: piConfigured ? `${pc.green("✓")} Pi config synced` : "Sync Pi config", hint: piConfigured ? "Already in ~/.pi/agent/models.json" : "Update ~/.pi/agent/models.json" },
-    { value: "run", label: "Run", hint: "Start server + Pi" },
-    ...(isManaged ? [] : [{ value: "server", label: "Server only", hint: "Start server, no harness" }]),
-    { value: "remove", label: "Remove", hint: "Delete profile + Pi config" },
-  ], "sync");
-  if (action === "sync") return await syncPiConfig(profile);
-  if (action === "run") return await runProfile(profile);
-  if (action === "server") return await runProfile(profile, { with: "server" });
-  if (action === "remove") return await removeProfileInteractive(profile.id);
-}
 async function removeProfileInteractive(id) {
   const profile = await readProfile(id);
   if (!process.stdin.isTTY) {
@@ -1207,9 +1063,7 @@ function printHelp() {
   console.log(`${pc.bold("offgrid-ai")} — privacy-first local LLM runner
 Usage:
-  offgrid-ai            Friendly shortcut: pick a model and run it
-  offgrid-ai models     List, inspect, set up, sync, or remove models
-  offgrid-ai run        Pick and run a model (or: offgrid-ai run <profile>)
+  offgrid-ai            Command center: inspect, set up, run, benchmark, or remove models
   offgrid-ai status     Show running local models
   offgrid-ai stop       Stop a running server (or: offgrid-ai stop <id>)
   offgrid-ai uninstall  Remove offgrid-ai, clean up PATH, optionally keep profiles

package/src/profile-setup.mjs CHANGED Viewed

@@ -8,10 +8,27 @@ const CACHE_CHOICES = [
   { value: "q4_0", label: "q4_0", hint: "lowest memory, quality/speed tradeoff" },
 ];
+const GENERAL_DEFAULTS = {
+  topK: 20,
+  presencePenalty: 1.5,
+  repeatPenalty: 1.0,
+};
+const THINKING_DEFAULTS = {
+  topK: 64,
+  presencePenalty: 0,
+  repeatPenalty: 1.1,
+  chatTemplateKwargs: { enable_thinking: true },
+};
 export async function configureLocalProfile(prompt, profile) {
+  let configured = profile;
+  const caps = profile.capabilities ?? {};
   console.log("");
   console.log(renderSection("Model setup", renderRows([
     ["Model", pc.bold(profile.label)],
+    ["Detected", detectionSummary(caps)],
     ["Context", `${profile.flags.ctxSize.toLocaleString()} tokens`],
     ["KV cache", `${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV}`],
     ["Sampling", samplingSummary(profile.flags)],
@@ -19,13 +36,36 @@ export async function configureLocalProfile(prompt, profile) {
   console.log(pc.dim("Larger context windows use more memory. KV cache precision controls memory used by attention history."));
   console.log(pc.dim("Sampling defaults are shown for transparency; you can edit command.json later if needed.\n"));
-  const ctxSize = await prompt.number("Context window tokens", profile.flags.ctxSize, 1024, 1048576);
-  const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, profile.flags.cacheTypeK);
-  const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, profile.flags.cacheTypeV);
-  const configured = applyRuntimeFlagOverrides(profile, { ctxSize, cacheTypeK, cacheTypeV });
+  if (caps.mtp) {
+    console.log(renderSection("Detected MTP", renderRows([
+      ["Backend", "llama.cpp MTP"],
+      ["Port", "8081"],
+      ["Flags", "--spec-type draft-mtp --spec-draft-n-max 2"],
+    ])));
+    const useMtp = await prompt.yesNo("Use MTP speculative decoding flags?", true);
+    configured = useMtp ? applyMtpDefaults(configured) : removeMtpDefaults(configured);
+  }
+  if (caps.thinking || caps.qat) {
+    console.log("");
+    console.log(renderSection(caps.qat ? "Detected QAT / imatrix-style model" : "Detected thinking model", renderRows([
+      ["Defaults", "thinking / loop-safe"],
+      ["Flags", "--top-k 64 --presence-penalty 0 --repeat-penalty 1.1"],
+      ["Template", "--chat-template-kwargs { enable_thinking: true }"],
+    ])));
+    const useThinking = await prompt.yesNo("Use these thinking/QAT-safe defaults?", true);
+    configured = useThinking ? applyThinkingDefaults(configured) : removeThinkingDefaults(configured);
+  }
+  const ctxSize = await prompt.number("Context window tokens", configured.flags.ctxSize, 1024, 1048576);
+  const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, configured.flags.cacheTypeK);
+  const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, configured.flags.cacheTypeV);
+  configured = applyRuntimeFlagOverrides(configured, { ctxSize, cacheTypeK, cacheTypeV });
   console.log("");
   console.log(renderSection("Defaults", renderRows([
+    ["Backend", configured.backend],
+    ["Endpoint", configured.baseUrl],
     ["Temperature", configured.flags.temperature],
     ["Top-p", configured.flags.topP],
     ["Top-k", configured.flags.topK],
@@ -41,21 +81,63 @@ export async function configureLocalProfile(prompt, profile) {
 export function applyRuntimeFlagOverrides(profile, overrides) {
   const flags = { ...profile.flags, ...overrides };
-  return {
+  return applyProfileFlags(profile, flags);
+}
+function applyMtpDefaults(profile) {
+  const flags = { ...profile.flags, port: 8081 };
+  return applyProfileFlags({ ...profile, backend: "llama-cpp-mtp", providerId: "llama-cpp-mtp" }, flags, {
+    values: { "--spec-type": "draft-mtp", "--spec-draft-n-max": 2 },
+  });
+}
+function removeMtpDefaults(profile) {
+  const flags = { ...profile.flags, port: 8080 };
+  return applyProfileFlags({ ...profile, backend: "llama-cpp", providerId: "llama-cpp" }, flags, {
+    remove: ["--spec-type", "--spec-draft-n-max"],
+  });
+}
+function applyThinkingDefaults(profile) {
+  const flags = { ...profile.flags, ...THINKING_DEFAULTS };
+  return applyProfileFlags(profile, flags);
+}
+function removeThinkingDefaults(profile) {
+  const flags = { ...profile.flags, ...GENERAL_DEFAULTS };
+  delete flags.chatTemplateKwargs;
+  return applyProfileFlags(profile, flags, { remove: ["--chat-template-kwargs"] });
+}
+function applyProfileFlags(profile, flags, edits = {}) {
+  const next = {
     ...profile,
     flags,
     baseUrl: `http://${flags.host}:${flags.port}/v1`,
-    commandArgv: updateArgv(profile.commandArgv ?? [], {
-      "--ctx-size": flags.ctxSize,
-      "--cache-type-k": flags.cacheTypeK,
-      "--cache-type-v": flags.cacheTypeV,
-    }),
+    harnesses: {
+      ...(profile.harnesses ?? {}),
+      pi: { ...(profile.harnesses?.pi ?? {}), enabled: true, model: `${profile.providerId ?? profile.backend}/${profile.modelAlias ?? profile.id}` },
+    },
   };
+  next.commandArgv = updateArgv(profile.commandArgv ?? [], {
+    "--host": flags.host,
+    "--port": flags.port,
+    "--ctx-size": flags.ctxSize,
+    "--cache-type-k": flags.cacheTypeK,
+    "--cache-type-v": flags.cacheTypeV,
+    "--top-k": flags.topK,
+    "--presence-penalty": flags.presencePenalty,
+    "--repeat-penalty": flags.repeatPenalty,
+    ...(flags.chatTemplateKwargs ? { "--chat-template-kwargs": JSON.stringify(flags.chatTemplateKwargs) } : {}),
+  }, edits);
+  return next;
 }
-function updateArgv(argv, values) {
-  const next = [...argv];
-  for (const [flag, value] of Object.entries(values)) {
+function updateArgv(argv, values, edits = {}) {
+  let next = [...argv];
+  for (const flag of edits.remove ?? []) next = removeOption(next, flag);
+  for (const [flag, value] of Object.entries({ ...values, ...(edits.values ?? {}) })) {
+    if (value === undefined) continue;
     const index = next.indexOf(flag);
     if (index === -1) next.push(flag, String(value));
     else next[index + 1] = String(value);
@@ -63,6 +145,18 @@ function updateArgv(argv, values) {
   return next;
 }
+function removeOption(argv, flag) {
+  const next = [];
+  for (let i = 0; i < argv.length; i++) {
+    if (argv[i] === flag) {
+      if (argv[i + 1] && !argv[i + 1].startsWith("--")) i += 1;
+      continue;
+    }
+    next.push(argv[i]);
+  }
+  return next;
+}
 function renderMemoryEstimate(profile) {
   try {
     const est = estimateMemory(profile.modelPath, profile.mmprojPath, null, profile.flags);
@@ -77,6 +171,17 @@ function renderMemoryEstimate(profile) {
   }
 }
+function detectionSummary(caps) {
+  const parts = [];
+  if (caps.architecture) parts.push(caps.architecture);
+  if (caps.quant) parts.push(caps.quant);
+  if (caps.mtp) parts.push("MTP");
+  if (caps.qat) parts.push("QAT/imatrix");
+  if (caps.thinking) parts.push("thinking");
+  if (caps.vision) parts.push("vision");
+  return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
+}
 function samplingSummary(flags) {
   return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
 }

package/src/profiles.mjs CHANGED Viewed

@@ -136,25 +136,41 @@ export function normalizeProfile(profile) {
 // ── Auto-create profile from a discovered model ────────────────────────────
-export async function createProfileFromModel(model, backendId = "llama-cpp") {
+export async function createProfileFromModel(model, backendId) {
   const { detectCapabilities } = await import("./autodetect.mjs");
   const caps = detectCapabilities(model.path, model.mmprojPath);
+  const backend = backendId ?? (caps.mtp ? "llama-cpp-mtp" : "llama-cpp");
   const id = slugFromLabel(model.label);
   const { flags, argv } = computeFlags(caps, model.path, model.mmprojPath, null);
   return normalizeProfile({
     id,
     label: model.label,
-    backend: backendId,
+    backend,
+    providerId: backend,
     modelAlias: model.aliasSuggestion,
     modelPath: model.path,
     mmprojPath: model.mmprojPath,
+    capabilities: summarizeCapabilities(caps),
     preset: null, // no presets — auto-detected
     flags,
     commandArgv: argv,
   });
 }
+function summarizeCapabilities(caps) {
+  return {
+    architecture: caps.architecture,
+    thinking: caps.thinking,
+    vision: caps.vision,
+    mtp: caps.mtp,
+    qat: caps.qat,
+    quant: caps.quant,
+    metaCtx: caps.metaCtx,
+    ctxSize: caps.ctxSize,
+  };
+}
 // ── State files (for running servers) ──────────────────────────────────────
 export async function readState(id) {