npm - @gmickel/gno - Versions diffs - 0.24.0 → 0.25.0 - Mend

@gmickel/gno 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +5 -2
package/package.json +1 -1
package/src/cli/AGENTS.md +2 -1
package/src/cli/CLAUDE.md +2 -1
package/src/cli/commands/ask.ts +33 -14
package/src/cli/commands/models/clear.ts +10 -3
package/src/cli/commands/models/list.ts +17 -4
package/src/cli/commands/models/pull.ts +15 -7
package/src/cli/commands/query.ts +13 -10
package/src/cli/program.ts +47 -44
package/src/config/types.ts +8 -1
package/src/core/depth-policy.ts +78 -0
package/src/llm/errors.ts +1 -1
package/src/llm/nodeLlamaCpp/adapter.ts +39 -3
package/src/llm/registry.ts +21 -0
package/src/llm/types.ts +1 -1
package/src/mcp/tools/index.ts +2 -2
package/src/mcp/tools/query.ts +25 -37
package/src/pipeline/hybrid.ts +4 -4
package/src/sdk/client.ts +59 -19
package/src/sdk/types.ts +1 -0
package/src/serve/AGENTS.md +2 -1
package/src/serve/CLAUDE.md +2 -1
package/src/serve/context.ts +23 -9
package/src/serve/public/app.tsx +8 -0
package/src/serve/public/components/AIModelSelector.tsx +48 -10
package/src/serve/public/pages/Ask.tsx +94 -54
package/src/serve/public/pages/Browse.tsx +141 -5
package/src/serve/public/pages/Collections.tsx +135 -38
package/src/serve/public/pages/Dashboard.tsx +31 -4
package/src/serve/public/pages/GraphView.tsx +24 -0
package/src/serve/public/pages/Search.tsx +78 -29
package/src/serve/routes/api.ts +6 -6

package/README.md CHANGED Viewed

@@ -61,16 +61,18 @@ models:
   activePreset: slim-tuned
   presets:
     - id: slim-tuned
-      name: GNO Slim Retrieval v1
+      name: GNO Slim Tuned
       embed: hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf
       rerank: hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf
-      gen: hf:guiltylemon/gno-expansion-slim-retrieval-v1/gno-expansion-auto-entity-lock-default-mix-lr95-f16.gguf
+      expand: hf:guiltylemon/gno-expansion-slim-retrieval-v1/gno-expansion-auto-entity-lock-default-mix-lr95-f16.gguf
+      gen: hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf
 ```
 Then:
 ```bash
 gno models use slim-tuned
+gno models pull --expand
 gno models pull --gen
 gno query "ECONNREFUSED 127.0.0.1:5432" --thorough
 ```
@@ -579,6 +581,7 @@ models:
       name: Remote GPU Server
       embed: "http://192.168.1.100:8081/v1/embeddings#bge-m3"
       rerank: "http://192.168.1.100:8082/v1/completions#reranker"
+      expand: "http://192.168.1.100:8083/v1/chat/completions#gno-expand"
       gen: "http://192.168.1.100:8083/v1/chat/completions#qwen3-4b"
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@gmickel/gno",
-  "version": "0.24.0",
+  "version": "0.25.0",
   "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
   "keywords": [
     "embeddings",

package/src/cli/AGENTS.md CHANGED Viewed

@@ -81,7 +81,8 @@ interface CliContext {
   store: SqliteAdapter;
   config: Config;
   embedPort?: EmbeddingPort;
-  genPort?: GenerationPort;
+  expandPort?: GenerationPort;
+  answerPort?: GenerationPort;
   rerankPort?: RerankPort;
 }
 ```

package/src/cli/CLAUDE.md CHANGED Viewed

@@ -81,7 +81,8 @@ interface CliContext {
   store: SqliteAdapter;
   config: Config;
   embedPort?: EmbeddingPort;
-  genPort?: GenerationPort;
+  expandPort?: GenerationPort;
+  answerPort?: GenerationPort;
   rerankPort?: RerankPort;
 }
 ```

package/src/cli/commands/ask.ts CHANGED Viewed

@@ -40,7 +40,9 @@ export type AskCommandOptions = AskOptions & {
   configPath?: string;
   /** Override embedding model */
   embedModel?: string;
-  /** Override generation model */
+  /** Override expansion model */
+  expandModel?: string;
+  /** Override answer generation model */
   genModel?: string;
   /** Override rerank model */
   rerankModel?: string;
@@ -82,7 +84,8 @@ export async function ask(
   const { store, config } = initResult;
   let embedPort: EmbeddingPort | null = null;
-  let genPort: GenerationPort | null = null;
+  let expandPort: GenerationPort | null = null;
+  let answerPort: GenerationPort | null = null;
   let rerankPort: RerankPort | null = null;
   try {
@@ -113,10 +116,23 @@ export async function ask(
       embedPort = embedResult.value;
     }
-    // Create generation port (for expansion and/or answer)
-    // Need genPort if: expansion enabled (!noExpand) OR answer requested
-    const needsGen = !options.noExpand || options.answer;
-    if (needsGen) {
+    // Create expansion port when expansion is enabled.
+    if (!options.noExpand && !options.queryModes?.length) {
+      const expandUri =
+        options.expandModel ?? options.genModel ?? preset.expand;
+      const genResult = await llm.createExpansionPort(expandUri, {
+        policy,
+        onProgress: downloadProgress
+          ? (progress) => downloadProgress("expand", progress)
+          : undefined,
+      });
+      if (genResult.ok) {
+        expandPort = genResult.value;
+      }
+    }
+    // Create answer generation port when answers are requested.
+    if (options.answer) {
       const genUri = options.genModel ?? preset.gen;
       const genResult = await llm.createGenerationPort(genUri, {
         policy,
@@ -125,7 +141,7 @@ export async function ask(
           : undefined,
       });
       if (genResult.ok) {
-        genPort = genResult.value;
+        answerPort = genResult.value;
       }
     }
@@ -170,7 +186,7 @@ export async function ask(
       config,
       vectorIndex,
       embedPort,
-      genPort,
+      expandPort,
       rerankPort,
     };
@@ -178,7 +194,7 @@ export async function ask(
     const answerRequested = options.answer && !options.noAnswer;
     // Fail early if --answer is requested but no generation model available
-    if (answerRequested && genPort === null) {
+    if (answerRequested && answerPort === null) {
       return {
         success: false,
         error:
@@ -223,12 +239,12 @@ export async function ask(
     // 2. --no-answer was not set
     // 3. We have results to ground on (no point generating from nothing)
     const shouldGenerateAnswer =
-      answerRequested && genPort !== null && results.length > 0;
+      answerRequested && answerPort !== null && results.length > 0;
-    if (shouldGenerateAnswer && genPort) {
+    if (shouldGenerateAnswer && answerPort) {
       const maxTokens = options.maxAnswerTokens ?? 512;
       const rawResult = await generateGroundedAnswer(
-        { genPort, store },
+        { genPort: answerPort, store },
         query,
         results,
         maxTokens
@@ -277,8 +293,11 @@ export async function ask(
     if (embedPort) {
       await embedPort.dispose();
     }
-    if (genPort) {
-      await genPort.dispose();
+    if (expandPort) {
+      await expandPort.dispose();
+    }
+    if (answerPort) {
+      await answerPort.dispose();
     }
     if (rerankPort) {
       await rerankPort.dispose();

package/src/cli/commands/models/clear.ts CHANGED Viewed

@@ -21,6 +21,8 @@ export interface ModelsClearOptions {
   embed?: boolean;
   /** Clear reranker model */
   rerank?: boolean;
+  /** Clear expansion model */
+  expand?: boolean;
   /** Clear generation model */
   gen?: boolean;
   /** Skip confirmation */
@@ -50,7 +52,7 @@ export async function modelsClear(
   if (options.all) {
     types = undefined; // Clear all
-  } else if (options.embed || options.rerank || options.gen) {
+  } else if (options.embed || options.rerank || options.expand || options.gen) {
     types = [];
     if (options.embed) {
       types.push("embed");
@@ -58,6 +60,9 @@ export async function modelsClear(
     if (options.rerank) {
       types.push("rerank");
     }
+    if (options.expand) {
+      types.push("expand");
+    }
     if (options.gen) {
       types.push("gen");
     }
@@ -71,7 +76,7 @@ export async function modelsClear(
   const sizeAfter = await cache.totalSize();
   return {
-    cleared: types ?? ["embed", "rerank", "gen"],
+    cleared: types ?? ["embed", "rerank", "expand", "gen"],
     sizeBefore,
     sizeAfter,
   };
@@ -96,8 +101,10 @@ function formatBytes(bytes: number): string {
  */
 export function formatModelsClear(result: ModelsClearResult): string {
   const lines: string[] = [];
+  const label = (type: ModelType) =>
+    type === "gen" ? "answer" : type === "expand" ? "expand" : type;
-  lines.push(`Cleared: ${result.cleared.join(", ")}`);
+  lines.push(`Cleared: ${result.cleared.map(label).join(", ")}`);
   lines.push(`Freed: ${formatBytes(result.sizeBefore - result.sizeAfter)}`);
   return lines.join("\n");

package/src/cli/commands/models/list.ts CHANGED Viewed

@@ -36,6 +36,7 @@ export interface ModelsListResult {
   presets: PresetInfo[];
   embed: ModelStatus;
   rerank: ModelStatus;
+  expand: ModelStatus;
   gen: ModelStatus;
   cacheDir: string;
   totalSize: number;
@@ -84,9 +85,10 @@ export async function modelsList(
   const preset = getActivePreset(config);
   const cache = new ModelCache(getModelsCachePath());
-  const [embed, rerank, gen] = await Promise.all([
+  const [embed, rerank, expand, gen] = await Promise.all([
     getModelStatus(cache, preset.embed),
     getModelStatus(cache, preset.rerank),
+    getModelStatus(cache, preset.expand ?? preset.gen),
     getModelStatus(cache, preset.gen),
   ]);
@@ -99,6 +101,7 @@ export async function modelsList(
     })),
     embed,
     rerank,
+    expand,
     gen,
     cacheDir: cache.dir,
     totalSize: await cache.totalSize(),
@@ -147,7 +150,11 @@ function formatTerminal(result: ModelsListResult): string {
       (result.rerank.size ? ` (${formatBytes(result.rerank.size)})` : "")
   );
   lines.push(
-    `  gen:    ${statusIcon(result.gen)} ${result.gen.uri}` +
+    `  expand: ${statusIcon(result.expand)} ${result.expand.uri}` +
+      (result.expand.size ? ` (${formatBytes(result.expand.size)})` : "")
+  );
+  lines.push(
+    `  answer: ${statusIcon(result.gen)} ${result.gen.uri}` +
       (result.gen.size ? ` (${formatBytes(result.gen.size)})` : "")
   );
@@ -156,7 +163,10 @@ function formatTerminal(result: ModelsListResult): string {
   lines.push(`Total size: ${formatBytes(result.totalSize)}`);
   const allCached =
-    result.embed.cached && result.rerank.cached && result.gen.cached;
+    result.embed.cached &&
+    result.rerank.cached &&
+    result.expand.cached &&
+    result.gen.cached;
   if (!allCached) {
     lines.push("");
     lines.push("Run: gno models pull --all");
@@ -186,7 +196,10 @@ function formatMarkdown(result: ModelsListResult): string {
     `| rerank | ${result.rerank.uri} | ${status(result.rerank)} | ${size(result.rerank)} |`
   );
   lines.push(
-    `| gen | ${result.gen.uri} | ${status(result.gen)} | ${size(result.gen)} |`
+    `| expand | ${result.expand.uri} | ${status(result.expand)} | ${size(result.expand)} |`
+  );
+  lines.push(
+    `| answer | ${result.gen.uri} | ${status(result.gen)} | ${size(result.gen)} |`
   );
   lines.push("");

package/src/cli/commands/models/pull.ts CHANGED Viewed

@@ -27,6 +27,8 @@ export interface ModelsPullOptions {
   embed?: boolean;
   /** Pull reranker model */
   rerank?: boolean;
+  /** Pull expansion model */
+  expand?: boolean;
   /** Pull generation model */
   gen?: boolean;
   /** Force re-download */
@@ -59,9 +61,9 @@ export interface ModelsPullResult {
  */
 function getTypesToPull(options: ModelsPullOptions): ModelType[] {
   if (options.all) {
-    return ["embed", "rerank", "gen"];
+    return ["embed", "rerank", "expand", "gen"];
   }
-  if (options.embed || options.rerank || options.gen) {
+  if (options.embed || options.rerank || options.expand || options.gen) {
     const types: ModelType[] = [];
     if (options.embed) {
       types.push("embed");
@@ -69,13 +71,16 @@ function getTypesToPull(options: ModelsPullOptions): ModelType[] {
     if (options.rerank) {
       types.push("rerank");
     }
+    if (options.expand) {
+      types.push("expand");
+    }
     if (options.gen) {
       types.push("gen");
     }
     return types;
   }
   // Default: pull all
-  return ["embed", "rerank", "gen"];
+  return ["embed", "rerank", "expand", "gen"];
 }
 /**
@@ -101,7 +106,8 @@ export async function modelsPull(
   let skipped = 0;
   for (const type of types) {
-    const uri = preset[type];
+    const uri =
+      type === "expand" ? (preset.expand ?? preset.gen) : preset[type];
     // Check if already cached (skip unless --force)
     if (!options.force) {
@@ -160,16 +166,18 @@ export async function modelsPull(
  */
 export function formatModelsPull(result: ModelsPullResult): string {
   const lines: string[] = [];
+  const label = (type: ModelType) =>
+    type === "gen" ? "answer" : type === "expand" ? "expand" : type;
   for (const r of result.results) {
     if (r.ok) {
       if (r.skipped) {
-        lines.push(`${r.type}: skipped (already cached)`);
+        lines.push(`${label(r.type)}: skipped (already cached)`);
       } else {
-        lines.push(`${r.type}: downloaded`);
+        lines.push(`${label(r.type)}: downloaded`);
       }
     } else {
-      lines.push(`${r.type}: failed - ${r.error}`);
+      lines.push(`${label(r.type)}: failed - ${r.error}`);
     }
   }

package/src/cli/commands/query.ts CHANGED Viewed

@@ -36,7 +36,9 @@ export type QueryCommandOptions = HybridSearchOptions & {
   configPath?: string;
   /** Override embedding model */
   embedModel?: string;
-  /** Override generation model */
+  /** Override expansion model */
+  expandModel?: string;
+  /** Deprecated alias for expansion model */
   genModel?: string;
   /** Override rerank model */
   rerankModel?: string;
@@ -90,7 +92,7 @@ export async function query(
   const { store, config } = initResult;
   let embedPort: EmbeddingPort | null = null;
-  let genPort: GenerationPort | null = null;
+  let expandPort: GenerationPort | null = null;
   let rerankPort: RerankPort | null = null;
   try {
@@ -121,18 +123,19 @@ export async function query(
       embedPort = embedResult.value;
     }
-    // Create generation port (for expansion) - optional.
+    // Create expansion port - optional.
     // Skip when structured query modes are provided.
     if (!options.noExpand && !options.queryModes?.length) {
-      const genUri = options.genModel ?? preset.gen;
-      const genResult = await llm.createGenerationPort(genUri, {
+      const expandUri =
+        options.expandModel ?? options.genModel ?? preset.expand;
+      const genResult = await llm.createExpansionPort(expandUri, {
         policy,
         onProgress: downloadProgress
-          ? (progress) => downloadProgress("gen", progress)
+          ? (progress) => downloadProgress("expand", progress)
           : undefined,
       });
       if (genResult.ok) {
-        genPort = genResult.value;
+        expandPort = genResult.value;
       }
     }
@@ -177,7 +180,7 @@ export async function query(
       config,
       vectorIndex,
       embedPort,
-      genPort,
+      expandPort,
       rerankPort,
     };
@@ -195,8 +198,8 @@ export async function query(
     if (embedPort) {
       await embedPort.dispose();
     }
-    if (genPort) {
-      await genPort.dispose();
+    if (expandPort) {
+      await expandPort.dispose();
     }
     if (rerankPort) {
       await rerankPort.dispose();

package/src/cli/program.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import {
   PRODUCT_NAME,
   VERSION,
 } from "../app/constants";
+import { resolveDepthPolicy } from "../core/depth-policy";
 import { parseAndValidateTagFilter } from "../core/tags";
 import { setColorsEnabled } from "./colors";
 import {
@@ -455,7 +456,10 @@ function wireSearchCommands(program: Command): void {
     .option("--full", "include full content")
     .option("--line-numbers", "include line numbers in output")
     .option("--fast", "skip expansion and reranking (fastest, ~0.7s)")
-    .option("--thorough", "enable query expansion (slower, ~5-8s)")
+    .option(
+      "--thorough",
+      "use expansion with a wider rerank pool (slowest, best recall)"
+    )
     .option("--no-expand", "disable query expansion")
     .option("--no-rerank", "disable reranking")
     .option(
@@ -474,6 +478,7 @@ function wireSearchCommands(program: Command): void {
     .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
       const format = getFormat(cmdOpts);
       assertFormatSupported(CMD.query, format);
+      const globals = getGlobals();
       // Validate empty query
       if (!queryText.trim()) {
@@ -532,35 +537,26 @@ function wireSearchCommands(program: Command): void {
       const limit = cmdOpts.limit
         ? parsePositiveInt("limit", cmdOpts.limit)
         : getDefaultLimit(format);
+      const { loadConfig } = await import("../config");
+      const { getActivePreset } = await import("../llm/registry");
+      const configResult = await loadConfig(globals.config);
+      const activePresetId = configResult.ok
+        ? getActivePreset(configResult.value).id
+        : "slim";
       const candidateLimit = cmdOpts.candidateLimit
         ? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
         : undefined;
       const categories = parseCsvValues(cmdOpts.category);
       const exclude = parseCsvValues(cmdOpts.exclude);
-      // Determine expansion/rerank settings based on flags
-      // Priority: --fast > --thorough > --no-expand/--no-rerank > default
-      // Default: skip expansion (balanced mode ~2-3s)
-      let noExpand = true; // Default: skip expansion
-      let noRerank = false; // Default: with reranking
-      if (cmdOpts.fast) {
-        // --fast: skip both (~0.7s)
-        noExpand = true;
-        noRerank = true;
-      } else if (cmdOpts.thorough) {
-        // --thorough: full pipeline (~5-8s)
-        noExpand = false;
-        noRerank = false;
-      } else {
-        // Check individual flags (override defaults)
-        if (cmdOpts.expand === false) {
-          noExpand = true;
-        }
-        if (cmdOpts.rerank === false) {
-          noRerank = true;
-        }
-      }
+      const depthPolicy = resolveDepthPolicy({
+        presetId: activePresetId,
+        fast: Boolean(cmdOpts.fast),
+        thorough: Boolean(cmdOpts.thorough),
+        expand: cmdOpts.expand === false ? false : undefined,
+        rerank: cmdOpts.rerank === false ? false : undefined,
+        candidateLimit,
+      });
       const { query, formatQuery } = await import("./commands/query");
       const result = await query(queryText, {
@@ -578,9 +574,9 @@ function wireSearchCommands(program: Command): void {
         tagsAny,
         full: Boolean(cmdOpts.full),
         lineNumbers: Boolean(cmdOpts.lineNumbers),
-        noExpand,
-        noRerank,
-        candidateLimit,
+        noExpand: depthPolicy.noExpand,
+        noRerank: depthPolicy.noRerank,
+        candidateLimit: depthPolicy.candidateLimit,
         queryModes,
         explain: Boolean(cmdOpts.explain),
         json: format === "json",
@@ -630,7 +626,10 @@ function wireSearchCommands(program: Command): void {
       []
     )
     .option("--fast", "skip expansion and reranking (fastest)")
-    .option("--thorough", "enable query expansion (slower)")
+    .option(
+      "--thorough",
+      "use expansion with a wider rerank pool (slowest, best recall)"
+    )
     .option("-C, --candidate-limit <num>", "max candidates passed to reranking")
     .option("--answer", "generate short grounded answer")
     .option("--no-answer", "force retrieval-only output")
@@ -641,6 +640,7 @@ function wireSearchCommands(program: Command): void {
     .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
       const format = getFormat(cmdOpts);
       assertFormatSupported(CMD.ask, format);
+      const globals = getGlobals();
       // Validate empty query
       if (!queryText.trim()) {
@@ -650,6 +650,12 @@ function wireSearchCommands(program: Command): void {
       const limit = cmdOpts.limit
         ? parsePositiveInt("limit", cmdOpts.limit)
         : getDefaultLimit(format);
+      const { loadConfig } = await import("../config");
+      const { getActivePreset } = await import("../llm/registry");
+      const configResult = await loadConfig(globals.config);
+      const activePresetId = configResult.ok
+        ? getActivePreset(configResult.value).id
+        : "slim";
       const candidateLimit = cmdOpts.candidateLimit
         ? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
         : undefined;
@@ -686,18 +692,13 @@ function wireSearchCommands(program: Command): void {
           ? normalizedInput.value.queryModes
           : undefined;
-      // Determine expansion/rerank settings based on flags
-      // Default: skip expansion (balanced mode)
-      let noExpand = true;
-      let noRerank = false;
-      if (cmdOpts.fast) {
-        noExpand = true;
-        noRerank = true;
-      } else if (cmdOpts.thorough) {
-        noExpand = false;
-        noRerank = false;
-      }
+      const depthPolicy = resolveDepthPolicy({
+        presetId: activePresetId,
+        fast: Boolean(cmdOpts.fast),
+        thorough: Boolean(cmdOpts.thorough),
+        candidateLimit,
+        hasStructuredModes: Boolean(queryModes?.length),
+      });
       const { ask, formatAsk } = await import("./commands/ask");
       const showSources = Boolean(cmdOpts.showSources);
@@ -712,9 +713,9 @@ function wireSearchCommands(program: Command): void {
         intent: cmdOpts.intent as string | undefined,
         exclude,
         queryModes,
-        noExpand,
-        noRerank,
-        candidateLimit,
+        noExpand: depthPolicy.noExpand,
+        noRerank: depthPolicy.noRerank,
+        candidateLimit: depthPolicy.candidateLimit,
         // Per spec: --answer defaults to false, --no-answer forces retrieval-only
         // Commander creates separate cmdOpts.noAnswer for --no-answer flag
         answer: Boolean(cmdOpts.answer),
@@ -1358,7 +1359,8 @@ function wireManagementCommands(program: Command): void {
     .option("--all", "download all configured models")
     .option("--embed", "download embedding model")
     .option("--rerank", "download reranker model")
-    .option("--gen", "download generation model")
+    .option("--expand", "download expansion model")
+    .option("--gen", "download answer generation model")
     .option("--force", "force re-download")
     .option("--no-progress", "disable download progress")
     .action(async (cmdOpts: Record<string, unknown>) => {
@@ -1377,6 +1379,7 @@ function wireManagementCommands(program: Command): void {
         all: Boolean(cmdOpts.all),
         embed: Boolean(cmdOpts.embed),
         rerank: Boolean(cmdOpts.rerank),
+        expand: Boolean(cmdOpts.expand),
         gen: Boolean(cmdOpts.gen),
         force: Boolean(cmdOpts.force),
         onProgress: showProgress ? createProgressRenderer() : undefined,

package/src/config/types.ts CHANGED Viewed

@@ -166,7 +166,9 @@ export const ModelPresetSchema = z.object({
   embed: z.string().min(1),
   /** Reranker model URI */
   rerank: z.string().min(1),
-  /** Generation model URI */
+  /** Query expansion model URI (defaults to gen for older configs) */
+  expand: z.string().min(1).optional(),
+  /** Answer generation model URI */
   gen: z.string().min(1),
 });
@@ -180,6 +182,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
     rerank:
       "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
+    expand: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
     gen: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
   },
   {
@@ -188,6 +191,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
     rerank:
       "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
+    expand:
+      "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
     gen: "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
   },
   {
@@ -196,6 +201,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
     rerank:
       "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
+    expand:
+      "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
     gen: "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
   },
 ];