npm - @gmickel/gno - Versions diffs - 0.24.0 → 0.25.2 - Mend

@gmickel/gno 0.24.0 → 0.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/README.md +5 -2
package/assets/skill/SKILL.md +75 -1
package/package.json +1 -1
package/src/cli/AGENTS.md +2 -1
package/src/cli/CLAUDE.md +2 -1
package/src/cli/commands/ask.ts +33 -14
package/src/cli/commands/models/clear.ts +10 -3
package/src/cli/commands/models/list.ts +17 -4
package/src/cli/commands/models/pull.ts +15 -7
package/src/cli/commands/query.ts +13 -10
package/src/cli/program.ts +47 -44
package/src/config/types.ts +8 -1
package/src/core/depth-policy.ts +78 -0
package/src/llm/errors.ts +1 -1
package/src/llm/nodeLlamaCpp/adapter.ts +39 -3
package/src/llm/registry.ts +21 -0
package/src/llm/types.ts +1 -1
package/src/mcp/tools/index.ts +2 -2
package/src/mcp/tools/query.ts +25 -37
package/src/pipeline/hybrid.ts +4 -4
package/src/sdk/client.ts +59 -19
package/src/sdk/types.ts +1 -0
package/src/serve/AGENTS.md +2 -1
package/src/serve/CLAUDE.md +2 -1
package/src/serve/context.ts +23 -9
package/src/serve/public/app.tsx +8 -0
package/src/serve/public/components/AIModelSelector.tsx +48 -10
package/src/serve/public/pages/Ask.tsx +94 -54
package/src/serve/public/pages/Browse.tsx +141 -5
package/src/serve/public/pages/Collections.tsx +135 -38
package/src/serve/public/pages/Dashboard.tsx +31 -4
package/src/serve/public/pages/GraphView.tsx +24 -0
package/src/serve/public/pages/Search.tsx +78 -29
package/src/serve/routes/api.ts +6 -6

package/src/config/types.ts CHANGED Viewed

@@ -166,7 +166,9 @@ export const ModelPresetSchema = z.object({
   embed: z.string().min(1),
   /** Reranker model URI */
   rerank: z.string().min(1),
-  /** Generation model URI */
+  /** Query expansion model URI (defaults to gen for older configs) */
+  expand: z.string().min(1).optional(),
+  /** Answer generation model URI */
   gen: z.string().min(1),
 });
@@ -180,6 +182,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
     rerank:
       "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
+    expand: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
     gen: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
   },
   {
@@ -188,6 +191,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
     rerank:
       "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
+    expand:
+      "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
     gen: "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
   },
   {
@@ -196,6 +201,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
     rerank:
       "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
+    expand:
+      "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
     gen: "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
   },
 ];

package/src/core/depth-policy.ts ADDED Viewed

@@ -0,0 +1,78 @@
+export type RetrievalDepth = "fast" | "balanced" | "thorough";
+export interface ResolveDepthPolicyInput {
+  presetId?: string;
+  fast?: boolean;
+  thorough?: boolean;
+  expand?: boolean;
+  rerank?: boolean;
+  candidateLimit?: number;
+  hasStructuredModes?: boolean;
+}
+export interface ResolvedDepthPolicy {
+  depth: RetrievalDepth;
+  noExpand: boolean;
+  noRerank: boolean;
+  candidateLimit?: number;
+  balancedExpansionEnabled: boolean;
+}
+export const DEFAULT_THOROUGH_CANDIDATE_LIMIT = 40;
+function normalizePresetId(presetId?: string): string {
+  return presetId?.trim().toLowerCase() || "slim";
+}
+export function balancedUsesExpansion(presetId?: string): boolean {
+  const normalized = normalizePresetId(presetId);
+  return normalized === "slim" || normalized === "slim-tuned";
+}
+export function resolveDepthPolicy(
+  input: ResolveDepthPolicyInput
+): ResolvedDepthPolicy {
+  const balancedExpansionEnabled = balancedUsesExpansion(input.presetId);
+  let depth: RetrievalDepth = "balanced";
+  let noExpand = !balancedExpansionEnabled;
+  let noRerank = false;
+  let candidateLimit = input.candidateLimit;
+  if (input.fast) {
+    depth = "fast";
+    noExpand = true;
+    noRerank = true;
+  } else if (input.thorough) {
+    depth = "thorough";
+    noExpand = false;
+    noRerank = false;
+    candidateLimit ??= DEFAULT_THOROUGH_CANDIDATE_LIMIT;
+  } else {
+    if (input.expand === true) {
+      noExpand = false;
+    }
+    if (input.expand === false) {
+      noExpand = true;
+    }
+    if (input.rerank === true) {
+      noRerank = false;
+    }
+    if (input.rerank === false) {
+      noRerank = true;
+    }
+  }
+  // Structured query modes supply explicit expansions and should not trigger
+  // an additional generated expansion step.
+  if (input.hasStructuredModes) {
+    noExpand = true;
+  }
+  return {
+    depth,
+    noExpand,
+    noRerank,
+    candidateLimit,
+    balancedExpansionEnabled,
+  };
+}

package/src/llm/errors.ts CHANGED Viewed

@@ -121,7 +121,7 @@ export function modelNotFoundError(uri: string, details?: string): LlmError {
 export function modelNotCachedError(
   uri: string,
-  modelType: "embed" | "rerank" | "gen"
+  modelType: "embed" | "rerank" | "expand" | "gen"
 ): LlmError {
   return llmError("MODEL_NOT_CACHED", {
     message: `${modelType} model not cached`,

package/src/llm/nodeLlamaCpp/adapter.ts CHANGED Viewed

@@ -19,7 +19,12 @@ import { ModelCache } from "../cache";
 import { HttpEmbedding, isHttpModelUri } from "../httpEmbedding";
 import { HttpGeneration, isHttpGenUri } from "../httpGeneration";
 import { HttpRerank, isHttpRerankUri } from "../httpRerank";
-import { getActivePreset, getModelConfig } from "../registry";
+import {
+  getActivePreset,
+  getAnswerModelUri,
+  getExpandModelUri,
+  getModelConfig,
+} from "../registry";
 import { NodeLlamaCppEmbedding } from "./embedding";
 import { NodeLlamaCppGeneration } from "./generation";
 import { getModelManager, type ModelManager } from "./lifecycle";
@@ -105,8 +110,7 @@ export class LlmAdapter {
     modelUri?: string,
     options?: CreatePortOptions
   ): Promise<LlmResult<GenerationPort>> {
-    const preset = getActivePreset(this.config);
-    const uri = modelUri ?? preset.gen;
+    const uri = getAnswerModelUri(this.config, modelUri);
     const policy = options?.policy ?? DEFAULT_POLICY;
     // Use HTTP generation for remote endpoints
@@ -132,6 +136,38 @@ export class LlmAdapter {
     };
   }
+  /**
+   * Create a generation port dedicated to query expansion.
+   * Uses preset.expand when configured, else falls back to preset.gen.
+   */
+  async createExpansionPort(
+    modelUri?: string,
+    options?: CreatePortOptions
+  ): Promise<LlmResult<GenerationPort>> {
+    const uri = getExpandModelUri(this.config, modelUri);
+    const policy = options?.policy ?? DEFAULT_POLICY;
+    if (isHttpGenUri(uri)) {
+      const httpGen = new HttpGeneration(uri);
+      return { ok: true, value: httpGen };
+    }
+    const resolved = await this.cache.ensureModel(
+      uri,
+      "expand",
+      policy,
+      options?.onProgress
+    );
+    if (!resolved.ok) {
+      return resolved;
+    }
+    return {
+      ok: true,
+      value: new NodeLlamaCppGeneration(this.manager, uri, resolved.value),
+    };
+  }
   /**
    * Create a rerank port.
    * Supports HTTP endpoints for remote reranking models.

package/src/llm/registry.ts CHANGED Viewed

@@ -57,6 +57,24 @@ export function getActivePreset(config: Config): ModelPreset {
   return builtIn;
 }
+export function getExpandModelUri(config: Config, override?: string): string {
+  if (override) {
+    return override;
+  }
+  const preset = getActivePreset(config);
+  return preset.expand ?? preset.gen;
+}
+export function getAnswerModelUri(config: Config, override?: string): string {
+  if (override) {
+    return override;
+  }
+  const preset = getActivePreset(config);
+  return preset.gen;
+}
 /**
  * Resolve a model URI for a given type.
  * Uses override if provided, otherwise from active preset.
@@ -70,6 +88,9 @@ export function resolveModelUri(
     return override;
   }
   const preset = getActivePreset(config);
+  if (type === "expand") {
+    return preset.expand ?? preset.gen;
+  }
   return preset[type];
 }

package/src/llm/types.ts CHANGED Viewed

@@ -19,7 +19,7 @@ export type LlmResult<T> =
 // Model Types
 // ─────────────────────────────────────────────────────────────────────────────
-export type ModelType = "embed" | "rerank" | "gen";
+export type ModelType = "embed" | "rerank" | "expand" | "gen";
 /** Model URI format: hf:org/repo/file.gguf or file:/path */
 export type ModelUri = string;

package/src/mcp/tools/index.ts CHANGED Viewed

@@ -149,8 +149,8 @@ export const queryInputSchema = z.object({
     .optional(),
   fast: z.boolean().default(false),
   thorough: z.boolean().default(false),
-  expand: z.boolean().default(false), // Default: skip expansion
-  rerank: z.boolean().default(true),
+  expand: z.boolean().optional(),
+  rerank: z.boolean().optional(),
   tagsAll: z.array(z.string()).optional(),
   tagsAny: z.array(z.string()).optional(),
 });

package/src/mcp/tools/query.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import type { ToolContext } from "../server";
 import { parseUri } from "../../app/constants";
 import { createNonTtyProgressRenderer } from "../../cli/progress";
+import { resolveDepthPolicy } from "../../core/depth-policy";
 import { normalizeStructuredQueryInput } from "../../core/structured-query";
 import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
 import { resolveDownloadPolicy } from "../../llm/policy";
@@ -167,7 +168,7 @@ export function handleQuery(
       const downloadProgress = createNonTtyProgressRenderer();
       let embedPort: EmbeddingPort | null = null;
-      let genPort: GenerationPort | null = null;
+      let expandPort: GenerationPort | null = null;
       let rerankPort: RerankPort | null = null;
       let vectorIndex: VectorIndexPort | null = null;
@@ -181,42 +182,29 @@ export function handleQuery(
           embedPort = embedResult.value;
         }
-        // Determine noExpand/noRerank based on mode flags
-        // Priority: fast > thorough > expand/rerank params > defaults
-        // Default: noExpand=true (skip expansion), noRerank=false (with reranking)
         const hasStructuredModes = Boolean(queryModes?.length);
-        let noExpand = true;
-        let noRerank = false;
-        if (args.fast) {
-          noExpand = true;
-          noRerank = true;
-        } else if (args.thorough) {
-          noExpand = false;
-          noRerank = false;
-        } else {
-          // Use explicit expand/rerank params if provided
-          if (args.expand === true) {
-            noExpand = false;
-          }
-          if (args.rerank === false) {
-            noRerank = true;
-          }
-        }
-        // Structured query modes replace generated expansion.
-        if (hasStructuredModes) {
-          noExpand = true;
-        }
+        const depthPolicy = resolveDepthPolicy({
+          presetId: preset.id,
+          fast: args.fast,
+          thorough: args.thorough,
+          expand: args.expand,
+          rerank: args.rerank,
+          candidateLimit: args.candidateLimit,
+          hasStructuredModes,
+        });
+        const { noExpand, noRerank } = depthPolicy;
-        // Create generation port (for expansion) - optional
+        // Create expansion port - optional
         if (!noExpand && !hasStructuredModes) {
-          const genResult = await llm.createGenerationPort(preset.gen, {
-            policy,
-            onProgress: (progress) => downloadProgress("gen", progress),
-          });
+          const genResult = await llm.createExpansionPort(
+            preset.expand ?? preset.gen,
+            {
+              policy,
+              onProgress: (progress) => downloadProgress("expand", progress),
+            }
+          );
           if (genResult.ok) {
-            genPort = genResult.value;
+            expandPort = genResult.value;
           }
         }
@@ -252,7 +240,7 @@ export function handleQuery(
           config: ctx.config,
           vectorIndex,
           embedPort,
-          genPort,
+          expandPort,
           rerankPort,
         };
@@ -265,7 +253,7 @@ export function handleQuery(
           collection: args.collection,
           queryLanguageHint: args.lang, // Affects expansion prompt, not retrieval
           intent: args.intent,
-          candidateLimit: args.candidateLimit,
+          candidateLimit: depthPolicy.candidateLimit,
           exclude: args.exclude,
           since: args.since,
           until: args.until,
@@ -298,8 +286,8 @@ export function handleQuery(
         if (embedPort) {
           await embedPort.dispose();
         }
-        if (genPort) {
-          await genPort.dispose();
+        if (expandPort) {
+          await expandPort.dispose();
         }
         if (rerankPort) {
           await rerankPort.dispose();

package/src/pipeline/hybrid.ts CHANGED Viewed

@@ -60,7 +60,7 @@ export interface HybridSearchDeps {
   config: Config;
   vectorIndex: VectorIndexPort | null;
   embedPort: EmbeddingPort | null;
-  genPort: GenerationPort | null;
+  expandPort: GenerationPort | null;
   rerankPort: RerankPort | null;
   pipelineConfig?: PipelineConfig;
 }
@@ -249,7 +249,7 @@ export async function searchHybrid(
   options: HybridSearchOptions = {}
 ): Promise<ReturnType<typeof ok<SearchResults>>> {
   const runStartedAt = performance.now();
-  const { store, vectorIndex, embedPort, genPort, rerankPort } = deps;
+  const { store, vectorIndex, embedPort, expandPort, rerankPort } = deps;
   const pipelineConfig = deps.pipelineConfig ?? DEFAULT_PIPELINE_CONFIG;
   const limit = options.limit ?? 20;
@@ -318,7 +318,7 @@ export async function searchHybrid(
   // 1. Check if expansion needed
   // ─────────────────────────────────────────────────────────────────────────
   const expansionStartedAt = performance.now();
-  const shouldExpand = !options.noExpand && genPort !== null;
+  const shouldExpand = !options.noExpand && expandPort !== null;
   let expansionStatus: ExpansionStatus = "disabled";
   let queryModeSummary: ReturnType<typeof summarizeQueryModes> | undefined =
     undefined;
@@ -349,7 +349,7 @@ export async function searchHybrid(
       counters.fallbackEvents.push("expansion_skipped_strong");
     } else {
       expansionStatus = "attempted";
-      const expandResult = await expandQuery(genPort, query, {
+      const expandResult = await expandQuery(expandPort, query, {
         // Use queryLanguage for prompt selection, NOT options.lang (retrieval filter)
         lang: queryLanguage,
         timeout: pipelineConfig.expansionTimeout,

package/src/sdk/client.ts CHANGED Viewed

@@ -65,7 +65,8 @@ interface OpenedClientState {
 interface RuntimePorts {
   embedPort: EmbeddingPort | null;
-  genPort: GenerationPort | null;
+  expandPort: GenerationPort | null;
+  answerPort: GenerationPort | null;
   rerankPort: RerankPort | null;
   vectorIndex: VectorIndexPort | null;
 }
@@ -179,19 +180,23 @@ class GnoClientImpl implements GnoClient {
   private async createRuntimePorts(options: {
     embed?: boolean;
-    gen?: boolean;
+    expand?: boolean;
+    answer?: boolean;
     rerank?: boolean;
     requiredEmbed?: boolean;
-    requiredGen?: boolean;
+    requiredExpand?: boolean;
+    requiredAnswer?: boolean;
     requiredRerank?: boolean;
     embedModel?: string;
+    expandModel?: string;
     genModel?: string;
     rerankModel?: string;
   }): Promise<RuntimePorts> {
     this.assertOpen();
     let embedPort: EmbeddingPort | null = null;
-    let genPort: GenerationPort | null = null;
+    let expandPort: GenerationPort | null = null;
+    let answerPort: GenerationPort | null = null;
     let rerankPort: RerankPort | null = null;
     let vectorIndex: VectorIndexPort | null = null;
@@ -234,16 +239,38 @@ class GnoClientImpl implements GnoClient {
       }
     }
-    if (options.gen) {
+    if (options.expand) {
+      const genResult = await this.llm.createExpansionPort(
+        options.expandModel ?? options.genModel,
+        {
+          policy: this.downloadPolicy,
+        }
+      );
+      if (genResult.ok) {
+        expandPort = genResult.value;
+      } else if (options.requiredExpand) {
+        if (embedPort) {
+          await embedPort.dispose();
+        }
+        throw sdkError("MODEL", genResult.error.message, {
+          cause: genResult.error.cause,
+        });
+      }
+    }
+    if (options.answer) {
       const genResult = await this.llm.createGenerationPort(options.genModel, {
         policy: this.downloadPolicy,
       });
       if (genResult.ok) {
-        genPort = genResult.value;
-      } else if (options.requiredGen) {
+        answerPort = genResult.value;
+      } else if (options.requiredAnswer) {
         if (embedPort) {
           await embedPort.dispose();
         }
+        if (expandPort) {
+          await expandPort.dispose();
+        }
         throw sdkError("MODEL", genResult.error.message, {
           cause: genResult.error.cause,
         });
@@ -263,8 +290,11 @@ class GnoClientImpl implements GnoClient {
         if (embedPort) {
           await embedPort.dispose();
         }
-        if (genPort) {
-          await genPort.dispose();
+        if (expandPort) {
+          await expandPort.dispose();
+        }
+        if (answerPort) {
+          await answerPort.dispose();
         }
         throw sdkError("MODEL", rerankResult.error.message, {
           cause: rerankResult.error.cause,
@@ -272,15 +302,18 @@ class GnoClientImpl implements GnoClient {
       }
     }
-    return { embedPort, genPort, rerankPort, vectorIndex };
+    return { embedPort, expandPort, answerPort, rerankPort, vectorIndex };
   }
   private async disposeRuntimePorts(ports: RuntimePorts): Promise<void> {
     if (ports.embedPort) {
       await ports.embedPort.dispose();
     }
-    if (ports.genPort) {
-      await ports.genPort.dispose();
+    if (ports.expandPort) {
+      await ports.expandPort.dispose();
+    }
+    if (ports.answerPort) {
+      await ports.answerPort.dispose();
     }
     if (ports.rerankPort) {
       await ports.rerankPort.dispose();
@@ -366,9 +399,10 @@ class GnoClientImpl implements GnoClient {
     const ports = await this.createRuntimePorts({
       embed: true,
-      gen: !options.noExpand && !options.queryModes?.length,
+      expand: !options.noExpand && !options.queryModes?.length,
       rerank: !options.noRerank,
       embedModel: options.embedModel,
+      expandModel: options.expandModel,
       genModel: options.genModel,
       rerankModel: options.rerankModel,
     });
@@ -381,7 +415,7 @@ class GnoClientImpl implements GnoClient {
             config: this.config,
             vectorIndex: ports.vectorIndex,
             embedPort: ports.embedPort,
-            genPort: ports.genPort,
+            expandPort: ports.expandPort,
             rerankPort: ports.rerankPort,
           },
           query,
@@ -416,15 +450,17 @@ class GnoClientImpl implements GnoClient {
     const needsExpansionGen = !options.noExpand && !options.queryModes?.length;
     const ports = await this.createRuntimePorts({
       embed: true,
-      gen: needsExpansionGen || answerRequested,
+      expand: needsExpansionGen,
+      answer: answerRequested,
       rerank: !options.noRerank,
+      expandModel: options.expandModel,
       genModel: options.genModel,
       embedModel: options.embedModel,
       rerankModel: options.rerankModel,
     });
     try {
-      if (answerRequested && !ports.genPort) {
+      if (answerRequested && !ports.answerPort) {
         throw sdkError(
           "MODEL",
           "Answer generation requested but no generation model is available"
@@ -438,7 +474,7 @@ class GnoClientImpl implements GnoClient {
             config: this.config,
             vectorIndex: ports.vectorIndex,
             embedPort: ports.embedPort,
-            genPort: ports.genPort,
+            expandPort: ports.expandPort,
             rerankPort: ports.rerankPort,
           },
           query,
@@ -468,9 +504,13 @@ class GnoClientImpl implements GnoClient {
       let answerContext: AskResult["meta"]["answerContext"];
       let answerGenerated = false;
-      if (answerRequested && ports.genPort && searchResult.results.length > 0) {
+      if (
+        answerRequested &&
+        ports.answerPort &&
+        searchResult.results.length > 0
+      ) {
         const rawAnswer = await generateGroundedAnswer(
-          { genPort: ports.genPort, store: this.store },
+          { genPort: ports.answerPort, store: this.store },
           query,
           searchResult.results,
           options.maxAnswerTokens ?? 512

package/src/sdk/types.ts CHANGED Viewed

@@ -49,6 +49,7 @@ export interface GnoClientInitOptions {
 export interface GnoModelOverrides {
   embedModel?: string;
+  expandModel?: string;
   genModel?: string;
   rerankModel?: string;
 }

package/src/serve/AGENTS.md CHANGED Viewed

@@ -50,7 +50,8 @@ interface ServerContext {
   config: Config;
   vectorIndex: VectorIndexPort | null;
   embedPort: EmbeddingPort | null;
-  genPort: GenerationPort | null;
+  expandPort: GenerationPort | null;
+  answerPort: GenerationPort | null;
   rerankPort: RerankPort | null;
   capabilities: { bm25; vector; hybrid; answer };
 }

package/src/serve/CLAUDE.md CHANGED Viewed

@@ -50,7 +50,8 @@ interface ServerContext {
   config: Config;
   vectorIndex: VectorIndexPort | null;
   embedPort: EmbeddingPort | null;
-  genPort: GenerationPort | null;
+  expandPort: GenerationPort | null;
+  answerPort: GenerationPort | null;
   rerankPort: RerankPort | null;
   capabilities: { bm25; vector; hybrid; answer };
 }

package/src/serve/context.ts CHANGED Viewed

@@ -63,7 +63,8 @@ export interface ServerContext {
   config: Config;
   vectorIndex: VectorIndexPort | null;
   embedPort: EmbeddingPort | null;
-  genPort: GenerationPort | null;
+  expandPort: GenerationPort | null;
+  answerPort: GenerationPort | null;
   rerankPort: RerankPort | null;
   capabilities: {
     bm25: boolean;
@@ -82,7 +83,8 @@ export async function createServerContext(
   config: Config
 ): Promise<ServerContext> {
   let embedPort: EmbeddingPort | null = null;
-  let genPort: GenerationPort | null = null;
+  let expandPort: GenerationPort | null = null;
+  let answerPort: GenerationPort | null = null;
   let rerankPort: RerankPort | null = null;
   let vectorIndex: VectorIndexPort | null = null;
@@ -129,13 +131,23 @@ export async function createServerContext(
       }
     }
-    // Try to create generation port
-    const genResult = await llm.createGenerationPort(
+    // Try to create expansion port
+    const expandResult = await llm.createExpansionPort(
+      preset.expand ?? preset.gen,
+      createPortOptions("expand")
+    );
+    if (expandResult.ok) {
+      expandPort = expandResult.value;
+      console.log("Query expansion enabled");
+    }
+    // Try to create answer generation port
+    const answerResult = await llm.createGenerationPort(
       preset.gen,
       createPortOptions("gen")
     );
-    if (genResult.ok) {
-      genPort = genResult.value;
+    if (answerResult.ok) {
+      answerPort = answerResult.value;
       console.log("AI answer generation enabled");
     }
@@ -166,7 +178,7 @@ export async function createServerContext(
     bm25: true, // Always available
     vector: vectorIndex?.searchAvailable ?? false,
     hybrid: (vectorIndex?.searchAvailable ?? false) && embedPort !== null,
-    answer: genPort !== null,
+    answer: answerPort !== null,
   };
   return {
@@ -174,7 +186,8 @@ export async function createServerContext(
     config,
     vectorIndex,
     embedPort,
-    genPort,
+    expandPort,
+    answerPort,
     rerankPort,
     capabilities,
   };
@@ -187,7 +200,8 @@ export async function createServerContext(
 export async function disposeServerContext(ctx: ServerContext): Promise<void> {
   const ports = [
     { name: "embed", port: ctx.embedPort },
-    { name: "gen", port: ctx.genPort },
+    { name: "expand", port: ctx.expandPort },
+    { name: "answer", port: ctx.answerPort },
     { name: "rerank", port: ctx.rerankPort },
   ];