@gmickel/gno 0.24.0 → 0.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -166,7 +166,9 @@ export const ModelPresetSchema = z.object({
166
166
  embed: z.string().min(1),
167
167
  /** Reranker model URI */
168
168
  rerank: z.string().min(1),
169
- /** Generation model URI */
169
+ /** Query expansion model URI (defaults to gen for older configs) */
170
+ expand: z.string().min(1).optional(),
171
+ /** Answer generation model URI */
170
172
  gen: z.string().min(1),
171
173
  });
172
174
 
@@ -180,6 +182,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
180
182
  embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
181
183
  rerank:
182
184
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
185
+ expand: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
183
186
  gen: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
184
187
  },
185
188
  {
@@ -188,6 +191,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
188
191
  embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
189
192
  rerank:
190
193
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
194
+ expand:
195
+ "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
191
196
  gen: "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
192
197
  },
193
198
  {
@@ -196,6 +201,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
196
201
  embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
197
202
  rerank:
198
203
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
204
+ expand:
205
+ "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
199
206
  gen: "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
200
207
  },
201
208
  ];
@@ -0,0 +1,78 @@
1
+ export type RetrievalDepth = "fast" | "balanced" | "thorough";
2
+
3
+ export interface ResolveDepthPolicyInput {
4
+ presetId?: string;
5
+ fast?: boolean;
6
+ thorough?: boolean;
7
+ expand?: boolean;
8
+ rerank?: boolean;
9
+ candidateLimit?: number;
10
+ hasStructuredModes?: boolean;
11
+ }
12
+
13
+ export interface ResolvedDepthPolicy {
14
+ depth: RetrievalDepth;
15
+ noExpand: boolean;
16
+ noRerank: boolean;
17
+ candidateLimit?: number;
18
+ balancedExpansionEnabled: boolean;
19
+ }
20
+
21
+ export const DEFAULT_THOROUGH_CANDIDATE_LIMIT = 40;
22
+
23
+ function normalizePresetId(presetId?: string): string {
24
+ return presetId?.trim().toLowerCase() || "slim";
25
+ }
26
+
27
+ export function balancedUsesExpansion(presetId?: string): boolean {
28
+ const normalized = normalizePresetId(presetId);
29
+ return normalized === "slim" || normalized === "slim-tuned";
30
+ }
31
+
32
+ export function resolveDepthPolicy(
33
+ input: ResolveDepthPolicyInput
34
+ ): ResolvedDepthPolicy {
35
+ const balancedExpansionEnabled = balancedUsesExpansion(input.presetId);
36
+ let depth: RetrievalDepth = "balanced";
37
+ let noExpand = !balancedExpansionEnabled;
38
+ let noRerank = false;
39
+ let candidateLimit = input.candidateLimit;
40
+
41
+ if (input.fast) {
42
+ depth = "fast";
43
+ noExpand = true;
44
+ noRerank = true;
45
+ } else if (input.thorough) {
46
+ depth = "thorough";
47
+ noExpand = false;
48
+ noRerank = false;
49
+ candidateLimit ??= DEFAULT_THOROUGH_CANDIDATE_LIMIT;
50
+ } else {
51
+ if (input.expand === true) {
52
+ noExpand = false;
53
+ }
54
+ if (input.expand === false) {
55
+ noExpand = true;
56
+ }
57
+ if (input.rerank === true) {
58
+ noRerank = false;
59
+ }
60
+ if (input.rerank === false) {
61
+ noRerank = true;
62
+ }
63
+ }
64
+
65
+ // Structured query modes supply explicit expansions and should not trigger
66
+ // an additional generated expansion step.
67
+ if (input.hasStructuredModes) {
68
+ noExpand = true;
69
+ }
70
+
71
+ return {
72
+ depth,
73
+ noExpand,
74
+ noRerank,
75
+ candidateLimit,
76
+ balancedExpansionEnabled,
77
+ };
78
+ }
package/src/llm/errors.ts CHANGED
@@ -121,7 +121,7 @@ export function modelNotFoundError(uri: string, details?: string): LlmError {
121
121
 
122
122
  export function modelNotCachedError(
123
123
  uri: string,
124
- modelType: "embed" | "rerank" | "gen"
124
+ modelType: "embed" | "rerank" | "expand" | "gen"
125
125
  ): LlmError {
126
126
  return llmError("MODEL_NOT_CACHED", {
127
127
  message: `${modelType} model not cached`,
@@ -19,7 +19,12 @@ import { ModelCache } from "../cache";
19
19
  import { HttpEmbedding, isHttpModelUri } from "../httpEmbedding";
20
20
  import { HttpGeneration, isHttpGenUri } from "../httpGeneration";
21
21
  import { HttpRerank, isHttpRerankUri } from "../httpRerank";
22
- import { getActivePreset, getModelConfig } from "../registry";
22
+ import {
23
+ getActivePreset,
24
+ getAnswerModelUri,
25
+ getExpandModelUri,
26
+ getModelConfig,
27
+ } from "../registry";
23
28
  import { NodeLlamaCppEmbedding } from "./embedding";
24
29
  import { NodeLlamaCppGeneration } from "./generation";
25
30
  import { getModelManager, type ModelManager } from "./lifecycle";
@@ -105,8 +110,7 @@ export class LlmAdapter {
105
110
  modelUri?: string,
106
111
  options?: CreatePortOptions
107
112
  ): Promise<LlmResult<GenerationPort>> {
108
- const preset = getActivePreset(this.config);
109
- const uri = modelUri ?? preset.gen;
113
+ const uri = getAnswerModelUri(this.config, modelUri);
110
114
  const policy = options?.policy ?? DEFAULT_POLICY;
111
115
 
112
116
  // Use HTTP generation for remote endpoints
@@ -132,6 +136,38 @@ export class LlmAdapter {
132
136
  };
133
137
  }
134
138
 
139
+ /**
140
+ * Create a generation port dedicated to query expansion.
141
+ * Uses preset.expand when configured, else falls back to preset.gen.
142
+ */
143
+ async createExpansionPort(
144
+ modelUri?: string,
145
+ options?: CreatePortOptions
146
+ ): Promise<LlmResult<GenerationPort>> {
147
+ const uri = getExpandModelUri(this.config, modelUri);
148
+ const policy = options?.policy ?? DEFAULT_POLICY;
149
+
150
+ if (isHttpGenUri(uri)) {
151
+ const httpGen = new HttpGeneration(uri);
152
+ return { ok: true, value: httpGen };
153
+ }
154
+
155
+ const resolved = await this.cache.ensureModel(
156
+ uri,
157
+ "expand",
158
+ policy,
159
+ options?.onProgress
160
+ );
161
+ if (!resolved.ok) {
162
+ return resolved;
163
+ }
164
+
165
+ return {
166
+ ok: true,
167
+ value: new NodeLlamaCppGeneration(this.manager, uri, resolved.value),
168
+ };
169
+ }
170
+
135
171
  /**
136
172
  * Create a rerank port.
137
173
  * Supports HTTP endpoints for remote reranking models.
@@ -57,6 +57,24 @@ export function getActivePreset(config: Config): ModelPreset {
57
57
  return builtIn;
58
58
  }
59
59
 
60
+ export function getExpandModelUri(config: Config, override?: string): string {
61
+ if (override) {
62
+ return override;
63
+ }
64
+
65
+ const preset = getActivePreset(config);
66
+ return preset.expand ?? preset.gen;
67
+ }
68
+
69
+ export function getAnswerModelUri(config: Config, override?: string): string {
70
+ if (override) {
71
+ return override;
72
+ }
73
+
74
+ const preset = getActivePreset(config);
75
+ return preset.gen;
76
+ }
77
+
60
78
  /**
61
79
  * Resolve a model URI for a given type.
62
80
  * Uses override if provided, otherwise from active preset.
@@ -70,6 +88,9 @@ export function resolveModelUri(
70
88
  return override;
71
89
  }
72
90
  const preset = getActivePreset(config);
91
+ if (type === "expand") {
92
+ return preset.expand ?? preset.gen;
93
+ }
73
94
  return preset[type];
74
95
  }
75
96
 
package/src/llm/types.ts CHANGED
@@ -19,7 +19,7 @@ export type LlmResult<T> =
19
19
  // Model Types
20
20
  // ─────────────────────────────────────────────────────────────────────────────
21
21
 
22
- export type ModelType = "embed" | "rerank" | "gen";
22
+ export type ModelType = "embed" | "rerank" | "expand" | "gen";
23
23
 
24
24
  /** Model URI format: hf:org/repo/file.gguf or file:/path */
25
25
  export type ModelUri = string;
@@ -149,8 +149,8 @@ export const queryInputSchema = z.object({
149
149
  .optional(),
150
150
  fast: z.boolean().default(false),
151
151
  thorough: z.boolean().default(false),
152
- expand: z.boolean().default(false), // Default: skip expansion
153
- rerank: z.boolean().default(true),
152
+ expand: z.boolean().optional(),
153
+ rerank: z.boolean().optional(),
154
154
  tagsAll: z.array(z.string()).optional(),
155
155
  tagsAny: z.array(z.string()).optional(),
156
156
  });
@@ -20,6 +20,7 @@ import type { ToolContext } from "../server";
20
20
 
21
21
  import { parseUri } from "../../app/constants";
22
22
  import { createNonTtyProgressRenderer } from "../../cli/progress";
23
+ import { resolveDepthPolicy } from "../../core/depth-policy";
23
24
  import { normalizeStructuredQueryInput } from "../../core/structured-query";
24
25
  import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
25
26
  import { resolveDownloadPolicy } from "../../llm/policy";
@@ -167,7 +168,7 @@ export function handleQuery(
167
168
  const downloadProgress = createNonTtyProgressRenderer();
168
169
 
169
170
  let embedPort: EmbeddingPort | null = null;
170
- let genPort: GenerationPort | null = null;
171
+ let expandPort: GenerationPort | null = null;
171
172
  let rerankPort: RerankPort | null = null;
172
173
  let vectorIndex: VectorIndexPort | null = null;
173
174
 
@@ -181,42 +182,29 @@ export function handleQuery(
181
182
  embedPort = embedResult.value;
182
183
  }
183
184
 
184
- // Determine noExpand/noRerank based on mode flags
185
- // Priority: fast > thorough > expand/rerank params > defaults
186
- // Default: noExpand=true (skip expansion), noRerank=false (with reranking)
187
185
  const hasStructuredModes = Boolean(queryModes?.length);
188
- let noExpand = true;
189
- let noRerank = false;
190
-
191
- if (args.fast) {
192
- noExpand = true;
193
- noRerank = true;
194
- } else if (args.thorough) {
195
- noExpand = false;
196
- noRerank = false;
197
- } else {
198
- // Use explicit expand/rerank params if provided
199
- if (args.expand === true) {
200
- noExpand = false;
201
- }
202
- if (args.rerank === false) {
203
- noRerank = true;
204
- }
205
- }
206
-
207
- // Structured query modes replace generated expansion.
208
- if (hasStructuredModes) {
209
- noExpand = true;
210
- }
186
+ const depthPolicy = resolveDepthPolicy({
187
+ presetId: preset.id,
188
+ fast: args.fast,
189
+ thorough: args.thorough,
190
+ expand: args.expand,
191
+ rerank: args.rerank,
192
+ candidateLimit: args.candidateLimit,
193
+ hasStructuredModes,
194
+ });
195
+ const { noExpand, noRerank } = depthPolicy;
211
196
 
212
- // Create generation port (for expansion) - optional
197
+ // Create expansion port - optional
213
198
  if (!noExpand && !hasStructuredModes) {
214
- const genResult = await llm.createGenerationPort(preset.gen, {
215
- policy,
216
- onProgress: (progress) => downloadProgress("gen", progress),
217
- });
199
+ const genResult = await llm.createExpansionPort(
200
+ preset.expand ?? preset.gen,
201
+ {
202
+ policy,
203
+ onProgress: (progress) => downloadProgress("expand", progress),
204
+ }
205
+ );
218
206
  if (genResult.ok) {
219
- genPort = genResult.value;
207
+ expandPort = genResult.value;
220
208
  }
221
209
  }
222
210
 
@@ -252,7 +240,7 @@ export function handleQuery(
252
240
  config: ctx.config,
253
241
  vectorIndex,
254
242
  embedPort,
255
- genPort,
243
+ expandPort,
256
244
  rerankPort,
257
245
  };
258
246
 
@@ -265,7 +253,7 @@ export function handleQuery(
265
253
  collection: args.collection,
266
254
  queryLanguageHint: args.lang, // Affects expansion prompt, not retrieval
267
255
  intent: args.intent,
268
- candidateLimit: args.candidateLimit,
256
+ candidateLimit: depthPolicy.candidateLimit,
269
257
  exclude: args.exclude,
270
258
  since: args.since,
271
259
  until: args.until,
@@ -298,8 +286,8 @@ export function handleQuery(
298
286
  if (embedPort) {
299
287
  await embedPort.dispose();
300
288
  }
301
- if (genPort) {
302
- await genPort.dispose();
289
+ if (expandPort) {
290
+ await expandPort.dispose();
303
291
  }
304
292
  if (rerankPort) {
305
293
  await rerankPort.dispose();
@@ -60,7 +60,7 @@ export interface HybridSearchDeps {
60
60
  config: Config;
61
61
  vectorIndex: VectorIndexPort | null;
62
62
  embedPort: EmbeddingPort | null;
63
- genPort: GenerationPort | null;
63
+ expandPort: GenerationPort | null;
64
64
  rerankPort: RerankPort | null;
65
65
  pipelineConfig?: PipelineConfig;
66
66
  }
@@ -249,7 +249,7 @@ export async function searchHybrid(
249
249
  options: HybridSearchOptions = {}
250
250
  ): Promise<ReturnType<typeof ok<SearchResults>>> {
251
251
  const runStartedAt = performance.now();
252
- const { store, vectorIndex, embedPort, genPort, rerankPort } = deps;
252
+ const { store, vectorIndex, embedPort, expandPort, rerankPort } = deps;
253
253
  const pipelineConfig = deps.pipelineConfig ?? DEFAULT_PIPELINE_CONFIG;
254
254
 
255
255
  const limit = options.limit ?? 20;
@@ -318,7 +318,7 @@ export async function searchHybrid(
318
318
  // 1. Check if expansion needed
319
319
  // ─────────────────────────────────────────────────────────────────────────
320
320
  const expansionStartedAt = performance.now();
321
- const shouldExpand = !options.noExpand && genPort !== null;
321
+ const shouldExpand = !options.noExpand && expandPort !== null;
322
322
  let expansionStatus: ExpansionStatus = "disabled";
323
323
  let queryModeSummary: ReturnType<typeof summarizeQueryModes> | undefined =
324
324
  undefined;
@@ -349,7 +349,7 @@ export async function searchHybrid(
349
349
  counters.fallbackEvents.push("expansion_skipped_strong");
350
350
  } else {
351
351
  expansionStatus = "attempted";
352
- const expandResult = await expandQuery(genPort, query, {
352
+ const expandResult = await expandQuery(expandPort, query, {
353
353
  // Use queryLanguage for prompt selection, NOT options.lang (retrieval filter)
354
354
  lang: queryLanguage,
355
355
  timeout: pipelineConfig.expansionTimeout,
package/src/sdk/client.ts CHANGED
@@ -65,7 +65,8 @@ interface OpenedClientState {
65
65
 
66
66
  interface RuntimePorts {
67
67
  embedPort: EmbeddingPort | null;
68
- genPort: GenerationPort | null;
68
+ expandPort: GenerationPort | null;
69
+ answerPort: GenerationPort | null;
69
70
  rerankPort: RerankPort | null;
70
71
  vectorIndex: VectorIndexPort | null;
71
72
  }
@@ -179,19 +180,23 @@ class GnoClientImpl implements GnoClient {
179
180
 
180
181
  private async createRuntimePorts(options: {
181
182
  embed?: boolean;
182
- gen?: boolean;
183
+ expand?: boolean;
184
+ answer?: boolean;
183
185
  rerank?: boolean;
184
186
  requiredEmbed?: boolean;
185
- requiredGen?: boolean;
187
+ requiredExpand?: boolean;
188
+ requiredAnswer?: boolean;
186
189
  requiredRerank?: boolean;
187
190
  embedModel?: string;
191
+ expandModel?: string;
188
192
  genModel?: string;
189
193
  rerankModel?: string;
190
194
  }): Promise<RuntimePorts> {
191
195
  this.assertOpen();
192
196
 
193
197
  let embedPort: EmbeddingPort | null = null;
194
- let genPort: GenerationPort | null = null;
198
+ let expandPort: GenerationPort | null = null;
199
+ let answerPort: GenerationPort | null = null;
195
200
  let rerankPort: RerankPort | null = null;
196
201
  let vectorIndex: VectorIndexPort | null = null;
197
202
 
@@ -234,16 +239,38 @@ class GnoClientImpl implements GnoClient {
234
239
  }
235
240
  }
236
241
 
237
- if (options.gen) {
242
+ if (options.expand) {
243
+ const genResult = await this.llm.createExpansionPort(
244
+ options.expandModel ?? options.genModel,
245
+ {
246
+ policy: this.downloadPolicy,
247
+ }
248
+ );
249
+ if (genResult.ok) {
250
+ expandPort = genResult.value;
251
+ } else if (options.requiredExpand) {
252
+ if (embedPort) {
253
+ await embedPort.dispose();
254
+ }
255
+ throw sdkError("MODEL", genResult.error.message, {
256
+ cause: genResult.error.cause,
257
+ });
258
+ }
259
+ }
260
+
261
+ if (options.answer) {
238
262
  const genResult = await this.llm.createGenerationPort(options.genModel, {
239
263
  policy: this.downloadPolicy,
240
264
  });
241
265
  if (genResult.ok) {
242
- genPort = genResult.value;
243
- } else if (options.requiredGen) {
266
+ answerPort = genResult.value;
267
+ } else if (options.requiredAnswer) {
244
268
  if (embedPort) {
245
269
  await embedPort.dispose();
246
270
  }
271
+ if (expandPort) {
272
+ await expandPort.dispose();
273
+ }
247
274
  throw sdkError("MODEL", genResult.error.message, {
248
275
  cause: genResult.error.cause,
249
276
  });
@@ -263,8 +290,11 @@ class GnoClientImpl implements GnoClient {
263
290
  if (embedPort) {
264
291
  await embedPort.dispose();
265
292
  }
266
- if (genPort) {
267
- await genPort.dispose();
293
+ if (expandPort) {
294
+ await expandPort.dispose();
295
+ }
296
+ if (answerPort) {
297
+ await answerPort.dispose();
268
298
  }
269
299
  throw sdkError("MODEL", rerankResult.error.message, {
270
300
  cause: rerankResult.error.cause,
@@ -272,15 +302,18 @@ class GnoClientImpl implements GnoClient {
272
302
  }
273
303
  }
274
304
 
275
- return { embedPort, genPort, rerankPort, vectorIndex };
305
+ return { embedPort, expandPort, answerPort, rerankPort, vectorIndex };
276
306
  }
277
307
 
278
308
  private async disposeRuntimePorts(ports: RuntimePorts): Promise<void> {
279
309
  if (ports.embedPort) {
280
310
  await ports.embedPort.dispose();
281
311
  }
282
- if (ports.genPort) {
283
- await ports.genPort.dispose();
312
+ if (ports.expandPort) {
313
+ await ports.expandPort.dispose();
314
+ }
315
+ if (ports.answerPort) {
316
+ await ports.answerPort.dispose();
284
317
  }
285
318
  if (ports.rerankPort) {
286
319
  await ports.rerankPort.dispose();
@@ -366,9 +399,10 @@ class GnoClientImpl implements GnoClient {
366
399
 
367
400
  const ports = await this.createRuntimePorts({
368
401
  embed: true,
369
- gen: !options.noExpand && !options.queryModes?.length,
402
+ expand: !options.noExpand && !options.queryModes?.length,
370
403
  rerank: !options.noRerank,
371
404
  embedModel: options.embedModel,
405
+ expandModel: options.expandModel,
372
406
  genModel: options.genModel,
373
407
  rerankModel: options.rerankModel,
374
408
  });
@@ -381,7 +415,7 @@ class GnoClientImpl implements GnoClient {
381
415
  config: this.config,
382
416
  vectorIndex: ports.vectorIndex,
383
417
  embedPort: ports.embedPort,
384
- genPort: ports.genPort,
418
+ expandPort: ports.expandPort,
385
419
  rerankPort: ports.rerankPort,
386
420
  },
387
421
  query,
@@ -416,15 +450,17 @@ class GnoClientImpl implements GnoClient {
416
450
  const needsExpansionGen = !options.noExpand && !options.queryModes?.length;
417
451
  const ports = await this.createRuntimePorts({
418
452
  embed: true,
419
- gen: needsExpansionGen || answerRequested,
453
+ expand: needsExpansionGen,
454
+ answer: answerRequested,
420
455
  rerank: !options.noRerank,
456
+ expandModel: options.expandModel,
421
457
  genModel: options.genModel,
422
458
  embedModel: options.embedModel,
423
459
  rerankModel: options.rerankModel,
424
460
  });
425
461
 
426
462
  try {
427
- if (answerRequested && !ports.genPort) {
463
+ if (answerRequested && !ports.answerPort) {
428
464
  throw sdkError(
429
465
  "MODEL",
430
466
  "Answer generation requested but no generation model is available"
@@ -438,7 +474,7 @@ class GnoClientImpl implements GnoClient {
438
474
  config: this.config,
439
475
  vectorIndex: ports.vectorIndex,
440
476
  embedPort: ports.embedPort,
441
- genPort: ports.genPort,
477
+ expandPort: ports.expandPort,
442
478
  rerankPort: ports.rerankPort,
443
479
  },
444
480
  query,
@@ -468,9 +504,13 @@ class GnoClientImpl implements GnoClient {
468
504
  let answerContext: AskResult["meta"]["answerContext"];
469
505
  let answerGenerated = false;
470
506
 
471
- if (answerRequested && ports.genPort && searchResult.results.length > 0) {
507
+ if (
508
+ answerRequested &&
509
+ ports.answerPort &&
510
+ searchResult.results.length > 0
511
+ ) {
472
512
  const rawAnswer = await generateGroundedAnswer(
473
- { genPort: ports.genPort, store: this.store },
513
+ { genPort: ports.answerPort, store: this.store },
474
514
  query,
475
515
  searchResult.results,
476
516
  options.maxAnswerTokens ?? 512
package/src/sdk/types.ts CHANGED
@@ -49,6 +49,7 @@ export interface GnoClientInitOptions {
49
49
 
50
50
  export interface GnoModelOverrides {
51
51
  embedModel?: string;
52
+ expandModel?: string;
52
53
  genModel?: string;
53
54
  rerankModel?: string;
54
55
  }
@@ -50,7 +50,8 @@ interface ServerContext {
50
50
  config: Config;
51
51
  vectorIndex: VectorIndexPort | null;
52
52
  embedPort: EmbeddingPort | null;
53
- genPort: GenerationPort | null;
53
+ expandPort: GenerationPort | null;
54
+ answerPort: GenerationPort | null;
54
55
  rerankPort: RerankPort | null;
55
56
  capabilities: { bm25; vector; hybrid; answer };
56
57
  }
@@ -50,7 +50,8 @@ interface ServerContext {
50
50
  config: Config;
51
51
  vectorIndex: VectorIndexPort | null;
52
52
  embedPort: EmbeddingPort | null;
53
- genPort: GenerationPort | null;
53
+ expandPort: GenerationPort | null;
54
+ answerPort: GenerationPort | null;
54
55
  rerankPort: RerankPort | null;
55
56
  capabilities: { bm25; vector; hybrid; answer };
56
57
  }
@@ -63,7 +63,8 @@ export interface ServerContext {
63
63
  config: Config;
64
64
  vectorIndex: VectorIndexPort | null;
65
65
  embedPort: EmbeddingPort | null;
66
- genPort: GenerationPort | null;
66
+ expandPort: GenerationPort | null;
67
+ answerPort: GenerationPort | null;
67
68
  rerankPort: RerankPort | null;
68
69
  capabilities: {
69
70
  bm25: boolean;
@@ -82,7 +83,8 @@ export async function createServerContext(
82
83
  config: Config
83
84
  ): Promise<ServerContext> {
84
85
  let embedPort: EmbeddingPort | null = null;
85
- let genPort: GenerationPort | null = null;
86
+ let expandPort: GenerationPort | null = null;
87
+ let answerPort: GenerationPort | null = null;
86
88
  let rerankPort: RerankPort | null = null;
87
89
  let vectorIndex: VectorIndexPort | null = null;
88
90
 
@@ -129,13 +131,23 @@ export async function createServerContext(
129
131
  }
130
132
  }
131
133
 
132
- // Try to create generation port
133
- const genResult = await llm.createGenerationPort(
134
+ // Try to create expansion port
135
+ const expandResult = await llm.createExpansionPort(
136
+ preset.expand ?? preset.gen,
137
+ createPortOptions("expand")
138
+ );
139
+ if (expandResult.ok) {
140
+ expandPort = expandResult.value;
141
+ console.log("Query expansion enabled");
142
+ }
143
+
144
+ // Try to create answer generation port
145
+ const answerResult = await llm.createGenerationPort(
134
146
  preset.gen,
135
147
  createPortOptions("gen")
136
148
  );
137
- if (genResult.ok) {
138
- genPort = genResult.value;
149
+ if (answerResult.ok) {
150
+ answerPort = answerResult.value;
139
151
  console.log("AI answer generation enabled");
140
152
  }
141
153
 
@@ -166,7 +178,7 @@ export async function createServerContext(
166
178
  bm25: true, // Always available
167
179
  vector: vectorIndex?.searchAvailable ?? false,
168
180
  hybrid: (vectorIndex?.searchAvailable ?? false) && embedPort !== null,
169
- answer: genPort !== null,
181
+ answer: answerPort !== null,
170
182
  };
171
183
 
172
184
  return {
@@ -174,7 +186,8 @@ export async function createServerContext(
174
186
  config,
175
187
  vectorIndex,
176
188
  embedPort,
177
- genPort,
189
+ expandPort,
190
+ answerPort,
178
191
  rerankPort,
179
192
  capabilities,
180
193
  };
@@ -187,7 +200,8 @@ export async function createServerContext(
187
200
  export async function disposeServerContext(ctx: ServerContext): Promise<void> {
188
201
  const ports = [
189
202
  { name: "embed", port: ctx.embedPort },
190
- { name: "gen", port: ctx.genPort },
203
+ { name: "expand", port: ctx.expandPort },
204
+ { name: "answer", port: ctx.answerPort },
191
205
  { name: "rerank", port: ctx.rerankPort },
192
206
  ];
193
207