@gmickel/gno 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -61,16 +61,18 @@ models:
61
61
  activePreset: slim-tuned
62
62
  presets:
63
63
  - id: slim-tuned
64
- name: GNO Slim Retrieval v1
64
+ name: GNO Slim Tuned
65
65
  embed: hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf
66
66
  rerank: hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf
67
- gen: hf:guiltylemon/gno-expansion-slim-retrieval-v1/gno-expansion-auto-entity-lock-default-mix-lr95-f16.gguf
67
+ expand: hf:guiltylemon/gno-expansion-slim-retrieval-v1/gno-expansion-auto-entity-lock-default-mix-lr95-f16.gguf
68
+ gen: hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf
68
69
  ```
69
70
 
70
71
  Then:
71
72
 
72
73
  ```bash
73
74
  gno models use slim-tuned
75
+ gno models pull --expand
74
76
  gno models pull --gen
75
77
  gno query "ECONNREFUSED 127.0.0.1:5432" --thorough
76
78
  ```
@@ -579,6 +581,7 @@ models:
579
581
  name: Remote GPU Server
580
582
  embed: "http://192.168.1.100:8081/v1/embeddings#bge-m3"
581
583
  rerank: "http://192.168.1.100:8082/v1/completions#reranker"
584
+ expand: "http://192.168.1.100:8083/v1/chat/completions#gno-expand"
582
585
  gen: "http://192.168.1.100:8083/v1/chat/completions#qwen3-4b"
583
586
  ```
584
587
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gmickel/gno",
3
- "version": "0.24.0",
3
+ "version": "0.25.0",
4
4
  "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
5
5
  "keywords": [
6
6
  "embeddings",
package/src/cli/AGENTS.md CHANGED
@@ -81,7 +81,8 @@ interface CliContext {
81
81
  store: SqliteAdapter;
82
82
  config: Config;
83
83
  embedPort?: EmbeddingPort;
84
- genPort?: GenerationPort;
84
+ expandPort?: GenerationPort;
85
+ answerPort?: GenerationPort;
85
86
  rerankPort?: RerankPort;
86
87
  }
87
88
  ```
package/src/cli/CLAUDE.md CHANGED
@@ -81,7 +81,8 @@ interface CliContext {
81
81
  store: SqliteAdapter;
82
82
  config: Config;
83
83
  embedPort?: EmbeddingPort;
84
- genPort?: GenerationPort;
84
+ expandPort?: GenerationPort;
85
+ answerPort?: GenerationPort;
85
86
  rerankPort?: RerankPort;
86
87
  }
87
88
  ```
@@ -40,7 +40,9 @@ export type AskCommandOptions = AskOptions & {
40
40
  configPath?: string;
41
41
  /** Override embedding model */
42
42
  embedModel?: string;
43
- /** Override generation model */
43
+ /** Override expansion model */
44
+ expandModel?: string;
45
+ /** Override answer generation model */
44
46
  genModel?: string;
45
47
  /** Override rerank model */
46
48
  rerankModel?: string;
@@ -82,7 +84,8 @@ export async function ask(
82
84
  const { store, config } = initResult;
83
85
 
84
86
  let embedPort: EmbeddingPort | null = null;
85
- let genPort: GenerationPort | null = null;
87
+ let expandPort: GenerationPort | null = null;
88
+ let answerPort: GenerationPort | null = null;
86
89
  let rerankPort: RerankPort | null = null;
87
90
 
88
91
  try {
@@ -113,10 +116,23 @@ export async function ask(
113
116
  embedPort = embedResult.value;
114
117
  }
115
118
 
116
- // Create generation port (for expansion and/or answer)
117
- // Need genPort if: expansion enabled (!noExpand) OR answer requested
118
- const needsGen = !options.noExpand || options.answer;
119
- if (needsGen) {
119
+ // Create expansion port when expansion is enabled.
120
+ if (!options.noExpand && !options.queryModes?.length) {
121
+ const expandUri =
122
+ options.expandModel ?? options.genModel ?? preset.expand;
123
+ const genResult = await llm.createExpansionPort(expandUri, {
124
+ policy,
125
+ onProgress: downloadProgress
126
+ ? (progress) => downloadProgress("expand", progress)
127
+ : undefined,
128
+ });
129
+ if (genResult.ok) {
130
+ expandPort = genResult.value;
131
+ }
132
+ }
133
+
134
+ // Create answer generation port when answers are requested.
135
+ if (options.answer) {
120
136
  const genUri = options.genModel ?? preset.gen;
121
137
  const genResult = await llm.createGenerationPort(genUri, {
122
138
  policy,
@@ -125,7 +141,7 @@ export async function ask(
125
141
  : undefined,
126
142
  });
127
143
  if (genResult.ok) {
128
- genPort = genResult.value;
144
+ answerPort = genResult.value;
129
145
  }
130
146
  }
131
147
 
@@ -170,7 +186,7 @@ export async function ask(
170
186
  config,
171
187
  vectorIndex,
172
188
  embedPort,
173
- genPort,
189
+ expandPort,
174
190
  rerankPort,
175
191
  };
176
192
 
@@ -178,7 +194,7 @@ export async function ask(
178
194
  const answerRequested = options.answer && !options.noAnswer;
179
195
 
180
196
  // Fail early if --answer is requested but no generation model available
181
- if (answerRequested && genPort === null) {
197
+ if (answerRequested && answerPort === null) {
182
198
  return {
183
199
  success: false,
184
200
  error:
@@ -223,12 +239,12 @@ export async function ask(
223
239
  // 2. --no-answer was not set
224
240
  // 3. We have results to ground on (no point generating from nothing)
225
241
  const shouldGenerateAnswer =
226
- answerRequested && genPort !== null && results.length > 0;
242
+ answerRequested && answerPort !== null && results.length > 0;
227
243
 
228
- if (shouldGenerateAnswer && genPort) {
244
+ if (shouldGenerateAnswer && answerPort) {
229
245
  const maxTokens = options.maxAnswerTokens ?? 512;
230
246
  const rawResult = await generateGroundedAnswer(
231
- { genPort, store },
247
+ { genPort: answerPort, store },
232
248
  query,
233
249
  results,
234
250
  maxTokens
@@ -277,8 +293,11 @@ export async function ask(
277
293
  if (embedPort) {
278
294
  await embedPort.dispose();
279
295
  }
280
- if (genPort) {
281
- await genPort.dispose();
296
+ if (expandPort) {
297
+ await expandPort.dispose();
298
+ }
299
+ if (answerPort) {
300
+ await answerPort.dispose();
282
301
  }
283
302
  if (rerankPort) {
284
303
  await rerankPort.dispose();
@@ -21,6 +21,8 @@ export interface ModelsClearOptions {
21
21
  embed?: boolean;
22
22
  /** Clear reranker model */
23
23
  rerank?: boolean;
24
+ /** Clear expansion model */
25
+ expand?: boolean;
24
26
  /** Clear generation model */
25
27
  gen?: boolean;
26
28
  /** Skip confirmation */
@@ -50,7 +52,7 @@ export async function modelsClear(
50
52
 
51
53
  if (options.all) {
52
54
  types = undefined; // Clear all
53
- } else if (options.embed || options.rerank || options.gen) {
55
+ } else if (options.embed || options.rerank || options.expand || options.gen) {
54
56
  types = [];
55
57
  if (options.embed) {
56
58
  types.push("embed");
@@ -58,6 +60,9 @@ export async function modelsClear(
58
60
  if (options.rerank) {
59
61
  types.push("rerank");
60
62
  }
63
+ if (options.expand) {
64
+ types.push("expand");
65
+ }
61
66
  if (options.gen) {
62
67
  types.push("gen");
63
68
  }
@@ -71,7 +76,7 @@ export async function modelsClear(
71
76
  const sizeAfter = await cache.totalSize();
72
77
 
73
78
  return {
74
- cleared: types ?? ["embed", "rerank", "gen"],
79
+ cleared: types ?? ["embed", "rerank", "expand", "gen"],
75
80
  sizeBefore,
76
81
  sizeAfter,
77
82
  };
@@ -96,8 +101,10 @@ function formatBytes(bytes: number): string {
96
101
  */
97
102
  export function formatModelsClear(result: ModelsClearResult): string {
98
103
  const lines: string[] = [];
104
+ const label = (type: ModelType) =>
105
+ type === "gen" ? "answer" : type === "expand" ? "expand" : type;
99
106
 
100
- lines.push(`Cleared: ${result.cleared.join(", ")}`);
107
+ lines.push(`Cleared: ${result.cleared.map(label).join(", ")}`);
101
108
  lines.push(`Freed: ${formatBytes(result.sizeBefore - result.sizeAfter)}`);
102
109
 
103
110
  return lines.join("\n");
@@ -36,6 +36,7 @@ export interface ModelsListResult {
36
36
  presets: PresetInfo[];
37
37
  embed: ModelStatus;
38
38
  rerank: ModelStatus;
39
+ expand: ModelStatus;
39
40
  gen: ModelStatus;
40
41
  cacheDir: string;
41
42
  totalSize: number;
@@ -84,9 +85,10 @@ export async function modelsList(
84
85
  const preset = getActivePreset(config);
85
86
  const cache = new ModelCache(getModelsCachePath());
86
87
 
87
- const [embed, rerank, gen] = await Promise.all([
88
+ const [embed, rerank, expand, gen] = await Promise.all([
88
89
  getModelStatus(cache, preset.embed),
89
90
  getModelStatus(cache, preset.rerank),
91
+ getModelStatus(cache, preset.expand ?? preset.gen),
90
92
  getModelStatus(cache, preset.gen),
91
93
  ]);
92
94
 
@@ -99,6 +101,7 @@ export async function modelsList(
99
101
  })),
100
102
  embed,
101
103
  rerank,
104
+ expand,
102
105
  gen,
103
106
  cacheDir: cache.dir,
104
107
  totalSize: await cache.totalSize(),
@@ -147,7 +150,11 @@ function formatTerminal(result: ModelsListResult): string {
147
150
  (result.rerank.size ? ` (${formatBytes(result.rerank.size)})` : "")
148
151
  );
149
152
  lines.push(
150
- ` gen: ${statusIcon(result.gen)} ${result.gen.uri}` +
153
+ ` expand: ${statusIcon(result.expand)} ${result.expand.uri}` +
154
+ (result.expand.size ? ` (${formatBytes(result.expand.size)})` : "")
155
+ );
156
+ lines.push(
157
+ ` answer: ${statusIcon(result.gen)} ${result.gen.uri}` +
151
158
  (result.gen.size ? ` (${formatBytes(result.gen.size)})` : "")
152
159
  );
153
160
 
@@ -156,7 +163,10 @@ function formatTerminal(result: ModelsListResult): string {
156
163
  lines.push(`Total size: ${formatBytes(result.totalSize)}`);
157
164
 
158
165
  const allCached =
159
- result.embed.cached && result.rerank.cached && result.gen.cached;
166
+ result.embed.cached &&
167
+ result.rerank.cached &&
168
+ result.expand.cached &&
169
+ result.gen.cached;
160
170
  if (!allCached) {
161
171
  lines.push("");
162
172
  lines.push("Run: gno models pull --all");
@@ -186,7 +196,10 @@ function formatMarkdown(result: ModelsListResult): string {
186
196
  `| rerank | ${result.rerank.uri} | ${status(result.rerank)} | ${size(result.rerank)} |`
187
197
  );
188
198
  lines.push(
189
- `| gen | ${result.gen.uri} | ${status(result.gen)} | ${size(result.gen)} |`
199
+ `| expand | ${result.expand.uri} | ${status(result.expand)} | ${size(result.expand)} |`
200
+ );
201
+ lines.push(
202
+ `| answer | ${result.gen.uri} | ${status(result.gen)} | ${size(result.gen)} |`
190
203
  );
191
204
 
192
205
  lines.push("");
@@ -27,6 +27,8 @@ export interface ModelsPullOptions {
27
27
  embed?: boolean;
28
28
  /** Pull reranker model */
29
29
  rerank?: boolean;
30
+ /** Pull expansion model */
31
+ expand?: boolean;
30
32
  /** Pull generation model */
31
33
  gen?: boolean;
32
34
  /** Force re-download */
@@ -59,9 +61,9 @@ export interface ModelsPullResult {
59
61
  */
60
62
  function getTypesToPull(options: ModelsPullOptions): ModelType[] {
61
63
  if (options.all) {
62
- return ["embed", "rerank", "gen"];
64
+ return ["embed", "rerank", "expand", "gen"];
63
65
  }
64
- if (options.embed || options.rerank || options.gen) {
66
+ if (options.embed || options.rerank || options.expand || options.gen) {
65
67
  const types: ModelType[] = [];
66
68
  if (options.embed) {
67
69
  types.push("embed");
@@ -69,13 +71,16 @@ function getTypesToPull(options: ModelsPullOptions): ModelType[] {
69
71
  if (options.rerank) {
70
72
  types.push("rerank");
71
73
  }
74
+ if (options.expand) {
75
+ types.push("expand");
76
+ }
72
77
  if (options.gen) {
73
78
  types.push("gen");
74
79
  }
75
80
  return types;
76
81
  }
77
82
  // Default: pull all
78
- return ["embed", "rerank", "gen"];
83
+ return ["embed", "rerank", "expand", "gen"];
79
84
  }
80
85
 
81
86
  /**
@@ -101,7 +106,8 @@ export async function modelsPull(
101
106
  let skipped = 0;
102
107
 
103
108
  for (const type of types) {
104
- const uri = preset[type];
109
+ const uri =
110
+ type === "expand" ? (preset.expand ?? preset.gen) : preset[type];
105
111
 
106
112
  // Check if already cached (skip unless --force)
107
113
  if (!options.force) {
@@ -160,16 +166,18 @@ export async function modelsPull(
160
166
  */
161
167
  export function formatModelsPull(result: ModelsPullResult): string {
162
168
  const lines: string[] = [];
169
+ const label = (type: ModelType) =>
170
+ type === "gen" ? "answer" : type === "expand" ? "expand" : type;
163
171
 
164
172
  for (const r of result.results) {
165
173
  if (r.ok) {
166
174
  if (r.skipped) {
167
- lines.push(`${r.type}: skipped (already cached)`);
175
+ lines.push(`${label(r.type)}: skipped (already cached)`);
168
176
  } else {
169
- lines.push(`${r.type}: downloaded`);
177
+ lines.push(`${label(r.type)}: downloaded`);
170
178
  }
171
179
  } else {
172
- lines.push(`${r.type}: failed - ${r.error}`);
180
+ lines.push(`${label(r.type)}: failed - ${r.error}`);
173
181
  }
174
182
  }
175
183
 
@@ -36,7 +36,9 @@ export type QueryCommandOptions = HybridSearchOptions & {
36
36
  configPath?: string;
37
37
  /** Override embedding model */
38
38
  embedModel?: string;
39
- /** Override generation model */
39
+ /** Override expansion model */
40
+ expandModel?: string;
41
+ /** Deprecated alias for expansion model */
40
42
  genModel?: string;
41
43
  /** Override rerank model */
42
44
  rerankModel?: string;
@@ -90,7 +92,7 @@ export async function query(
90
92
  const { store, config } = initResult;
91
93
 
92
94
  let embedPort: EmbeddingPort | null = null;
93
- let genPort: GenerationPort | null = null;
95
+ let expandPort: GenerationPort | null = null;
94
96
  let rerankPort: RerankPort | null = null;
95
97
 
96
98
  try {
@@ -121,18 +123,19 @@ export async function query(
121
123
  embedPort = embedResult.value;
122
124
  }
123
125
 
124
- // Create generation port (for expansion) - optional.
126
+ // Create expansion port - optional.
125
127
  // Skip when structured query modes are provided.
126
128
  if (!options.noExpand && !options.queryModes?.length) {
127
- const genUri = options.genModel ?? preset.gen;
128
- const genResult = await llm.createGenerationPort(genUri, {
129
+ const expandUri =
130
+ options.expandModel ?? options.genModel ?? preset.expand;
131
+ const genResult = await llm.createExpansionPort(expandUri, {
129
132
  policy,
130
133
  onProgress: downloadProgress
131
- ? (progress) => downloadProgress("gen", progress)
134
+ ? (progress) => downloadProgress("expand", progress)
132
135
  : undefined,
133
136
  });
134
137
  if (genResult.ok) {
135
- genPort = genResult.value;
138
+ expandPort = genResult.value;
136
139
  }
137
140
  }
138
141
 
@@ -177,7 +180,7 @@ export async function query(
177
180
  config,
178
181
  vectorIndex,
179
182
  embedPort,
180
- genPort,
183
+ expandPort,
181
184
  rerankPort,
182
185
  };
183
186
 
@@ -195,8 +198,8 @@ export async function query(
195
198
  if (embedPort) {
196
199
  await embedPort.dispose();
197
200
  }
198
- if (genPort) {
199
- await genPort.dispose();
201
+ if (expandPort) {
202
+ await expandPort.dispose();
200
203
  }
201
204
  if (rerankPort) {
202
205
  await rerankPort.dispose();
@@ -14,6 +14,7 @@ import {
14
14
  PRODUCT_NAME,
15
15
  VERSION,
16
16
  } from "../app/constants";
17
+ import { resolveDepthPolicy } from "../core/depth-policy";
17
18
  import { parseAndValidateTagFilter } from "../core/tags";
18
19
  import { setColorsEnabled } from "./colors";
19
20
  import {
@@ -455,7 +456,10 @@ function wireSearchCommands(program: Command): void {
455
456
  .option("--full", "include full content")
456
457
  .option("--line-numbers", "include line numbers in output")
457
458
  .option("--fast", "skip expansion and reranking (fastest, ~0.7s)")
458
- .option("--thorough", "enable query expansion (slower, ~5-8s)")
459
+ .option(
460
+ "--thorough",
461
+ "use expansion with a wider rerank pool (slowest, best recall)"
462
+ )
459
463
  .option("--no-expand", "disable query expansion")
460
464
  .option("--no-rerank", "disable reranking")
461
465
  .option(
@@ -474,6 +478,7 @@ function wireSearchCommands(program: Command): void {
474
478
  .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
475
479
  const format = getFormat(cmdOpts);
476
480
  assertFormatSupported(CMD.query, format);
481
+ const globals = getGlobals();
477
482
 
478
483
  // Validate empty query
479
484
  if (!queryText.trim()) {
@@ -532,35 +537,26 @@ function wireSearchCommands(program: Command): void {
532
537
  const limit = cmdOpts.limit
533
538
  ? parsePositiveInt("limit", cmdOpts.limit)
534
539
  : getDefaultLimit(format);
540
+ const { loadConfig } = await import("../config");
541
+ const { getActivePreset } = await import("../llm/registry");
542
+ const configResult = await loadConfig(globals.config);
543
+ const activePresetId = configResult.ok
544
+ ? getActivePreset(configResult.value).id
545
+ : "slim";
535
546
  const candidateLimit = cmdOpts.candidateLimit
536
547
  ? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
537
548
  : undefined;
538
549
  const categories = parseCsvValues(cmdOpts.category);
539
550
  const exclude = parseCsvValues(cmdOpts.exclude);
540
551
 
541
- // Determine expansion/rerank settings based on flags
542
- // Priority: --fast > --thorough > --no-expand/--no-rerank > default
543
- // Default: skip expansion (balanced mode ~2-3s)
544
- let noExpand = true; // Default: skip expansion
545
- let noRerank = false; // Default: with reranking
546
-
547
- if (cmdOpts.fast) {
548
- // --fast: skip both (~0.7s)
549
- noExpand = true;
550
- noRerank = true;
551
- } else if (cmdOpts.thorough) {
552
- // --thorough: full pipeline (~5-8s)
553
- noExpand = false;
554
- noRerank = false;
555
- } else {
556
- // Check individual flags (override defaults)
557
- if (cmdOpts.expand === false) {
558
- noExpand = true;
559
- }
560
- if (cmdOpts.rerank === false) {
561
- noRerank = true;
562
- }
563
- }
552
+ const depthPolicy = resolveDepthPolicy({
553
+ presetId: activePresetId,
554
+ fast: Boolean(cmdOpts.fast),
555
+ thorough: Boolean(cmdOpts.thorough),
556
+ expand: cmdOpts.expand === false ? false : undefined,
557
+ rerank: cmdOpts.rerank === false ? false : undefined,
558
+ candidateLimit,
559
+ });
564
560
 
565
561
  const { query, formatQuery } = await import("./commands/query");
566
562
  const result = await query(queryText, {
@@ -578,9 +574,9 @@ function wireSearchCommands(program: Command): void {
578
574
  tagsAny,
579
575
  full: Boolean(cmdOpts.full),
580
576
  lineNumbers: Boolean(cmdOpts.lineNumbers),
581
- noExpand,
582
- noRerank,
583
- candidateLimit,
577
+ noExpand: depthPolicy.noExpand,
578
+ noRerank: depthPolicy.noRerank,
579
+ candidateLimit: depthPolicy.candidateLimit,
584
580
  queryModes,
585
581
  explain: Boolean(cmdOpts.explain),
586
582
  json: format === "json",
@@ -630,7 +626,10 @@ function wireSearchCommands(program: Command): void {
630
626
  []
631
627
  )
632
628
  .option("--fast", "skip expansion and reranking (fastest)")
633
- .option("--thorough", "enable query expansion (slower)")
629
+ .option(
630
+ "--thorough",
631
+ "use expansion with a wider rerank pool (slowest, best recall)"
632
+ )
634
633
  .option("-C, --candidate-limit <num>", "max candidates passed to reranking")
635
634
  .option("--answer", "generate short grounded answer")
636
635
  .option("--no-answer", "force retrieval-only output")
@@ -641,6 +640,7 @@ function wireSearchCommands(program: Command): void {
641
640
  .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
642
641
  const format = getFormat(cmdOpts);
643
642
  assertFormatSupported(CMD.ask, format);
643
+ const globals = getGlobals();
644
644
 
645
645
  // Validate empty query
646
646
  if (!queryText.trim()) {
@@ -650,6 +650,12 @@ function wireSearchCommands(program: Command): void {
650
650
  const limit = cmdOpts.limit
651
651
  ? parsePositiveInt("limit", cmdOpts.limit)
652
652
  : getDefaultLimit(format);
653
+ const { loadConfig } = await import("../config");
654
+ const { getActivePreset } = await import("../llm/registry");
655
+ const configResult = await loadConfig(globals.config);
656
+ const activePresetId = configResult.ok
657
+ ? getActivePreset(configResult.value).id
658
+ : "slim";
653
659
  const candidateLimit = cmdOpts.candidateLimit
654
660
  ? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
655
661
  : undefined;
@@ -686,18 +692,13 @@ function wireSearchCommands(program: Command): void {
686
692
  ? normalizedInput.value.queryModes
687
693
  : undefined;
688
694
 
689
- // Determine expansion/rerank settings based on flags
690
- // Default: skip expansion (balanced mode)
691
- let noExpand = true;
692
- let noRerank = false;
693
-
694
- if (cmdOpts.fast) {
695
- noExpand = true;
696
- noRerank = true;
697
- } else if (cmdOpts.thorough) {
698
- noExpand = false;
699
- noRerank = false;
700
- }
695
+ const depthPolicy = resolveDepthPolicy({
696
+ presetId: activePresetId,
697
+ fast: Boolean(cmdOpts.fast),
698
+ thorough: Boolean(cmdOpts.thorough),
699
+ candidateLimit,
700
+ hasStructuredModes: Boolean(queryModes?.length),
701
+ });
701
702
 
702
703
  const { ask, formatAsk } = await import("./commands/ask");
703
704
  const showSources = Boolean(cmdOpts.showSources);
@@ -712,9 +713,9 @@ function wireSearchCommands(program: Command): void {
712
713
  intent: cmdOpts.intent as string | undefined,
713
714
  exclude,
714
715
  queryModes,
715
- noExpand,
716
- noRerank,
717
- candidateLimit,
716
+ noExpand: depthPolicy.noExpand,
717
+ noRerank: depthPolicy.noRerank,
718
+ candidateLimit: depthPolicy.candidateLimit,
718
719
  // Per spec: --answer defaults to false, --no-answer forces retrieval-only
719
720
  // Commander creates separate cmdOpts.noAnswer for --no-answer flag
720
721
  answer: Boolean(cmdOpts.answer),
@@ -1358,7 +1359,8 @@ function wireManagementCommands(program: Command): void {
1358
1359
  .option("--all", "download all configured models")
1359
1360
  .option("--embed", "download embedding model")
1360
1361
  .option("--rerank", "download reranker model")
1361
- .option("--gen", "download generation model")
1362
+ .option("--expand", "download expansion model")
1363
+ .option("--gen", "download answer generation model")
1362
1364
  .option("--force", "force re-download")
1363
1365
  .option("--no-progress", "disable download progress")
1364
1366
  .action(async (cmdOpts: Record<string, unknown>) => {
@@ -1377,6 +1379,7 @@ function wireManagementCommands(program: Command): void {
1377
1379
  all: Boolean(cmdOpts.all),
1378
1380
  embed: Boolean(cmdOpts.embed),
1379
1381
  rerank: Boolean(cmdOpts.rerank),
1382
+ expand: Boolean(cmdOpts.expand),
1380
1383
  gen: Boolean(cmdOpts.gen),
1381
1384
  force: Boolean(cmdOpts.force),
1382
1385
  onProgress: showProgress ? createProgressRenderer() : undefined,
@@ -166,7 +166,9 @@ export const ModelPresetSchema = z.object({
166
166
  embed: z.string().min(1),
167
167
  /** Reranker model URI */
168
168
  rerank: z.string().min(1),
169
- /** Generation model URI */
169
+ /** Query expansion model URI (defaults to gen for older configs) */
170
+ expand: z.string().min(1).optional(),
171
+ /** Answer generation model URI */
170
172
  gen: z.string().min(1),
171
173
  });
172
174
 
@@ -180,6 +182,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
180
182
  embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
181
183
  rerank:
182
184
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
185
+ expand: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
183
186
  gen: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
184
187
  },
185
188
  {
@@ -188,6 +191,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
188
191
  embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
189
192
  rerank:
190
193
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
194
+ expand:
195
+ "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
191
196
  gen: "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
192
197
  },
193
198
  {
@@ -196,6 +201,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
196
201
  embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
197
202
  rerank:
198
203
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
204
+ expand:
205
+ "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
199
206
  gen: "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
200
207
  },
201
208
  ];