@gmickel/gno 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/package.json +1 -1
- package/src/cli/AGENTS.md +2 -1
- package/src/cli/CLAUDE.md +2 -1
- package/src/cli/commands/ask.ts +33 -14
- package/src/cli/commands/models/clear.ts +10 -3
- package/src/cli/commands/models/list.ts +17 -4
- package/src/cli/commands/models/pull.ts +15 -7
- package/src/cli/commands/query.ts +13 -10
- package/src/cli/program.ts +47 -44
- package/src/config/types.ts +8 -1
- package/src/core/depth-policy.ts +78 -0
- package/src/llm/errors.ts +1 -1
- package/src/llm/nodeLlamaCpp/adapter.ts +39 -3
- package/src/llm/registry.ts +21 -0
- package/src/llm/types.ts +1 -1
- package/src/mcp/tools/index.ts +2 -2
- package/src/mcp/tools/query.ts +25 -37
- package/src/pipeline/hybrid.ts +4 -4
- package/src/sdk/client.ts +59 -19
- package/src/sdk/types.ts +1 -0
- package/src/serve/AGENTS.md +2 -1
- package/src/serve/CLAUDE.md +2 -1
- package/src/serve/context.ts +23 -9
- package/src/serve/public/app.tsx +8 -0
- package/src/serve/public/components/AIModelSelector.tsx +48 -10
- package/src/serve/public/pages/Ask.tsx +94 -54
- package/src/serve/public/pages/Browse.tsx +141 -5
- package/src/serve/public/pages/Collections.tsx +135 -38
- package/src/serve/public/pages/Dashboard.tsx +31 -4
- package/src/serve/public/pages/GraphView.tsx +24 -0
- package/src/serve/public/pages/Search.tsx +78 -29
- package/src/serve/routes/api.ts +6 -6
package/README.md
CHANGED
|
@@ -61,16 +61,18 @@ models:
|
|
|
61
61
|
activePreset: slim-tuned
|
|
62
62
|
presets:
|
|
63
63
|
- id: slim-tuned
|
|
64
|
-
name: GNO Slim
|
|
64
|
+
name: GNO Slim Tuned
|
|
65
65
|
embed: hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf
|
|
66
66
|
rerank: hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf
|
|
67
|
-
|
|
67
|
+
expand: hf:guiltylemon/gno-expansion-slim-retrieval-v1/gno-expansion-auto-entity-lock-default-mix-lr95-f16.gguf
|
|
68
|
+
gen: hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf
|
|
68
69
|
```
|
|
69
70
|
|
|
70
71
|
Then:
|
|
71
72
|
|
|
72
73
|
```bash
|
|
73
74
|
gno models use slim-tuned
|
|
75
|
+
gno models pull --expand
|
|
74
76
|
gno models pull --gen
|
|
75
77
|
gno query "ECONNREFUSED 127.0.0.1:5432" --thorough
|
|
76
78
|
```
|
|
@@ -579,6 +581,7 @@ models:
|
|
|
579
581
|
name: Remote GPU Server
|
|
580
582
|
embed: "http://192.168.1.100:8081/v1/embeddings#bge-m3"
|
|
581
583
|
rerank: "http://192.168.1.100:8082/v1/completions#reranker"
|
|
584
|
+
expand: "http://192.168.1.100:8083/v1/chat/completions#gno-expand"
|
|
582
585
|
gen: "http://192.168.1.100:8083/v1/chat/completions#qwen3-4b"
|
|
583
586
|
```
|
|
584
587
|
|
package/package.json
CHANGED
package/src/cli/AGENTS.md
CHANGED
package/src/cli/CLAUDE.md
CHANGED
package/src/cli/commands/ask.ts
CHANGED
|
@@ -40,7 +40,9 @@ export type AskCommandOptions = AskOptions & {
|
|
|
40
40
|
configPath?: string;
|
|
41
41
|
/** Override embedding model */
|
|
42
42
|
embedModel?: string;
|
|
43
|
-
/** Override
|
|
43
|
+
/** Override expansion model */
|
|
44
|
+
expandModel?: string;
|
|
45
|
+
/** Override answer generation model */
|
|
44
46
|
genModel?: string;
|
|
45
47
|
/** Override rerank model */
|
|
46
48
|
rerankModel?: string;
|
|
@@ -82,7 +84,8 @@ export async function ask(
|
|
|
82
84
|
const { store, config } = initResult;
|
|
83
85
|
|
|
84
86
|
let embedPort: EmbeddingPort | null = null;
|
|
85
|
-
let
|
|
87
|
+
let expandPort: GenerationPort | null = null;
|
|
88
|
+
let answerPort: GenerationPort | null = null;
|
|
86
89
|
let rerankPort: RerankPort | null = null;
|
|
87
90
|
|
|
88
91
|
try {
|
|
@@ -113,10 +116,23 @@ export async function ask(
|
|
|
113
116
|
embedPort = embedResult.value;
|
|
114
117
|
}
|
|
115
118
|
|
|
116
|
-
// Create
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
119
|
+
// Create expansion port when expansion is enabled.
|
|
120
|
+
if (!options.noExpand && !options.queryModes?.length) {
|
|
121
|
+
const expandUri =
|
|
122
|
+
options.expandModel ?? options.genModel ?? preset.expand;
|
|
123
|
+
const genResult = await llm.createExpansionPort(expandUri, {
|
|
124
|
+
policy,
|
|
125
|
+
onProgress: downloadProgress
|
|
126
|
+
? (progress) => downloadProgress("expand", progress)
|
|
127
|
+
: undefined,
|
|
128
|
+
});
|
|
129
|
+
if (genResult.ok) {
|
|
130
|
+
expandPort = genResult.value;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Create answer generation port when answers are requested.
|
|
135
|
+
if (options.answer) {
|
|
120
136
|
const genUri = options.genModel ?? preset.gen;
|
|
121
137
|
const genResult = await llm.createGenerationPort(genUri, {
|
|
122
138
|
policy,
|
|
@@ -125,7 +141,7 @@ export async function ask(
|
|
|
125
141
|
: undefined,
|
|
126
142
|
});
|
|
127
143
|
if (genResult.ok) {
|
|
128
|
-
|
|
144
|
+
answerPort = genResult.value;
|
|
129
145
|
}
|
|
130
146
|
}
|
|
131
147
|
|
|
@@ -170,7 +186,7 @@ export async function ask(
|
|
|
170
186
|
config,
|
|
171
187
|
vectorIndex,
|
|
172
188
|
embedPort,
|
|
173
|
-
|
|
189
|
+
expandPort,
|
|
174
190
|
rerankPort,
|
|
175
191
|
};
|
|
176
192
|
|
|
@@ -178,7 +194,7 @@ export async function ask(
|
|
|
178
194
|
const answerRequested = options.answer && !options.noAnswer;
|
|
179
195
|
|
|
180
196
|
// Fail early if --answer is requested but no generation model available
|
|
181
|
-
if (answerRequested &&
|
|
197
|
+
if (answerRequested && answerPort === null) {
|
|
182
198
|
return {
|
|
183
199
|
success: false,
|
|
184
200
|
error:
|
|
@@ -223,12 +239,12 @@ export async function ask(
|
|
|
223
239
|
// 2. --no-answer was not set
|
|
224
240
|
// 3. We have results to ground on (no point generating from nothing)
|
|
225
241
|
const shouldGenerateAnswer =
|
|
226
|
-
answerRequested &&
|
|
242
|
+
answerRequested && answerPort !== null && results.length > 0;
|
|
227
243
|
|
|
228
|
-
if (shouldGenerateAnswer &&
|
|
244
|
+
if (shouldGenerateAnswer && answerPort) {
|
|
229
245
|
const maxTokens = options.maxAnswerTokens ?? 512;
|
|
230
246
|
const rawResult = await generateGroundedAnswer(
|
|
231
|
-
{ genPort, store },
|
|
247
|
+
{ genPort: answerPort, store },
|
|
232
248
|
query,
|
|
233
249
|
results,
|
|
234
250
|
maxTokens
|
|
@@ -277,8 +293,11 @@ export async function ask(
|
|
|
277
293
|
if (embedPort) {
|
|
278
294
|
await embedPort.dispose();
|
|
279
295
|
}
|
|
280
|
-
if (
|
|
281
|
-
await
|
|
296
|
+
if (expandPort) {
|
|
297
|
+
await expandPort.dispose();
|
|
298
|
+
}
|
|
299
|
+
if (answerPort) {
|
|
300
|
+
await answerPort.dispose();
|
|
282
301
|
}
|
|
283
302
|
if (rerankPort) {
|
|
284
303
|
await rerankPort.dispose();
|
|
@@ -21,6 +21,8 @@ export interface ModelsClearOptions {
|
|
|
21
21
|
embed?: boolean;
|
|
22
22
|
/** Clear reranker model */
|
|
23
23
|
rerank?: boolean;
|
|
24
|
+
/** Clear expansion model */
|
|
25
|
+
expand?: boolean;
|
|
24
26
|
/** Clear generation model */
|
|
25
27
|
gen?: boolean;
|
|
26
28
|
/** Skip confirmation */
|
|
@@ -50,7 +52,7 @@ export async function modelsClear(
|
|
|
50
52
|
|
|
51
53
|
if (options.all) {
|
|
52
54
|
types = undefined; // Clear all
|
|
53
|
-
} else if (options.embed || options.rerank || options.gen) {
|
|
55
|
+
} else if (options.embed || options.rerank || options.expand || options.gen) {
|
|
54
56
|
types = [];
|
|
55
57
|
if (options.embed) {
|
|
56
58
|
types.push("embed");
|
|
@@ -58,6 +60,9 @@ export async function modelsClear(
|
|
|
58
60
|
if (options.rerank) {
|
|
59
61
|
types.push("rerank");
|
|
60
62
|
}
|
|
63
|
+
if (options.expand) {
|
|
64
|
+
types.push("expand");
|
|
65
|
+
}
|
|
61
66
|
if (options.gen) {
|
|
62
67
|
types.push("gen");
|
|
63
68
|
}
|
|
@@ -71,7 +76,7 @@ export async function modelsClear(
|
|
|
71
76
|
const sizeAfter = await cache.totalSize();
|
|
72
77
|
|
|
73
78
|
return {
|
|
74
|
-
cleared: types ?? ["embed", "rerank", "gen"],
|
|
79
|
+
cleared: types ?? ["embed", "rerank", "expand", "gen"],
|
|
75
80
|
sizeBefore,
|
|
76
81
|
sizeAfter,
|
|
77
82
|
};
|
|
@@ -96,8 +101,10 @@ function formatBytes(bytes: number): string {
|
|
|
96
101
|
*/
|
|
97
102
|
export function formatModelsClear(result: ModelsClearResult): string {
|
|
98
103
|
const lines: string[] = [];
|
|
104
|
+
const label = (type: ModelType) =>
|
|
105
|
+
type === "gen" ? "answer" : type === "expand" ? "expand" : type;
|
|
99
106
|
|
|
100
|
-
lines.push(`Cleared: ${result.cleared.join(", ")}`);
|
|
107
|
+
lines.push(`Cleared: ${result.cleared.map(label).join(", ")}`);
|
|
101
108
|
lines.push(`Freed: ${formatBytes(result.sizeBefore - result.sizeAfter)}`);
|
|
102
109
|
|
|
103
110
|
return lines.join("\n");
|
|
@@ -36,6 +36,7 @@ export interface ModelsListResult {
|
|
|
36
36
|
presets: PresetInfo[];
|
|
37
37
|
embed: ModelStatus;
|
|
38
38
|
rerank: ModelStatus;
|
|
39
|
+
expand: ModelStatus;
|
|
39
40
|
gen: ModelStatus;
|
|
40
41
|
cacheDir: string;
|
|
41
42
|
totalSize: number;
|
|
@@ -84,9 +85,10 @@ export async function modelsList(
|
|
|
84
85
|
const preset = getActivePreset(config);
|
|
85
86
|
const cache = new ModelCache(getModelsCachePath());
|
|
86
87
|
|
|
87
|
-
const [embed, rerank, gen] = await Promise.all([
|
|
88
|
+
const [embed, rerank, expand, gen] = await Promise.all([
|
|
88
89
|
getModelStatus(cache, preset.embed),
|
|
89
90
|
getModelStatus(cache, preset.rerank),
|
|
91
|
+
getModelStatus(cache, preset.expand ?? preset.gen),
|
|
90
92
|
getModelStatus(cache, preset.gen),
|
|
91
93
|
]);
|
|
92
94
|
|
|
@@ -99,6 +101,7 @@ export async function modelsList(
|
|
|
99
101
|
})),
|
|
100
102
|
embed,
|
|
101
103
|
rerank,
|
|
104
|
+
expand,
|
|
102
105
|
gen,
|
|
103
106
|
cacheDir: cache.dir,
|
|
104
107
|
totalSize: await cache.totalSize(),
|
|
@@ -147,7 +150,11 @@ function formatTerminal(result: ModelsListResult): string {
|
|
|
147
150
|
(result.rerank.size ? ` (${formatBytes(result.rerank.size)})` : "")
|
|
148
151
|
);
|
|
149
152
|
lines.push(
|
|
150
|
-
`
|
|
153
|
+
` expand: ${statusIcon(result.expand)} ${result.expand.uri}` +
|
|
154
|
+
(result.expand.size ? ` (${formatBytes(result.expand.size)})` : "")
|
|
155
|
+
);
|
|
156
|
+
lines.push(
|
|
157
|
+
` answer: ${statusIcon(result.gen)} ${result.gen.uri}` +
|
|
151
158
|
(result.gen.size ? ` (${formatBytes(result.gen.size)})` : "")
|
|
152
159
|
);
|
|
153
160
|
|
|
@@ -156,7 +163,10 @@ function formatTerminal(result: ModelsListResult): string {
|
|
|
156
163
|
lines.push(`Total size: ${formatBytes(result.totalSize)}`);
|
|
157
164
|
|
|
158
165
|
const allCached =
|
|
159
|
-
result.embed.cached &&
|
|
166
|
+
result.embed.cached &&
|
|
167
|
+
result.rerank.cached &&
|
|
168
|
+
result.expand.cached &&
|
|
169
|
+
result.gen.cached;
|
|
160
170
|
if (!allCached) {
|
|
161
171
|
lines.push("");
|
|
162
172
|
lines.push("Run: gno models pull --all");
|
|
@@ -186,7 +196,10 @@ function formatMarkdown(result: ModelsListResult): string {
|
|
|
186
196
|
`| rerank | ${result.rerank.uri} | ${status(result.rerank)} | ${size(result.rerank)} |`
|
|
187
197
|
);
|
|
188
198
|
lines.push(
|
|
189
|
-
`|
|
|
199
|
+
`| expand | ${result.expand.uri} | ${status(result.expand)} | ${size(result.expand)} |`
|
|
200
|
+
);
|
|
201
|
+
lines.push(
|
|
202
|
+
`| answer | ${result.gen.uri} | ${status(result.gen)} | ${size(result.gen)} |`
|
|
190
203
|
);
|
|
191
204
|
|
|
192
205
|
lines.push("");
|
|
@@ -27,6 +27,8 @@ export interface ModelsPullOptions {
|
|
|
27
27
|
embed?: boolean;
|
|
28
28
|
/** Pull reranker model */
|
|
29
29
|
rerank?: boolean;
|
|
30
|
+
/** Pull expansion model */
|
|
31
|
+
expand?: boolean;
|
|
30
32
|
/** Pull generation model */
|
|
31
33
|
gen?: boolean;
|
|
32
34
|
/** Force re-download */
|
|
@@ -59,9 +61,9 @@ export interface ModelsPullResult {
|
|
|
59
61
|
*/
|
|
60
62
|
function getTypesToPull(options: ModelsPullOptions): ModelType[] {
|
|
61
63
|
if (options.all) {
|
|
62
|
-
return ["embed", "rerank", "gen"];
|
|
64
|
+
return ["embed", "rerank", "expand", "gen"];
|
|
63
65
|
}
|
|
64
|
-
if (options.embed || options.rerank || options.gen) {
|
|
66
|
+
if (options.embed || options.rerank || options.expand || options.gen) {
|
|
65
67
|
const types: ModelType[] = [];
|
|
66
68
|
if (options.embed) {
|
|
67
69
|
types.push("embed");
|
|
@@ -69,13 +71,16 @@ function getTypesToPull(options: ModelsPullOptions): ModelType[] {
|
|
|
69
71
|
if (options.rerank) {
|
|
70
72
|
types.push("rerank");
|
|
71
73
|
}
|
|
74
|
+
if (options.expand) {
|
|
75
|
+
types.push("expand");
|
|
76
|
+
}
|
|
72
77
|
if (options.gen) {
|
|
73
78
|
types.push("gen");
|
|
74
79
|
}
|
|
75
80
|
return types;
|
|
76
81
|
}
|
|
77
82
|
// Default: pull all
|
|
78
|
-
return ["embed", "rerank", "gen"];
|
|
83
|
+
return ["embed", "rerank", "expand", "gen"];
|
|
79
84
|
}
|
|
80
85
|
|
|
81
86
|
/**
|
|
@@ -101,7 +106,8 @@ export async function modelsPull(
|
|
|
101
106
|
let skipped = 0;
|
|
102
107
|
|
|
103
108
|
for (const type of types) {
|
|
104
|
-
const uri =
|
|
109
|
+
const uri =
|
|
110
|
+
type === "expand" ? (preset.expand ?? preset.gen) : preset[type];
|
|
105
111
|
|
|
106
112
|
// Check if already cached (skip unless --force)
|
|
107
113
|
if (!options.force) {
|
|
@@ -160,16 +166,18 @@ export async function modelsPull(
|
|
|
160
166
|
*/
|
|
161
167
|
export function formatModelsPull(result: ModelsPullResult): string {
|
|
162
168
|
const lines: string[] = [];
|
|
169
|
+
const label = (type: ModelType) =>
|
|
170
|
+
type === "gen" ? "answer" : type === "expand" ? "expand" : type;
|
|
163
171
|
|
|
164
172
|
for (const r of result.results) {
|
|
165
173
|
if (r.ok) {
|
|
166
174
|
if (r.skipped) {
|
|
167
|
-
lines.push(`${r.type}: skipped (already cached)`);
|
|
175
|
+
lines.push(`${label(r.type)}: skipped (already cached)`);
|
|
168
176
|
} else {
|
|
169
|
-
lines.push(`${r.type}: downloaded`);
|
|
177
|
+
lines.push(`${label(r.type)}: downloaded`);
|
|
170
178
|
}
|
|
171
179
|
} else {
|
|
172
|
-
lines.push(`${r.type}: failed - ${r.error}`);
|
|
180
|
+
lines.push(`${label(r.type)}: failed - ${r.error}`);
|
|
173
181
|
}
|
|
174
182
|
}
|
|
175
183
|
|
|
@@ -36,7 +36,9 @@ export type QueryCommandOptions = HybridSearchOptions & {
|
|
|
36
36
|
configPath?: string;
|
|
37
37
|
/** Override embedding model */
|
|
38
38
|
embedModel?: string;
|
|
39
|
-
/** Override
|
|
39
|
+
/** Override expansion model */
|
|
40
|
+
expandModel?: string;
|
|
41
|
+
/** Deprecated alias for expansion model */
|
|
40
42
|
genModel?: string;
|
|
41
43
|
/** Override rerank model */
|
|
42
44
|
rerankModel?: string;
|
|
@@ -90,7 +92,7 @@ export async function query(
|
|
|
90
92
|
const { store, config } = initResult;
|
|
91
93
|
|
|
92
94
|
let embedPort: EmbeddingPort | null = null;
|
|
93
|
-
let
|
|
95
|
+
let expandPort: GenerationPort | null = null;
|
|
94
96
|
let rerankPort: RerankPort | null = null;
|
|
95
97
|
|
|
96
98
|
try {
|
|
@@ -121,18 +123,19 @@ export async function query(
|
|
|
121
123
|
embedPort = embedResult.value;
|
|
122
124
|
}
|
|
123
125
|
|
|
124
|
-
// Create
|
|
126
|
+
// Create expansion port - optional.
|
|
125
127
|
// Skip when structured query modes are provided.
|
|
126
128
|
if (!options.noExpand && !options.queryModes?.length) {
|
|
127
|
-
const
|
|
128
|
-
|
|
129
|
+
const expandUri =
|
|
130
|
+
options.expandModel ?? options.genModel ?? preset.expand;
|
|
131
|
+
const genResult = await llm.createExpansionPort(expandUri, {
|
|
129
132
|
policy,
|
|
130
133
|
onProgress: downloadProgress
|
|
131
|
-
? (progress) => downloadProgress("
|
|
134
|
+
? (progress) => downloadProgress("expand", progress)
|
|
132
135
|
: undefined,
|
|
133
136
|
});
|
|
134
137
|
if (genResult.ok) {
|
|
135
|
-
|
|
138
|
+
expandPort = genResult.value;
|
|
136
139
|
}
|
|
137
140
|
}
|
|
138
141
|
|
|
@@ -177,7 +180,7 @@ export async function query(
|
|
|
177
180
|
config,
|
|
178
181
|
vectorIndex,
|
|
179
182
|
embedPort,
|
|
180
|
-
|
|
183
|
+
expandPort,
|
|
181
184
|
rerankPort,
|
|
182
185
|
};
|
|
183
186
|
|
|
@@ -195,8 +198,8 @@ export async function query(
|
|
|
195
198
|
if (embedPort) {
|
|
196
199
|
await embedPort.dispose();
|
|
197
200
|
}
|
|
198
|
-
if (
|
|
199
|
-
await
|
|
201
|
+
if (expandPort) {
|
|
202
|
+
await expandPort.dispose();
|
|
200
203
|
}
|
|
201
204
|
if (rerankPort) {
|
|
202
205
|
await rerankPort.dispose();
|
package/src/cli/program.ts
CHANGED
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
PRODUCT_NAME,
|
|
15
15
|
VERSION,
|
|
16
16
|
} from "../app/constants";
|
|
17
|
+
import { resolveDepthPolicy } from "../core/depth-policy";
|
|
17
18
|
import { parseAndValidateTagFilter } from "../core/tags";
|
|
18
19
|
import { setColorsEnabled } from "./colors";
|
|
19
20
|
import {
|
|
@@ -455,7 +456,10 @@ function wireSearchCommands(program: Command): void {
|
|
|
455
456
|
.option("--full", "include full content")
|
|
456
457
|
.option("--line-numbers", "include line numbers in output")
|
|
457
458
|
.option("--fast", "skip expansion and reranking (fastest, ~0.7s)")
|
|
458
|
-
.option(
|
|
459
|
+
.option(
|
|
460
|
+
"--thorough",
|
|
461
|
+
"use expansion with a wider rerank pool (slowest, best recall)"
|
|
462
|
+
)
|
|
459
463
|
.option("--no-expand", "disable query expansion")
|
|
460
464
|
.option("--no-rerank", "disable reranking")
|
|
461
465
|
.option(
|
|
@@ -474,6 +478,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
474
478
|
.action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
|
|
475
479
|
const format = getFormat(cmdOpts);
|
|
476
480
|
assertFormatSupported(CMD.query, format);
|
|
481
|
+
const globals = getGlobals();
|
|
477
482
|
|
|
478
483
|
// Validate empty query
|
|
479
484
|
if (!queryText.trim()) {
|
|
@@ -532,35 +537,26 @@ function wireSearchCommands(program: Command): void {
|
|
|
532
537
|
const limit = cmdOpts.limit
|
|
533
538
|
? parsePositiveInt("limit", cmdOpts.limit)
|
|
534
539
|
: getDefaultLimit(format);
|
|
540
|
+
const { loadConfig } = await import("../config");
|
|
541
|
+
const { getActivePreset } = await import("../llm/registry");
|
|
542
|
+
const configResult = await loadConfig(globals.config);
|
|
543
|
+
const activePresetId = configResult.ok
|
|
544
|
+
? getActivePreset(configResult.value).id
|
|
545
|
+
: "slim";
|
|
535
546
|
const candidateLimit = cmdOpts.candidateLimit
|
|
536
547
|
? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
|
|
537
548
|
: undefined;
|
|
538
549
|
const categories = parseCsvValues(cmdOpts.category);
|
|
539
550
|
const exclude = parseCsvValues(cmdOpts.exclude);
|
|
540
551
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
noExpand = true;
|
|
550
|
-
noRerank = true;
|
|
551
|
-
} else if (cmdOpts.thorough) {
|
|
552
|
-
// --thorough: full pipeline (~5-8s)
|
|
553
|
-
noExpand = false;
|
|
554
|
-
noRerank = false;
|
|
555
|
-
} else {
|
|
556
|
-
// Check individual flags (override defaults)
|
|
557
|
-
if (cmdOpts.expand === false) {
|
|
558
|
-
noExpand = true;
|
|
559
|
-
}
|
|
560
|
-
if (cmdOpts.rerank === false) {
|
|
561
|
-
noRerank = true;
|
|
562
|
-
}
|
|
563
|
-
}
|
|
552
|
+
const depthPolicy = resolveDepthPolicy({
|
|
553
|
+
presetId: activePresetId,
|
|
554
|
+
fast: Boolean(cmdOpts.fast),
|
|
555
|
+
thorough: Boolean(cmdOpts.thorough),
|
|
556
|
+
expand: cmdOpts.expand === false ? false : undefined,
|
|
557
|
+
rerank: cmdOpts.rerank === false ? false : undefined,
|
|
558
|
+
candidateLimit,
|
|
559
|
+
});
|
|
564
560
|
|
|
565
561
|
const { query, formatQuery } = await import("./commands/query");
|
|
566
562
|
const result = await query(queryText, {
|
|
@@ -578,9 +574,9 @@ function wireSearchCommands(program: Command): void {
|
|
|
578
574
|
tagsAny,
|
|
579
575
|
full: Boolean(cmdOpts.full),
|
|
580
576
|
lineNumbers: Boolean(cmdOpts.lineNumbers),
|
|
581
|
-
noExpand,
|
|
582
|
-
noRerank,
|
|
583
|
-
candidateLimit,
|
|
577
|
+
noExpand: depthPolicy.noExpand,
|
|
578
|
+
noRerank: depthPolicy.noRerank,
|
|
579
|
+
candidateLimit: depthPolicy.candidateLimit,
|
|
584
580
|
queryModes,
|
|
585
581
|
explain: Boolean(cmdOpts.explain),
|
|
586
582
|
json: format === "json",
|
|
@@ -630,7 +626,10 @@ function wireSearchCommands(program: Command): void {
|
|
|
630
626
|
[]
|
|
631
627
|
)
|
|
632
628
|
.option("--fast", "skip expansion and reranking (fastest)")
|
|
633
|
-
.option(
|
|
629
|
+
.option(
|
|
630
|
+
"--thorough",
|
|
631
|
+
"use expansion with a wider rerank pool (slowest, best recall)"
|
|
632
|
+
)
|
|
634
633
|
.option("-C, --candidate-limit <num>", "max candidates passed to reranking")
|
|
635
634
|
.option("--answer", "generate short grounded answer")
|
|
636
635
|
.option("--no-answer", "force retrieval-only output")
|
|
@@ -641,6 +640,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
641
640
|
.action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
|
|
642
641
|
const format = getFormat(cmdOpts);
|
|
643
642
|
assertFormatSupported(CMD.ask, format);
|
|
643
|
+
const globals = getGlobals();
|
|
644
644
|
|
|
645
645
|
// Validate empty query
|
|
646
646
|
if (!queryText.trim()) {
|
|
@@ -650,6 +650,12 @@ function wireSearchCommands(program: Command): void {
|
|
|
650
650
|
const limit = cmdOpts.limit
|
|
651
651
|
? parsePositiveInt("limit", cmdOpts.limit)
|
|
652
652
|
: getDefaultLimit(format);
|
|
653
|
+
const { loadConfig } = await import("../config");
|
|
654
|
+
const { getActivePreset } = await import("../llm/registry");
|
|
655
|
+
const configResult = await loadConfig(globals.config);
|
|
656
|
+
const activePresetId = configResult.ok
|
|
657
|
+
? getActivePreset(configResult.value).id
|
|
658
|
+
: "slim";
|
|
653
659
|
const candidateLimit = cmdOpts.candidateLimit
|
|
654
660
|
? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
|
|
655
661
|
: undefined;
|
|
@@ -686,18 +692,13 @@ function wireSearchCommands(program: Command): void {
|
|
|
686
692
|
? normalizedInput.value.queryModes
|
|
687
693
|
: undefined;
|
|
688
694
|
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
noRerank = true;
|
|
697
|
-
} else if (cmdOpts.thorough) {
|
|
698
|
-
noExpand = false;
|
|
699
|
-
noRerank = false;
|
|
700
|
-
}
|
|
695
|
+
const depthPolicy = resolveDepthPolicy({
|
|
696
|
+
presetId: activePresetId,
|
|
697
|
+
fast: Boolean(cmdOpts.fast),
|
|
698
|
+
thorough: Boolean(cmdOpts.thorough),
|
|
699
|
+
candidateLimit,
|
|
700
|
+
hasStructuredModes: Boolean(queryModes?.length),
|
|
701
|
+
});
|
|
701
702
|
|
|
702
703
|
const { ask, formatAsk } = await import("./commands/ask");
|
|
703
704
|
const showSources = Boolean(cmdOpts.showSources);
|
|
@@ -712,9 +713,9 @@ function wireSearchCommands(program: Command): void {
|
|
|
712
713
|
intent: cmdOpts.intent as string | undefined,
|
|
713
714
|
exclude,
|
|
714
715
|
queryModes,
|
|
715
|
-
noExpand,
|
|
716
|
-
noRerank,
|
|
717
|
-
candidateLimit,
|
|
716
|
+
noExpand: depthPolicy.noExpand,
|
|
717
|
+
noRerank: depthPolicy.noRerank,
|
|
718
|
+
candidateLimit: depthPolicy.candidateLimit,
|
|
718
719
|
// Per spec: --answer defaults to false, --no-answer forces retrieval-only
|
|
719
720
|
// Commander creates separate cmdOpts.noAnswer for --no-answer flag
|
|
720
721
|
answer: Boolean(cmdOpts.answer),
|
|
@@ -1358,7 +1359,8 @@ function wireManagementCommands(program: Command): void {
|
|
|
1358
1359
|
.option("--all", "download all configured models")
|
|
1359
1360
|
.option("--embed", "download embedding model")
|
|
1360
1361
|
.option("--rerank", "download reranker model")
|
|
1361
|
-
.option("--
|
|
1362
|
+
.option("--expand", "download expansion model")
|
|
1363
|
+
.option("--gen", "download answer generation model")
|
|
1362
1364
|
.option("--force", "force re-download")
|
|
1363
1365
|
.option("--no-progress", "disable download progress")
|
|
1364
1366
|
.action(async (cmdOpts: Record<string, unknown>) => {
|
|
@@ -1377,6 +1379,7 @@ function wireManagementCommands(program: Command): void {
|
|
|
1377
1379
|
all: Boolean(cmdOpts.all),
|
|
1378
1380
|
embed: Boolean(cmdOpts.embed),
|
|
1379
1381
|
rerank: Boolean(cmdOpts.rerank),
|
|
1382
|
+
expand: Boolean(cmdOpts.expand),
|
|
1380
1383
|
gen: Boolean(cmdOpts.gen),
|
|
1381
1384
|
force: Boolean(cmdOpts.force),
|
|
1382
1385
|
onProgress: showProgress ? createProgressRenderer() : undefined,
|
package/src/config/types.ts
CHANGED
|
@@ -166,7 +166,9 @@ export const ModelPresetSchema = z.object({
|
|
|
166
166
|
embed: z.string().min(1),
|
|
167
167
|
/** Reranker model URI */
|
|
168
168
|
rerank: z.string().min(1),
|
|
169
|
-
/**
|
|
169
|
+
/** Query expansion model URI (defaults to gen for older configs) */
|
|
170
|
+
expand: z.string().min(1).optional(),
|
|
171
|
+
/** Answer generation model URI */
|
|
170
172
|
gen: z.string().min(1),
|
|
171
173
|
});
|
|
172
174
|
|
|
@@ -180,6 +182,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
180
182
|
embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
|
|
181
183
|
rerank:
|
|
182
184
|
"hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
|
|
185
|
+
expand: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
|
|
183
186
|
gen: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
|
|
184
187
|
},
|
|
185
188
|
{
|
|
@@ -188,6 +191,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
188
191
|
embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
|
|
189
192
|
rerank:
|
|
190
193
|
"hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
|
|
194
|
+
expand:
|
|
195
|
+
"hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
|
|
191
196
|
gen: "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
|
|
192
197
|
},
|
|
193
198
|
{
|
|
@@ -196,6 +201,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
196
201
|
embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
|
|
197
202
|
rerank:
|
|
198
203
|
"hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
|
|
204
|
+
expand:
|
|
205
|
+
"hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
|
|
199
206
|
gen: "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
|
|
200
207
|
},
|
|
201
208
|
];
|