@gmickel/gno 0.22.6 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -3
- package/package.json +14 -2
- package/src/cli/AGENTS.md +2 -1
- package/src/cli/CLAUDE.md +2 -1
- package/src/cli/commands/ask.ts +33 -14
- package/src/cli/commands/models/clear.ts +10 -3
- package/src/cli/commands/models/list.ts +17 -4
- package/src/cli/commands/models/pull.ts +15 -7
- package/src/cli/commands/query.ts +13 -10
- package/src/cli/program.ts +76 -43
- package/src/config/types.ts +8 -1
- package/src/core/depth-policy.ts +78 -0
- package/src/core/structured-query.ts +198 -0
- package/src/llm/errors.ts +1 -1
- package/src/llm/nodeLlamaCpp/adapter.ts +39 -3
- package/src/llm/registry.ts +21 -0
- package/src/llm/types.ts +1 -1
- package/src/mcp/tools/index.ts +2 -2
- package/src/mcp/tools/query.ts +42 -40
- package/src/pipeline/hybrid.ts +4 -4
- package/src/pipeline/query-modes.ts +17 -12
- package/src/sdk/client.ts +624 -0
- package/src/sdk/documents.ts +348 -0
- package/src/sdk/embed.ts +287 -0
- package/src/sdk/errors.ts +42 -0
- package/src/sdk/index.ts +51 -0
- package/src/sdk/types.ts +138 -0
- package/src/serve/AGENTS.md +2 -1
- package/src/serve/CLAUDE.md +2 -1
- package/src/serve/context.ts +23 -9
- package/src/serve/public/app.tsx +8 -0
- package/src/serve/public/components/AIModelSelector.tsx +48 -10
- package/src/serve/public/globals.built.css +1 -1
- package/src/serve/public/pages/Ask.tsx +109 -41
- package/src/serve/public/pages/Browse.tsx +141 -5
- package/src/serve/public/pages/Collections.tsx +135 -38
- package/src/serve/public/pages/Dashboard.tsx +31 -4
- package/src/serve/public/pages/GraphView.tsx +24 -0
- package/src/serve/public/pages/Search.tsx +125 -36
- package/src/serve/routes/api.ts +73 -20
package/README.md
CHANGED
|
@@ -24,6 +24,7 @@ GNO is a local knowledge engine that turns your documents into a searchable, con
|
|
|
24
24
|
- [Agent Integration](#agent-integration)
|
|
25
25
|
- [Web UI](#web-ui)
|
|
26
26
|
- [REST API](#rest-api)
|
|
27
|
+
- [SDK](#sdk)
|
|
27
28
|
- [How It Works](#how-it-works)
|
|
28
29
|
- [Features](#features)
|
|
29
30
|
- [Local Models](#local-models)
|
|
@@ -33,7 +34,20 @@ GNO is a local knowledge engine that turns your documents into a searchable, con
|
|
|
33
34
|
|
|
34
35
|
---
|
|
35
36
|
|
|
36
|
-
## What's New in v0.
|
|
37
|
+
## What's New in v0.24
|
|
38
|
+
|
|
39
|
+
- **Structured Query Documents**: first-class multi-line query syntax using `term:`, `intent:`, and `hyde:`
|
|
40
|
+
- **Cross-Surface Rollout**: works across CLI, API, MCP, SDK, and Web Search/Ask
|
|
41
|
+
- **Portable Retrieval Prompts**: save/share advanced retrieval intent as one text payload instead of repeated flags or JSON arrays
|
|
42
|
+
|
|
43
|
+
### v0.23
|
|
44
|
+
|
|
45
|
+
- **SDK / Library Mode**: package-root importable SDK with `createGnoClient(...)` for direct retrieval, document access, and indexing flows
|
|
46
|
+
- **Inline Config Support**: embed GNO in another app without writing YAML config files
|
|
47
|
+
- **Programmatic Indexing**: call `update`, `embed`, and `index` directly from Bun/TypeScript
|
|
48
|
+
- **Docs & Website**: dedicated SDK guide, feature page, homepage section, and architecture docs
|
|
49
|
+
|
|
50
|
+
### v0.22
|
|
37
51
|
|
|
38
52
|
- **Promoted Slim Retrieval Model**: published `slim-retrieval-v1` on Hugging Face for direct `hf:` installation in GNO
|
|
39
53
|
- **Fine-Tuning Workflow**: local MLX LoRA training, portable GGUF export, automatic checkpoint selection, promotion bundles, and repeatable benchmark comparisons
|
|
@@ -47,16 +61,18 @@ models:
|
|
|
47
61
|
activePreset: slim-tuned
|
|
48
62
|
presets:
|
|
49
63
|
- id: slim-tuned
|
|
50
|
-
name: GNO Slim
|
|
64
|
+
name: GNO Slim Tuned
|
|
51
65
|
embed: hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf
|
|
52
66
|
rerank: hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf
|
|
53
|
-
|
|
67
|
+
expand: hf:guiltylemon/gno-expansion-slim-retrieval-v1/gno-expansion-auto-entity-lock-default-mix-lr95-f16.gguf
|
|
68
|
+
gen: hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf
|
|
54
69
|
```
|
|
55
70
|
|
|
56
71
|
Then:
|
|
57
72
|
|
|
58
73
|
```bash
|
|
59
74
|
gno models use slim-tuned
|
|
75
|
+
gno models pull --expand
|
|
60
76
|
gno models pull --gen
|
|
61
77
|
gno query "ECONNREFUSED 127.0.0.1:5432" --thorough
|
|
62
78
|
```
|
|
@@ -187,6 +203,58 @@ gno skill install --target all # Both Claude + Codex
|
|
|
187
203
|
|
|
188
204
|
---
|
|
189
205
|
|
|
206
|
+
## SDK
|
|
207
|
+
|
|
208
|
+
Embed GNO directly in another Bun or TypeScript app. No CLI subprocesses. No local server required.
|
|
209
|
+
|
|
210
|
+
```ts
|
|
211
|
+
import { createDefaultConfig, createGnoClient } from "@gmickel/gno";
|
|
212
|
+
|
|
213
|
+
const config = createDefaultConfig();
|
|
214
|
+
config.collections = [
|
|
215
|
+
{
|
|
216
|
+
name: "notes",
|
|
217
|
+
path: "/Users/me/notes",
|
|
218
|
+
pattern: "**/*",
|
|
219
|
+
include: [],
|
|
220
|
+
exclude: [],
|
|
221
|
+
},
|
|
222
|
+
];
|
|
223
|
+
|
|
224
|
+
const client = await createGnoClient({
|
|
225
|
+
config,
|
|
226
|
+
dbPath: "/tmp/gno-sdk.sqlite",
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
await client.index({ noEmbed: true });
|
|
230
|
+
|
|
231
|
+
const results = await client.query("JWT token flow", {
|
|
232
|
+
noExpand: true,
|
|
233
|
+
noRerank: true,
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
console.log(results.results[0]?.uri);
|
|
237
|
+
await client.close();
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
Core SDK surface:
|
|
241
|
+
|
|
242
|
+
- `createGnoClient({ config | configPath, dbPath? })`
|
|
243
|
+
- `search`, `vsearch`, `query`, `ask`
|
|
244
|
+
- `get`, `multiGet`, `list`, `status`
|
|
245
|
+
- `update`, `embed`, `index`
|
|
246
|
+
- `close`
|
|
247
|
+
|
|
248
|
+
Install in an app:
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
bun add @gmickel/gno
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Full guide: [SDK docs](https://gno.sh/docs/SDK/)
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
190
258
|
## Search Modes
|
|
191
259
|
|
|
192
260
|
| Command | Mode | Best For |
|
|
@@ -228,11 +296,15 @@ gno query "auth flow" \
|
|
|
228
296
|
--query-mode intent:"how refresh token rotation works" \
|
|
229
297
|
--query-mode hyde:"Refresh tokens rotate on each use and previous tokens are revoked." \
|
|
230
298
|
--explain
|
|
299
|
+
|
|
300
|
+
# Multi-line structured query document
|
|
301
|
+
gno query $'auth flow\nterm: "refresh token" -oauth1\nintent: how refresh token rotation works\nhyde: Refresh tokens rotate on each use and previous tokens are revoked.' --fast
|
|
231
302
|
```
|
|
232
303
|
|
|
233
304
|
- Modes: `term` (BM25-focused), `intent` (semantic-focused), `hyde` (single hypothetical passage)
|
|
234
305
|
- Explain includes stage timings, fallback/cache counters, and per-result score components
|
|
235
306
|
- `gno ask --json` includes `meta.answerContext` for adaptive source selection traces
|
|
307
|
+
- Search and Ask web text boxes also accept multi-line structured query documents with `Shift+Enter`
|
|
236
308
|
|
|
237
309
|
---
|
|
238
310
|
|
|
@@ -509,6 +581,7 @@ models:
|
|
|
509
581
|
name: Remote GPU Server
|
|
510
582
|
embed: "http://192.168.1.100:8081/v1/embeddings#bge-m3"
|
|
511
583
|
rerank: "http://192.168.1.100:8082/v1/completions#reranker"
|
|
584
|
+
expand: "http://192.168.1.100:8083/v1/chat/completions#gno-expand"
|
|
512
585
|
gen: "http://192.168.1.100:8083/v1/chat/completions#qwen3-4b"
|
|
513
586
|
```
|
|
514
587
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gmickel/gno",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.0",
|
|
4
4
|
"description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"embeddings",
|
|
@@ -33,7 +33,19 @@
|
|
|
33
33
|
"vendor"
|
|
34
34
|
],
|
|
35
35
|
"type": "module",
|
|
36
|
-
"
|
|
36
|
+
"main": "src/sdk/index.ts",
|
|
37
|
+
"module": "src/sdk/index.ts",
|
|
38
|
+
"types": "src/sdk/index.ts",
|
|
39
|
+
"exports": {
|
|
40
|
+
".": {
|
|
41
|
+
"types": "./src/sdk/index.ts",
|
|
42
|
+
"default": "./src/sdk/index.ts"
|
|
43
|
+
},
|
|
44
|
+
"./cli": {
|
|
45
|
+
"default": "./src/index.ts"
|
|
46
|
+
},
|
|
47
|
+
"./package.json": "./package.json"
|
|
48
|
+
},
|
|
37
49
|
"publishConfig": {
|
|
38
50
|
"access": "public"
|
|
39
51
|
},
|
package/src/cli/AGENTS.md
CHANGED
package/src/cli/CLAUDE.md
CHANGED
package/src/cli/commands/ask.ts
CHANGED
|
@@ -40,7 +40,9 @@ export type AskCommandOptions = AskOptions & {
|
|
|
40
40
|
configPath?: string;
|
|
41
41
|
/** Override embedding model */
|
|
42
42
|
embedModel?: string;
|
|
43
|
-
/** Override
|
|
43
|
+
/** Override expansion model */
|
|
44
|
+
expandModel?: string;
|
|
45
|
+
/** Override answer generation model */
|
|
44
46
|
genModel?: string;
|
|
45
47
|
/** Override rerank model */
|
|
46
48
|
rerankModel?: string;
|
|
@@ -82,7 +84,8 @@ export async function ask(
|
|
|
82
84
|
const { store, config } = initResult;
|
|
83
85
|
|
|
84
86
|
let embedPort: EmbeddingPort | null = null;
|
|
85
|
-
let
|
|
87
|
+
let expandPort: GenerationPort | null = null;
|
|
88
|
+
let answerPort: GenerationPort | null = null;
|
|
86
89
|
let rerankPort: RerankPort | null = null;
|
|
87
90
|
|
|
88
91
|
try {
|
|
@@ -113,10 +116,23 @@ export async function ask(
|
|
|
113
116
|
embedPort = embedResult.value;
|
|
114
117
|
}
|
|
115
118
|
|
|
116
|
-
// Create
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
119
|
+
// Create expansion port when expansion is enabled.
|
|
120
|
+
if (!options.noExpand && !options.queryModes?.length) {
|
|
121
|
+
const expandUri =
|
|
122
|
+
options.expandModel ?? options.genModel ?? preset.expand;
|
|
123
|
+
const genResult = await llm.createExpansionPort(expandUri, {
|
|
124
|
+
policy,
|
|
125
|
+
onProgress: downloadProgress
|
|
126
|
+
? (progress) => downloadProgress("expand", progress)
|
|
127
|
+
: undefined,
|
|
128
|
+
});
|
|
129
|
+
if (genResult.ok) {
|
|
130
|
+
expandPort = genResult.value;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Create answer generation port when answers are requested.
|
|
135
|
+
if (options.answer) {
|
|
120
136
|
const genUri = options.genModel ?? preset.gen;
|
|
121
137
|
const genResult = await llm.createGenerationPort(genUri, {
|
|
122
138
|
policy,
|
|
@@ -125,7 +141,7 @@ export async function ask(
|
|
|
125
141
|
: undefined,
|
|
126
142
|
});
|
|
127
143
|
if (genResult.ok) {
|
|
128
|
-
|
|
144
|
+
answerPort = genResult.value;
|
|
129
145
|
}
|
|
130
146
|
}
|
|
131
147
|
|
|
@@ -170,7 +186,7 @@ export async function ask(
|
|
|
170
186
|
config,
|
|
171
187
|
vectorIndex,
|
|
172
188
|
embedPort,
|
|
173
|
-
|
|
189
|
+
expandPort,
|
|
174
190
|
rerankPort,
|
|
175
191
|
};
|
|
176
192
|
|
|
@@ -178,7 +194,7 @@ export async function ask(
|
|
|
178
194
|
const answerRequested = options.answer && !options.noAnswer;
|
|
179
195
|
|
|
180
196
|
// Fail early if --answer is requested but no generation model available
|
|
181
|
-
if (answerRequested &&
|
|
197
|
+
if (answerRequested && answerPort === null) {
|
|
182
198
|
return {
|
|
183
199
|
success: false,
|
|
184
200
|
error:
|
|
@@ -223,12 +239,12 @@ export async function ask(
|
|
|
223
239
|
// 2. --no-answer was not set
|
|
224
240
|
// 3. We have results to ground on (no point generating from nothing)
|
|
225
241
|
const shouldGenerateAnswer =
|
|
226
|
-
answerRequested &&
|
|
242
|
+
answerRequested && answerPort !== null && results.length > 0;
|
|
227
243
|
|
|
228
|
-
if (shouldGenerateAnswer &&
|
|
244
|
+
if (shouldGenerateAnswer && answerPort) {
|
|
229
245
|
const maxTokens = options.maxAnswerTokens ?? 512;
|
|
230
246
|
const rawResult = await generateGroundedAnswer(
|
|
231
|
-
{ genPort, store },
|
|
247
|
+
{ genPort: answerPort, store },
|
|
232
248
|
query,
|
|
233
249
|
results,
|
|
234
250
|
maxTokens
|
|
@@ -277,8 +293,11 @@ export async function ask(
|
|
|
277
293
|
if (embedPort) {
|
|
278
294
|
await embedPort.dispose();
|
|
279
295
|
}
|
|
280
|
-
if (
|
|
281
|
-
await
|
|
296
|
+
if (expandPort) {
|
|
297
|
+
await expandPort.dispose();
|
|
298
|
+
}
|
|
299
|
+
if (answerPort) {
|
|
300
|
+
await answerPort.dispose();
|
|
282
301
|
}
|
|
283
302
|
if (rerankPort) {
|
|
284
303
|
await rerankPort.dispose();
|
|
@@ -21,6 +21,8 @@ export interface ModelsClearOptions {
|
|
|
21
21
|
embed?: boolean;
|
|
22
22
|
/** Clear reranker model */
|
|
23
23
|
rerank?: boolean;
|
|
24
|
+
/** Clear expansion model */
|
|
25
|
+
expand?: boolean;
|
|
24
26
|
/** Clear generation model */
|
|
25
27
|
gen?: boolean;
|
|
26
28
|
/** Skip confirmation */
|
|
@@ -50,7 +52,7 @@ export async function modelsClear(
|
|
|
50
52
|
|
|
51
53
|
if (options.all) {
|
|
52
54
|
types = undefined; // Clear all
|
|
53
|
-
} else if (options.embed || options.rerank || options.gen) {
|
|
55
|
+
} else if (options.embed || options.rerank || options.expand || options.gen) {
|
|
54
56
|
types = [];
|
|
55
57
|
if (options.embed) {
|
|
56
58
|
types.push("embed");
|
|
@@ -58,6 +60,9 @@ export async function modelsClear(
|
|
|
58
60
|
if (options.rerank) {
|
|
59
61
|
types.push("rerank");
|
|
60
62
|
}
|
|
63
|
+
if (options.expand) {
|
|
64
|
+
types.push("expand");
|
|
65
|
+
}
|
|
61
66
|
if (options.gen) {
|
|
62
67
|
types.push("gen");
|
|
63
68
|
}
|
|
@@ -71,7 +76,7 @@ export async function modelsClear(
|
|
|
71
76
|
const sizeAfter = await cache.totalSize();
|
|
72
77
|
|
|
73
78
|
return {
|
|
74
|
-
cleared: types ?? ["embed", "rerank", "gen"],
|
|
79
|
+
cleared: types ?? ["embed", "rerank", "expand", "gen"],
|
|
75
80
|
sizeBefore,
|
|
76
81
|
sizeAfter,
|
|
77
82
|
};
|
|
@@ -96,8 +101,10 @@ function formatBytes(bytes: number): string {
|
|
|
96
101
|
*/
|
|
97
102
|
export function formatModelsClear(result: ModelsClearResult): string {
|
|
98
103
|
const lines: string[] = [];
|
|
104
|
+
const label = (type: ModelType) =>
|
|
105
|
+
type === "gen" ? "answer" : type === "expand" ? "expand" : type;
|
|
99
106
|
|
|
100
|
-
lines.push(`Cleared: ${result.cleared.join(", ")}`);
|
|
107
|
+
lines.push(`Cleared: ${result.cleared.map(label).join(", ")}`);
|
|
101
108
|
lines.push(`Freed: ${formatBytes(result.sizeBefore - result.sizeAfter)}`);
|
|
102
109
|
|
|
103
110
|
return lines.join("\n");
|
|
@@ -36,6 +36,7 @@ export interface ModelsListResult {
|
|
|
36
36
|
presets: PresetInfo[];
|
|
37
37
|
embed: ModelStatus;
|
|
38
38
|
rerank: ModelStatus;
|
|
39
|
+
expand: ModelStatus;
|
|
39
40
|
gen: ModelStatus;
|
|
40
41
|
cacheDir: string;
|
|
41
42
|
totalSize: number;
|
|
@@ -84,9 +85,10 @@ export async function modelsList(
|
|
|
84
85
|
const preset = getActivePreset(config);
|
|
85
86
|
const cache = new ModelCache(getModelsCachePath());
|
|
86
87
|
|
|
87
|
-
const [embed, rerank, gen] = await Promise.all([
|
|
88
|
+
const [embed, rerank, expand, gen] = await Promise.all([
|
|
88
89
|
getModelStatus(cache, preset.embed),
|
|
89
90
|
getModelStatus(cache, preset.rerank),
|
|
91
|
+
getModelStatus(cache, preset.expand ?? preset.gen),
|
|
90
92
|
getModelStatus(cache, preset.gen),
|
|
91
93
|
]);
|
|
92
94
|
|
|
@@ -99,6 +101,7 @@ export async function modelsList(
|
|
|
99
101
|
})),
|
|
100
102
|
embed,
|
|
101
103
|
rerank,
|
|
104
|
+
expand,
|
|
102
105
|
gen,
|
|
103
106
|
cacheDir: cache.dir,
|
|
104
107
|
totalSize: await cache.totalSize(),
|
|
@@ -147,7 +150,11 @@ function formatTerminal(result: ModelsListResult): string {
|
|
|
147
150
|
(result.rerank.size ? ` (${formatBytes(result.rerank.size)})` : "")
|
|
148
151
|
);
|
|
149
152
|
lines.push(
|
|
150
|
-
`
|
|
153
|
+
` expand: ${statusIcon(result.expand)} ${result.expand.uri}` +
|
|
154
|
+
(result.expand.size ? ` (${formatBytes(result.expand.size)})` : "")
|
|
155
|
+
);
|
|
156
|
+
lines.push(
|
|
157
|
+
` answer: ${statusIcon(result.gen)} ${result.gen.uri}` +
|
|
151
158
|
(result.gen.size ? ` (${formatBytes(result.gen.size)})` : "")
|
|
152
159
|
);
|
|
153
160
|
|
|
@@ -156,7 +163,10 @@ function formatTerminal(result: ModelsListResult): string {
|
|
|
156
163
|
lines.push(`Total size: ${formatBytes(result.totalSize)}`);
|
|
157
164
|
|
|
158
165
|
const allCached =
|
|
159
|
-
result.embed.cached &&
|
|
166
|
+
result.embed.cached &&
|
|
167
|
+
result.rerank.cached &&
|
|
168
|
+
result.expand.cached &&
|
|
169
|
+
result.gen.cached;
|
|
160
170
|
if (!allCached) {
|
|
161
171
|
lines.push("");
|
|
162
172
|
lines.push("Run: gno models pull --all");
|
|
@@ -186,7 +196,10 @@ function formatMarkdown(result: ModelsListResult): string {
|
|
|
186
196
|
`| rerank | ${result.rerank.uri} | ${status(result.rerank)} | ${size(result.rerank)} |`
|
|
187
197
|
);
|
|
188
198
|
lines.push(
|
|
189
|
-
`|
|
|
199
|
+
`| expand | ${result.expand.uri} | ${status(result.expand)} | ${size(result.expand)} |`
|
|
200
|
+
);
|
|
201
|
+
lines.push(
|
|
202
|
+
`| answer | ${result.gen.uri} | ${status(result.gen)} | ${size(result.gen)} |`
|
|
190
203
|
);
|
|
191
204
|
|
|
192
205
|
lines.push("");
|
|
@@ -27,6 +27,8 @@ export interface ModelsPullOptions {
|
|
|
27
27
|
embed?: boolean;
|
|
28
28
|
/** Pull reranker model */
|
|
29
29
|
rerank?: boolean;
|
|
30
|
+
/** Pull expansion model */
|
|
31
|
+
expand?: boolean;
|
|
30
32
|
/** Pull generation model */
|
|
31
33
|
gen?: boolean;
|
|
32
34
|
/** Force re-download */
|
|
@@ -59,9 +61,9 @@ export interface ModelsPullResult {
|
|
|
59
61
|
*/
|
|
60
62
|
function getTypesToPull(options: ModelsPullOptions): ModelType[] {
|
|
61
63
|
if (options.all) {
|
|
62
|
-
return ["embed", "rerank", "gen"];
|
|
64
|
+
return ["embed", "rerank", "expand", "gen"];
|
|
63
65
|
}
|
|
64
|
-
if (options.embed || options.rerank || options.gen) {
|
|
66
|
+
if (options.embed || options.rerank || options.expand || options.gen) {
|
|
65
67
|
const types: ModelType[] = [];
|
|
66
68
|
if (options.embed) {
|
|
67
69
|
types.push("embed");
|
|
@@ -69,13 +71,16 @@ function getTypesToPull(options: ModelsPullOptions): ModelType[] {
|
|
|
69
71
|
if (options.rerank) {
|
|
70
72
|
types.push("rerank");
|
|
71
73
|
}
|
|
74
|
+
if (options.expand) {
|
|
75
|
+
types.push("expand");
|
|
76
|
+
}
|
|
72
77
|
if (options.gen) {
|
|
73
78
|
types.push("gen");
|
|
74
79
|
}
|
|
75
80
|
return types;
|
|
76
81
|
}
|
|
77
82
|
// Default: pull all
|
|
78
|
-
return ["embed", "rerank", "gen"];
|
|
83
|
+
return ["embed", "rerank", "expand", "gen"];
|
|
79
84
|
}
|
|
80
85
|
|
|
81
86
|
/**
|
|
@@ -101,7 +106,8 @@ export async function modelsPull(
|
|
|
101
106
|
let skipped = 0;
|
|
102
107
|
|
|
103
108
|
for (const type of types) {
|
|
104
|
-
const uri =
|
|
109
|
+
const uri =
|
|
110
|
+
type === "expand" ? (preset.expand ?? preset.gen) : preset[type];
|
|
105
111
|
|
|
106
112
|
// Check if already cached (skip unless --force)
|
|
107
113
|
if (!options.force) {
|
|
@@ -160,16 +166,18 @@ export async function modelsPull(
|
|
|
160
166
|
*/
|
|
161
167
|
export function formatModelsPull(result: ModelsPullResult): string {
|
|
162
168
|
const lines: string[] = [];
|
|
169
|
+
const label = (type: ModelType) =>
|
|
170
|
+
type === "gen" ? "answer" : type === "expand" ? "expand" : type;
|
|
163
171
|
|
|
164
172
|
for (const r of result.results) {
|
|
165
173
|
if (r.ok) {
|
|
166
174
|
if (r.skipped) {
|
|
167
|
-
lines.push(`${r.type}: skipped (already cached)`);
|
|
175
|
+
lines.push(`${label(r.type)}: skipped (already cached)`);
|
|
168
176
|
} else {
|
|
169
|
-
lines.push(`${r.type}: downloaded`);
|
|
177
|
+
lines.push(`${label(r.type)}: downloaded`);
|
|
170
178
|
}
|
|
171
179
|
} else {
|
|
172
|
-
lines.push(`${r.type}: failed - ${r.error}`);
|
|
180
|
+
lines.push(`${label(r.type)}: failed - ${r.error}`);
|
|
173
181
|
}
|
|
174
182
|
}
|
|
175
183
|
|
|
@@ -36,7 +36,9 @@ export type QueryCommandOptions = HybridSearchOptions & {
|
|
|
36
36
|
configPath?: string;
|
|
37
37
|
/** Override embedding model */
|
|
38
38
|
embedModel?: string;
|
|
39
|
-
/** Override
|
|
39
|
+
/** Override expansion model */
|
|
40
|
+
expandModel?: string;
|
|
41
|
+
/** Deprecated alias for expansion model */
|
|
40
42
|
genModel?: string;
|
|
41
43
|
/** Override rerank model */
|
|
42
44
|
rerankModel?: string;
|
|
@@ -90,7 +92,7 @@ export async function query(
|
|
|
90
92
|
const { store, config } = initResult;
|
|
91
93
|
|
|
92
94
|
let embedPort: EmbeddingPort | null = null;
|
|
93
|
-
let
|
|
95
|
+
let expandPort: GenerationPort | null = null;
|
|
94
96
|
let rerankPort: RerankPort | null = null;
|
|
95
97
|
|
|
96
98
|
try {
|
|
@@ -121,18 +123,19 @@ export async function query(
|
|
|
121
123
|
embedPort = embedResult.value;
|
|
122
124
|
}
|
|
123
125
|
|
|
124
|
-
// Create
|
|
126
|
+
// Create expansion port - optional.
|
|
125
127
|
// Skip when structured query modes are provided.
|
|
126
128
|
if (!options.noExpand && !options.queryModes?.length) {
|
|
127
|
-
const
|
|
128
|
-
|
|
129
|
+
const expandUri =
|
|
130
|
+
options.expandModel ?? options.genModel ?? preset.expand;
|
|
131
|
+
const genResult = await llm.createExpansionPort(expandUri, {
|
|
129
132
|
policy,
|
|
130
133
|
onProgress: downloadProgress
|
|
131
|
-
? (progress) => downloadProgress("
|
|
134
|
+
? (progress) => downloadProgress("expand", progress)
|
|
132
135
|
: undefined,
|
|
133
136
|
});
|
|
134
137
|
if (genResult.ok) {
|
|
135
|
-
|
|
138
|
+
expandPort = genResult.value;
|
|
136
139
|
}
|
|
137
140
|
}
|
|
138
141
|
|
|
@@ -177,7 +180,7 @@ export async function query(
|
|
|
177
180
|
config,
|
|
178
181
|
vectorIndex,
|
|
179
182
|
embedPort,
|
|
180
|
-
|
|
183
|
+
expandPort,
|
|
181
184
|
rerankPort,
|
|
182
185
|
};
|
|
183
186
|
|
|
@@ -195,8 +198,8 @@ export async function query(
|
|
|
195
198
|
if (embedPort) {
|
|
196
199
|
await embedPort.dispose();
|
|
197
200
|
}
|
|
198
|
-
if (
|
|
199
|
-
await
|
|
201
|
+
if (expandPort) {
|
|
202
|
+
await expandPort.dispose();
|
|
200
203
|
}
|
|
201
204
|
if (rerankPort) {
|
|
202
205
|
await rerankPort.dispose();
|