@gmickel/gno 0.24.0 → 0.25.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/assets/skill/SKILL.md +75 -1
- package/package.json +1 -1
- package/src/cli/AGENTS.md +2 -1
- package/src/cli/CLAUDE.md +2 -1
- package/src/cli/commands/ask.ts +33 -14
- package/src/cli/commands/models/clear.ts +10 -3
- package/src/cli/commands/models/list.ts +17 -4
- package/src/cli/commands/models/pull.ts +15 -7
- package/src/cli/commands/query.ts +13 -10
- package/src/cli/program.ts +47 -44
- package/src/config/types.ts +8 -1
- package/src/core/depth-policy.ts +78 -0
- package/src/llm/errors.ts +1 -1
- package/src/llm/nodeLlamaCpp/adapter.ts +39 -3
- package/src/llm/registry.ts +21 -0
- package/src/llm/types.ts +1 -1
- package/src/mcp/tools/index.ts +2 -2
- package/src/mcp/tools/query.ts +25 -37
- package/src/pipeline/hybrid.ts +4 -4
- package/src/sdk/client.ts +59 -19
- package/src/sdk/types.ts +1 -0
- package/src/serve/AGENTS.md +2 -1
- package/src/serve/CLAUDE.md +2 -1
- package/src/serve/context.ts +23 -9
- package/src/serve/public/app.tsx +8 -0
- package/src/serve/public/components/AIModelSelector.tsx +48 -10
- package/src/serve/public/pages/Ask.tsx +94 -54
- package/src/serve/public/pages/Browse.tsx +141 -5
- package/src/serve/public/pages/Collections.tsx +135 -38
- package/src/serve/public/pages/Dashboard.tsx +31 -4
- package/src/serve/public/pages/GraphView.tsx +24 -0
- package/src/serve/public/pages/Search.tsx +78 -29
- package/src/serve/routes/api.ts +6 -6
package/src/config/types.ts
CHANGED
|
@@ -166,7 +166,9 @@ export const ModelPresetSchema = z.object({
|
|
|
166
166
|
embed: z.string().min(1),
|
|
167
167
|
/** Reranker model URI */
|
|
168
168
|
rerank: z.string().min(1),
|
|
169
|
-
/**
|
|
169
|
+
/** Query expansion model URI (defaults to gen for older configs) */
|
|
170
|
+
expand: z.string().min(1).optional(),
|
|
171
|
+
/** Answer generation model URI */
|
|
170
172
|
gen: z.string().min(1),
|
|
171
173
|
});
|
|
172
174
|
|
|
@@ -180,6 +182,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
180
182
|
embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
|
|
181
183
|
rerank:
|
|
182
184
|
"hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
|
|
185
|
+
expand: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
|
|
183
186
|
gen: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
|
|
184
187
|
},
|
|
185
188
|
{
|
|
@@ -188,6 +191,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
188
191
|
embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
|
|
189
192
|
rerank:
|
|
190
193
|
"hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
|
|
194
|
+
expand:
|
|
195
|
+
"hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
|
|
191
196
|
gen: "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
|
|
192
197
|
},
|
|
193
198
|
{
|
|
@@ -196,6 +201,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
196
201
|
embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
|
|
197
202
|
rerank:
|
|
198
203
|
"hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
|
|
204
|
+
expand:
|
|
205
|
+
"hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
|
|
199
206
|
gen: "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
|
|
200
207
|
},
|
|
201
208
|
];
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
export type RetrievalDepth = "fast" | "balanced" | "thorough";
|
|
2
|
+
|
|
3
|
+
export interface ResolveDepthPolicyInput {
|
|
4
|
+
presetId?: string;
|
|
5
|
+
fast?: boolean;
|
|
6
|
+
thorough?: boolean;
|
|
7
|
+
expand?: boolean;
|
|
8
|
+
rerank?: boolean;
|
|
9
|
+
candidateLimit?: number;
|
|
10
|
+
hasStructuredModes?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface ResolvedDepthPolicy {
|
|
14
|
+
depth: RetrievalDepth;
|
|
15
|
+
noExpand: boolean;
|
|
16
|
+
noRerank: boolean;
|
|
17
|
+
candidateLimit?: number;
|
|
18
|
+
balancedExpansionEnabled: boolean;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export const DEFAULT_THOROUGH_CANDIDATE_LIMIT = 40;
|
|
22
|
+
|
|
23
|
+
function normalizePresetId(presetId?: string): string {
|
|
24
|
+
return presetId?.trim().toLowerCase() || "slim";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function balancedUsesExpansion(presetId?: string): boolean {
|
|
28
|
+
const normalized = normalizePresetId(presetId);
|
|
29
|
+
return normalized === "slim" || normalized === "slim-tuned";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function resolveDepthPolicy(
|
|
33
|
+
input: ResolveDepthPolicyInput
|
|
34
|
+
): ResolvedDepthPolicy {
|
|
35
|
+
const balancedExpansionEnabled = balancedUsesExpansion(input.presetId);
|
|
36
|
+
let depth: RetrievalDepth = "balanced";
|
|
37
|
+
let noExpand = !balancedExpansionEnabled;
|
|
38
|
+
let noRerank = false;
|
|
39
|
+
let candidateLimit = input.candidateLimit;
|
|
40
|
+
|
|
41
|
+
if (input.fast) {
|
|
42
|
+
depth = "fast";
|
|
43
|
+
noExpand = true;
|
|
44
|
+
noRerank = true;
|
|
45
|
+
} else if (input.thorough) {
|
|
46
|
+
depth = "thorough";
|
|
47
|
+
noExpand = false;
|
|
48
|
+
noRerank = false;
|
|
49
|
+
candidateLimit ??= DEFAULT_THOROUGH_CANDIDATE_LIMIT;
|
|
50
|
+
} else {
|
|
51
|
+
if (input.expand === true) {
|
|
52
|
+
noExpand = false;
|
|
53
|
+
}
|
|
54
|
+
if (input.expand === false) {
|
|
55
|
+
noExpand = true;
|
|
56
|
+
}
|
|
57
|
+
if (input.rerank === true) {
|
|
58
|
+
noRerank = false;
|
|
59
|
+
}
|
|
60
|
+
if (input.rerank === false) {
|
|
61
|
+
noRerank = true;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Structured query modes supply explicit expansions and should not trigger
|
|
66
|
+
// an additional generated expansion step.
|
|
67
|
+
if (input.hasStructuredModes) {
|
|
68
|
+
noExpand = true;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
depth,
|
|
73
|
+
noExpand,
|
|
74
|
+
noRerank,
|
|
75
|
+
candidateLimit,
|
|
76
|
+
balancedExpansionEnabled,
|
|
77
|
+
};
|
|
78
|
+
}
|
package/src/llm/errors.ts
CHANGED
|
@@ -121,7 +121,7 @@ export function modelNotFoundError(uri: string, details?: string): LlmError {
|
|
|
121
121
|
|
|
122
122
|
export function modelNotCachedError(
|
|
123
123
|
uri: string,
|
|
124
|
-
modelType: "embed" | "rerank" | "gen"
|
|
124
|
+
modelType: "embed" | "rerank" | "expand" | "gen"
|
|
125
125
|
): LlmError {
|
|
126
126
|
return llmError("MODEL_NOT_CACHED", {
|
|
127
127
|
message: `${modelType} model not cached`,
|
|
@@ -19,7 +19,12 @@ import { ModelCache } from "../cache";
|
|
|
19
19
|
import { HttpEmbedding, isHttpModelUri } from "../httpEmbedding";
|
|
20
20
|
import { HttpGeneration, isHttpGenUri } from "../httpGeneration";
|
|
21
21
|
import { HttpRerank, isHttpRerankUri } from "../httpRerank";
|
|
22
|
-
import {
|
|
22
|
+
import {
|
|
23
|
+
getActivePreset,
|
|
24
|
+
getAnswerModelUri,
|
|
25
|
+
getExpandModelUri,
|
|
26
|
+
getModelConfig,
|
|
27
|
+
} from "../registry";
|
|
23
28
|
import { NodeLlamaCppEmbedding } from "./embedding";
|
|
24
29
|
import { NodeLlamaCppGeneration } from "./generation";
|
|
25
30
|
import { getModelManager, type ModelManager } from "./lifecycle";
|
|
@@ -105,8 +110,7 @@ export class LlmAdapter {
|
|
|
105
110
|
modelUri?: string,
|
|
106
111
|
options?: CreatePortOptions
|
|
107
112
|
): Promise<LlmResult<GenerationPort>> {
|
|
108
|
-
const
|
|
109
|
-
const uri = modelUri ?? preset.gen;
|
|
113
|
+
const uri = getAnswerModelUri(this.config, modelUri);
|
|
110
114
|
const policy = options?.policy ?? DEFAULT_POLICY;
|
|
111
115
|
|
|
112
116
|
// Use HTTP generation for remote endpoints
|
|
@@ -132,6 +136,38 @@ export class LlmAdapter {
|
|
|
132
136
|
};
|
|
133
137
|
}
|
|
134
138
|
|
|
139
|
+
/**
|
|
140
|
+
* Create a generation port dedicated to query expansion.
|
|
141
|
+
* Uses preset.expand when configured, else falls back to preset.gen.
|
|
142
|
+
*/
|
|
143
|
+
async createExpansionPort(
|
|
144
|
+
modelUri?: string,
|
|
145
|
+
options?: CreatePortOptions
|
|
146
|
+
): Promise<LlmResult<GenerationPort>> {
|
|
147
|
+
const uri = getExpandModelUri(this.config, modelUri);
|
|
148
|
+
const policy = options?.policy ?? DEFAULT_POLICY;
|
|
149
|
+
|
|
150
|
+
if (isHttpGenUri(uri)) {
|
|
151
|
+
const httpGen = new HttpGeneration(uri);
|
|
152
|
+
return { ok: true, value: httpGen };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const resolved = await this.cache.ensureModel(
|
|
156
|
+
uri,
|
|
157
|
+
"expand",
|
|
158
|
+
policy,
|
|
159
|
+
options?.onProgress
|
|
160
|
+
);
|
|
161
|
+
if (!resolved.ok) {
|
|
162
|
+
return resolved;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
ok: true,
|
|
167
|
+
value: new NodeLlamaCppGeneration(this.manager, uri, resolved.value),
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
135
171
|
/**
|
|
136
172
|
* Create a rerank port.
|
|
137
173
|
* Supports HTTP endpoints for remote reranking models.
|
package/src/llm/registry.ts
CHANGED
|
@@ -57,6 +57,24 @@ export function getActivePreset(config: Config): ModelPreset {
|
|
|
57
57
|
return builtIn;
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
export function getExpandModelUri(config: Config, override?: string): string {
|
|
61
|
+
if (override) {
|
|
62
|
+
return override;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const preset = getActivePreset(config);
|
|
66
|
+
return preset.expand ?? preset.gen;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function getAnswerModelUri(config: Config, override?: string): string {
|
|
70
|
+
if (override) {
|
|
71
|
+
return override;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const preset = getActivePreset(config);
|
|
75
|
+
return preset.gen;
|
|
76
|
+
}
|
|
77
|
+
|
|
60
78
|
/**
|
|
61
79
|
* Resolve a model URI for a given type.
|
|
62
80
|
* Uses override if provided, otherwise from active preset.
|
|
@@ -70,6 +88,9 @@ export function resolveModelUri(
|
|
|
70
88
|
return override;
|
|
71
89
|
}
|
|
72
90
|
const preset = getActivePreset(config);
|
|
91
|
+
if (type === "expand") {
|
|
92
|
+
return preset.expand ?? preset.gen;
|
|
93
|
+
}
|
|
73
94
|
return preset[type];
|
|
74
95
|
}
|
|
75
96
|
|
package/src/llm/types.ts
CHANGED
|
@@ -19,7 +19,7 @@ export type LlmResult<T> =
|
|
|
19
19
|
// Model Types
|
|
20
20
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
21
21
|
|
|
22
|
-
export type ModelType = "embed" | "rerank" | "gen";
|
|
22
|
+
export type ModelType = "embed" | "rerank" | "expand" | "gen";
|
|
23
23
|
|
|
24
24
|
/** Model URI format: hf:org/repo/file.gguf or file:/path */
|
|
25
25
|
export type ModelUri = string;
|
package/src/mcp/tools/index.ts
CHANGED
|
@@ -149,8 +149,8 @@ export const queryInputSchema = z.object({
|
|
|
149
149
|
.optional(),
|
|
150
150
|
fast: z.boolean().default(false),
|
|
151
151
|
thorough: z.boolean().default(false),
|
|
152
|
-
expand: z.boolean().
|
|
153
|
-
rerank: z.boolean().
|
|
152
|
+
expand: z.boolean().optional(),
|
|
153
|
+
rerank: z.boolean().optional(),
|
|
154
154
|
tagsAll: z.array(z.string()).optional(),
|
|
155
155
|
tagsAny: z.array(z.string()).optional(),
|
|
156
156
|
});
|
package/src/mcp/tools/query.ts
CHANGED
|
@@ -20,6 +20,7 @@ import type { ToolContext } from "../server";
|
|
|
20
20
|
|
|
21
21
|
import { parseUri } from "../../app/constants";
|
|
22
22
|
import { createNonTtyProgressRenderer } from "../../cli/progress";
|
|
23
|
+
import { resolveDepthPolicy } from "../../core/depth-policy";
|
|
23
24
|
import { normalizeStructuredQueryInput } from "../../core/structured-query";
|
|
24
25
|
import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
|
|
25
26
|
import { resolveDownloadPolicy } from "../../llm/policy";
|
|
@@ -167,7 +168,7 @@ export function handleQuery(
|
|
|
167
168
|
const downloadProgress = createNonTtyProgressRenderer();
|
|
168
169
|
|
|
169
170
|
let embedPort: EmbeddingPort | null = null;
|
|
170
|
-
let
|
|
171
|
+
let expandPort: GenerationPort | null = null;
|
|
171
172
|
let rerankPort: RerankPort | null = null;
|
|
172
173
|
let vectorIndex: VectorIndexPort | null = null;
|
|
173
174
|
|
|
@@ -181,42 +182,29 @@ export function handleQuery(
|
|
|
181
182
|
embedPort = embedResult.value;
|
|
182
183
|
}
|
|
183
184
|
|
|
184
|
-
// Determine noExpand/noRerank based on mode flags
|
|
185
|
-
// Priority: fast > thorough > expand/rerank params > defaults
|
|
186
|
-
// Default: noExpand=true (skip expansion), noRerank=false (with reranking)
|
|
187
185
|
const hasStructuredModes = Boolean(queryModes?.length);
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
}
|
|
198
|
-
// Use explicit expand/rerank params if provided
|
|
199
|
-
if (args.expand === true) {
|
|
200
|
-
noExpand = false;
|
|
201
|
-
}
|
|
202
|
-
if (args.rerank === false) {
|
|
203
|
-
noRerank = true;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Structured query modes replace generated expansion.
|
|
208
|
-
if (hasStructuredModes) {
|
|
209
|
-
noExpand = true;
|
|
210
|
-
}
|
|
186
|
+
const depthPolicy = resolveDepthPolicy({
|
|
187
|
+
presetId: preset.id,
|
|
188
|
+
fast: args.fast,
|
|
189
|
+
thorough: args.thorough,
|
|
190
|
+
expand: args.expand,
|
|
191
|
+
rerank: args.rerank,
|
|
192
|
+
candidateLimit: args.candidateLimit,
|
|
193
|
+
hasStructuredModes,
|
|
194
|
+
});
|
|
195
|
+
const { noExpand, noRerank } = depthPolicy;
|
|
211
196
|
|
|
212
|
-
// Create
|
|
197
|
+
// Create expansion port - optional
|
|
213
198
|
if (!noExpand && !hasStructuredModes) {
|
|
214
|
-
const genResult = await llm.
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
199
|
+
const genResult = await llm.createExpansionPort(
|
|
200
|
+
preset.expand ?? preset.gen,
|
|
201
|
+
{
|
|
202
|
+
policy,
|
|
203
|
+
onProgress: (progress) => downloadProgress("expand", progress),
|
|
204
|
+
}
|
|
205
|
+
);
|
|
218
206
|
if (genResult.ok) {
|
|
219
|
-
|
|
207
|
+
expandPort = genResult.value;
|
|
220
208
|
}
|
|
221
209
|
}
|
|
222
210
|
|
|
@@ -252,7 +240,7 @@ export function handleQuery(
|
|
|
252
240
|
config: ctx.config,
|
|
253
241
|
vectorIndex,
|
|
254
242
|
embedPort,
|
|
255
|
-
|
|
243
|
+
expandPort,
|
|
256
244
|
rerankPort,
|
|
257
245
|
};
|
|
258
246
|
|
|
@@ -265,7 +253,7 @@ export function handleQuery(
|
|
|
265
253
|
collection: args.collection,
|
|
266
254
|
queryLanguageHint: args.lang, // Affects expansion prompt, not retrieval
|
|
267
255
|
intent: args.intent,
|
|
268
|
-
candidateLimit:
|
|
256
|
+
candidateLimit: depthPolicy.candidateLimit,
|
|
269
257
|
exclude: args.exclude,
|
|
270
258
|
since: args.since,
|
|
271
259
|
until: args.until,
|
|
@@ -298,8 +286,8 @@ export function handleQuery(
|
|
|
298
286
|
if (embedPort) {
|
|
299
287
|
await embedPort.dispose();
|
|
300
288
|
}
|
|
301
|
-
if (
|
|
302
|
-
await
|
|
289
|
+
if (expandPort) {
|
|
290
|
+
await expandPort.dispose();
|
|
303
291
|
}
|
|
304
292
|
if (rerankPort) {
|
|
305
293
|
await rerankPort.dispose();
|
package/src/pipeline/hybrid.ts
CHANGED
|
@@ -60,7 +60,7 @@ export interface HybridSearchDeps {
|
|
|
60
60
|
config: Config;
|
|
61
61
|
vectorIndex: VectorIndexPort | null;
|
|
62
62
|
embedPort: EmbeddingPort | null;
|
|
63
|
-
|
|
63
|
+
expandPort: GenerationPort | null;
|
|
64
64
|
rerankPort: RerankPort | null;
|
|
65
65
|
pipelineConfig?: PipelineConfig;
|
|
66
66
|
}
|
|
@@ -249,7 +249,7 @@ export async function searchHybrid(
|
|
|
249
249
|
options: HybridSearchOptions = {}
|
|
250
250
|
): Promise<ReturnType<typeof ok<SearchResults>>> {
|
|
251
251
|
const runStartedAt = performance.now();
|
|
252
|
-
const { store, vectorIndex, embedPort,
|
|
252
|
+
const { store, vectorIndex, embedPort, expandPort, rerankPort } = deps;
|
|
253
253
|
const pipelineConfig = deps.pipelineConfig ?? DEFAULT_PIPELINE_CONFIG;
|
|
254
254
|
|
|
255
255
|
const limit = options.limit ?? 20;
|
|
@@ -318,7 +318,7 @@ export async function searchHybrid(
|
|
|
318
318
|
// 1. Check if expansion needed
|
|
319
319
|
// ─────────────────────────────────────────────────────────────────────────
|
|
320
320
|
const expansionStartedAt = performance.now();
|
|
321
|
-
const shouldExpand = !options.noExpand &&
|
|
321
|
+
const shouldExpand = !options.noExpand && expandPort !== null;
|
|
322
322
|
let expansionStatus: ExpansionStatus = "disabled";
|
|
323
323
|
let queryModeSummary: ReturnType<typeof summarizeQueryModes> | undefined =
|
|
324
324
|
undefined;
|
|
@@ -349,7 +349,7 @@ export async function searchHybrid(
|
|
|
349
349
|
counters.fallbackEvents.push("expansion_skipped_strong");
|
|
350
350
|
} else {
|
|
351
351
|
expansionStatus = "attempted";
|
|
352
|
-
const expandResult = await expandQuery(
|
|
352
|
+
const expandResult = await expandQuery(expandPort, query, {
|
|
353
353
|
// Use queryLanguage for prompt selection, NOT options.lang (retrieval filter)
|
|
354
354
|
lang: queryLanguage,
|
|
355
355
|
timeout: pipelineConfig.expansionTimeout,
|
package/src/sdk/client.ts
CHANGED
|
@@ -65,7 +65,8 @@ interface OpenedClientState {
|
|
|
65
65
|
|
|
66
66
|
interface RuntimePorts {
|
|
67
67
|
embedPort: EmbeddingPort | null;
|
|
68
|
-
|
|
68
|
+
expandPort: GenerationPort | null;
|
|
69
|
+
answerPort: GenerationPort | null;
|
|
69
70
|
rerankPort: RerankPort | null;
|
|
70
71
|
vectorIndex: VectorIndexPort | null;
|
|
71
72
|
}
|
|
@@ -179,19 +180,23 @@ class GnoClientImpl implements GnoClient {
|
|
|
179
180
|
|
|
180
181
|
private async createRuntimePorts(options: {
|
|
181
182
|
embed?: boolean;
|
|
182
|
-
|
|
183
|
+
expand?: boolean;
|
|
184
|
+
answer?: boolean;
|
|
183
185
|
rerank?: boolean;
|
|
184
186
|
requiredEmbed?: boolean;
|
|
185
|
-
|
|
187
|
+
requiredExpand?: boolean;
|
|
188
|
+
requiredAnswer?: boolean;
|
|
186
189
|
requiredRerank?: boolean;
|
|
187
190
|
embedModel?: string;
|
|
191
|
+
expandModel?: string;
|
|
188
192
|
genModel?: string;
|
|
189
193
|
rerankModel?: string;
|
|
190
194
|
}): Promise<RuntimePorts> {
|
|
191
195
|
this.assertOpen();
|
|
192
196
|
|
|
193
197
|
let embedPort: EmbeddingPort | null = null;
|
|
194
|
-
let
|
|
198
|
+
let expandPort: GenerationPort | null = null;
|
|
199
|
+
let answerPort: GenerationPort | null = null;
|
|
195
200
|
let rerankPort: RerankPort | null = null;
|
|
196
201
|
let vectorIndex: VectorIndexPort | null = null;
|
|
197
202
|
|
|
@@ -234,16 +239,38 @@ class GnoClientImpl implements GnoClient {
|
|
|
234
239
|
}
|
|
235
240
|
}
|
|
236
241
|
|
|
237
|
-
if (options.
|
|
242
|
+
if (options.expand) {
|
|
243
|
+
const genResult = await this.llm.createExpansionPort(
|
|
244
|
+
options.expandModel ?? options.genModel,
|
|
245
|
+
{
|
|
246
|
+
policy: this.downloadPolicy,
|
|
247
|
+
}
|
|
248
|
+
);
|
|
249
|
+
if (genResult.ok) {
|
|
250
|
+
expandPort = genResult.value;
|
|
251
|
+
} else if (options.requiredExpand) {
|
|
252
|
+
if (embedPort) {
|
|
253
|
+
await embedPort.dispose();
|
|
254
|
+
}
|
|
255
|
+
throw sdkError("MODEL", genResult.error.message, {
|
|
256
|
+
cause: genResult.error.cause,
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
if (options.answer) {
|
|
238
262
|
const genResult = await this.llm.createGenerationPort(options.genModel, {
|
|
239
263
|
policy: this.downloadPolicy,
|
|
240
264
|
});
|
|
241
265
|
if (genResult.ok) {
|
|
242
|
-
|
|
243
|
-
} else if (options.
|
|
266
|
+
answerPort = genResult.value;
|
|
267
|
+
} else if (options.requiredAnswer) {
|
|
244
268
|
if (embedPort) {
|
|
245
269
|
await embedPort.dispose();
|
|
246
270
|
}
|
|
271
|
+
if (expandPort) {
|
|
272
|
+
await expandPort.dispose();
|
|
273
|
+
}
|
|
247
274
|
throw sdkError("MODEL", genResult.error.message, {
|
|
248
275
|
cause: genResult.error.cause,
|
|
249
276
|
});
|
|
@@ -263,8 +290,11 @@ class GnoClientImpl implements GnoClient {
|
|
|
263
290
|
if (embedPort) {
|
|
264
291
|
await embedPort.dispose();
|
|
265
292
|
}
|
|
266
|
-
if (
|
|
267
|
-
await
|
|
293
|
+
if (expandPort) {
|
|
294
|
+
await expandPort.dispose();
|
|
295
|
+
}
|
|
296
|
+
if (answerPort) {
|
|
297
|
+
await answerPort.dispose();
|
|
268
298
|
}
|
|
269
299
|
throw sdkError("MODEL", rerankResult.error.message, {
|
|
270
300
|
cause: rerankResult.error.cause,
|
|
@@ -272,15 +302,18 @@ class GnoClientImpl implements GnoClient {
|
|
|
272
302
|
}
|
|
273
303
|
}
|
|
274
304
|
|
|
275
|
-
return { embedPort,
|
|
305
|
+
return { embedPort, expandPort, answerPort, rerankPort, vectorIndex };
|
|
276
306
|
}
|
|
277
307
|
|
|
278
308
|
private async disposeRuntimePorts(ports: RuntimePorts): Promise<void> {
|
|
279
309
|
if (ports.embedPort) {
|
|
280
310
|
await ports.embedPort.dispose();
|
|
281
311
|
}
|
|
282
|
-
if (ports.
|
|
283
|
-
await ports.
|
|
312
|
+
if (ports.expandPort) {
|
|
313
|
+
await ports.expandPort.dispose();
|
|
314
|
+
}
|
|
315
|
+
if (ports.answerPort) {
|
|
316
|
+
await ports.answerPort.dispose();
|
|
284
317
|
}
|
|
285
318
|
if (ports.rerankPort) {
|
|
286
319
|
await ports.rerankPort.dispose();
|
|
@@ -366,9 +399,10 @@ class GnoClientImpl implements GnoClient {
|
|
|
366
399
|
|
|
367
400
|
const ports = await this.createRuntimePorts({
|
|
368
401
|
embed: true,
|
|
369
|
-
|
|
402
|
+
expand: !options.noExpand && !options.queryModes?.length,
|
|
370
403
|
rerank: !options.noRerank,
|
|
371
404
|
embedModel: options.embedModel,
|
|
405
|
+
expandModel: options.expandModel,
|
|
372
406
|
genModel: options.genModel,
|
|
373
407
|
rerankModel: options.rerankModel,
|
|
374
408
|
});
|
|
@@ -381,7 +415,7 @@ class GnoClientImpl implements GnoClient {
|
|
|
381
415
|
config: this.config,
|
|
382
416
|
vectorIndex: ports.vectorIndex,
|
|
383
417
|
embedPort: ports.embedPort,
|
|
384
|
-
|
|
418
|
+
expandPort: ports.expandPort,
|
|
385
419
|
rerankPort: ports.rerankPort,
|
|
386
420
|
},
|
|
387
421
|
query,
|
|
@@ -416,15 +450,17 @@ class GnoClientImpl implements GnoClient {
|
|
|
416
450
|
const needsExpansionGen = !options.noExpand && !options.queryModes?.length;
|
|
417
451
|
const ports = await this.createRuntimePorts({
|
|
418
452
|
embed: true,
|
|
419
|
-
|
|
453
|
+
expand: needsExpansionGen,
|
|
454
|
+
answer: answerRequested,
|
|
420
455
|
rerank: !options.noRerank,
|
|
456
|
+
expandModel: options.expandModel,
|
|
421
457
|
genModel: options.genModel,
|
|
422
458
|
embedModel: options.embedModel,
|
|
423
459
|
rerankModel: options.rerankModel,
|
|
424
460
|
});
|
|
425
461
|
|
|
426
462
|
try {
|
|
427
|
-
if (answerRequested && !ports.
|
|
463
|
+
if (answerRequested && !ports.answerPort) {
|
|
428
464
|
throw sdkError(
|
|
429
465
|
"MODEL",
|
|
430
466
|
"Answer generation requested but no generation model is available"
|
|
@@ -438,7 +474,7 @@ class GnoClientImpl implements GnoClient {
|
|
|
438
474
|
config: this.config,
|
|
439
475
|
vectorIndex: ports.vectorIndex,
|
|
440
476
|
embedPort: ports.embedPort,
|
|
441
|
-
|
|
477
|
+
expandPort: ports.expandPort,
|
|
442
478
|
rerankPort: ports.rerankPort,
|
|
443
479
|
},
|
|
444
480
|
query,
|
|
@@ -468,9 +504,13 @@ class GnoClientImpl implements GnoClient {
|
|
|
468
504
|
let answerContext: AskResult["meta"]["answerContext"];
|
|
469
505
|
let answerGenerated = false;
|
|
470
506
|
|
|
471
|
-
if (
|
|
507
|
+
if (
|
|
508
|
+
answerRequested &&
|
|
509
|
+
ports.answerPort &&
|
|
510
|
+
searchResult.results.length > 0
|
|
511
|
+
) {
|
|
472
512
|
const rawAnswer = await generateGroundedAnswer(
|
|
473
|
-
{ genPort: ports.
|
|
513
|
+
{ genPort: ports.answerPort, store: this.store },
|
|
474
514
|
query,
|
|
475
515
|
searchResult.results,
|
|
476
516
|
options.maxAnswerTokens ?? 512
|
package/src/sdk/types.ts
CHANGED
package/src/serve/AGENTS.md
CHANGED
|
@@ -50,7 +50,8 @@ interface ServerContext {
|
|
|
50
50
|
config: Config;
|
|
51
51
|
vectorIndex: VectorIndexPort | null;
|
|
52
52
|
embedPort: EmbeddingPort | null;
|
|
53
|
-
|
|
53
|
+
expandPort: GenerationPort | null;
|
|
54
|
+
answerPort: GenerationPort | null;
|
|
54
55
|
rerankPort: RerankPort | null;
|
|
55
56
|
capabilities: { bm25; vector; hybrid; answer };
|
|
56
57
|
}
|
package/src/serve/CLAUDE.md
CHANGED
|
@@ -50,7 +50,8 @@ interface ServerContext {
|
|
|
50
50
|
config: Config;
|
|
51
51
|
vectorIndex: VectorIndexPort | null;
|
|
52
52
|
embedPort: EmbeddingPort | null;
|
|
53
|
-
|
|
53
|
+
expandPort: GenerationPort | null;
|
|
54
|
+
answerPort: GenerationPort | null;
|
|
54
55
|
rerankPort: RerankPort | null;
|
|
55
56
|
capabilities: { bm25; vector; hybrid; answer };
|
|
56
57
|
}
|
package/src/serve/context.ts
CHANGED
|
@@ -63,7 +63,8 @@ export interface ServerContext {
|
|
|
63
63
|
config: Config;
|
|
64
64
|
vectorIndex: VectorIndexPort | null;
|
|
65
65
|
embedPort: EmbeddingPort | null;
|
|
66
|
-
|
|
66
|
+
expandPort: GenerationPort | null;
|
|
67
|
+
answerPort: GenerationPort | null;
|
|
67
68
|
rerankPort: RerankPort | null;
|
|
68
69
|
capabilities: {
|
|
69
70
|
bm25: boolean;
|
|
@@ -82,7 +83,8 @@ export async function createServerContext(
|
|
|
82
83
|
config: Config
|
|
83
84
|
): Promise<ServerContext> {
|
|
84
85
|
let embedPort: EmbeddingPort | null = null;
|
|
85
|
-
let
|
|
86
|
+
let expandPort: GenerationPort | null = null;
|
|
87
|
+
let answerPort: GenerationPort | null = null;
|
|
86
88
|
let rerankPort: RerankPort | null = null;
|
|
87
89
|
let vectorIndex: VectorIndexPort | null = null;
|
|
88
90
|
|
|
@@ -129,13 +131,23 @@ export async function createServerContext(
|
|
|
129
131
|
}
|
|
130
132
|
}
|
|
131
133
|
|
|
132
|
-
// Try to create
|
|
133
|
-
const
|
|
134
|
+
// Try to create expansion port
|
|
135
|
+
const expandResult = await llm.createExpansionPort(
|
|
136
|
+
preset.expand ?? preset.gen,
|
|
137
|
+
createPortOptions("expand")
|
|
138
|
+
);
|
|
139
|
+
if (expandResult.ok) {
|
|
140
|
+
expandPort = expandResult.value;
|
|
141
|
+
console.log("Query expansion enabled");
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Try to create answer generation port
|
|
145
|
+
const answerResult = await llm.createGenerationPort(
|
|
134
146
|
preset.gen,
|
|
135
147
|
createPortOptions("gen")
|
|
136
148
|
);
|
|
137
|
-
if (
|
|
138
|
-
|
|
149
|
+
if (answerResult.ok) {
|
|
150
|
+
answerPort = answerResult.value;
|
|
139
151
|
console.log("AI answer generation enabled");
|
|
140
152
|
}
|
|
141
153
|
|
|
@@ -166,7 +178,7 @@ export async function createServerContext(
|
|
|
166
178
|
bm25: true, // Always available
|
|
167
179
|
vector: vectorIndex?.searchAvailable ?? false,
|
|
168
180
|
hybrid: (vectorIndex?.searchAvailable ?? false) && embedPort !== null,
|
|
169
|
-
answer:
|
|
181
|
+
answer: answerPort !== null,
|
|
170
182
|
};
|
|
171
183
|
|
|
172
184
|
return {
|
|
@@ -174,7 +186,8 @@ export async function createServerContext(
|
|
|
174
186
|
config,
|
|
175
187
|
vectorIndex,
|
|
176
188
|
embedPort,
|
|
177
|
-
|
|
189
|
+
expandPort,
|
|
190
|
+
answerPort,
|
|
178
191
|
rerankPort,
|
|
179
192
|
capabilities,
|
|
180
193
|
};
|
|
@@ -187,7 +200,8 @@ export async function createServerContext(
|
|
|
187
200
|
export async function disposeServerContext(ctx: ServerContext): Promise<void> {
|
|
188
201
|
const ports = [
|
|
189
202
|
{ name: "embed", port: ctx.embedPort },
|
|
190
|
-
{ name: "
|
|
203
|
+
{ name: "expand", port: ctx.expandPort },
|
|
204
|
+
{ name: "answer", port: ctx.answerPort },
|
|
191
205
|
{ name: "rerank", port: ctx.rerankPort },
|
|
192
206
|
];
|
|
193
207
|
|