@gmickel/gno 0.22.6 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -3
- package/package.json +14 -2
- package/src/cli/AGENTS.md +2 -1
- package/src/cli/CLAUDE.md +2 -1
- package/src/cli/commands/ask.ts +33 -14
- package/src/cli/commands/models/clear.ts +10 -3
- package/src/cli/commands/models/list.ts +17 -4
- package/src/cli/commands/models/pull.ts +15 -7
- package/src/cli/commands/query.ts +13 -10
- package/src/cli/program.ts +76 -43
- package/src/config/types.ts +8 -1
- package/src/core/depth-policy.ts +78 -0
- package/src/core/structured-query.ts +198 -0
- package/src/llm/errors.ts +1 -1
- package/src/llm/nodeLlamaCpp/adapter.ts +39 -3
- package/src/llm/registry.ts +21 -0
- package/src/llm/types.ts +1 -1
- package/src/mcp/tools/index.ts +2 -2
- package/src/mcp/tools/query.ts +42 -40
- package/src/pipeline/hybrid.ts +4 -4
- package/src/pipeline/query-modes.ts +17 -12
- package/src/sdk/client.ts +624 -0
- package/src/sdk/documents.ts +348 -0
- package/src/sdk/embed.ts +287 -0
- package/src/sdk/errors.ts +42 -0
- package/src/sdk/index.ts +51 -0
- package/src/sdk/types.ts +138 -0
- package/src/serve/AGENTS.md +2 -1
- package/src/serve/CLAUDE.md +2 -1
- package/src/serve/context.ts +23 -9
- package/src/serve/public/app.tsx +8 -0
- package/src/serve/public/components/AIModelSelector.tsx +48 -10
- package/src/serve/public/globals.built.css +1 -1
- package/src/serve/public/pages/Ask.tsx +109 -41
- package/src/serve/public/pages/Browse.tsx +141 -5
- package/src/serve/public/pages/Collections.tsx +135 -38
- package/src/serve/public/pages/Dashboard.tsx +31 -4
- package/src/serve/public/pages/GraphView.tsx +24 -0
- package/src/serve/public/pages/Search.tsx +125 -36
- package/src/serve/routes/api.ts +73 -20
package/src/cli/program.ts
CHANGED
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
PRODUCT_NAME,
|
|
15
15
|
VERSION,
|
|
16
16
|
} from "../app/constants";
|
|
17
|
+
import { resolveDepthPolicy } from "../core/depth-policy";
|
|
17
18
|
import { parseAndValidateTagFilter } from "../core/tags";
|
|
18
19
|
import { setColorsEnabled } from "./colors";
|
|
19
20
|
import {
|
|
@@ -455,7 +456,10 @@ function wireSearchCommands(program: Command): void {
|
|
|
455
456
|
.option("--full", "include full content")
|
|
456
457
|
.option("--line-numbers", "include line numbers in output")
|
|
457
458
|
.option("--fast", "skip expansion and reranking (fastest, ~0.7s)")
|
|
458
|
-
.option(
|
|
459
|
+
.option(
|
|
460
|
+
"--thorough",
|
|
461
|
+
"use expansion with a wider rerank pool (slowest, best recall)"
|
|
462
|
+
)
|
|
459
463
|
.option("--no-expand", "disable query expansion")
|
|
460
464
|
.option("--no-rerank", "disable reranking")
|
|
461
465
|
.option(
|
|
@@ -474,6 +478,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
474
478
|
.action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
|
|
475
479
|
const format = getFormat(cmdOpts);
|
|
476
480
|
assertFormatSupported(CMD.query, format);
|
|
481
|
+
const globals = getGlobals();
|
|
477
482
|
|
|
478
483
|
// Validate empty query
|
|
479
484
|
if (!queryText.trim()) {
|
|
@@ -497,6 +502,21 @@ function wireSearchCommands(program: Command): void {
|
|
|
497
502
|
queryModes = parsed.value;
|
|
498
503
|
}
|
|
499
504
|
|
|
505
|
+
const { normalizeStructuredQueryInput } =
|
|
506
|
+
await import("../core/structured-query");
|
|
507
|
+
const normalizedInput = normalizeStructuredQueryInput(
|
|
508
|
+
queryText,
|
|
509
|
+
queryModes ?? []
|
|
510
|
+
);
|
|
511
|
+
if (!normalizedInput.ok) {
|
|
512
|
+
throw new CliError("VALIDATION", normalizedInput.error.message);
|
|
513
|
+
}
|
|
514
|
+
queryText = normalizedInput.value.query;
|
|
515
|
+
queryModes =
|
|
516
|
+
normalizedInput.value.queryModes.length > 0
|
|
517
|
+
? normalizedInput.value.queryModes
|
|
518
|
+
: undefined;
|
|
519
|
+
|
|
500
520
|
// Parse and validate tag filters
|
|
501
521
|
let tagsAll: string[] | undefined;
|
|
502
522
|
let tagsAny: string[] | undefined;
|
|
@@ -517,35 +537,26 @@ function wireSearchCommands(program: Command): void {
|
|
|
517
537
|
const limit = cmdOpts.limit
|
|
518
538
|
? parsePositiveInt("limit", cmdOpts.limit)
|
|
519
539
|
: getDefaultLimit(format);
|
|
540
|
+
const { loadConfig } = await import("../config");
|
|
541
|
+
const { getActivePreset } = await import("../llm/registry");
|
|
542
|
+
const configResult = await loadConfig(globals.config);
|
|
543
|
+
const activePresetId = configResult.ok
|
|
544
|
+
? getActivePreset(configResult.value).id
|
|
545
|
+
: "slim";
|
|
520
546
|
const candidateLimit = cmdOpts.candidateLimit
|
|
521
547
|
? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
|
|
522
548
|
: undefined;
|
|
523
549
|
const categories = parseCsvValues(cmdOpts.category);
|
|
524
550
|
const exclude = parseCsvValues(cmdOpts.exclude);
|
|
525
551
|
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
noExpand = true;
|
|
535
|
-
noRerank = true;
|
|
536
|
-
} else if (cmdOpts.thorough) {
|
|
537
|
-
// --thorough: full pipeline (~5-8s)
|
|
538
|
-
noExpand = false;
|
|
539
|
-
noRerank = false;
|
|
540
|
-
} else {
|
|
541
|
-
// Check individual flags (override defaults)
|
|
542
|
-
if (cmdOpts.expand === false) {
|
|
543
|
-
noExpand = true;
|
|
544
|
-
}
|
|
545
|
-
if (cmdOpts.rerank === false) {
|
|
546
|
-
noRerank = true;
|
|
547
|
-
}
|
|
548
|
-
}
|
|
552
|
+
const depthPolicy = resolveDepthPolicy({
|
|
553
|
+
presetId: activePresetId,
|
|
554
|
+
fast: Boolean(cmdOpts.fast),
|
|
555
|
+
thorough: Boolean(cmdOpts.thorough),
|
|
556
|
+
expand: cmdOpts.expand === false ? false : undefined,
|
|
557
|
+
rerank: cmdOpts.rerank === false ? false : undefined,
|
|
558
|
+
candidateLimit,
|
|
559
|
+
});
|
|
549
560
|
|
|
550
561
|
const { query, formatQuery } = await import("./commands/query");
|
|
551
562
|
const result = await query(queryText, {
|
|
@@ -563,9 +574,9 @@ function wireSearchCommands(program: Command): void {
|
|
|
563
574
|
tagsAny,
|
|
564
575
|
full: Boolean(cmdOpts.full),
|
|
565
576
|
lineNumbers: Boolean(cmdOpts.lineNumbers),
|
|
566
|
-
noExpand,
|
|
567
|
-
noRerank,
|
|
568
|
-
candidateLimit,
|
|
577
|
+
noExpand: depthPolicy.noExpand,
|
|
578
|
+
noRerank: depthPolicy.noRerank,
|
|
579
|
+
candidateLimit: depthPolicy.candidateLimit,
|
|
569
580
|
queryModes,
|
|
570
581
|
explain: Boolean(cmdOpts.explain),
|
|
571
582
|
json: format === "json",
|
|
@@ -615,7 +626,10 @@ function wireSearchCommands(program: Command): void {
|
|
|
615
626
|
[]
|
|
616
627
|
)
|
|
617
628
|
.option("--fast", "skip expansion and reranking (fastest)")
|
|
618
|
-
.option(
|
|
629
|
+
.option(
|
|
630
|
+
"--thorough",
|
|
631
|
+
"use expansion with a wider rerank pool (slowest, best recall)"
|
|
632
|
+
)
|
|
619
633
|
.option("-C, --candidate-limit <num>", "max candidates passed to reranking")
|
|
620
634
|
.option("--answer", "generate short grounded answer")
|
|
621
635
|
.option("--no-answer", "force retrieval-only output")
|
|
@@ -626,6 +640,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
626
640
|
.action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
|
|
627
641
|
const format = getFormat(cmdOpts);
|
|
628
642
|
assertFormatSupported(CMD.ask, format);
|
|
643
|
+
const globals = getGlobals();
|
|
629
644
|
|
|
630
645
|
// Validate empty query
|
|
631
646
|
if (!queryText.trim()) {
|
|
@@ -635,6 +650,12 @@ function wireSearchCommands(program: Command): void {
|
|
|
635
650
|
const limit = cmdOpts.limit
|
|
636
651
|
? parsePositiveInt("limit", cmdOpts.limit)
|
|
637
652
|
: getDefaultLimit(format);
|
|
653
|
+
const { loadConfig } = await import("../config");
|
|
654
|
+
const { getActivePreset } = await import("../llm/registry");
|
|
655
|
+
const configResult = await loadConfig(globals.config);
|
|
656
|
+
const activePresetId = configResult.ok
|
|
657
|
+
? getActivePreset(configResult.value).id
|
|
658
|
+
: "slim";
|
|
638
659
|
const candidateLimit = cmdOpts.candidateLimit
|
|
639
660
|
? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
|
|
640
661
|
: undefined;
|
|
@@ -656,18 +677,28 @@ function wireSearchCommands(program: Command): void {
|
|
|
656
677
|
queryModes = parsed.value;
|
|
657
678
|
}
|
|
658
679
|
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
} else if (cmdOpts.thorough) {
|
|
668
|
-
noExpand = false;
|
|
669
|
-
noRerank = false;
|
|
680
|
+
const { normalizeStructuredQueryInput } =
|
|
681
|
+
await import("../core/structured-query");
|
|
682
|
+
const normalizedInput = normalizeStructuredQueryInput(
|
|
683
|
+
queryText,
|
|
684
|
+
queryModes ?? []
|
|
685
|
+
);
|
|
686
|
+
if (!normalizedInput.ok) {
|
|
687
|
+
throw new CliError("VALIDATION", normalizedInput.error.message);
|
|
670
688
|
}
|
|
689
|
+
queryText = normalizedInput.value.query;
|
|
690
|
+
queryModes =
|
|
691
|
+
normalizedInput.value.queryModes.length > 0
|
|
692
|
+
? normalizedInput.value.queryModes
|
|
693
|
+
: undefined;
|
|
694
|
+
|
|
695
|
+
const depthPolicy = resolveDepthPolicy({
|
|
696
|
+
presetId: activePresetId,
|
|
697
|
+
fast: Boolean(cmdOpts.fast),
|
|
698
|
+
thorough: Boolean(cmdOpts.thorough),
|
|
699
|
+
candidateLimit,
|
|
700
|
+
hasStructuredModes: Boolean(queryModes?.length),
|
|
701
|
+
});
|
|
671
702
|
|
|
672
703
|
const { ask, formatAsk } = await import("./commands/ask");
|
|
673
704
|
const showSources = Boolean(cmdOpts.showSources);
|
|
@@ -682,9 +713,9 @@ function wireSearchCommands(program: Command): void {
|
|
|
682
713
|
intent: cmdOpts.intent as string | undefined,
|
|
683
714
|
exclude,
|
|
684
715
|
queryModes,
|
|
685
|
-
noExpand,
|
|
686
|
-
noRerank,
|
|
687
|
-
candidateLimit,
|
|
716
|
+
noExpand: depthPolicy.noExpand,
|
|
717
|
+
noRerank: depthPolicy.noRerank,
|
|
718
|
+
candidateLimit: depthPolicy.candidateLimit,
|
|
688
719
|
// Per spec: --answer defaults to false, --no-answer forces retrieval-only
|
|
689
720
|
// Commander creates separate cmdOpts.noAnswer for --no-answer flag
|
|
690
721
|
answer: Boolean(cmdOpts.answer),
|
|
@@ -1328,7 +1359,8 @@ function wireManagementCommands(program: Command): void {
|
|
|
1328
1359
|
.option("--all", "download all configured models")
|
|
1329
1360
|
.option("--embed", "download embedding model")
|
|
1330
1361
|
.option("--rerank", "download reranker model")
|
|
1331
|
-
.option("--
|
|
1362
|
+
.option("--expand", "download expansion model")
|
|
1363
|
+
.option("--gen", "download answer generation model")
|
|
1332
1364
|
.option("--force", "force re-download")
|
|
1333
1365
|
.option("--no-progress", "disable download progress")
|
|
1334
1366
|
.action(async (cmdOpts: Record<string, unknown>) => {
|
|
@@ -1347,6 +1379,7 @@ function wireManagementCommands(program: Command): void {
|
|
|
1347
1379
|
all: Boolean(cmdOpts.all),
|
|
1348
1380
|
embed: Boolean(cmdOpts.embed),
|
|
1349
1381
|
rerank: Boolean(cmdOpts.rerank),
|
|
1382
|
+
expand: Boolean(cmdOpts.expand),
|
|
1350
1383
|
gen: Boolean(cmdOpts.gen),
|
|
1351
1384
|
force: Boolean(cmdOpts.force),
|
|
1352
1385
|
onProgress: showProgress ? createProgressRenderer() : undefined,
|
package/src/config/types.ts
CHANGED
|
@@ -166,7 +166,9 @@ export const ModelPresetSchema = z.object({
|
|
|
166
166
|
embed: z.string().min(1),
|
|
167
167
|
/** Reranker model URI */
|
|
168
168
|
rerank: z.string().min(1),
|
|
169
|
-
/**
|
|
169
|
+
/** Query expansion model URI (defaults to gen for older configs) */
|
|
170
|
+
expand: z.string().min(1).optional(),
|
|
171
|
+
/** Answer generation model URI */
|
|
170
172
|
gen: z.string().min(1),
|
|
171
173
|
});
|
|
172
174
|
|
|
@@ -180,6 +182,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
180
182
|
embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
|
|
181
183
|
rerank:
|
|
182
184
|
"hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
|
|
185
|
+
expand: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
|
|
183
186
|
gen: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
|
|
184
187
|
},
|
|
185
188
|
{
|
|
@@ -188,6 +191,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
188
191
|
embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
|
|
189
192
|
rerank:
|
|
190
193
|
"hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
|
|
194
|
+
expand:
|
|
195
|
+
"hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
|
|
191
196
|
gen: "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
|
|
192
197
|
},
|
|
193
198
|
{
|
|
@@ -196,6 +201,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
|
|
|
196
201
|
embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
|
|
197
202
|
rerank:
|
|
198
203
|
"hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
|
|
204
|
+
expand:
|
|
205
|
+
"hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
|
|
199
206
|
gen: "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
|
|
200
207
|
},
|
|
201
208
|
];
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
export type RetrievalDepth = "fast" | "balanced" | "thorough";
|
|
2
|
+
|
|
3
|
+
export interface ResolveDepthPolicyInput {
|
|
4
|
+
presetId?: string;
|
|
5
|
+
fast?: boolean;
|
|
6
|
+
thorough?: boolean;
|
|
7
|
+
expand?: boolean;
|
|
8
|
+
rerank?: boolean;
|
|
9
|
+
candidateLimit?: number;
|
|
10
|
+
hasStructuredModes?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface ResolvedDepthPolicy {
|
|
14
|
+
depth: RetrievalDepth;
|
|
15
|
+
noExpand: boolean;
|
|
16
|
+
noRerank: boolean;
|
|
17
|
+
candidateLimit?: number;
|
|
18
|
+
balancedExpansionEnabled: boolean;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export const DEFAULT_THOROUGH_CANDIDATE_LIMIT = 40;
|
|
22
|
+
|
|
23
|
+
function normalizePresetId(presetId?: string): string {
|
|
24
|
+
return presetId?.trim().toLowerCase() || "slim";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function balancedUsesExpansion(presetId?: string): boolean {
|
|
28
|
+
const normalized = normalizePresetId(presetId);
|
|
29
|
+
return normalized === "slim" || normalized === "slim-tuned";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function resolveDepthPolicy(
|
|
33
|
+
input: ResolveDepthPolicyInput
|
|
34
|
+
): ResolvedDepthPolicy {
|
|
35
|
+
const balancedExpansionEnabled = balancedUsesExpansion(input.presetId);
|
|
36
|
+
let depth: RetrievalDepth = "balanced";
|
|
37
|
+
let noExpand = !balancedExpansionEnabled;
|
|
38
|
+
let noRerank = false;
|
|
39
|
+
let candidateLimit = input.candidateLimit;
|
|
40
|
+
|
|
41
|
+
if (input.fast) {
|
|
42
|
+
depth = "fast";
|
|
43
|
+
noExpand = true;
|
|
44
|
+
noRerank = true;
|
|
45
|
+
} else if (input.thorough) {
|
|
46
|
+
depth = "thorough";
|
|
47
|
+
noExpand = false;
|
|
48
|
+
noRerank = false;
|
|
49
|
+
candidateLimit ??= DEFAULT_THOROUGH_CANDIDATE_LIMIT;
|
|
50
|
+
} else {
|
|
51
|
+
if (input.expand === true) {
|
|
52
|
+
noExpand = false;
|
|
53
|
+
}
|
|
54
|
+
if (input.expand === false) {
|
|
55
|
+
noExpand = true;
|
|
56
|
+
}
|
|
57
|
+
if (input.rerank === true) {
|
|
58
|
+
noRerank = false;
|
|
59
|
+
}
|
|
60
|
+
if (input.rerank === false) {
|
|
61
|
+
noRerank = true;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Structured query modes supply explicit expansions and should not trigger
|
|
66
|
+
// an additional generated expansion step.
|
|
67
|
+
if (input.hasStructuredModes) {
|
|
68
|
+
noExpand = true;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
depth,
|
|
73
|
+
noExpand,
|
|
74
|
+
noRerank,
|
|
75
|
+
candidateLimit,
|
|
76
|
+
balancedExpansionEnabled,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured multi-line query document parsing.
|
|
3
|
+
*
|
|
4
|
+
* Pure parser used across CLI, API, MCP, SDK, and Web.
|
|
5
|
+
*
|
|
6
|
+
* @module src/core/structured-query
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { QueryModeInput } from "../pipeline/types";
|
|
10
|
+
|
|
11
|
+
export interface StructuredQueryError {
|
|
12
|
+
line: number | null;
|
|
13
|
+
message: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface StructuredQueryNormalization {
|
|
17
|
+
query: string;
|
|
18
|
+
queryModes: QueryModeInput[];
|
|
19
|
+
usedStructuredQuerySyntax: boolean;
|
|
20
|
+
derivedQuery: boolean;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export type StructuredQueryResult =
|
|
24
|
+
| { ok: true; value: StructuredQueryNormalization }
|
|
25
|
+
| { ok: false; error: StructuredQueryError };
|
|
26
|
+
|
|
27
|
+
const RECOGNIZED_MODE_PREFIXES = new Set(["term", "intent", "hyde"]);
|
|
28
|
+
const ANY_PREFIX_PATTERN = /^\s*([a-z][a-z0-9_-]*)\s*:\s*(.*)$/i;
|
|
29
|
+
const RECOGNIZED_PREFIX_PATTERN = /^\s*(term|intent|hyde)\s*:\s*(.*)$/i;
|
|
30
|
+
|
|
31
|
+
function buildError(
|
|
32
|
+
message: string,
|
|
33
|
+
line: number | null
|
|
34
|
+
): StructuredQueryResult {
|
|
35
|
+
return { ok: false, error: { message, line } };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function trimNonBlankLines(query: string): string[] {
|
|
39
|
+
return query.split(/\r?\n/).filter((line) => line.trim().length > 0);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Parse multi-line structured query syntax.
|
|
44
|
+
*
|
|
45
|
+
* Rules:
|
|
46
|
+
* - single-line queries remain unchanged
|
|
47
|
+
* - blank lines are ignored
|
|
48
|
+
* - recognized typed lines: term:, intent:, hyde:
|
|
49
|
+
* - if structured syntax is used, unknown prefix lines like foo:bar are rejected
|
|
50
|
+
* - untyped lines contribute to the base query text
|
|
51
|
+
* - if no untyped lines exist, base query is derived from term lines first, then intent lines
|
|
52
|
+
* - hyde-only documents are rejected
|
|
53
|
+
*/
|
|
54
|
+
export function normalizeStructuredQueryInput(
|
|
55
|
+
query: string,
|
|
56
|
+
explicitQueryModes: QueryModeInput[] = []
|
|
57
|
+
): StructuredQueryResult {
|
|
58
|
+
if (!query.includes("\n")) {
|
|
59
|
+
return {
|
|
60
|
+
ok: true,
|
|
61
|
+
value: {
|
|
62
|
+
query,
|
|
63
|
+
queryModes: explicitQueryModes,
|
|
64
|
+
usedStructuredQuerySyntax: false,
|
|
65
|
+
derivedQuery: false,
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const lines = trimNonBlankLines(query);
|
|
71
|
+
if (lines.length === 0) {
|
|
72
|
+
return {
|
|
73
|
+
ok: true,
|
|
74
|
+
value: {
|
|
75
|
+
query,
|
|
76
|
+
queryModes: explicitQueryModes,
|
|
77
|
+
usedStructuredQuerySyntax: false,
|
|
78
|
+
derivedQuery: false,
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const hasRecognizedTypedLine = lines.some((line) => {
|
|
84
|
+
const match = line.match(RECOGNIZED_PREFIX_PATTERN);
|
|
85
|
+
return Boolean(match?.[1]);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
if (!hasRecognizedTypedLine) {
|
|
89
|
+
return {
|
|
90
|
+
ok: true,
|
|
91
|
+
value: {
|
|
92
|
+
query,
|
|
93
|
+
queryModes: explicitQueryModes,
|
|
94
|
+
usedStructuredQuerySyntax: false,
|
|
95
|
+
derivedQuery: false,
|
|
96
|
+
},
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const queryModes: QueryModeInput[] = [];
|
|
101
|
+
const bodyLines: string[] = [];
|
|
102
|
+
let hydeCount = 0;
|
|
103
|
+
|
|
104
|
+
for (const [index, line] of query.split(/\r?\n/).entries()) {
|
|
105
|
+
const trimmed = line.trim();
|
|
106
|
+
if (trimmed.length === 0) {
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const recognized = trimmed.match(RECOGNIZED_PREFIX_PATTERN);
|
|
111
|
+
if (recognized) {
|
|
112
|
+
const mode = recognized[1]?.toLowerCase() as QueryModeInput["mode"];
|
|
113
|
+
const text = recognized[2]?.trim() ?? "";
|
|
114
|
+
if (text.length === 0) {
|
|
115
|
+
return buildError(
|
|
116
|
+
`Structured query line ${index + 1} must contain non-empty text after ${mode}:`,
|
|
117
|
+
index + 1
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
if (mode === "hyde") {
|
|
121
|
+
hydeCount += 1;
|
|
122
|
+
if (hydeCount > 1) {
|
|
123
|
+
return buildError(
|
|
124
|
+
"Only one hyde line is allowed in a structured query document.",
|
|
125
|
+
index + 1
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
queryModes.push({ mode, text });
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const prefixed = trimmed.match(ANY_PREFIX_PATTERN);
|
|
134
|
+
if (prefixed?.[1]) {
|
|
135
|
+
const prefix = prefixed[1].toLowerCase();
|
|
136
|
+
if (!RECOGNIZED_MODE_PREFIXES.has(prefix)) {
|
|
137
|
+
return buildError(
|
|
138
|
+
`Unknown structured query line prefix "${prefix}:" on line ${index + 1}. Expected term:, intent:, or hyde:.`,
|
|
139
|
+
index + 1
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
bodyLines.push(trimmed);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const combinedQueryModes = [...queryModes, ...explicitQueryModes];
|
|
148
|
+
const totalHydeCount = combinedQueryModes.filter(
|
|
149
|
+
(entry) => entry.mode === "hyde"
|
|
150
|
+
).length;
|
|
151
|
+
if (totalHydeCount > 1) {
|
|
152
|
+
return buildError(
|
|
153
|
+
"Only one hyde entry is allowed across structured query syntax and explicit query modes.",
|
|
154
|
+
null
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
let normalizedQuery = bodyLines.join(" ").trim();
|
|
159
|
+
let derivedQuery = false;
|
|
160
|
+
|
|
161
|
+
if (!normalizedQuery) {
|
|
162
|
+
const termQuery = queryModes
|
|
163
|
+
.filter((entry) => entry.mode === "term")
|
|
164
|
+
.map((entry) => entry.text)
|
|
165
|
+
.join(" ")
|
|
166
|
+
.trim();
|
|
167
|
+
const intentQuery = queryModes
|
|
168
|
+
.filter((entry) => entry.mode === "intent")
|
|
169
|
+
.map((entry) => entry.text)
|
|
170
|
+
.join(" ")
|
|
171
|
+
.trim();
|
|
172
|
+
|
|
173
|
+
normalizedQuery = termQuery || intentQuery;
|
|
174
|
+
derivedQuery = normalizedQuery.length > 0;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (!normalizedQuery) {
|
|
178
|
+
return buildError(
|
|
179
|
+
"Structured query documents must include at least one plain query line, term line, or intent line. hyde-only documents are not allowed.",
|
|
180
|
+
null
|
|
181
|
+
);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return {
|
|
185
|
+
ok: true,
|
|
186
|
+
value: {
|
|
187
|
+
query: normalizedQuery,
|
|
188
|
+
queryModes: combinedQueryModes,
|
|
189
|
+
usedStructuredQuerySyntax: true,
|
|
190
|
+
derivedQuery,
|
|
191
|
+
},
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
export function hasStructuredQuerySyntax(query: string): boolean {
|
|
196
|
+
const result = normalizeStructuredQueryInput(query);
|
|
197
|
+
return result.ok && result.value.usedStructuredQuerySyntax;
|
|
198
|
+
}
|
package/src/llm/errors.ts
CHANGED
|
@@ -121,7 +121,7 @@ export function modelNotFoundError(uri: string, details?: string): LlmError {
|
|
|
121
121
|
|
|
122
122
|
export function modelNotCachedError(
|
|
123
123
|
uri: string,
|
|
124
|
-
modelType: "embed" | "rerank" | "gen"
|
|
124
|
+
modelType: "embed" | "rerank" | "expand" | "gen"
|
|
125
125
|
): LlmError {
|
|
126
126
|
return llmError("MODEL_NOT_CACHED", {
|
|
127
127
|
message: `${modelType} model not cached`,
|
|
@@ -19,7 +19,12 @@ import { ModelCache } from "../cache";
|
|
|
19
19
|
import { HttpEmbedding, isHttpModelUri } from "../httpEmbedding";
|
|
20
20
|
import { HttpGeneration, isHttpGenUri } from "../httpGeneration";
|
|
21
21
|
import { HttpRerank, isHttpRerankUri } from "../httpRerank";
|
|
22
|
-
import {
|
|
22
|
+
import {
|
|
23
|
+
getActivePreset,
|
|
24
|
+
getAnswerModelUri,
|
|
25
|
+
getExpandModelUri,
|
|
26
|
+
getModelConfig,
|
|
27
|
+
} from "../registry";
|
|
23
28
|
import { NodeLlamaCppEmbedding } from "./embedding";
|
|
24
29
|
import { NodeLlamaCppGeneration } from "./generation";
|
|
25
30
|
import { getModelManager, type ModelManager } from "./lifecycle";
|
|
@@ -105,8 +110,7 @@ export class LlmAdapter {
|
|
|
105
110
|
modelUri?: string,
|
|
106
111
|
options?: CreatePortOptions
|
|
107
112
|
): Promise<LlmResult<GenerationPort>> {
|
|
108
|
-
const
|
|
109
|
-
const uri = modelUri ?? preset.gen;
|
|
113
|
+
const uri = getAnswerModelUri(this.config, modelUri);
|
|
110
114
|
const policy = options?.policy ?? DEFAULT_POLICY;
|
|
111
115
|
|
|
112
116
|
// Use HTTP generation for remote endpoints
|
|
@@ -132,6 +136,38 @@ export class LlmAdapter {
|
|
|
132
136
|
};
|
|
133
137
|
}
|
|
134
138
|
|
|
139
|
+
/**
|
|
140
|
+
* Create a generation port dedicated to query expansion.
|
|
141
|
+
* Uses preset.expand when configured, else falls back to preset.gen.
|
|
142
|
+
*/
|
|
143
|
+
async createExpansionPort(
|
|
144
|
+
modelUri?: string,
|
|
145
|
+
options?: CreatePortOptions
|
|
146
|
+
): Promise<LlmResult<GenerationPort>> {
|
|
147
|
+
const uri = getExpandModelUri(this.config, modelUri);
|
|
148
|
+
const policy = options?.policy ?? DEFAULT_POLICY;
|
|
149
|
+
|
|
150
|
+
if (isHttpGenUri(uri)) {
|
|
151
|
+
const httpGen = new HttpGeneration(uri);
|
|
152
|
+
return { ok: true, value: httpGen };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const resolved = await this.cache.ensureModel(
|
|
156
|
+
uri,
|
|
157
|
+
"expand",
|
|
158
|
+
policy,
|
|
159
|
+
options?.onProgress
|
|
160
|
+
);
|
|
161
|
+
if (!resolved.ok) {
|
|
162
|
+
return resolved;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
ok: true,
|
|
167
|
+
value: new NodeLlamaCppGeneration(this.manager, uri, resolved.value),
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
135
171
|
/**
|
|
136
172
|
* Create a rerank port.
|
|
137
173
|
* Supports HTTP endpoints for remote reranking models.
|
package/src/llm/registry.ts
CHANGED
|
@@ -57,6 +57,24 @@ export function getActivePreset(config: Config): ModelPreset {
|
|
|
57
57
|
return builtIn;
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
export function getExpandModelUri(config: Config, override?: string): string {
|
|
61
|
+
if (override) {
|
|
62
|
+
return override;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const preset = getActivePreset(config);
|
|
66
|
+
return preset.expand ?? preset.gen;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function getAnswerModelUri(config: Config, override?: string): string {
|
|
70
|
+
if (override) {
|
|
71
|
+
return override;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const preset = getActivePreset(config);
|
|
75
|
+
return preset.gen;
|
|
76
|
+
}
|
|
77
|
+
|
|
60
78
|
/**
|
|
61
79
|
* Resolve a model URI for a given type.
|
|
62
80
|
* Uses override if provided, otherwise from active preset.
|
|
@@ -70,6 +88,9 @@ export function resolveModelUri(
|
|
|
70
88
|
return override;
|
|
71
89
|
}
|
|
72
90
|
const preset = getActivePreset(config);
|
|
91
|
+
if (type === "expand") {
|
|
92
|
+
return preset.expand ?? preset.gen;
|
|
93
|
+
}
|
|
73
94
|
return preset[type];
|
|
74
95
|
}
|
|
75
96
|
|
package/src/llm/types.ts
CHANGED
|
@@ -19,7 +19,7 @@ export type LlmResult<T> =
|
|
|
19
19
|
// Model Types
|
|
20
20
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
21
21
|
|
|
22
|
-
export type ModelType = "embed" | "rerank" | "gen";
|
|
22
|
+
export type ModelType = "embed" | "rerank" | "expand" | "gen";
|
|
23
23
|
|
|
24
24
|
/** Model URI format: hf:org/repo/file.gguf or file:/path */
|
|
25
25
|
export type ModelUri = string;
|
package/src/mcp/tools/index.ts
CHANGED
|
@@ -149,8 +149,8 @@ export const queryInputSchema = z.object({
|
|
|
149
149
|
.optional(),
|
|
150
150
|
fast: z.boolean().default(false),
|
|
151
151
|
thorough: z.boolean().default(false),
|
|
152
|
-
expand: z.boolean().
|
|
153
|
-
rerank: z.boolean().
|
|
152
|
+
expand: z.boolean().optional(),
|
|
153
|
+
rerank: z.boolean().optional(),
|
|
154
154
|
tagsAll: z.array(z.string()).optional(),
|
|
155
155
|
tagsAny: z.array(z.string()).optional(),
|
|
156
156
|
});
|