@gmickel/gno 0.22.6 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +76 -3
  2. package/package.json +14 -2
  3. package/src/cli/AGENTS.md +2 -1
  4. package/src/cli/CLAUDE.md +2 -1
  5. package/src/cli/commands/ask.ts +33 -14
  6. package/src/cli/commands/models/clear.ts +10 -3
  7. package/src/cli/commands/models/list.ts +17 -4
  8. package/src/cli/commands/models/pull.ts +15 -7
  9. package/src/cli/commands/query.ts +13 -10
  10. package/src/cli/program.ts +76 -43
  11. package/src/config/types.ts +8 -1
  12. package/src/core/depth-policy.ts +78 -0
  13. package/src/core/structured-query.ts +198 -0
  14. package/src/llm/errors.ts +1 -1
  15. package/src/llm/nodeLlamaCpp/adapter.ts +39 -3
  16. package/src/llm/registry.ts +21 -0
  17. package/src/llm/types.ts +1 -1
  18. package/src/mcp/tools/index.ts +2 -2
  19. package/src/mcp/tools/query.ts +42 -40
  20. package/src/pipeline/hybrid.ts +4 -4
  21. package/src/pipeline/query-modes.ts +17 -12
  22. package/src/sdk/client.ts +624 -0
  23. package/src/sdk/documents.ts +348 -0
  24. package/src/sdk/embed.ts +287 -0
  25. package/src/sdk/errors.ts +42 -0
  26. package/src/sdk/index.ts +51 -0
  27. package/src/sdk/types.ts +138 -0
  28. package/src/serve/AGENTS.md +2 -1
  29. package/src/serve/CLAUDE.md +2 -1
  30. package/src/serve/context.ts +23 -9
  31. package/src/serve/public/app.tsx +8 -0
  32. package/src/serve/public/components/AIModelSelector.tsx +48 -10
  33. package/src/serve/public/globals.built.css +1 -1
  34. package/src/serve/public/pages/Ask.tsx +109 -41
  35. package/src/serve/public/pages/Browse.tsx +141 -5
  36. package/src/serve/public/pages/Collections.tsx +135 -38
  37. package/src/serve/public/pages/Dashboard.tsx +31 -4
  38. package/src/serve/public/pages/GraphView.tsx +24 -0
  39. package/src/serve/public/pages/Search.tsx +125 -36
  40. package/src/serve/routes/api.ts +73 -20
@@ -14,6 +14,7 @@ import {
14
14
  PRODUCT_NAME,
15
15
  VERSION,
16
16
  } from "../app/constants";
17
+ import { resolveDepthPolicy } from "../core/depth-policy";
17
18
  import { parseAndValidateTagFilter } from "../core/tags";
18
19
  import { setColorsEnabled } from "./colors";
19
20
  import {
@@ -455,7 +456,10 @@ function wireSearchCommands(program: Command): void {
455
456
  .option("--full", "include full content")
456
457
  .option("--line-numbers", "include line numbers in output")
457
458
  .option("--fast", "skip expansion and reranking (fastest, ~0.7s)")
458
- .option("--thorough", "enable query expansion (slower, ~5-8s)")
459
+ .option(
460
+ "--thorough",
461
+ "use expansion with a wider rerank pool (slowest, best recall)"
462
+ )
459
463
  .option("--no-expand", "disable query expansion")
460
464
  .option("--no-rerank", "disable reranking")
461
465
  .option(
@@ -474,6 +478,7 @@ function wireSearchCommands(program: Command): void {
474
478
  .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
475
479
  const format = getFormat(cmdOpts);
476
480
  assertFormatSupported(CMD.query, format);
481
+ const globals = getGlobals();
477
482
 
478
483
  // Validate empty query
479
484
  if (!queryText.trim()) {
@@ -497,6 +502,21 @@ function wireSearchCommands(program: Command): void {
497
502
  queryModes = parsed.value;
498
503
  }
499
504
 
505
+ const { normalizeStructuredQueryInput } =
506
+ await import("../core/structured-query");
507
+ const normalizedInput = normalizeStructuredQueryInput(
508
+ queryText,
509
+ queryModes ?? []
510
+ );
511
+ if (!normalizedInput.ok) {
512
+ throw new CliError("VALIDATION", normalizedInput.error.message);
513
+ }
514
+ queryText = normalizedInput.value.query;
515
+ queryModes =
516
+ normalizedInput.value.queryModes.length > 0
517
+ ? normalizedInput.value.queryModes
518
+ : undefined;
519
+
500
520
  // Parse and validate tag filters
501
521
  let tagsAll: string[] | undefined;
502
522
  let tagsAny: string[] | undefined;
@@ -517,35 +537,26 @@ function wireSearchCommands(program: Command): void {
517
537
  const limit = cmdOpts.limit
518
538
  ? parsePositiveInt("limit", cmdOpts.limit)
519
539
  : getDefaultLimit(format);
540
+ const { loadConfig } = await import("../config");
541
+ const { getActivePreset } = await import("../llm/registry");
542
+ const configResult = await loadConfig(globals.config);
543
+ const activePresetId = configResult.ok
544
+ ? getActivePreset(configResult.value).id
545
+ : "slim";
520
546
  const candidateLimit = cmdOpts.candidateLimit
521
547
  ? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
522
548
  : undefined;
523
549
  const categories = parseCsvValues(cmdOpts.category);
524
550
  const exclude = parseCsvValues(cmdOpts.exclude);
525
551
 
526
- // Determine expansion/rerank settings based on flags
527
- // Priority: --fast > --thorough > --no-expand/--no-rerank > default
528
- // Default: skip expansion (balanced mode ~2-3s)
529
- let noExpand = true; // Default: skip expansion
530
- let noRerank = false; // Default: with reranking
531
-
532
- if (cmdOpts.fast) {
533
- // --fast: skip both (~0.7s)
534
- noExpand = true;
535
- noRerank = true;
536
- } else if (cmdOpts.thorough) {
537
- // --thorough: full pipeline (~5-8s)
538
- noExpand = false;
539
- noRerank = false;
540
- } else {
541
- // Check individual flags (override defaults)
542
- if (cmdOpts.expand === false) {
543
- noExpand = true;
544
- }
545
- if (cmdOpts.rerank === false) {
546
- noRerank = true;
547
- }
548
- }
552
+ const depthPolicy = resolveDepthPolicy({
553
+ presetId: activePresetId,
554
+ fast: Boolean(cmdOpts.fast),
555
+ thorough: Boolean(cmdOpts.thorough),
556
+ expand: cmdOpts.expand === false ? false : undefined,
557
+ rerank: cmdOpts.rerank === false ? false : undefined,
558
+ candidateLimit,
559
+ });
549
560
 
550
561
  const { query, formatQuery } = await import("./commands/query");
551
562
  const result = await query(queryText, {
@@ -563,9 +574,9 @@ function wireSearchCommands(program: Command): void {
563
574
  tagsAny,
564
575
  full: Boolean(cmdOpts.full),
565
576
  lineNumbers: Boolean(cmdOpts.lineNumbers),
566
- noExpand,
567
- noRerank,
568
- candidateLimit,
577
+ noExpand: depthPolicy.noExpand,
578
+ noRerank: depthPolicy.noRerank,
579
+ candidateLimit: depthPolicy.candidateLimit,
569
580
  queryModes,
570
581
  explain: Boolean(cmdOpts.explain),
571
582
  json: format === "json",
@@ -615,7 +626,10 @@ function wireSearchCommands(program: Command): void {
615
626
  []
616
627
  )
617
628
  .option("--fast", "skip expansion and reranking (fastest)")
618
- .option("--thorough", "enable query expansion (slower)")
629
+ .option(
630
+ "--thorough",
631
+ "use expansion with a wider rerank pool (slowest, best recall)"
632
+ )
619
633
  .option("-C, --candidate-limit <num>", "max candidates passed to reranking")
620
634
  .option("--answer", "generate short grounded answer")
621
635
  .option("--no-answer", "force retrieval-only output")
@@ -626,6 +640,7 @@ function wireSearchCommands(program: Command): void {
626
640
  .action(async (queryText: string, cmdOpts: Record<string, unknown>) => {
627
641
  const format = getFormat(cmdOpts);
628
642
  assertFormatSupported(CMD.ask, format);
643
+ const globals = getGlobals();
629
644
 
630
645
  // Validate empty query
631
646
  if (!queryText.trim()) {
@@ -635,6 +650,12 @@ function wireSearchCommands(program: Command): void {
635
650
  const limit = cmdOpts.limit
636
651
  ? parsePositiveInt("limit", cmdOpts.limit)
637
652
  : getDefaultLimit(format);
653
+ const { loadConfig } = await import("../config");
654
+ const { getActivePreset } = await import("../llm/registry");
655
+ const configResult = await loadConfig(globals.config);
656
+ const activePresetId = configResult.ok
657
+ ? getActivePreset(configResult.value).id
658
+ : "slim";
638
659
  const candidateLimit = cmdOpts.candidateLimit
639
660
  ? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
640
661
  : undefined;
@@ -656,18 +677,28 @@ function wireSearchCommands(program: Command): void {
656
677
  queryModes = parsed.value;
657
678
  }
658
679
 
659
- // Determine expansion/rerank settings based on flags
660
- // Default: skip expansion (balanced mode)
661
- let noExpand = true;
662
- let noRerank = false;
663
-
664
- if (cmdOpts.fast) {
665
- noExpand = true;
666
- noRerank = true;
667
- } else if (cmdOpts.thorough) {
668
- noExpand = false;
669
- noRerank = false;
680
+ const { normalizeStructuredQueryInput } =
681
+ await import("../core/structured-query");
682
+ const normalizedInput = normalizeStructuredQueryInput(
683
+ queryText,
684
+ queryModes ?? []
685
+ );
686
+ if (!normalizedInput.ok) {
687
+ throw new CliError("VALIDATION", normalizedInput.error.message);
670
688
  }
689
+ queryText = normalizedInput.value.query;
690
+ queryModes =
691
+ normalizedInput.value.queryModes.length > 0
692
+ ? normalizedInput.value.queryModes
693
+ : undefined;
694
+
695
+ const depthPolicy = resolveDepthPolicy({
696
+ presetId: activePresetId,
697
+ fast: Boolean(cmdOpts.fast),
698
+ thorough: Boolean(cmdOpts.thorough),
699
+ candidateLimit,
700
+ hasStructuredModes: Boolean(queryModes?.length),
701
+ });
671
702
 
672
703
  const { ask, formatAsk } = await import("./commands/ask");
673
704
  const showSources = Boolean(cmdOpts.showSources);
@@ -682,9 +713,9 @@ function wireSearchCommands(program: Command): void {
682
713
  intent: cmdOpts.intent as string | undefined,
683
714
  exclude,
684
715
  queryModes,
685
- noExpand,
686
- noRerank,
687
- candidateLimit,
716
+ noExpand: depthPolicy.noExpand,
717
+ noRerank: depthPolicy.noRerank,
718
+ candidateLimit: depthPolicy.candidateLimit,
688
719
  // Per spec: --answer defaults to false, --no-answer forces retrieval-only
689
720
  // Commander creates separate cmdOpts.noAnswer for --no-answer flag
690
721
  answer: Boolean(cmdOpts.answer),
@@ -1328,7 +1359,8 @@ function wireManagementCommands(program: Command): void {
1328
1359
  .option("--all", "download all configured models")
1329
1360
  .option("--embed", "download embedding model")
1330
1361
  .option("--rerank", "download reranker model")
1331
- .option("--gen", "download generation model")
1362
+ .option("--expand", "download expansion model")
1363
+ .option("--gen", "download answer generation model")
1332
1364
  .option("--force", "force re-download")
1333
1365
  .option("--no-progress", "disable download progress")
1334
1366
  .action(async (cmdOpts: Record<string, unknown>) => {
@@ -1347,6 +1379,7 @@ function wireManagementCommands(program: Command): void {
1347
1379
  all: Boolean(cmdOpts.all),
1348
1380
  embed: Boolean(cmdOpts.embed),
1349
1381
  rerank: Boolean(cmdOpts.rerank),
1382
+ expand: Boolean(cmdOpts.expand),
1350
1383
  gen: Boolean(cmdOpts.gen),
1351
1384
  force: Boolean(cmdOpts.force),
1352
1385
  onProgress: showProgress ? createProgressRenderer() : undefined,
@@ -166,7 +166,9 @@ export const ModelPresetSchema = z.object({
166
166
  embed: z.string().min(1),
167
167
  /** Reranker model URI */
168
168
  rerank: z.string().min(1),
169
- /** Generation model URI */
169
+ /** Query expansion model URI (defaults to gen for older configs) */
170
+ expand: z.string().min(1).optional(),
171
+ /** Answer generation model URI */
170
172
  gen: z.string().min(1),
171
173
  });
172
174
 
@@ -180,6 +182,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
180
182
  embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
181
183
  rerank:
182
184
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
185
+ expand: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
183
186
  gen: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
184
187
  },
185
188
  {
@@ -188,6 +191,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
188
191
  embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
189
192
  rerank:
190
193
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
194
+ expand:
195
+ "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
191
196
  gen: "hf:bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-Q4_K_M.gguf",
192
197
  },
193
198
  {
@@ -196,6 +201,8 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
196
201
  embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
197
202
  rerank:
198
203
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
204
+ expand:
205
+ "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
199
206
  gen: "hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
200
207
  },
201
208
  ];
@@ -0,0 +1,78 @@
1
+ export type RetrievalDepth = "fast" | "balanced" | "thorough";
2
+
3
+ export interface ResolveDepthPolicyInput {
4
+ presetId?: string;
5
+ fast?: boolean;
6
+ thorough?: boolean;
7
+ expand?: boolean;
8
+ rerank?: boolean;
9
+ candidateLimit?: number;
10
+ hasStructuredModes?: boolean;
11
+ }
12
+
13
+ export interface ResolvedDepthPolicy {
14
+ depth: RetrievalDepth;
15
+ noExpand: boolean;
16
+ noRerank: boolean;
17
+ candidateLimit?: number;
18
+ balancedExpansionEnabled: boolean;
19
+ }
20
+
21
+ export const DEFAULT_THOROUGH_CANDIDATE_LIMIT = 40;
22
+
23
+ function normalizePresetId(presetId?: string): string {
24
+ return presetId?.trim().toLowerCase() || "slim";
25
+ }
26
+
27
+ export function balancedUsesExpansion(presetId?: string): boolean {
28
+ const normalized = normalizePresetId(presetId);
29
+ return normalized === "slim" || normalized === "slim-tuned";
30
+ }
31
+
32
+ export function resolveDepthPolicy(
33
+ input: ResolveDepthPolicyInput
34
+ ): ResolvedDepthPolicy {
35
+ const balancedExpansionEnabled = balancedUsesExpansion(input.presetId);
36
+ let depth: RetrievalDepth = "balanced";
37
+ let noExpand = !balancedExpansionEnabled;
38
+ let noRerank = false;
39
+ let candidateLimit = input.candidateLimit;
40
+
41
+ if (input.fast) {
42
+ depth = "fast";
43
+ noExpand = true;
44
+ noRerank = true;
45
+ } else if (input.thorough) {
46
+ depth = "thorough";
47
+ noExpand = false;
48
+ noRerank = false;
49
+ candidateLimit ??= DEFAULT_THOROUGH_CANDIDATE_LIMIT;
50
+ } else {
51
+ if (input.expand === true) {
52
+ noExpand = false;
53
+ }
54
+ if (input.expand === false) {
55
+ noExpand = true;
56
+ }
57
+ if (input.rerank === true) {
58
+ noRerank = false;
59
+ }
60
+ if (input.rerank === false) {
61
+ noRerank = true;
62
+ }
63
+ }
64
+
65
+ // Structured query modes supply explicit expansions and should not trigger
66
+ // an additional generated expansion step.
67
+ if (input.hasStructuredModes) {
68
+ noExpand = true;
69
+ }
70
+
71
+ return {
72
+ depth,
73
+ noExpand,
74
+ noRerank,
75
+ candidateLimit,
76
+ balancedExpansionEnabled,
77
+ };
78
+ }
@@ -0,0 +1,198 @@
1
+ /**
2
+ * Structured multi-line query document parsing.
3
+ *
4
+ * Pure parser used across CLI, API, MCP, SDK, and Web.
5
+ *
6
+ * @module src/core/structured-query
7
+ */
8
+
9
+ import type { QueryModeInput } from "../pipeline/types";
10
+
11
+ export interface StructuredQueryError {
12
+ line: number | null;
13
+ message: string;
14
+ }
15
+
16
+ export interface StructuredQueryNormalization {
17
+ query: string;
18
+ queryModes: QueryModeInput[];
19
+ usedStructuredQuerySyntax: boolean;
20
+ derivedQuery: boolean;
21
+ }
22
+
23
+ export type StructuredQueryResult =
24
+ | { ok: true; value: StructuredQueryNormalization }
25
+ | { ok: false; error: StructuredQueryError };
26
+
27
+ const RECOGNIZED_MODE_PREFIXES = new Set(["term", "intent", "hyde"]);
28
+ const ANY_PREFIX_PATTERN = /^\s*([a-z][a-z0-9_-]*)\s*:\s*(.*)$/i;
29
+ const RECOGNIZED_PREFIX_PATTERN = /^\s*(term|intent|hyde)\s*:\s*(.*)$/i;
30
+
31
+ function buildError(
32
+ message: string,
33
+ line: number | null
34
+ ): StructuredQueryResult {
35
+ return { ok: false, error: { message, line } };
36
+ }
37
+
38
+ function trimNonBlankLines(query: string): string[] {
39
+ return query.split(/\r?\n/).filter((line) => line.trim().length > 0);
40
+ }
41
+
42
+ /**
43
+ * Parse multi-line structured query syntax.
44
+ *
45
+ * Rules:
46
+ * - single-line queries remain unchanged
47
+ * - blank lines are ignored
48
+ * - recognized typed lines: term:, intent:, hyde:
49
+ * - if structured syntax is used, unknown prefix lines like foo:bar are rejected
50
+ * - untyped lines contribute to the base query text
51
+ * - if no untyped lines exist, base query is derived from term lines first, then intent lines
52
+ * - hyde-only documents are rejected
53
+ */
54
+ export function normalizeStructuredQueryInput(
55
+ query: string,
56
+ explicitQueryModes: QueryModeInput[] = []
57
+ ): StructuredQueryResult {
58
+ if (!query.includes("\n")) {
59
+ return {
60
+ ok: true,
61
+ value: {
62
+ query,
63
+ queryModes: explicitQueryModes,
64
+ usedStructuredQuerySyntax: false,
65
+ derivedQuery: false,
66
+ },
67
+ };
68
+ }
69
+
70
+ const lines = trimNonBlankLines(query);
71
+ if (lines.length === 0) {
72
+ return {
73
+ ok: true,
74
+ value: {
75
+ query,
76
+ queryModes: explicitQueryModes,
77
+ usedStructuredQuerySyntax: false,
78
+ derivedQuery: false,
79
+ },
80
+ };
81
+ }
82
+
83
+ const hasRecognizedTypedLine = lines.some((line) => {
84
+ const match = line.match(RECOGNIZED_PREFIX_PATTERN);
85
+ return Boolean(match?.[1]);
86
+ });
87
+
88
+ if (!hasRecognizedTypedLine) {
89
+ return {
90
+ ok: true,
91
+ value: {
92
+ query,
93
+ queryModes: explicitQueryModes,
94
+ usedStructuredQuerySyntax: false,
95
+ derivedQuery: false,
96
+ },
97
+ };
98
+ }
99
+
100
+ const queryModes: QueryModeInput[] = [];
101
+ const bodyLines: string[] = [];
102
+ let hydeCount = 0;
103
+
104
+ for (const [index, line] of query.split(/\r?\n/).entries()) {
105
+ const trimmed = line.trim();
106
+ if (trimmed.length === 0) {
107
+ continue;
108
+ }
109
+
110
+ const recognized = trimmed.match(RECOGNIZED_PREFIX_PATTERN);
111
+ if (recognized) {
112
+ const mode = recognized[1]?.toLowerCase() as QueryModeInput["mode"];
113
+ const text = recognized[2]?.trim() ?? "";
114
+ if (text.length === 0) {
115
+ return buildError(
116
+ `Structured query line ${index + 1} must contain non-empty text after ${mode}:`,
117
+ index + 1
118
+ );
119
+ }
120
+ if (mode === "hyde") {
121
+ hydeCount += 1;
122
+ if (hydeCount > 1) {
123
+ return buildError(
124
+ "Only one hyde line is allowed in a structured query document.",
125
+ index + 1
126
+ );
127
+ }
128
+ }
129
+ queryModes.push({ mode, text });
130
+ continue;
131
+ }
132
+
133
+ const prefixed = trimmed.match(ANY_PREFIX_PATTERN);
134
+ if (prefixed?.[1]) {
135
+ const prefix = prefixed[1].toLowerCase();
136
+ if (!RECOGNIZED_MODE_PREFIXES.has(prefix)) {
137
+ return buildError(
138
+ `Unknown structured query line prefix "${prefix}:" on line ${index + 1}. Expected term:, intent:, or hyde:.`,
139
+ index + 1
140
+ );
141
+ }
142
+ }
143
+
144
+ bodyLines.push(trimmed);
145
+ }
146
+
147
+ const combinedQueryModes = [...queryModes, ...explicitQueryModes];
148
+ const totalHydeCount = combinedQueryModes.filter(
149
+ (entry) => entry.mode === "hyde"
150
+ ).length;
151
+ if (totalHydeCount > 1) {
152
+ return buildError(
153
+ "Only one hyde entry is allowed across structured query syntax and explicit query modes.",
154
+ null
155
+ );
156
+ }
157
+
158
+ let normalizedQuery = bodyLines.join(" ").trim();
159
+ let derivedQuery = false;
160
+
161
+ if (!normalizedQuery) {
162
+ const termQuery = queryModes
163
+ .filter((entry) => entry.mode === "term")
164
+ .map((entry) => entry.text)
165
+ .join(" ")
166
+ .trim();
167
+ const intentQuery = queryModes
168
+ .filter((entry) => entry.mode === "intent")
169
+ .map((entry) => entry.text)
170
+ .join(" ")
171
+ .trim();
172
+
173
+ normalizedQuery = termQuery || intentQuery;
174
+ derivedQuery = normalizedQuery.length > 0;
175
+ }
176
+
177
+ if (!normalizedQuery) {
178
+ return buildError(
179
+ "Structured query documents must include at least one plain query line, term line, or intent line. hyde-only documents are not allowed.",
180
+ null
181
+ );
182
+ }
183
+
184
+ return {
185
+ ok: true,
186
+ value: {
187
+ query: normalizedQuery,
188
+ queryModes: combinedQueryModes,
189
+ usedStructuredQuerySyntax: true,
190
+ derivedQuery,
191
+ },
192
+ };
193
+ }
194
+
195
+ export function hasStructuredQuerySyntax(query: string): boolean {
196
+ const result = normalizeStructuredQueryInput(query);
197
+ return result.ok && result.value.usedStructuredQuerySyntax;
198
+ }
package/src/llm/errors.ts CHANGED
@@ -121,7 +121,7 @@ export function modelNotFoundError(uri: string, details?: string): LlmError {
121
121
 
122
122
  export function modelNotCachedError(
123
123
  uri: string,
124
- modelType: "embed" | "rerank" | "gen"
124
+ modelType: "embed" | "rerank" | "expand" | "gen"
125
125
  ): LlmError {
126
126
  return llmError("MODEL_NOT_CACHED", {
127
127
  message: `${modelType} model not cached`,
@@ -19,7 +19,12 @@ import { ModelCache } from "../cache";
19
19
  import { HttpEmbedding, isHttpModelUri } from "../httpEmbedding";
20
20
  import { HttpGeneration, isHttpGenUri } from "../httpGeneration";
21
21
  import { HttpRerank, isHttpRerankUri } from "../httpRerank";
22
- import { getActivePreset, getModelConfig } from "../registry";
22
+ import {
23
+ getActivePreset,
24
+ getAnswerModelUri,
25
+ getExpandModelUri,
26
+ getModelConfig,
27
+ } from "../registry";
23
28
  import { NodeLlamaCppEmbedding } from "./embedding";
24
29
  import { NodeLlamaCppGeneration } from "./generation";
25
30
  import { getModelManager, type ModelManager } from "./lifecycle";
@@ -105,8 +110,7 @@ export class LlmAdapter {
105
110
  modelUri?: string,
106
111
  options?: CreatePortOptions
107
112
  ): Promise<LlmResult<GenerationPort>> {
108
- const preset = getActivePreset(this.config);
109
- const uri = modelUri ?? preset.gen;
113
+ const uri = getAnswerModelUri(this.config, modelUri);
110
114
  const policy = options?.policy ?? DEFAULT_POLICY;
111
115
 
112
116
  // Use HTTP generation for remote endpoints
@@ -132,6 +136,38 @@ export class LlmAdapter {
132
136
  };
133
137
  }
134
138
 
139
+ /**
140
+ * Create a generation port dedicated to query expansion.
141
+ * Uses preset.expand when configured, else falls back to preset.gen.
142
+ */
143
+ async createExpansionPort(
144
+ modelUri?: string,
145
+ options?: CreatePortOptions
146
+ ): Promise<LlmResult<GenerationPort>> {
147
+ const uri = getExpandModelUri(this.config, modelUri);
148
+ const policy = options?.policy ?? DEFAULT_POLICY;
149
+
150
+ if (isHttpGenUri(uri)) {
151
+ const httpGen = new HttpGeneration(uri);
152
+ return { ok: true, value: httpGen };
153
+ }
154
+
155
+ const resolved = await this.cache.ensureModel(
156
+ uri,
157
+ "expand",
158
+ policy,
159
+ options?.onProgress
160
+ );
161
+ if (!resolved.ok) {
162
+ return resolved;
163
+ }
164
+
165
+ return {
166
+ ok: true,
167
+ value: new NodeLlamaCppGeneration(this.manager, uri, resolved.value),
168
+ };
169
+ }
170
+
135
171
  /**
136
172
  * Create a rerank port.
137
173
  * Supports HTTP endpoints for remote reranking models.
@@ -57,6 +57,24 @@ export function getActivePreset(config: Config): ModelPreset {
57
57
  return builtIn;
58
58
  }
59
59
 
60
+ export function getExpandModelUri(config: Config, override?: string): string {
61
+ if (override) {
62
+ return override;
63
+ }
64
+
65
+ const preset = getActivePreset(config);
66
+ return preset.expand ?? preset.gen;
67
+ }
68
+
69
+ export function getAnswerModelUri(config: Config, override?: string): string {
70
+ if (override) {
71
+ return override;
72
+ }
73
+
74
+ const preset = getActivePreset(config);
75
+ return preset.gen;
76
+ }
77
+
60
78
  /**
61
79
  * Resolve a model URI for a given type.
62
80
  * Uses override if provided, otherwise from active preset.
@@ -70,6 +88,9 @@ export function resolveModelUri(
70
88
  return override;
71
89
  }
72
90
  const preset = getActivePreset(config);
91
+ if (type === "expand") {
92
+ return preset.expand ?? preset.gen;
93
+ }
73
94
  return preset[type];
74
95
  }
75
96
 
package/src/llm/types.ts CHANGED
@@ -19,7 +19,7 @@ export type LlmResult<T> =
19
19
  // Model Types
20
20
  // ─────────────────────────────────────────────────────────────────────────────
21
21
 
22
- export type ModelType = "embed" | "rerank" | "gen";
22
+ export type ModelType = "embed" | "rerank" | "expand" | "gen";
23
23
 
24
24
  /** Model URI format: hf:org/repo/file.gguf or file:/path */
25
25
  export type ModelUri = string;
@@ -149,8 +149,8 @@ export const queryInputSchema = z.object({
149
149
  .optional(),
150
150
  fast: z.boolean().default(false),
151
151
  thorough: z.boolean().default(false),
152
- expand: z.boolean().default(false), // Default: skip expansion
153
- rerank: z.boolean().default(true),
152
+ expand: z.boolean().optional(),
153
+ rerank: z.boolean().optional(),
154
154
  tagsAll: z.array(z.string()).optional(),
155
155
  tagsAny: z.array(z.string()).optional(),
156
156
  });