@gmickel/gno 0.15.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +36 -1
  2. package/package.json +7 -4
  3. package/src/cli/commands/ask.ts +9 -0
  4. package/src/cli/commands/query.ts +3 -2
  5. package/src/cli/pager.ts +1 -1
  6. package/src/cli/program.ts +89 -0
  7. package/src/core/links.ts +92 -20
  8. package/src/ingestion/sync.ts +267 -23
  9. package/src/ingestion/types.ts +2 -0
  10. package/src/ingestion/walker.ts +2 -1
  11. package/src/llm/nodeLlamaCpp/embedding.ts +53 -10
  12. package/src/mcp/tools/index.ts +30 -1
  13. package/src/mcp/tools/query.ts +22 -2
  14. package/src/mcp/tools/search.ts +8 -0
  15. package/src/mcp/tools/vsearch.ts +8 -0
  16. package/src/pipeline/answer.ts +324 -7
  17. package/src/pipeline/expansion.ts +243 -7
  18. package/src/pipeline/explain.ts +93 -5
  19. package/src/pipeline/hybrid.ts +240 -57
  20. package/src/pipeline/query-modes.ts +125 -0
  21. package/src/pipeline/rerank.ts +34 -13
  22. package/src/pipeline/search.ts +41 -3
  23. package/src/pipeline/temporal.ts +257 -0
  24. package/src/pipeline/types.ts +58 -0
  25. package/src/pipeline/vsearch.ts +107 -9
  26. package/src/serve/public/app.tsx +1 -3
  27. package/src/serve/public/globals.built.css +2 -2
  28. package/src/serve/public/lib/retrieval-filters.ts +167 -0
  29. package/src/serve/public/pages/Ask.tsx +339 -109
  30. package/src/serve/public/pages/Browse.tsx +71 -5
  31. package/src/serve/public/pages/DocView.tsx +2 -21
  32. package/src/serve/public/pages/Search.tsx +507 -120
  33. package/src/serve/routes/api.ts +202 -2
  34. package/src/store/migrations/006-document-metadata.ts +104 -0
  35. package/src/store/migrations/007-document-date-fields.ts +24 -0
  36. package/src/store/migrations/index.ts +3 -1
  37. package/src/store/sqlite/adapter.ts +218 -5
  38. package/src/store/types.ts +46 -0
@@ -17,10 +17,44 @@ import { ok } from "../store/types";
17
17
  // Constants
18
18
  // ─────────────────────────────────────────────────────────────────────────────
19
19
 
20
- const EXPANSION_PROMPT_VERSION = "v2";
20
+ const EXPANSION_PROMPT_VERSION = "v3";
21
21
  const DEFAULT_TIMEOUT_MS = 5000;
22
22
  // Non-greedy to avoid matching from first { to last } across multiple objects
23
23
  const JSON_EXTRACT_PATTERN = /\{[\s\S]*?\}/;
24
+ const QUOTED_PHRASE_PATTERN = /"([^"]+)"/g;
25
+ const NEGATION_PATTERN = /-(?:"([^"]+)"|([^\s]+))/g;
26
+ const TOKEN_PATTERN = /[A-Za-z0-9][A-Za-z0-9.+#_-]*/g;
27
+ const MAX_VARIANTS = 5;
28
+ const STOPWORDS = new Set([
29
+ "a",
30
+ "an",
31
+ "and",
32
+ "are",
33
+ "as",
34
+ "at",
35
+ "be",
36
+ "by",
37
+ "for",
38
+ "from",
39
+ "how",
40
+ "in",
41
+ "is",
42
+ "it",
43
+ "of",
44
+ "on",
45
+ "or",
46
+ "that",
47
+ "the",
48
+ "this",
49
+ "to",
50
+ "what",
51
+ "when",
52
+ "where",
53
+ "which",
54
+ "who",
55
+ "why",
56
+ "with",
57
+ ]);
24
58
 
25
59
  // ─────────────────────────────────────────────────────────────────────────────
26
60
  // Cache Key Generation
@@ -54,6 +88,8 @@ Generate JSON with:
54
88
 
55
89
  Rules:
56
90
  - Keep proper nouns exactly as written
91
+ - Preserve quoted phrases and negated terms from the query in lexicalQueries
92
+ - Keep symbol-heavy technical entities exactly (for example: C++, C#, Node.js)
57
93
  - Be concise - each variation 3-8 words
58
94
  - HyDE should read like actual documentation, not a question
59
95
 
@@ -70,6 +106,8 @@ Generiere JSON mit:
70
106
 
71
107
  Regeln:
72
108
  - Eigennamen exakt beibehalten
109
+ - Zitierte Phrasen und negierte Begriffe in lexicalQueries beibehalten
110
+ - Technische Begriffe mit Symbolen exakt halten (z. B. C++, C#, Node.js)
73
111
  - Kurz halten - jede Variation 3-8 Wörter
74
112
  - HyDE soll wie echte Dokumentation klingen, nicht wie eine Frage
75
113
 
@@ -86,6 +124,8 @@ Generate JSON with:
86
124
 
87
125
  Rules:
88
126
  - Keep proper nouns exactly as written
127
+ - Preserve quoted phrases and negated terms from the query in lexicalQueries
128
+ - Keep symbol-heavy technical entities exactly (for example: C++, C#, Node.js)
89
129
  - Be concise - each variation 3-8 words
90
130
  - HyDE should read like actual documentation, not a question
91
131
 
@@ -110,6 +150,199 @@ function getPromptTemplate(lang?: string): string {
110
150
  }
111
151
  }
112
152
 
153
+ interface QuerySignals {
154
+ quotedPhrases: string[];
155
+ negations: string[];
156
+ criticalEntities: string[];
157
+ overlapTokens: Set<string>;
158
+ }
159
+
160
+ function normalizeToken(token: string): string {
161
+ return token.toLowerCase().trim();
162
+ }
163
+
164
+ function extractOverlapTokens(text: string): Set<string> {
165
+ const matches = text.match(TOKEN_PATTERN) ?? [];
166
+ const tokens: string[] = [];
167
+ for (const rawToken of matches) {
168
+ const token = normalizeToken(rawToken);
169
+ if (token.length < 2) {
170
+ continue;
171
+ }
172
+ if (STOPWORDS.has(token)) {
173
+ continue;
174
+ }
175
+ tokens.push(token);
176
+ }
177
+ return new Set(tokens);
178
+ }
179
+
180
+ function dedupeStrings(values: string[]): string[] {
181
+ const out: string[] = [];
182
+ const seen = new Set<string>();
183
+ for (const value of values) {
184
+ const trimmed = value.trim();
185
+ if (!trimmed) {
186
+ continue;
187
+ }
188
+ const key = trimmed.toLowerCase();
189
+ if (seen.has(key)) {
190
+ continue;
191
+ }
192
+ seen.add(key);
193
+ out.push(trimmed);
194
+ }
195
+ return out;
196
+ }
197
+
198
+ function extractQuerySignals(query: string): QuerySignals {
199
+ const quotedPhrases = dedupeStrings(
200
+ [...query.matchAll(QUOTED_PHRASE_PATTERN)]
201
+ .map((m) => m[1]?.trim() ?? "")
202
+ .filter(Boolean)
203
+ );
204
+
205
+ const negations = dedupeStrings(
206
+ [...query.matchAll(NEGATION_PATTERN)]
207
+ .map((m) => {
208
+ const phrase = m[1]?.trim();
209
+ if (phrase) {
210
+ return `-"${phrase}"`;
211
+ }
212
+ const token = m[2]?.trim();
213
+ return token ? `-${token}` : "";
214
+ })
215
+ .filter(Boolean)
216
+ );
217
+
218
+ const criticalEntities = dedupeStrings(
219
+ (query.match(TOKEN_PATTERN) ?? []).filter((token) => {
220
+ // Preserve common entity signals: uppercase/mixed case, acronyms, symbol-heavy technical terms.
221
+ return (
222
+ /[A-Z]/.test(token) ||
223
+ /[+#.]/.test(token) ||
224
+ /[A-Za-z]\d|\d[A-Za-z]/.test(token)
225
+ );
226
+ })
227
+ );
228
+
229
+ return {
230
+ quotedPhrases,
231
+ negations,
232
+ criticalEntities,
233
+ overlapTokens: extractOverlapTokens(query),
234
+ };
235
+ }
236
+
237
+ function hasCaseInsensitiveSubstring(text: string, part: string): boolean {
238
+ return text.toLowerCase().includes(part.toLowerCase());
239
+ }
240
+
241
+ function hasSufficientOverlap(
242
+ querySignals: QuerySignals,
243
+ candidate: string
244
+ ): boolean {
245
+ if (!candidate.trim()) {
246
+ return false;
247
+ }
248
+
249
+ for (const phrase of querySignals.quotedPhrases) {
250
+ if (hasCaseInsensitiveSubstring(candidate, phrase)) {
251
+ return true;
252
+ }
253
+ }
254
+ for (const entity of querySignals.criticalEntities) {
255
+ if (hasCaseInsensitiveSubstring(candidate, entity)) {
256
+ return true;
257
+ }
258
+ }
259
+ for (const negation of querySignals.negations) {
260
+ if (hasCaseInsensitiveSubstring(candidate, negation)) {
261
+ return true;
262
+ }
263
+ }
264
+
265
+ const candidateTokens = extractOverlapTokens(candidate);
266
+ for (const token of candidateTokens) {
267
+ if (querySignals.overlapTokens.has(token)) {
268
+ return true;
269
+ }
270
+ }
271
+
272
+ return false;
273
+ }
274
+
275
+ function buildAnchorLexicalQuery(
276
+ query: string,
277
+ querySignals: QuerySignals
278
+ ): string {
279
+ const parts: string[] = [];
280
+
281
+ for (const entity of querySignals.criticalEntities) {
282
+ parts.push(entity);
283
+ }
284
+ for (const phrase of querySignals.quotedPhrases) {
285
+ parts.push(`"${phrase}"`);
286
+ }
287
+ for (const negation of querySignals.negations) {
288
+ parts.push(negation);
289
+ }
290
+
291
+ const anchored = dedupeStrings(parts).join(" ").trim();
292
+ return anchored || query.trim();
293
+ }
294
+
295
+ function normalizeVariants(
296
+ variants: string[],
297
+ querySignals: QuerySignals
298
+ ): string[] {
299
+ const deduped = dedupeStrings(variants);
300
+ return deduped.filter((variant) =>
301
+ hasSufficientOverlap(querySignals, variant)
302
+ );
303
+ }
304
+
305
+ /**
306
+ * Apply deterministic expansion guardrails:
307
+ * - preserve entities/phrases/negations in lexical variants
308
+ * - filter drifted variants with no overlap
309
+ * - provide fallbacks when filtering removes all variants
310
+ */
311
+ export function applyExpansionGuardrails(
312
+ query: string,
313
+ expansion: ExpansionResult
314
+ ): ExpansionResult {
315
+ const querySignals = extractQuerySignals(query);
316
+ const anchorLexical = buildAnchorLexicalQuery(query, querySignals);
317
+
318
+ const lexicalCandidates = [anchorLexical, ...expansion.lexicalQueries];
319
+ const guardedLexical = normalizeVariants(lexicalCandidates, querySignals);
320
+ const guardedVector = normalizeVariants(
321
+ expansion.vectorQueries,
322
+ querySignals
323
+ );
324
+
325
+ const lexicalQueries = (
326
+ guardedLexical.length > 0 ? guardedLexical : [query.trim()]
327
+ ).slice(0, MAX_VARIANTS);
328
+ const vectorQueries = (
329
+ guardedVector.length > 0 ? guardedVector : [query.trim()]
330
+ ).slice(0, MAX_VARIANTS);
331
+
332
+ const hyde =
333
+ typeof expansion.hyde === "string" &&
334
+ hasSufficientOverlap(querySignals, expansion.hyde)
335
+ ? expansion.hyde.trim()
336
+ : undefined;
337
+
338
+ return {
339
+ lexicalQueries,
340
+ vectorQueries,
341
+ hyde,
342
+ notes: expansion.notes,
343
+ };
344
+ }
345
+
113
346
  // ─────────────────────────────────────────────────────────────────────────────
114
347
  // Schema Validation
115
348
  // ─────────────────────────────────────────────────────────────────────────────
@@ -145,13 +378,13 @@ function parseExpansionResult(output: string): ExpansionResult | null {
145
378
 
146
379
  // Limit array sizes
147
380
  const result: ExpansionResult = {
148
- lexicalQueries: lexicalQueries.slice(0, 5),
149
- vectorQueries: vectorQueries.slice(0, 5),
381
+ lexicalQueries: lexicalQueries.slice(0, MAX_VARIANTS),
382
+ vectorQueries: vectorQueries.slice(0, MAX_VARIANTS),
150
383
  };
151
384
 
152
385
  // Optional fields
153
- if (typeof parsed.hyde === "string" && parsed.hyde.length > 0) {
154
- result.hyde = parsed.hyde;
386
+ if (typeof parsed.hyde === "string" && parsed.hyde.trim().length > 0) {
387
+ result.hyde = parsed.hyde.trim();
155
388
  }
156
389
  if (typeof parsed.notes === "string") {
157
390
  result.notes = parsed.notes;
@@ -222,7 +455,10 @@ export async function expandQuery(
222
455
 
223
456
  // Parse result
224
457
  const parsed = parseExpansionResult(result.value);
225
- return ok(parsed);
458
+ if (!parsed) {
459
+ return ok(null);
460
+ }
461
+ return ok(applyExpansionGuardrails(query, parsed));
226
462
  } catch {
227
463
  if (timeoutId) {
228
464
  clearTimeout(timeoutId);
@@ -257,7 +493,7 @@ export async function expandQueryCached(
257
493
  if (cached) {
258
494
  const parsed = parseExpansionResult(cached);
259
495
  if (parsed) {
260
- return ok(parsed);
496
+ return ok(applyExpansionGuardrails(query, parsed));
261
497
  }
262
498
  }
263
499
 
@@ -9,6 +9,7 @@ import type {
9
9
  ExpansionResult,
10
10
  ExplainLine,
11
11
  ExplainResult,
12
+ QueryModeSummary,
12
13
  RerankedCandidate,
13
14
  } from "./types";
14
15
 
@@ -30,13 +31,33 @@ export function formatResultExplain(results: ExplainResult[]): string {
30
31
  const lines: string[] = [];
31
32
  for (const r of results.slice(0, 10)) {
32
33
  let msg = `score=${r.score.toFixed(2)}`;
33
- if (r.bm25Score !== undefined) {
34
- msg += ` (bm25=${r.bm25Score.toFixed(2)}`;
34
+ if (
35
+ r.fusionScore !== undefined ||
36
+ r.bm25Score !== undefined ||
37
+ r.vecScore !== undefined ||
38
+ r.rerankScore !== undefined
39
+ ) {
40
+ msg += " (";
41
+ if (r.fusionScore !== undefined) {
42
+ msg += `fusion=${r.fusionScore.toFixed(3)}`;
43
+ }
44
+ if (r.bm25Score !== undefined) {
45
+ if (msg.at(-1) !== "(") {
46
+ msg += ", ";
47
+ }
48
+ msg += `bm25=${r.bm25Score.toFixed(2)}`;
49
+ }
35
50
  if (r.vecScore !== undefined) {
36
- msg += `, vec=${r.vecScore.toFixed(2)}`;
51
+ if (msg.at(-1) !== "(") {
52
+ msg += ", ";
53
+ }
54
+ msg += `vec=${r.vecScore.toFixed(2)}`;
37
55
  }
38
56
  if (r.rerankScore !== undefined) {
39
- msg += `, rerank=${r.rerankScore.toFixed(2)}`;
57
+ if (msg.at(-1) !== "(") {
58
+ msg += ", ";
59
+ }
60
+ msg += `rerank=${r.rerankScore.toFixed(2)}`;
40
61
  }
41
62
  msg += ")";
42
63
  }
@@ -52,7 +73,8 @@ export function formatResultExplain(results: ExplainResult[]): string {
52
73
  export type ExpansionStatus =
53
74
  | "disabled" // User chose --no-expand
54
75
  | "skipped_strong" // Strong BM25 signal detected
55
- | "attempted"; // Expansion was attempted (may have succeeded or timed out)
76
+ | "attempted" // Expansion was attempted (may have succeeded or timed out)
77
+ | "provided"; // Structured query modes were provided
56
78
 
57
79
  export function explainExpansion(
58
80
  status: ExpansionStatus,
@@ -64,6 +86,18 @@ export function explainExpansion(
64
86
  if (status === "skipped_strong") {
65
87
  return { stage: "expansion", message: "skipped (strong BM25)" };
66
88
  }
89
+ if (status === "provided") {
90
+ if (!result) {
91
+ return { stage: "expansion", message: "provided (empty)" };
92
+ }
93
+ const lex = result.lexicalQueries.length;
94
+ const sem = result.vectorQueries.length;
95
+ const hyde = result.hyde ? ", 1 hyde" : "";
96
+ return {
97
+ stage: "expansion",
98
+ message: `provided (${lex} term, ${sem} intent${hyde})`,
99
+ };
100
+ }
67
101
  if (!result) {
68
102
  return { stage: "expansion", message: "skipped (timeout)" };
69
103
  }
@@ -76,6 +110,14 @@ export function explainExpansion(
76
110
  };
77
111
  }
78
112
 
113
+ export function explainQueryModes(summary: QueryModeSummary): ExplainLine {
114
+ const hyde = summary.hyde ? "yes" : "no";
115
+ return {
116
+ stage: "query_modes",
117
+ message: `term=${summary.term}, intent=${summary.intent}, hyde=${hyde}`,
118
+ };
119
+ }
120
+
79
121
  export function explainBm25(count: number): ExplainLine {
80
122
  return { stage: "bm25", message: `${count} candidates` };
81
123
  }
@@ -101,6 +143,51 @@ export function explainRerank(enabled: boolean, count: number): ExplainLine {
101
143
  return { stage: "rerank", message: `top ${count} reranked` };
102
144
  }
103
145
 
146
+ interface ExplainCountersInput {
147
+ expansionCacheHits: number;
148
+ expansionCacheLookups: number;
149
+ rerankCacheHits: number;
150
+ rerankCacheLookups: number;
151
+ fallbackEvents: string[];
152
+ }
153
+
154
+ export function explainCounters(counters: ExplainCountersInput): ExplainLine {
155
+ const events = [...new Set(counters.fallbackEvents)];
156
+ const fallbackSummary = events.length > 0 ? events.join("|") : "none";
157
+ return {
158
+ stage: "counters",
159
+ message: `expansionCache=${counters.expansionCacheHits}/${counters.expansionCacheLookups}, rerankCache=${counters.rerankCacheHits}/${counters.rerankCacheLookups}, fallbacks=${fallbackSummary}`,
160
+ };
161
+ }
162
+
163
+ interface StageTimingsInput {
164
+ langMs: number;
165
+ expansionMs: number;
166
+ bm25Ms: number;
167
+ vectorMs: number;
168
+ fusionMs: number;
169
+ rerankMs: number;
170
+ assemblyMs: number;
171
+ totalMs: number;
172
+ }
173
+
174
+ export function explainTimings(timings: StageTimingsInput): ExplainLine {
175
+ const fmt = (value: number): string => `${value.toFixed(2)}ms`;
176
+ return {
177
+ stage: "timing",
178
+ message: [
179
+ `lang=${fmt(timings.langMs)}`,
180
+ `expansion=${fmt(timings.expansionMs)}`,
181
+ `bm25=${fmt(timings.bm25Ms)}`,
182
+ `vector=${fmt(timings.vectorMs)}`,
183
+ `fusion=${fmt(timings.fusionMs)}`,
184
+ `rerank=${fmt(timings.rerankMs)}`,
185
+ `assembly=${fmt(timings.assemblyMs)}`,
186
+ `total=${fmt(timings.totalMs)}`,
187
+ ].join(", "),
188
+ };
189
+ }
190
+
104
191
  // ─────────────────────────────────────────────────────────────────────────────
105
192
  // Build ExplainResult from RerankedCandidate
106
193
  // ─────────────────────────────────────────────────────────────────────────────
@@ -115,6 +202,7 @@ export function buildExplainResults(
115
202
  rank: i + 1,
116
203
  docid: docidMap.get(key) ?? "#unknown",
117
204
  score: c.blendedScore,
205
+ fusionScore: c.fusionScore,
118
206
  bm25Score: c.bm25Rank !== null ? 1 / (60 + c.bm25Rank) : undefined,
119
207
  vecScore: c.vecRank !== null ? 1 / (60 + c.vecRank) : undefined,
120
208
  rerankScore: c.rerankScore ?? undefined,