@gmickel/gno 0.15.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +36 -1
  2. package/package.json +7 -4
  3. package/src/cli/commands/ask.ts +9 -0
  4. package/src/cli/commands/query.ts +3 -2
  5. package/src/cli/pager.ts +1 -1
  6. package/src/cli/program.ts +89 -0
  7. package/src/core/links.ts +92 -20
  8. package/src/ingestion/sync.ts +267 -23
  9. package/src/ingestion/types.ts +2 -0
  10. package/src/ingestion/walker.ts +2 -1
  11. package/src/llm/nodeLlamaCpp/embedding.ts +53 -10
  12. package/src/mcp/tools/index.ts +30 -1
  13. package/src/mcp/tools/query.ts +22 -2
  14. package/src/mcp/tools/search.ts +8 -0
  15. package/src/mcp/tools/vsearch.ts +8 -0
  16. package/src/pipeline/answer.ts +324 -7
  17. package/src/pipeline/expansion.ts +243 -7
  18. package/src/pipeline/explain.ts +93 -5
  19. package/src/pipeline/hybrid.ts +240 -57
  20. package/src/pipeline/query-modes.ts +125 -0
  21. package/src/pipeline/rerank.ts +34 -13
  22. package/src/pipeline/search.ts +41 -3
  23. package/src/pipeline/temporal.ts +257 -0
  24. package/src/pipeline/types.ts +58 -0
  25. package/src/pipeline/vsearch.ts +107 -9
  26. package/src/serve/public/app.tsx +1 -3
  27. package/src/serve/public/globals.built.css +2 -2
  28. package/src/serve/public/lib/retrieval-filters.ts +167 -0
  29. package/src/serve/public/pages/Ask.tsx +339 -109
  30. package/src/serve/public/pages/Browse.tsx +71 -5
  31. package/src/serve/public/pages/DocView.tsx +2 -21
  32. package/src/serve/public/pages/Search.tsx +507 -120
  33. package/src/serve/routes/api.ts +202 -2
  34. package/src/store/migrations/006-document-metadata.ts +104 -0
  35. package/src/store/migrations/007-document-date-fields.ts +24 -0
  36. package/src/store/migrations/index.ts +3 -1
  37. package/src/store/sqlite/adapter.ts +218 -5
  38. package/src/store/types.ts +46 -0
@@ -28,6 +28,10 @@ interface VsearchInput {
28
28
  limit?: number;
29
29
  minScore?: number;
30
30
  lang?: string;
31
+ since?: string;
32
+ until?: string;
33
+ categories?: string[];
34
+ author?: string;
31
35
  tagsAll?: string[];
32
36
  tagsAny?: string[];
33
37
  }
@@ -188,6 +192,10 @@ export function handleVsearch(
188
192
  limit: args.limit ?? 5,
189
193
  minScore: args.minScore,
190
194
  collection: args.collection,
195
+ since: args.since,
196
+ until: args.until,
197
+ categories: args.categories,
198
+ author: args.author,
191
199
  tagsAll: normalizeTagFilters(args.tagsAll),
192
200
  tagsAny: normalizeTagFilters(args.tagsAny),
193
201
  }
@@ -7,7 +7,12 @@
7
7
 
8
8
  import type { GenerationPort } from "../llm/types";
9
9
  import type { StorePort } from "../store/types";
10
- import type { Citation, SearchResult } from "./types";
10
+ import type {
11
+ AnswerContextEntry,
12
+ AnswerContextExplain,
13
+ Citation,
14
+ SearchResult,
15
+ } from "./types";
11
16
 
12
17
  // ─────────────────────────────────────────────────────────────────────────────
13
18
  // Constants
@@ -39,12 +44,65 @@ export const ABSTENTION_MESSAGE =
39
44
  /** Max characters per document (~8K tokens) */
40
45
  const MAX_DOC_CHARS = 32_000;
41
46
 
42
- /** Max number of sources - fewer docs but full content */
43
- const MAX_CONTEXT_SOURCES = 3;
47
+ /** Max number of sources selected for grounded answer context */
48
+ const MAX_CONTEXT_SOURCES = 5;
49
+ /** Default source target for non-comparative queries */
50
+ const BASE_CONTEXT_SOURCES = 3;
51
+ /** Candidate pool before adaptive selection */
52
+ const CONTEXT_CANDIDATE_POOL = 12;
44
53
 
45
54
  /** Fallback snippet limit when full content unavailable */
46
55
  const MAX_SNIPPET_CHARS = 1500;
47
56
 
57
+ const FACET_SPLIT_RE = /\b(?:and|or|vs|versus)\b|[,;]+/gi;
58
+ const COMPARISON_QUERY_RE =
59
+ /\b(?:compare|comparison|difference|different|vs|versus|trade-?off|pros|cons|conflict|between)\b/i;
60
+ const TOKEN_SPLIT_RE = /[^\p{L}\p{N}]+/u;
61
+ const QUERY_STOPWORDS = new Set([
62
+ "a",
63
+ "an",
64
+ "and",
65
+ "are",
66
+ "as",
67
+ "at",
68
+ "be",
69
+ "by",
70
+ "for",
71
+ "from",
72
+ "how",
73
+ "i",
74
+ "in",
75
+ "is",
76
+ "it",
77
+ "of",
78
+ "on",
79
+ "or",
80
+ "that",
81
+ "the",
82
+ "to",
83
+ "vs",
84
+ "versus",
85
+ "what",
86
+ "when",
87
+ "where",
88
+ "which",
89
+ "who",
90
+ "why",
91
+ "with",
92
+ ]);
93
+
94
+ interface SourceCandidate {
95
+ result: SearchResult;
96
+ normalizedScore: number;
97
+ matchedQueryTokens: Set<string>;
98
+ matchedFacetIndexes: Set<number>;
99
+ }
100
+
101
+ interface SelectedSource {
102
+ candidate: SourceCandidate;
103
+ reason: string;
104
+ }
105
+
48
106
  // ─────────────────────────────────────────────────────────────────────────────
49
107
  // Citation Processing
50
108
  // ─────────────────────────────────────────────────────────────────────────────
@@ -114,6 +172,7 @@ export function renumberAnswerCitations(
114
172
  export interface AnswerGenerationResult {
115
173
  answer: string;
116
174
  citations: Citation[];
175
+ answerContext: AnswerContextExplain;
117
176
  }
118
177
 
119
178
  export interface AnswerGenerationDeps {
@@ -121,6 +180,250 @@ export interface AnswerGenerationDeps {
121
180
  store: StorePort | null;
122
181
  }
123
182
 
183
+ function normalizeScore(score: number): number {
184
+ if (!Number.isFinite(score)) {
185
+ return 0;
186
+ }
187
+ return Math.max(0, Math.min(1, score));
188
+ }
189
+
190
+ function tokenize(text: string): string[] {
191
+ return text
192
+ .trim()
193
+ .toLowerCase()
194
+ .split(TOKEN_SPLIT_RE)
195
+ .map((token) => token.trim())
196
+ .filter((token) => token.length >= 2 && !QUERY_STOPWORDS.has(token));
197
+ }
198
+
199
+ function uniqueFacetTexts(query: string): string[] {
200
+ const segments = query
201
+ .split(FACET_SPLIT_RE)
202
+ .map((segment) => segment.trim())
203
+ .filter((segment) => segment.length > 0);
204
+
205
+ if (segments.length <= 1) {
206
+ return query.trim().length > 0 ? [query.trim()] : [];
207
+ }
208
+
209
+ return [...new Set(segments)];
210
+ }
211
+
212
+ function buildCandidates(
213
+ queryTokenSet: Set<string>,
214
+ facetTokenSets: Set<string>[],
215
+ results: SearchResult[]
216
+ ): SourceCandidate[] {
217
+ return results.map((result) => {
218
+ const signalText = `${result.title ?? ""}\n${result.snippet ?? ""}`;
219
+ const signalTokenSet = new Set(tokenize(signalText));
220
+
221
+ const matchedQueryTokens = new Set<string>();
222
+ for (const token of queryTokenSet) {
223
+ if (signalTokenSet.has(token)) {
224
+ matchedQueryTokens.add(token);
225
+ }
226
+ }
227
+
228
+ const matchedFacetIndexes = new Set<number>();
229
+ for (const [index, facetTokenSet] of facetTokenSets.entries()) {
230
+ for (const token of facetTokenSet) {
231
+ if (signalTokenSet.has(token)) {
232
+ matchedFacetIndexes.add(index);
233
+ break;
234
+ }
235
+ }
236
+ }
237
+
238
+ return {
239
+ result,
240
+ normalizedScore: normalizeScore(result.score),
241
+ matchedQueryTokens,
242
+ matchedFacetIndexes,
243
+ };
244
+ });
245
+ }
246
+
247
+ function dedupeByDocidBestScore(results: SearchResult[]): SearchResult[] {
248
+ const bestByDocid = new Map<string, SearchResult>();
249
+
250
+ for (const result of results) {
251
+ const existing = bestByDocid.get(result.docid);
252
+ if (!existing || result.score > existing.score) {
253
+ bestByDocid.set(result.docid, result);
254
+ }
255
+ }
256
+
257
+ return [...bestByDocid.values()].sort((a, b) => {
258
+ const scoreDiff = b.score - a.score;
259
+ if (Math.abs(scoreDiff) > 1e-9) {
260
+ return scoreDiff;
261
+ }
262
+ return a.docid.localeCompare(b.docid);
263
+ });
264
+ }
265
+
266
+ function selectAdaptiveSources(
267
+ query: string,
268
+ results: SearchResult[]
269
+ ): { selected: SearchResult[]; explain: AnswerContextExplain } {
270
+ const dedupedResults = dedupeByDocidBestScore(results).slice(
271
+ 0,
272
+ CONTEXT_CANDIDATE_POOL
273
+ );
274
+ const queryTokens = tokenize(query);
275
+ const queryTokenSet = new Set(queryTokens);
276
+ const facets = uniqueFacetTexts(query);
277
+ const facetTokenSets = facets.map((facet) => new Set(tokenize(facet)));
278
+ const candidates = buildCandidates(
279
+ queryTokenSet,
280
+ facetTokenSets,
281
+ dedupedResults
282
+ );
283
+
284
+ const comparisonIntent = COMPARISON_QUERY_RE.test(query);
285
+ let targetSources = BASE_CONTEXT_SOURCES;
286
+ if (comparisonIntent || facets.length >= 3) {
287
+ targetSources = 5;
288
+ } else if (facets.length >= 2) {
289
+ targetSources = 4;
290
+ }
291
+ targetSources = Math.min(
292
+ targetSources,
293
+ MAX_CONTEXT_SOURCES,
294
+ candidates.length
295
+ );
296
+
297
+ const coveredTokens = new Set<string>();
298
+ const coveredFacets = new Set<number>();
299
+ const selected: SelectedSource[] = [];
300
+ const selectedDocids = new Set<string>();
301
+
302
+ while (selected.length < targetSources) {
303
+ let bestCandidate: SourceCandidate | null = null;
304
+ let bestGain = Number.NEGATIVE_INFINITY;
305
+ let bestReason = "relevance";
306
+
307
+ for (const candidate of candidates) {
308
+ const docid = candidate.result.docid;
309
+ if (selectedDocids.has(docid)) {
310
+ continue;
311
+ }
312
+
313
+ const newTokenHits = [...candidate.matchedQueryTokens].filter(
314
+ (token) => !coveredTokens.has(token)
315
+ ).length;
316
+ const newFacetHits = [...candidate.matchedFacetIndexes].filter(
317
+ (index) => !coveredFacets.has(index)
318
+ ).length;
319
+
320
+ const tokenGain =
321
+ queryTokenSet.size > 0 ? newTokenHits / queryTokenSet.size : 0;
322
+ const facetGain =
323
+ facetTokenSets.length > 0 ? newFacetHits / facetTokenSets.length : 0;
324
+
325
+ let gain =
326
+ candidate.normalizedScore * 0.6 + tokenGain * 0.25 + facetGain * 0.15;
327
+
328
+ if (comparisonIntent && selected.length > 0 && newFacetHits === 0) {
329
+ gain -= 0.2;
330
+ }
331
+
332
+ let reason = "relevance";
333
+ if (newFacetHits > 0) {
334
+ reason = "new_facet_coverage";
335
+ } else if (newTokenHits > 0) {
336
+ reason = "new_query_coverage";
337
+ }
338
+
339
+ if (
340
+ !bestCandidate ||
341
+ gain > bestGain ||
342
+ (Math.abs(gain - bestGain) <= 1e-9 &&
343
+ candidate.normalizedScore > bestCandidate.normalizedScore)
344
+ ) {
345
+ bestCandidate = candidate;
346
+ bestGain = gain;
347
+ bestReason = reason;
348
+ }
349
+ }
350
+
351
+ if (!bestCandidate) {
352
+ break;
353
+ }
354
+
355
+ // Keep selection compact when marginal gain is exhausted.
356
+ if (
357
+ bestGain <= 0 &&
358
+ selected.length >= 1 &&
359
+ !comparisonIntent &&
360
+ selected.length >= BASE_CONTEXT_SOURCES
361
+ ) {
362
+ break;
363
+ }
364
+
365
+ selected.push({ candidate: bestCandidate, reason: bestReason });
366
+ selectedDocids.add(bestCandidate.result.docid);
367
+ for (const token of bestCandidate.matchedQueryTokens) {
368
+ coveredTokens.add(token);
369
+ }
370
+ for (const index of bestCandidate.matchedFacetIndexes) {
371
+ coveredFacets.add(index);
372
+ }
373
+ }
374
+
375
+ if (comparisonIntent && selected.length < 2) {
376
+ for (const candidate of candidates) {
377
+ if (selectedDocids.has(candidate.result.docid)) {
378
+ continue;
379
+ }
380
+ selected.push({ candidate, reason: "comparison_balance" });
381
+ selectedDocids.add(candidate.result.docid);
382
+ if (selected.length >= 2) {
383
+ break;
384
+ }
385
+ }
386
+ }
387
+
388
+ if (selected.length === 0 && candidates.length > 0) {
389
+ const first = candidates[0];
390
+ if (first) {
391
+ selected.push({ candidate: first, reason: "fallback_top_result" });
392
+ selectedDocids.add(first.result.docid);
393
+ }
394
+ }
395
+
396
+ const toEntry = (
397
+ candidate: SourceCandidate,
398
+ reason: string
399
+ ): AnswerContextEntry => ({
400
+ docid: candidate.result.docid,
401
+ uri: candidate.result.uri,
402
+ score: candidate.normalizedScore,
403
+ queryTokenHits: candidate.matchedQueryTokens.size,
404
+ facetHits: candidate.matchedFacetIndexes.size,
405
+ reason,
406
+ });
407
+
408
+ const selectedEntries = selected.map(({ candidate, reason }) =>
409
+ toEntry(candidate, reason)
410
+ );
411
+ const droppedEntries = candidates
412
+ .filter((candidate) => !selectedDocids.has(candidate.result.docid))
413
+ .map((candidate) => toEntry(candidate, "lower_marginal_gain"));
414
+
415
+ return {
416
+ selected: selected.map((entry) => entry.candidate.result),
417
+ explain: {
418
+ strategy: "adaptive_coverage_v1",
419
+ targetSources,
420
+ facets,
421
+ selected: selectedEntries,
422
+ dropped: droppedEntries,
423
+ },
424
+ };
425
+ }
426
+
124
427
  /**
125
428
  * Generate a grounded answer from search results.
126
429
  * Returns null if no valid context or generation fails.
@@ -136,11 +439,12 @@ export async function generateGroundedAnswer(
136
439
  maxTokens: number
137
440
  ): Promise<AnswerGenerationResult | null> {
138
441
  const { genPort, store } = deps;
442
+ const sourceSelection = selectAdaptiveSources(query, results);
139
443
  const contextParts: string[] = [];
140
444
  const citations: Citation[] = [];
141
445
  let citationIndex = 0;
142
446
 
143
- for (const r of results.slice(0, MAX_CONTEXT_SOURCES)) {
447
+ for (const r of sourceSelection.selected) {
144
448
  let content: string | null = null;
145
449
  let usedFullContent = false;
146
450
 
@@ -197,7 +501,11 @@ export async function generateGroundedAnswer(
197
501
  return null;
198
502
  }
199
503
 
200
- return { answer: result.value, citations };
504
+ return {
505
+ answer: result.value,
506
+ citations,
507
+ answerContext: sourceSelection.explain,
508
+ };
201
509
  }
202
510
 
203
511
  /**
@@ -207,6 +515,7 @@ export async function generateGroundedAnswer(
207
515
  export function processAnswerResult(rawResult: AnswerGenerationResult): {
208
516
  answer: string;
209
517
  citations: Citation[];
518
+ answerContext: AnswerContextExplain;
210
519
  } {
211
520
  const maxCitation = rawResult.citations.length;
212
521
  const validUsedNums = extractValidCitationNumbers(
@@ -219,9 +528,17 @@ export function processAnswerResult(rawResult: AnswerGenerationResult): {
219
528
  );
220
529
 
221
530
  if (validUsedNums.length === 0 || filteredCitations.length === 0) {
222
- return { answer: ABSTENTION_MESSAGE, citations: [] };
531
+ return {
532
+ answer: ABSTENTION_MESSAGE,
533
+ citations: [],
534
+ answerContext: rawResult.answerContext,
535
+ };
223
536
  }
224
537
 
225
538
  const answer = renumberAnswerCitations(rawResult.answer, validUsedNums);
226
- return { answer, citations: filteredCitations };
539
+ return {
540
+ answer,
541
+ citations: filteredCitations,
542
+ answerContext: rawResult.answerContext,
543
+ };
227
544
  }