@gmickel/gno 0.15.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +36 -1
  2. package/package.json +7 -4
  3. package/src/cli/commands/ask.ts +9 -0
  4. package/src/cli/commands/query.ts +3 -2
  5. package/src/cli/pager.ts +1 -1
  6. package/src/cli/program.ts +89 -0
  7. package/src/core/links.ts +92 -20
  8. package/src/ingestion/sync.ts +267 -23
  9. package/src/ingestion/types.ts +2 -0
  10. package/src/ingestion/walker.ts +2 -1
  11. package/src/llm/nodeLlamaCpp/embedding.ts +53 -10
  12. package/src/mcp/tools/index.ts +30 -1
  13. package/src/mcp/tools/query.ts +22 -2
  14. package/src/mcp/tools/search.ts +8 -0
  15. package/src/mcp/tools/vsearch.ts +8 -0
  16. package/src/pipeline/answer.ts +324 -7
  17. package/src/pipeline/expansion.ts +243 -7
  18. package/src/pipeline/explain.ts +93 -5
  19. package/src/pipeline/hybrid.ts +240 -57
  20. package/src/pipeline/query-modes.ts +125 -0
  21. package/src/pipeline/rerank.ts +34 -13
  22. package/src/pipeline/search.ts +41 -3
  23. package/src/pipeline/temporal.ts +257 -0
  24. package/src/pipeline/types.ts +58 -0
  25. package/src/pipeline/vsearch.ts +107 -9
  26. package/src/serve/public/app.tsx +1 -3
  27. package/src/serve/public/globals.built.css +2 -2
  28. package/src/serve/public/lib/retrieval-filters.ts +167 -0
  29. package/src/serve/public/pages/Ask.tsx +339 -109
  30. package/src/serve/public/pages/Browse.tsx +71 -5
  31. package/src/serve/public/pages/DocView.tsx +2 -21
  32. package/src/serve/public/pages/Search.tsx +507 -120
  33. package/src/serve/routes/api.ts +202 -2
  34. package/src/store/migrations/006-document-metadata.ts +104 -0
  35. package/src/store/migrations/007-document-date-fields.ts +24 -0
  36. package/src/store/migrations/index.ts +3 -1
  37. package/src/store/sqlite/adapter.ts +218 -5
  38. package/src/store/types.ts +46 -0
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { RerankPort } from "../llm/types";
9
- import type { StorePort } from "../store/types";
9
+ import type { ChunkRow, StorePort } from "../store/types";
10
10
  import type { BlendingTier, FusionCandidate, RerankedCandidate } from "./types";
11
11
 
12
12
  import { DEFAULT_BLENDING_SCHEDULE } from "./types";
@@ -25,6 +25,7 @@ export interface RerankOptions {
25
25
  export interface RerankResult {
26
26
  candidates: RerankedCandidate[];
27
27
  reranked: boolean;
28
+ fallbackReason: "none" | "disabled" | "error";
28
29
  }
29
30
 
30
31
  export interface RerankDeps {
@@ -72,6 +73,7 @@ function blend(
72
73
  // ─────────────────────────────────────────────────────────────────────────────
73
74
 
74
75
  const MAX_CHUNK_CHARS = 4000;
76
+ const PROTECT_BM25_TOP_RANK = 1;
75
77
 
76
78
  interface BestChunkInfo {
77
79
  candidate: FusionCandidate;
@@ -94,6 +96,13 @@ function selectBestChunks(
94
96
  return bestChunkPerDoc;
95
97
  }
96
98
 
99
+ function isProtectedLexicalTopHit(candidate: FusionCandidate): boolean {
100
+ return (
101
+ candidate.bm25Rank === PROTECT_BM25_TOP_RANK &&
102
+ candidate.sources.includes("bm25")
103
+ );
104
+ }
105
+
97
106
  /**
98
107
  * Fetch chunk texts for reranking.
99
108
  */
@@ -102,18 +111,18 @@ async function fetchChunkTexts(
102
111
  bestChunkPerDoc: Map<string, BestChunkInfo>
103
112
  ): Promise<{ texts: string[]; hashToIndex: Map<string, number> }> {
104
113
  const uniqueHashes = [...bestChunkPerDoc.keys()];
105
- const chunkResults = await Promise.all(
106
- uniqueHashes.map((hash) => store.getChunks(hash))
107
- );
108
-
114
+ const chunksBatchResult = await store.getChunksBatch(uniqueHashes);
115
+ const chunksByHash: Map<string, ChunkRow[]> = chunksBatchResult.ok
116
+ ? chunksBatchResult.value
117
+ : new Map();
109
118
  const chunkTexts = new Map<string, string>();
110
- for (let i = 0; i < uniqueHashes.length; i++) {
111
- const hash = uniqueHashes[i] as string;
112
- const result = chunkResults[i];
119
+
120
+ for (const hash of uniqueHashes) {
113
121
  const bestInfo = bestChunkPerDoc.get(hash);
122
+ const chunks = chunksByHash.get(hash);
114
123
 
115
- if (result?.ok && result.value && bestInfo) {
116
- const chunk = result.value.find((c) => c.seq === bestInfo.seq);
124
+ if (chunks && bestInfo) {
125
+ const chunk = chunks.find((c) => c.seq === bestInfo.seq);
117
126
  const text = chunk?.text ?? "";
118
127
  chunkTexts.set(
119
128
  hash,
@@ -151,7 +160,7 @@ export async function rerankCandidates(
151
160
  options: RerankOptions = {}
152
161
  ): Promise<RerankResult> {
153
162
  if (candidates.length === 0) {
154
- return { candidates: [], reranked: false };
163
+ return { candidates: [], reranked: false, fallbackReason: "none" };
155
164
  }
156
165
 
157
166
  const { rerankPort, store } = deps;
@@ -181,6 +190,7 @@ export async function rerankCandidates(
181
190
  blendedScore: normalizeFusionScore(c.fusionScore),
182
191
  })),
183
192
  reranked: false,
193
+ fallbackReason: "disabled",
184
194
  };
185
195
  }
186
196
 
@@ -202,6 +212,7 @@ export async function rerankCandidates(
202
212
  blendedScore: normalizeFusionScore(c.fusionScore),
203
213
  })),
204
214
  reranked: false,
215
+ fallbackReason: "error",
205
216
  };
206
217
  }
207
218
 
@@ -239,7 +250,7 @@ export async function rerankCandidates(
239
250
  });
240
251
 
241
252
  // Add remaining candidates with penalty
242
- const allCandidates: RerankedCandidate[] = [
253
+ let allCandidates: RerankedCandidate[] = [
243
254
  ...rerankedCandidates,
244
255
  ...remaining.map((c) => ({
245
256
  ...c,
@@ -260,5 +271,15 @@ export async function rerankCandidates(
260
271
  return `${a.mirrorHash}:${a.seq}`.localeCompare(`${b.mirrorHash}:${b.seq}`);
261
272
  });
262
273
 
263
- return { candidates: allCandidates, reranked: true };
274
+ // Guardrail: keep strong original lexical #1 at the top.
275
+ // This avoids rerank-only demotions on clear exact-hit queries.
276
+ const protectedTopHit = allCandidates.find(isProtectedLexicalTopHit);
277
+ if (protectedTopHit && allCandidates[0] !== protectedTopHit) {
278
+ allCandidates = [
279
+ protectedTopHit,
280
+ ...allCandidates.filter((candidate) => candidate !== protectedTopHit),
281
+ ];
282
+ }
283
+
284
+ return { candidates: allCandidates, reranked: true, fallbackReason: "none" };
264
285
  }
@@ -18,6 +18,11 @@ import type {
18
18
  import { err, ok } from "../store/types";
19
19
  import { createChunkLookup } from "./chunk-lookup";
20
20
  import { detectQueryLanguage } from "./query-language";
21
+ import {
22
+ resolveRecencyTimestamp,
23
+ resolveTemporalRange,
24
+ shouldSortByRecency,
25
+ } from "./temporal";
21
26
 
22
27
  // ─────────────────────────────────────────────────────────────────────────────
23
28
  // Score Normalization
@@ -74,6 +79,7 @@ function buildSearchResult(ctx: BuildResultContext): SearchResult {
74
79
  mime: fts.sourceMime ?? "text/markdown",
75
80
  ext: fts.sourceExt ?? ".md",
76
81
  modifiedAt: fts.sourceMtime,
82
+ documentDate: fts.frontmatterDate,
77
83
  sizeBytes: fts.sourceSize,
78
84
  sourceHash: fts.sourceHash,
79
85
  };
@@ -131,6 +137,13 @@ export async function searchBm25(
131
137
  ): Promise<ReturnType<typeof ok<SearchResults>>> {
132
138
  const limit = options.limit ?? 20;
133
139
  const minScore = options.minScore ?? 0;
140
+ const recencySort = shouldSortByRecency(query);
141
+ const retrievalLimit = recencySort ? limit * 3 : limit;
142
+ const temporalRange = resolveTemporalRange(
143
+ query,
144
+ options.since,
145
+ options.until
146
+ );
134
147
 
135
148
  // Detect query language for metadata (DOES NOT affect retrieval filtering)
136
149
  const detection = detectQueryLanguage(query);
@@ -139,12 +152,16 @@ export async function searchBm25(
139
152
  // Run FTS search
140
153
  // Disable FTS snippet when --full or --line-numbers (we use raw text instead)
141
154
  const ftsResult = await store.searchFts(query, {
142
- limit,
155
+ limit: retrievalLimit,
143
156
  collection: options.collection,
144
157
  language: options.lang,
145
158
  snippet: !(options.full || options.lineNumbers),
146
159
  tagsAll: options.tagsAll,
147
160
  tagsAny: options.tagsAny,
161
+ since: temporalRange.since,
162
+ until: temporalRange.until,
163
+ categories: options.categories,
164
+ author: options.author,
148
165
  });
149
166
 
150
167
  if (!ftsResult.ok) {
@@ -253,14 +270,35 @@ export async function searchBm25(
253
270
  const filteredResults =
254
271
  minScore > 0 ? results.filter((r) => r.score >= minScore) : results;
255
272
 
273
+ if (recencySort) {
274
+ filteredResults.sort((a, b) => {
275
+ const aTs = resolveRecencyTimestamp(
276
+ a.source.documentDate,
277
+ a.source.modifiedAt
278
+ );
279
+ const bTs = resolveRecencyTimestamp(
280
+ b.source.documentDate,
281
+ b.source.modifiedAt
282
+ );
283
+ if (aTs !== bTs) {
284
+ return bTs - aTs;
285
+ }
286
+ return b.score - a.score;
287
+ });
288
+ }
289
+
256
290
  return ok({
257
- results: filteredResults,
291
+ results: filteredResults.slice(0, limit),
258
292
  meta: {
259
293
  query,
260
294
  mode: "bm25",
261
- totalResults: filteredResults.length,
295
+ totalResults: Math.min(filteredResults.length, limit),
262
296
  collection: options.collection,
263
297
  lang: options.lang,
298
+ since: temporalRange.since,
299
+ until: temporalRange.until,
300
+ categories: options.categories,
301
+ author: options.author,
264
302
  queryLanguage,
265
303
  },
266
304
  });
@@ -0,0 +1,257 @@
1
+ /**
2
+ * Temporal range parsing helpers for retrieval filters.
3
+ *
4
+ * @module src/pipeline/temporal
5
+ */
6
+
7
+ const DATE_ONLY_RE = /^\d{4}-\d{2}-\d{2}$/;
8
+ const RECENCY_SORT_RE =
9
+ /\b(latest|newest|most recent|recent|today|yesterday|this week|last week|this month|last month)\b/;
10
+
11
+ export interface TemporalRange {
12
+ since?: string;
13
+ until?: string;
14
+ }
15
+
16
+ type BoundKind = "since" | "until";
17
+
18
+ function startOfDay(d: Date): Date {
19
+ const out = new Date(d);
20
+ out.setUTCHours(0, 0, 0, 0);
21
+ return out;
22
+ }
23
+
24
+ function endOfDay(d: Date): Date {
25
+ const out = new Date(d);
26
+ out.setUTCHours(23, 59, 59, 999);
27
+ return out;
28
+ }
29
+
30
+ function startOfWeekUtc(d: Date): Date {
31
+ const out = startOfDay(d);
32
+ const day = out.getUTCDay(); // 0 = Sunday
33
+ const mondayOffset = day === 0 ? -6 : 1 - day;
34
+ out.setUTCDate(out.getUTCDate() + mondayOffset);
35
+ return out;
36
+ }
37
+
38
+ function endOfWeekUtc(d: Date): Date {
39
+ const start = startOfWeekUtc(d);
40
+ const out = new Date(start);
41
+ out.setUTCDate(out.getUTCDate() + 6);
42
+ return endOfDay(out);
43
+ }
44
+
45
+ function startOfMonthUtc(d: Date): Date {
46
+ return new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), 1, 0, 0, 0, 0));
47
+ }
48
+
49
+ function endOfMonthUtc(d: Date): Date {
50
+ return new Date(
51
+ Date.UTC(d.getUTCFullYear(), d.getUTCMonth() + 1, 0, 23, 59, 59, 999)
52
+ );
53
+ }
54
+
55
+ function normalizeParsedDate(value: string, kind: BoundKind): string | null {
56
+ const parsed = new Date(value);
57
+ if (Number.isNaN(parsed.getTime())) {
58
+ return null;
59
+ }
60
+
61
+ if (DATE_ONLY_RE.test(value)) {
62
+ return (
63
+ kind === "since" ? startOfDay(parsed) : endOfDay(parsed)
64
+ ).toISOString();
65
+ }
66
+
67
+ return parsed.toISOString();
68
+ }
69
+
70
+ function parseRelative(
71
+ value: string,
72
+ kind: BoundKind,
73
+ now: Date
74
+ ): string | null {
75
+ const v = value.trim().toLowerCase();
76
+
77
+ if (v === "today") {
78
+ return (kind === "since" ? startOfDay(now) : endOfDay(now)).toISOString();
79
+ }
80
+ if (v === "yesterday") {
81
+ const d = new Date(now);
82
+ d.setUTCDate(d.getUTCDate() - 1);
83
+ return (kind === "since" ? startOfDay(d) : endOfDay(d)).toISOString();
84
+ }
85
+ if (v === "this week") {
86
+ return (kind === "since" ? startOfWeekUtc(now) : now).toISOString();
87
+ }
88
+ if (v === "last week") {
89
+ const d = new Date(now);
90
+ d.setUTCDate(d.getUTCDate() - 7);
91
+ return (
92
+ kind === "since" ? startOfWeekUtc(d) : endOfWeekUtc(d)
93
+ ).toISOString();
94
+ }
95
+ if (v === "this month") {
96
+ return (kind === "since" ? startOfMonthUtc(now) : now).toISOString();
97
+ }
98
+ if (v === "last month") {
99
+ const d = new Date(now);
100
+ d.setUTCMonth(d.getUTCMonth() - 1);
101
+ return (
102
+ kind === "since" ? startOfMonthUtc(d) : endOfMonthUtc(d)
103
+ ).toISOString();
104
+ }
105
+ if (v === "recent") {
106
+ if (kind === "until") {
107
+ return now.toISOString();
108
+ }
109
+ const d = new Date(now);
110
+ d.setUTCDate(d.getUTCDate() - 30);
111
+ return d.toISOString();
112
+ }
113
+
114
+ return null;
115
+ }
116
+
117
+ function parseBound(
118
+ input: string | undefined,
119
+ kind: BoundKind,
120
+ now: Date
121
+ ): string | undefined {
122
+ if (!input) {
123
+ return undefined;
124
+ }
125
+ const relative = parseRelative(input, kind, now);
126
+ if (relative) {
127
+ return relative;
128
+ }
129
+ return normalizeParsedDate(input, kind) ?? undefined;
130
+ }
131
+
132
+ function inferFromQuery(query: string, now: Date): TemporalRange {
133
+ const q = query.toLowerCase();
134
+
135
+ if (/\btoday\b/.test(q)) {
136
+ return {
137
+ since: parseRelative("today", "since", now) ?? undefined,
138
+ until: parseRelative("today", "until", now) ?? undefined,
139
+ };
140
+ }
141
+ if (/\byesterday\b/.test(q)) {
142
+ return {
143
+ since: parseRelative("yesterday", "since", now) ?? undefined,
144
+ until: parseRelative("yesterday", "until", now) ?? undefined,
145
+ };
146
+ }
147
+ if (/\bthis week\b/.test(q)) {
148
+ return {
149
+ since: parseRelative("this week", "since", now) ?? undefined,
150
+ until: parseRelative("this week", "until", now) ?? undefined,
151
+ };
152
+ }
153
+ if (/\blast week\b/.test(q)) {
154
+ return {
155
+ since: parseRelative("last week", "since", now) ?? undefined,
156
+ until: parseRelative("last week", "until", now) ?? undefined,
157
+ };
158
+ }
159
+ if (/\bthis month\b/.test(q)) {
160
+ return {
161
+ since: parseRelative("this month", "since", now) ?? undefined,
162
+ until: parseRelative("this month", "until", now) ?? undefined,
163
+ };
164
+ }
165
+ if (/\blast month\b/.test(q)) {
166
+ return {
167
+ since: parseRelative("last month", "since", now) ?? undefined,
168
+ until: parseRelative("last month", "until", now) ?? undefined,
169
+ };
170
+ }
171
+ if (/\brecent\b/.test(q)) {
172
+ return {
173
+ since: parseRelative("recent", "since", now) ?? undefined,
174
+ until: parseRelative("recent", "until", now) ?? undefined,
175
+ };
176
+ }
177
+
178
+ return {};
179
+ }
180
+
181
+ /**
182
+ * Resolve temporal bounds from explicit flags or query text.
183
+ */
184
+ export function resolveTemporalRange(
185
+ query: string,
186
+ sinceInput?: string,
187
+ untilInput?: string,
188
+ now = new Date()
189
+ ): TemporalRange {
190
+ const since = parseBound(sinceInput, "since", now);
191
+ const until = parseBound(untilInput, "until", now);
192
+
193
+ if (since || until) {
194
+ return { since, until };
195
+ }
196
+
197
+ return inferFromQuery(query, now);
198
+ }
199
+
200
+ /**
201
+ * Return true when timestamp falls inside optional range.
202
+ */
203
+ export function isWithinTemporalRange(
204
+ timestamp: string | undefined,
205
+ range: TemporalRange
206
+ ): boolean {
207
+ if (!timestamp) {
208
+ return true;
209
+ }
210
+ const t = new Date(timestamp).getTime();
211
+ if (Number.isNaN(t)) {
212
+ return true;
213
+ }
214
+ if (range.since) {
215
+ const since = new Date(range.since).getTime();
216
+ if (!Number.isNaN(since) && t < since) {
217
+ return false;
218
+ }
219
+ }
220
+ if (range.until) {
221
+ const until = new Date(range.until).getTime();
222
+ if (!Number.isNaN(until) && t > until) {
223
+ return false;
224
+ }
225
+ }
226
+ return true;
227
+ }
228
+
229
+ /**
230
+ * Return true when query intent implies newest-first ordering.
231
+ */
232
+ export function shouldSortByRecency(query: string): boolean {
233
+ return RECENCY_SORT_RE.test(query.toLowerCase());
234
+ }
235
+
236
+ /**
237
+ * Prefer canonical doc date; fallback to source modified time.
238
+ * Returns 0 when neither value is valid.
239
+ */
240
+ export function resolveRecencyTimestamp(
241
+ docDate?: string | null,
242
+ sourceModifiedAt?: string | null
243
+ ): number {
244
+ if (docDate) {
245
+ const parsed = new Date(docDate).getTime();
246
+ if (!Number.isNaN(parsed)) {
247
+ return parsed;
248
+ }
249
+ }
250
+ if (sourceModifiedAt) {
251
+ const parsed = new Date(sourceModifiedAt).getTime();
252
+ if (!Number.isNaN(parsed)) {
253
+ return parsed;
254
+ }
255
+ }
256
+ return 0;
257
+ }
@@ -18,6 +18,7 @@ export interface SearchResultSource {
18
18
  mime: string;
19
19
  ext: string;
20
20
  modifiedAt?: string;
21
+ documentDate?: string;
21
22
  sizeBytes?: number;
22
23
  sourceHash?: string;
23
24
  }
@@ -65,6 +66,16 @@ export interface SearchMeta {
65
66
  lang?: string;
66
67
  /** Detected/overridden query language for prompt selection (typically BCP-47; may be user-provided via --lang) */
67
68
  queryLanguage?: string;
69
+ /** Summary of structured query modes applied (if provided) */
70
+ queryModes?: QueryModeSummary;
71
+ /** Temporal filter lower bound (ISO 8601) */
72
+ since?: string;
73
+ /** Temporal filter upper bound (ISO 8601) */
74
+ until?: string;
75
+ /** Category filters applied */
76
+ categories?: string[];
77
+ /** Author filter applied */
78
+ author?: string;
68
79
  /** Explain data (when --explain is used) */
69
80
  explain?: {
70
81
  lines: ExplainLine[];
@@ -100,6 +111,30 @@ export interface SearchOptions {
100
111
  tagsAll?: string[];
101
112
  /** Filter to docs with ANY of these tags (OR) */
102
113
  tagsAny?: string[];
114
+ /** Filter by modified time lower bound (ISO 8601 or relative token) */
115
+ since?: string;
116
+ /** Filter by modified time upper bound (ISO 8601 or relative token) */
117
+ until?: string;
118
+ /** Filter to docs matching ANY category */
119
+ categories?: string[];
120
+ /** Filter by author value */
121
+ author?: string;
122
+ }
123
+
124
+ /** Structured query mode identifier */
125
+ export type QueryMode = "term" | "intent" | "hyde";
126
+
127
+ /** Structured query mode entry */
128
+ export interface QueryModeInput {
129
+ mode: QueryMode;
130
+ text: string;
131
+ }
132
+
133
+ /** Structured query mode summary for metadata/explain */
134
+ export interface QueryModeSummary {
135
+ term: number;
136
+ intent: number;
137
+ hyde: boolean;
103
138
  }
104
139
 
105
140
  /** Options for hybrid search (gno query) */
@@ -108,6 +143,8 @@ export type HybridSearchOptions = SearchOptions & {
108
143
  noExpand?: boolean;
109
144
  /** Disable reranking */
110
145
  noRerank?: boolean;
146
+ /** Optional structured mode entries; when set, used as expansion inputs */
147
+ queryModes?: QueryModeInput[];
111
148
  /** Enable explain output */
112
149
  explain?: boolean;
113
150
  /** Language hint for prompt selection (does NOT filter retrieval, only affects expansion prompts) */
@@ -247,6 +284,25 @@ export interface Citation {
247
284
  endLine?: number;
248
285
  }
249
286
 
287
+ /** Source selection entry for answer-generation explain */
288
+ export interface AnswerContextEntry {
289
+ docid: string;
290
+ uri: string;
291
+ score: number;
292
+ queryTokenHits: number;
293
+ facetHits: number;
294
+ reason: string;
295
+ }
296
+
297
+ /** Answer-generation context selection explain payload */
298
+ export interface AnswerContextExplain {
299
+ strategy: "adaptive_coverage_v1";
300
+ targetSources: number;
301
+ facets: string[];
302
+ selected: AnswerContextEntry[];
303
+ dropped: AnswerContextEntry[];
304
+ }
305
+
250
306
  /** Ask result metadata */
251
307
  export interface AskMeta {
252
308
  expanded: boolean;
@@ -254,6 +310,7 @@ export interface AskMeta {
254
310
  vectorsUsed: boolean;
255
311
  answerGenerated?: boolean;
256
312
  totalResults?: number;
313
+ answerContext?: AnswerContextExplain;
257
314
  }
258
315
 
259
316
  /** Ask command result */
@@ -323,6 +380,7 @@ export interface ExplainResult {
323
380
  rank: number;
324
381
  docid: string;
325
382
  score: number;
383
+ fusionScore?: number;
326
384
  bm25Score?: number;
327
385
  vecScore?: number;
328
386
  rerankScore?: number;