@gmickel/gno 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -99,11 +99,14 @@ gno skill install --target all # Both Claude + Codex
99
99
 
100
100
  | Command | Mode | Best For |
101
101
  |:--------|:-----|:---------|
102
- | `gno search` | BM25 | Exact phrases, code identifiers |
103
- | `gno vsearch` | Vector | Natural language, concepts |
102
+ | `gno search` | Document-level BM25 | Exact phrases, code identifiers |
103
+ | `gno vsearch` | Contextual Vector | Natural language, concepts |
104
104
  | `gno query` | Hybrid | Best accuracy (BM25 + vector + reranking) |
105
105
  | `gno ask --answer` | RAG | Direct answers with citations |
106
106
 
107
+ **BM25** indexes full documents (not chunks) with Snowball stemming—"running" matches "run".
108
+ **Vector** embeds chunks with document titles for context awareness.
109
+
107
110
  ```bash
108
111
  gno search "handleAuth" # Find exact matches
109
112
  gno vsearch "error handling patterns" # Semantic similarity
@@ -230,10 +233,11 @@ graph TD
230
233
  M --> N[Final Results]
231
234
  ```
232
235
 
236
+ 0. **Strong Signal Check** — Skip expansion if BM25 has confident match (saves 1-3s)
233
237
  1. **Query Expansion** — LLM generates lexical variants, semantic rephrases, and a [HyDE](https://arxiv.org/abs/2212.10496) passage
234
- 2. **Parallel Retrieval** — BM25 + vector search run concurrently on all variants
235
- 3. **Fusion** — Reciprocal Rank Fusion merges results with position-based scoring
236
- 4. **Reranking** — Cross-encoder rescores top 20, blended with fusion scores
238
+ 2. **Parallel Retrieval** — Document-level BM25 + chunk-level vector search on all variants
239
+ 3. **Fusion** — RRF with weight for original query, tiered bonus for top ranks
240
+ 4. **Reranking** — Qwen3-Reranker scores full documents (32K context), blended with fusion
237
241
 
238
242
  > **Deep dive**: [How Search Works](https://gno.sh/docs/HOW-SEARCH-WORKS/)
239
243
 
@@ -263,7 +267,7 @@ Models auto-download on first use to `~/.cache/gno/models/`.
263
267
  | Model | Purpose | Size |
264
268
  |:------|:--------|:-----|
265
269
  | bge-m3 | Embeddings (1024-dim, multilingual) | ~500MB |
266
- | bge-reranker-v2-m3 | Cross-encoder reranking | ~700MB |
270
+ | Qwen3-Reranker-0.6B | Cross-encoder reranking (32K context) | ~700MB |
267
271
  | Qwen/SmolLM | Query expansion + AI answers | ~600MB-1.2GB |
268
272
 
269
273
  ### Model Presets
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gmickel/gno",
3
- "version": "0.4.0",
3
+ "version": "0.5.1",
4
4
  "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
5
5
  "keywords": [
6
6
  "search",
@@ -29,7 +29,8 @@
29
29
  },
30
30
  "files": [
31
31
  "src",
32
- "assets"
32
+ "assets",
33
+ "vendor"
33
34
  ],
34
35
  "engines": {
35
36
  "bun": ">=1.0.0"
@@ -170,7 +170,7 @@ export async function ask(
170
170
  if (shouldGenerateAnswer && genPort) {
171
171
  const maxTokens = options.maxAnswerTokens ?? 512;
172
172
  const rawResult = await generateGroundedAnswer(
173
- genPort,
173
+ { genPort, store },
174
174
  query,
175
175
  results,
176
176
  maxTokens
@@ -11,6 +11,7 @@ import { getConfigPaths, isInitialized, loadConfig } from '../../config';
11
11
  import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
12
12
  import { getActivePreset } from '../../llm/registry';
13
13
  import type { EmbeddingPort } from '../../llm/types';
14
+ import { formatDocForEmbedding } from '../../pipeline/contextual';
14
15
  import { SqliteAdapter } from '../../store/sqlite/adapter';
15
16
  import type { StoreResult } from '../../store/types';
16
17
  import { err, ok } from '../../store/types';
@@ -131,9 +132,9 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
131
132
  cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
132
133
  }
133
134
 
134
- // Embed batch
135
+ // Embed batch with contextual formatting (title prefix)
135
136
  const batchEmbedResult = await ctx.embedPort.embedBatch(
136
- batch.map((b) => b.text)
137
+ batch.map((b) => formatDocForEmbedding(b.text, b.title ?? undefined))
137
138
  );
138
139
  if (!batchEmbedResult.ok) {
139
140
  errors += batch.length;
@@ -365,9 +366,12 @@ function getActiveChunks(
365
366
  after?: { mirrorHash: string; seq: number }
366
367
  ): Promise<StoreResult<BacklogItem[]>> {
367
368
  try {
369
+ // Include title for contextual embedding
368
370
  const sql = after
369
371
  ? `
370
- SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
372
+ SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
373
+ (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
374
+ 'force' as reason
371
375
  FROM content_chunks c
372
376
  WHERE EXISTS (
373
377
  SELECT 1 FROM documents d
@@ -378,7 +382,9 @@ function getActiveChunks(
378
382
  LIMIT ?
379
383
  `
380
384
  : `
381
- SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
385
+ SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
386
+ (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
387
+ 'force' as reason
382
388
  FROM content_chunks c
383
389
  WHERE EXISTS (
384
390
  SELECT 1 FROM documents d
@@ -7,6 +7,7 @@
7
7
 
8
8
  import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
9
9
  import { getActivePreset } from '../../llm/registry';
10
+ import { formatQueryForEmbedding } from '../../pipeline/contextual';
10
11
  import type { SearchOptions, SearchResults } from '../../pipeline/types';
11
12
  import {
12
13
  searchVectorWithEmbedding,
@@ -86,8 +87,10 @@ export async function vsearch(
86
87
  const embedPort = embedResult.value;
87
88
 
88
89
  try {
89
- // Embed query (also determines dimensions - avoids double embed)
90
- const queryEmbedResult = await embedPort.embed(query);
90
+ // Embed query with contextual formatting (also determines dimensions)
91
+ const queryEmbedResult = await embedPort.embed(
92
+ formatQueryForEmbedding(query)
93
+ );
91
94
  if (!queryEmbedResult.ok) {
92
95
  return { success: false, error: queryEmbedResult.error.message };
93
96
  }
@@ -32,11 +32,16 @@ export const DEFAULT_EXCLUDES: readonly string[] = [
32
32
  ];
33
33
 
34
34
  /** Valid FTS tokenizer options */
35
- export const FTS_TOKENIZERS = ['unicode61', 'porter', 'trigram'] as const;
35
+ export const FTS_TOKENIZERS = [
36
+ 'unicode61',
37
+ 'porter',
38
+ 'trigram',
39
+ 'snowball english',
40
+ ] as const;
36
41
  export type FtsTokenizer = (typeof FTS_TOKENIZERS)[number];
37
42
 
38
- /** Default FTS tokenizer */
39
- export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = 'unicode61';
43
+ /** Default FTS tokenizer - snowball english for multilingual stemming */
44
+ export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = 'snowball english';
40
45
 
41
46
  /**
42
47
  * BCP-47 language tag pattern (simplified, case-insensitive).
@@ -173,7 +178,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
173
178
  name: 'Slim (Fast, ~1GB)',
174
179
  embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
175
180
  rerank:
176
- 'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
181
+ 'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
177
182
  gen: 'hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf',
178
183
  },
179
184
  {
@@ -181,7 +186,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
181
186
  name: 'Balanced (Default, ~2GB)',
182
187
  embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
183
188
  rerank:
184
- 'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
189
+ 'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
185
190
  gen: 'hf:ggml-org/SmolLM3-3B-GGUF/SmolLM3-Q4_K_M.gguf',
186
191
  },
187
192
  {
@@ -189,7 +194,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
189
194
  name: 'Quality (Best Answers, ~2.5GB)',
190
195
  embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
191
196
  rerank:
192
- 'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
197
+ 'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
193
198
  gen: 'hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf',
194
199
  },
195
200
  ];
@@ -8,6 +8,7 @@ import { join as pathJoin } from 'node:path';
8
8
  import { parseUri } from '../../app/constants';
9
9
  import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
10
10
  import { getActivePreset } from '../../llm/registry';
11
+ import { formatQueryForEmbedding } from '../../pipeline/contextual';
11
12
  import type { SearchResult, SearchResults } from '../../pipeline/types';
12
13
  import {
13
14
  searchVectorWithEmbedding,
@@ -121,8 +122,10 @@ export function handleVsearch(
121
122
  const embedPort = embedResult.value;
122
123
 
123
124
  try {
124
- // Embed query
125
- const queryEmbedResult = await embedPort.embed(args.query);
125
+ // Embed query with contextual formatting
126
+ const queryEmbedResult = await embedPort.embed(
127
+ formatQueryForEmbedding(args.query)
128
+ );
126
129
  if (!queryEmbedResult.ok) {
127
130
  throw new Error(queryEmbedResult.error.message);
128
131
  }
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { GenerationPort } from '../llm/types';
9
+ import type { StorePort } from '../store/types';
9
10
  import type { Citation, SearchResult } from './types';
10
11
 
11
12
  // ─────────────────────────────────────────────────────────────────────────────
@@ -32,11 +33,14 @@ Write a concise answer (1-3 paragraphs).`;
32
33
  export const ABSTENTION_MESSAGE =
33
34
  "I don't have enough information in the provided sources to answer this question.";
34
35
 
35
- /** Max characters per snippet to avoid blowing up prompt size */
36
- const MAX_SNIPPET_CHARS = 1500;
36
+ /** Max characters per document (~8K tokens) */
37
+ const MAX_DOC_CHARS = 32_000;
38
+
39
+ /** Max number of sources - fewer docs but full content */
40
+ const MAX_CONTEXT_SOURCES = 3;
37
41
 
38
- /** Max number of sources to include in context */
39
- const MAX_CONTEXT_SOURCES = 5;
42
+ /** Fallback snippet limit when full content unavailable */
43
+ const MAX_SNIPPET_CHARS = 1500;
40
44
 
41
45
  // ─────────────────────────────────────────────────────────────────────────────
42
46
  // Citation Processing
@@ -109,37 +113,66 @@ export interface AnswerGenerationResult {
109
113
  citations: Citation[];
110
114
  }
111
115
 
116
+ export interface AnswerGenerationDeps {
117
+ genPort: GenerationPort;
118
+ store: StorePort | null;
119
+ }
120
+
112
121
  /**
113
122
  * Generate a grounded answer from search results.
114
123
  * Returns null if no valid context or generation fails.
124
+ *
125
+ * When store is provided, fetches full document content for better context.
126
+ * Falls back to snippets if store unavailable or content fetch fails.
115
127
  */
128
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: sequential content processing with fallbacks
116
129
  export async function generateGroundedAnswer(
117
- genPort: GenerationPort,
130
+ deps: AnswerGenerationDeps,
118
131
  query: string,
119
132
  results: SearchResult[],
120
133
  maxTokens: number
121
134
  ): Promise<AnswerGenerationResult | null> {
135
+ const { genPort, store } = deps;
122
136
  const contextParts: string[] = [];
123
137
  const citations: Citation[] = [];
124
138
  let citationIndex = 0;
125
139
 
126
140
  for (const r of results.slice(0, MAX_CONTEXT_SOURCES)) {
127
- if (!r.snippet || r.snippet.trim().length === 0) {
128
- continue;
141
+ let content: string | null = null;
142
+ let usedFullContent = false;
143
+
144
+ // Try to fetch full document content if store available
145
+ if (store && r.conversion?.mirrorHash) {
146
+ const contentResult = await store.getContent(r.conversion.mirrorHash);
147
+ if (contentResult.ok && contentResult.value) {
148
+ content = contentResult.value;
149
+ usedFullContent = true;
150
+ // Truncate to max doc chars
151
+ if (content.length > MAX_DOC_CHARS) {
152
+ content = `${content.slice(0, MAX_DOC_CHARS)}\n\n[... truncated ...]`;
153
+ }
154
+ }
129
155
  }
130
156
 
131
- const snippet =
132
- r.snippet.length > MAX_SNIPPET_CHARS
133
- ? `${r.snippet.slice(0, MAX_SNIPPET_CHARS)}...`
134
- : r.snippet;
157
+ // Fallback to snippet if full content unavailable
158
+ if (!content) {
159
+ if (!r.snippet || r.snippet.trim().length === 0) {
160
+ continue;
161
+ }
162
+ content =
163
+ r.snippet.length > MAX_SNIPPET_CHARS
164
+ ? `${r.snippet.slice(0, MAX_SNIPPET_CHARS)}...`
165
+ : r.snippet;
166
+ }
135
167
 
136
168
  citationIndex += 1;
137
- contextParts.push(`[${citationIndex}] ${snippet}`);
169
+ contextParts.push(`[${citationIndex}] ${content}`);
170
+ // Clear line range when citing full content (not a specific snippet)
138
171
  citations.push({
139
172
  docid: r.docid,
140
173
  uri: r.uri,
141
- startLine: r.snippetRange?.startLine,
142
- endLine: r.snippetRange?.endLine,
174
+ startLine: usedFullContent ? undefined : r.snippetRange?.startLine,
175
+ endLine: usedFullContent ? undefined : r.snippetRange?.endLine,
143
176
  });
144
177
  }
145
178
 
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Contextual embedding formatting.
3
+ * Prepends document context to chunks for better retrieval.
4
+ *
5
+ * Based on Anthropic Contextual Retrieval research:
6
+ * - Query relevance jumps from 0.1 to 0.92 for context-dependent queries
7
+ * - 49% reduction in retrieval failure with contextual embeddings + BM25
8
+ * - 67% reduction with reranking added
9
+ *
10
+ * @module src/pipeline/contextual
11
+ */
12
+
13
+ // Top-level regex for performance
14
+ const HEADING_REGEX = /^##?\s+(.+)$/m;
15
+ const SUBHEADING_REGEX = /^##\s+(.+)$/m;
16
+ const EXT_REGEX = /\.\w+$/;
17
+
18
+ /**
19
+ * Format document text for embedding.
20
+ * Prepends title for contextual retrieval.
21
+ */
22
+ export function formatDocForEmbedding(text: string, title?: string): string {
23
+ const safeTitle = title?.trim() || 'none';
24
+ return `title: ${safeTitle} | text: ${text}`;
25
+ }
26
+
27
+ /**
28
+ * Format query for embedding.
29
+ * Uses task-prefixed format for asymmetric retrieval.
30
+ */
31
+ export function formatQueryForEmbedding(query: string): string {
32
+ return `task: search result | query: ${query}`;
33
+ }
34
+
35
+ /**
36
+ * Extract title from markdown content or filename.
37
+ * Prefers first heading, falls back to filename without extension.
38
+ */
39
+ export function extractTitle(content: string, filename: string): string {
40
+ // Try to find first heading (# or ##)
41
+ const match = content.match(HEADING_REGEX);
42
+ if (match?.[1]) {
43
+ const title = match[1].trim();
44
+ // Skip generic titles like "Notes" and try next heading
45
+ if (title.toLowerCase() === 'notes') {
46
+ const nextMatch = content.match(SUBHEADING_REGEX);
47
+ if (nextMatch?.[1]) {
48
+ return nextMatch[1].trim();
49
+ }
50
+ }
51
+ return title;
52
+ }
53
+
54
+ // Fall back to filename without extension
55
+ const basename = filename.split('/').pop() ?? filename;
56
+ return basename.replace(EXT_REGEX, '');
57
+ }
@@ -15,9 +15,10 @@ import type { ExpansionResult } from './types';
15
15
  // Constants
16
16
  // ─────────────────────────────────────────────────────────────────────────────
17
17
 
18
- const EXPANSION_PROMPT_VERSION = 'v1';
18
+ const EXPANSION_PROMPT_VERSION = 'v2';
19
19
  const DEFAULT_TIMEOUT_MS = 5000;
20
- const JSON_EXTRACT_PATTERN = /\{[\s\S]*\}/;
20
+ // Non-greedy to avoid matching from first { to last } across multiple objects
21
+ const JSON_EXTRACT_PATTERN = /\{[\s\S]*?\}/;
21
22
 
22
23
  // ─────────────────────────────────────────────────────────────────────────────
23
24
  // Cache Key Generation
@@ -40,45 +41,53 @@ export function generateCacheKey(
40
41
  // Prompt Templates
41
42
  // ─────────────────────────────────────────────────────────────────────────────
42
43
 
43
- const EXPANSION_PROMPT_EN = `You are a query expansion assistant. Given a search query, generate alternative phrasings to improve search results.
44
+ const EXPANSION_PROMPT_EN = `You expand search queries for a hybrid search system.
44
45
 
45
- Input query: "{query}"
46
+ Query: "{query}"
46
47
 
47
- Generate a JSON object with:
48
- - "lexicalQueries": array of 2-3 keyword-based variations (for BM25 search)
49
- - "vectorQueries": array of 2-3 semantic rephrasing (for embedding search)
50
- - "hyde": a short hypothetical document passage that would answer the query (optional)
48
+ Generate JSON with:
49
+ 1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
50
+ 2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
51
+ 3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
51
52
 
52
- Respond ONLY with valid JSON, no explanation.
53
+ Rules:
54
+ - Keep proper nouns exactly as written
55
+ - Be concise - each variation 3-8 words
56
+ - HyDE should read like actual documentation, not a question
53
57
 
54
- Example:
55
- {
56
- "lexicalQueries": ["deployment process", "how to deploy", "deploying application"],
57
- "vectorQueries": ["steps to release software to production", "guide for application deployment"],
58
- "hyde": "To deploy the application, first run the build command, then push to the staging environment..."
59
- }`;
58
+ Respond with valid JSON only.`;
60
59
 
61
- const EXPANSION_PROMPT_DE = `Du bist ein Query-Erweiterungs-Assistent. Generiere alternative Formulierungen für die Suchanfrage.
60
+ const EXPANSION_PROMPT_DE = `Du erweiterst Suchanfragen für ein hybrides Suchsystem.
62
61
 
63
- Suchanfrage: "{query}"
62
+ Anfrage: "{query}"
64
63
 
65
- Generiere ein JSON-Objekt mit:
66
- - "lexicalQueries": Array mit 2-3 Keyword-Variationen (für BM25-Suche)
67
- - "vectorQueries": Array mit 2-3 semantischen Umformulierungen (für Vektor-Suche)
68
- - "hyde": Ein kurzer hypothetischer Dokumentenausschnitt, der die Anfrage beantworten würde (optional)
64
+ Generiere JSON mit:
65
+ 1. "lexicalQueries": 2-3 Keyword-Variationen mit Synonymen (für BM25)
66
+ 2. "vectorQueries": 2-3 semantische Umformulierungen (für Embeddings)
67
+ 3. "hyde": Ein 50-100 Wort Abschnitt, der die Anfrage direkt beantwortet, wie aus einem relevanten Dokument
69
68
 
70
- Antworte NUR mit validem JSON, keine Erklärung.`;
69
+ Regeln:
70
+ - Eigennamen exakt beibehalten
71
+ - Kurz halten - jede Variation 3-8 Wörter
72
+ - HyDE soll wie echte Dokumentation klingen, nicht wie eine Frage
71
73
 
72
- const EXPANSION_PROMPT_MULTILINGUAL = `You are a query expansion assistant. Generate alternative phrasings for the search query in the same language as the query.
74
+ Antworte nur mit validem JSON.`;
73
75
 
74
- Input query: "{query}"
76
+ const EXPANSION_PROMPT_MULTILINGUAL = `You expand search queries for a hybrid search system. Respond in the same language as the query.
75
77
 
76
- Generate a JSON object with:
77
- - "lexicalQueries": array of 2-3 keyword-based variations
78
- - "vectorQueries": array of 2-3 semantic rephrasing
79
- - "hyde": a short hypothetical document passage (optional)
78
+ Query: "{query}"
80
79
 
81
- Respond ONLY with valid JSON.`;
80
+ Generate JSON with:
81
+ 1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
82
+ 2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
83
+ 3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
84
+
85
+ Rules:
86
+ - Keep proper nouns exactly as written
87
+ - Be concise - each variation 3-8 words
88
+ - HyDE should read like actual documentation, not a question
89
+
90
+ Respond with valid JSON only.`;
82
91
 
83
92
  /**
84
93
  * Get prompt template for language.
@@ -178,9 +187,10 @@ export async function expandQuery(
178
187
  const template = getPromptTemplate(options.lang);
179
188
  const prompt = template.replace('{query}', query);
180
189
 
181
- // Run with timeout
190
+ // Run with timeout (clear timer to avoid resource leak)
191
+ let timeoutId: ReturnType<typeof setTimeout> | undefined;
182
192
  const timeoutPromise = new Promise<null>((resolve) => {
183
- setTimeout(() => resolve(null), timeout);
193
+ timeoutId = setTimeout(() => resolve(null), timeout);
184
194
  });
185
195
 
186
196
  try {
@@ -193,6 +203,11 @@ export async function expandQuery(
193
203
  timeoutPromise,
194
204
  ]);
195
205
 
206
+ // Clear timeout if generation completed first
207
+ if (timeoutId) {
208
+ clearTimeout(timeoutId);
209
+ }
210
+
196
211
  // Timeout
197
212
  if (result === null) {
198
213
  return ok(null);
@@ -207,6 +222,9 @@ export async function expandQuery(
207
222
  const parsed = parseExpansionResult(result.value);
208
223
  return ok(parsed);
209
224
  } catch {
225
+ if (timeoutId) {
226
+ clearTimeout(timeoutId);
227
+ }
210
228
  return ok(null); // Graceful degradation
211
229
  }
212
230
  }
@@ -49,15 +49,23 @@ export function formatResultExplain(results: ExplainResult[]): string {
49
49
  // Explain Line Builders
50
50
  // ─────────────────────────────────────────────────────────────────────────────
51
51
 
52
+ export type ExpansionStatus =
53
+ | 'disabled' // User chose --no-expand
54
+ | 'skipped_strong' // Strong BM25 signal detected
55
+ | 'attempted'; // Expansion was attempted (may have succeeded or timed out)
56
+
52
57
  export function explainExpansion(
53
- enabled: boolean,
58
+ status: ExpansionStatus,
54
59
  result: ExpansionResult | null
55
60
  ): ExplainLine {
56
- if (!enabled) {
61
+ if (status === 'disabled') {
57
62
  return { stage: 'expansion', message: 'disabled' };
58
63
  }
64
+ if (status === 'skipped_strong') {
65
+ return { stage: 'expansion', message: 'skipped (strong BM25)' };
66
+ }
59
67
  if (!result) {
60
- return { stage: 'expansion', message: 'skipped (strong BM25 or timeout)' };
68
+ return { stage: 'expansion', message: 'skipped (timeout)' };
61
69
  }
62
70
  const lex = result.lexicalQueries.length;
63
71
  const sem = result.vectorQueries.length;
@@ -64,9 +64,12 @@ export function rrfFuse(
64
64
  );
65
65
 
66
66
  // Process BM25 sources
67
+ // Original query gets 2x weight to prevent dilution by expansion variants
67
68
  for (const input of bm25Inputs) {
68
69
  const weight =
69
- input.source === 'bm25' ? config.bm25Weight : config.bm25Weight * 0.5;
70
+ input.source === 'bm25'
71
+ ? config.bm25Weight * 2.0
72
+ : config.bm25Weight * 0.5;
70
73
 
71
74
  for (const result of input.results) {
72
75
  const key = `${result.mirrorHash}:${result.seq}`;
@@ -98,8 +101,9 @@ export function rrfFuse(
98
101
  }
99
102
 
100
103
  // Process vector sources
104
+ // Original query gets 2x weight to prevent dilution by expansion variants
101
105
  for (const input of vectorInputs) {
102
- let weight = config.vecWeight;
106
+ let weight = config.vecWeight * 2.0; // Default for original vector
103
107
  if (input.source === 'vector_variant') {
104
108
  weight = config.vecWeight * 0.5;
105
109
  } else if (input.source === 'hyde') {
@@ -135,16 +139,23 @@ export function rrfFuse(
135
139
  }
136
140
  }
137
141
 
138
- // Apply top-rank bonus
142
+ // Apply tiered top-rank bonus
143
+ // Rewards documents ranking highly in ANY list (not requiring both)
139
144
  for (const candidate of candidates.values()) {
140
- if (
141
- candidate.bm25Rank !== null &&
142
- candidate.bm25Rank <= config.topRankThreshold &&
143
- candidate.vecRank !== null &&
144
- candidate.vecRank <= config.topRankThreshold
145
- ) {
145
+ const bm25Rank = candidate.bm25Rank;
146
+ const vecRank = candidate.vecRank;
147
+
148
+ // Tier 1: #1 in any list
149
+ if (bm25Rank === 1 || vecRank === 1) {
146
150
  candidate.fusionScore += config.topRankBonus;
147
151
  }
152
+ // Tier 2: Top-3 in any list (but not #1)
153
+ else if (
154
+ (bm25Rank !== null && bm25Rank <= config.topRankThreshold) ||
155
+ (vecRank !== null && vecRank <= config.topRankThreshold)
156
+ ) {
157
+ candidate.fusionScore += config.topRankBonus * 0.4; // 40% of tier 1
158
+ }
148
159
  }
149
160
 
150
161
  // Sort by fusion score (descending), then by mirrorHash:seq for determinism