npm - @gmickel/gno - Versions diffs - 0.4.0 → 0.5.0 - Mend

@gmickel/gno 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/README.md +10 -6
package/package.json +1 -1
package/src/cli/commands/ask.ts +1 -1
package/src/cli/commands/embed.ts +10 -4
package/src/cli/commands/vsearch.ts +5 -2
package/src/config/types.ts +11 -6
package/src/mcp/tools/vsearch.ts +5 -2
package/src/pipeline/answer.ts +47 -14
package/src/pipeline/contextual.ts +57 -0
package/src/pipeline/expansion.ts +49 -31
package/src/pipeline/explain.ts +11 -3
package/src/pipeline/fusion.ts +20 -9
package/src/pipeline/hybrid.ts +57 -40
package/src/pipeline/index.ts +7 -0
package/src/pipeline/rerank.ts +55 -27
package/src/pipeline/types.ts +0 -3
package/src/pipeline/vsearch.ts +3 -2
package/src/serve/routes/api.ts +1 -1
package/src/store/migrations/002-documents-fts.ts +40 -0
package/src/store/migrations/index.ts +2 -1
package/src/store/sqlite/adapter.ts +169 -33
package/src/store/sqlite/fts5-snowball.ts +144 -0
package/src/store/types.ts +23 -3
package/src/store/vector/stats.ts +3 -0
package/src/store/vector/types.ts +1 -0

package/README.md CHANGED Viewed

@@ -99,11 +99,14 @@ gno skill install --target all       # Both Claude + Codex
 | Command | Mode | Best For |
 |:--------|:-----|:---------|
-| `gno search` | BM25 | Exact phrases, code identifiers |
-| `gno vsearch` | Vector | Natural language, concepts |
+| `gno search` | Document-level BM25 | Exact phrases, code identifiers |
+| `gno vsearch` | Contextual Vector | Natural language, concepts |
 | `gno query` | Hybrid | Best accuracy (BM25 + vector + reranking) |
 | `gno ask --answer` | RAG | Direct answers with citations |
+**BM25** indexes full documents (not chunks) with Snowball stemming—"running" matches "run".
+**Vector** embeds chunks with document titles for context awareness.
 ```bash
 gno search "handleAuth"              # Find exact matches
 gno vsearch "error handling patterns" # Semantic similarity
@@ -230,10 +233,11 @@ graph TD
     M --> N[Final Results]
 ```
+0. **Strong Signal Check** — Skip expansion if BM25 has confident match (saves 1-3s)
 1. **Query Expansion** — LLM generates lexical variants, semantic rephrases, and a [HyDE](https://arxiv.org/abs/2212.10496) passage
-2. **Parallel Retrieval** — BM25 + vector search run concurrently on all variants
-3. **Fusion** — Reciprocal Rank Fusion merges results with position-based scoring
-4. **Reranking** — Cross-encoder rescores top 20, blended with fusion scores
+2. **Parallel Retrieval** — Document-level BM25 + chunk-level vector search on all variants
+3. **Fusion** — RRF with 2× weight for original query, tiered bonus for top ranks
+4. **Reranking** — Qwen3-Reranker scores full documents (32K context), blended with fusion
 > **Deep dive**: [How Search Works](https://gno.sh/docs/HOW-SEARCH-WORKS/)
@@ -263,7 +267,7 @@ Models auto-download on first use to `~/.cache/gno/models/`.
 | Model | Purpose | Size |
 |:------|:--------|:-----|
 | bge-m3 | Embeddings (1024-dim, multilingual) | ~500MB |
-| bge-reranker-v2-m3 | Cross-encoder reranking | ~700MB |
+| Qwen3-Reranker-0.6B | Cross-encoder reranking (32K context) | ~700MB |
 | Qwen/SmolLM | Query expansion + AI answers | ~600MB-1.2GB |
 ### Model Presets

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@gmickel/gno",
-  "version": "0.4.0",
+  "version": "0.5.0",
   "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
   "keywords": [
     "search",

package/src/cli/commands/ask.ts CHANGED Viewed

@@ -170,7 +170,7 @@ export async function ask(
     if (shouldGenerateAnswer && genPort) {
       const maxTokens = options.maxAnswerTokens ?? 512;
       const rawResult = await generateGroundedAnswer(
-        genPort,
+        { genPort, store },
         query,
         results,
         maxTokens

package/src/cli/commands/embed.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import { getConfigPaths, isInitialized, loadConfig } from '../../config';
 import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
 import { getActivePreset } from '../../llm/registry';
 import type { EmbeddingPort } from '../../llm/types';
+import { formatDocForEmbedding } from '../../pipeline/contextual';
 import { SqliteAdapter } from '../../store/sqlite/adapter';
 import type { StoreResult } from '../../store/types';
 import { err, ok } from '../../store/types';
@@ -131,9 +132,9 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
       cursor = { mirrorHash: lastItem.mirrorHash, seq: lastItem.seq };
     }
-    // Embed batch
+    // Embed batch with contextual formatting (title prefix)
     const batchEmbedResult = await ctx.embedPort.embedBatch(
-      batch.map((b) => b.text)
+      batch.map((b) => formatDocForEmbedding(b.text, b.title ?? undefined))
     );
     if (!batchEmbedResult.ok) {
       errors += batch.length;
@@ -365,9 +366,12 @@ function getActiveChunks(
   after?: { mirrorHash: string; seq: number }
 ): Promise<StoreResult<BacklogItem[]>> {
   try {
+    // Include title for contextual embedding
     const sql = after
       ? `
-        SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
+        SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
+          (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
+          'force' as reason
         FROM content_chunks c
         WHERE EXISTS (
           SELECT 1 FROM documents d
@@ -378,7 +382,9 @@ function getActiveChunks(
         LIMIT ?
       `
       : `
-        SELECT c.mirror_hash as mirrorHash, c.seq, c.text, 'force' as reason
+        SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
+          (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
+          'force' as reason
         FROM content_chunks c
         WHERE EXISTS (
           SELECT 1 FROM documents d

package/src/cli/commands/vsearch.ts CHANGED Viewed

@@ -7,6 +7,7 @@
 import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
 import { getActivePreset } from '../../llm/registry';
+import { formatQueryForEmbedding } from '../../pipeline/contextual';
 import type { SearchOptions, SearchResults } from '../../pipeline/types';
 import {
   searchVectorWithEmbedding,
@@ -86,8 +87,10 @@ export async function vsearch(
     const embedPort = embedResult.value;
     try {
-      // Embed query (also determines dimensions - avoids double embed)
-      const queryEmbedResult = await embedPort.embed(query);
+      // Embed query with contextual formatting (also determines dimensions)
+      const queryEmbedResult = await embedPort.embed(
+        formatQueryForEmbedding(query)
+      );
       if (!queryEmbedResult.ok) {
         return { success: false, error: queryEmbedResult.error.message };
       }

package/src/config/types.ts CHANGED Viewed

@@ -32,11 +32,16 @@ export const DEFAULT_EXCLUDES: readonly string[] = [
 ];
 /** Valid FTS tokenizer options */
-export const FTS_TOKENIZERS = ['unicode61', 'porter', 'trigram'] as const;
+export const FTS_TOKENIZERS = [
+  'unicode61',
+  'porter',
+  'trigram',
+  'snowball english',
+] as const;
 export type FtsTokenizer = (typeof FTS_TOKENIZERS)[number];
-/** Default FTS tokenizer */
-export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = 'unicode61';
+/** Default FTS tokenizer - snowball english for multilingual stemming */
+export const DEFAULT_FTS_TOKENIZER: FtsTokenizer = 'snowball english';
 /**
  * BCP-47 language tag pattern (simplified, case-insensitive).
@@ -173,7 +178,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     name: 'Slim (Fast, ~1GB)',
     embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
     rerank:
-      'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
+      'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
     gen: 'hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf',
   },
   {
@@ -181,7 +186,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     name: 'Balanced (Default, ~2GB)',
     embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
     rerank:
-      'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
+      'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
     gen: 'hf:ggml-org/SmolLM3-3B-GGUF/SmolLM3-Q4_K_M.gguf',
   },
   {
@@ -189,7 +194,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
     name: 'Quality (Best Answers, ~2.5GB)',
     embed: 'hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf',
     rerank:
-      'hf:gpustack/bge-reranker-v2-m3-GGUF/bge-reranker-v2-m3-Q4_K_M.gguf',
+      'hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf',
     gen: 'hf:unsloth/Qwen3-4B-Instruct-2507-GGUF/Qwen3-4B-Instruct-2507-Q4_K_M.gguf',
   },
 ];

package/src/mcp/tools/vsearch.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { join as pathJoin } from 'node:path';
 import { parseUri } from '../../app/constants';
 import { LlmAdapter } from '../../llm/nodeLlamaCpp/adapter';
 import { getActivePreset } from '../../llm/registry';
+import { formatQueryForEmbedding } from '../../pipeline/contextual';
 import type { SearchResult, SearchResults } from '../../pipeline/types';
 import {
   searchVectorWithEmbedding,
@@ -121,8 +122,10 @@ export function handleVsearch(
       const embedPort = embedResult.value;
       try {
-        // Embed query
-        const queryEmbedResult = await embedPort.embed(args.query);
+        // Embed query with contextual formatting
+        const queryEmbedResult = await embedPort.embed(
+          formatQueryForEmbedding(args.query)
+        );
         if (!queryEmbedResult.ok) {
           throw new Error(queryEmbedResult.error.message);
         }

package/src/pipeline/answer.ts CHANGED Viewed

@@ -6,6 +6,7 @@
  */
 import type { GenerationPort } from '../llm/types';
+import type { StorePort } from '../store/types';
 import type { Citation, SearchResult } from './types';
 // ─────────────────────────────────────────────────────────────────────────────
@@ -32,11 +33,14 @@ Write a concise answer (1-3 paragraphs).`;
 export const ABSTENTION_MESSAGE =
   "I don't have enough information in the provided sources to answer this question.";
-/** Max characters per snippet to avoid blowing up prompt size */
-const MAX_SNIPPET_CHARS = 1500;
+/** Max characters per document (~8K tokens) */
+const MAX_DOC_CHARS = 32_000;
+/** Max number of sources - fewer docs but full content */
+const MAX_CONTEXT_SOURCES = 3;
-/** Max number of sources to include in context */
-const MAX_CONTEXT_SOURCES = 5;
+/** Fallback snippet limit when full content unavailable */
+const MAX_SNIPPET_CHARS = 1500;
 // ─────────────────────────────────────────────────────────────────────────────
 // Citation Processing
@@ -109,37 +113,66 @@ export interface AnswerGenerationResult {
   citations: Citation[];
 }
+export interface AnswerGenerationDeps {
+  genPort: GenerationPort;
+  store: StorePort | null;
+}
 /**
  * Generate a grounded answer from search results.
  * Returns null if no valid context or generation fails.
+ *
+ * When store is provided, fetches full document content for better context.
+ * Falls back to snippets if store unavailable or content fetch fails.
  */
+// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: sequential content processing with fallbacks
 export async function generateGroundedAnswer(
-  genPort: GenerationPort,
+  deps: AnswerGenerationDeps,
   query: string,
   results: SearchResult[],
   maxTokens: number
 ): Promise<AnswerGenerationResult | null> {
+  const { genPort, store } = deps;
   const contextParts: string[] = [];
   const citations: Citation[] = [];
   let citationIndex = 0;
   for (const r of results.slice(0, MAX_CONTEXT_SOURCES)) {
-    if (!r.snippet || r.snippet.trim().length === 0) {
-      continue;
+    let content: string | null = null;
+    let usedFullContent = false;
+    // Try to fetch full document content if store available
+    if (store && r.conversion?.mirrorHash) {
+      const contentResult = await store.getContent(r.conversion.mirrorHash);
+      if (contentResult.ok && contentResult.value) {
+        content = contentResult.value;
+        usedFullContent = true;
+        // Truncate to max doc chars
+        if (content.length > MAX_DOC_CHARS) {
+          content = `${content.slice(0, MAX_DOC_CHARS)}\n\n[... truncated ...]`;
+        }
+      }
     }
-    const snippet =
-      r.snippet.length > MAX_SNIPPET_CHARS
-        ? `${r.snippet.slice(0, MAX_SNIPPET_CHARS)}...`
-        : r.snippet;
+    // Fallback to snippet if full content unavailable
+    if (!content) {
+      if (!r.snippet || r.snippet.trim().length === 0) {
+        continue;
+      }
+      content =
+        r.snippet.length > MAX_SNIPPET_CHARS
+          ? `${r.snippet.slice(0, MAX_SNIPPET_CHARS)}...`
+          : r.snippet;
+    }
     citationIndex += 1;
-    contextParts.push(`[${citationIndex}] ${snippet}`);
+    contextParts.push(`[${citationIndex}] ${content}`);
+    // Clear line range when citing full content (not a specific snippet)
     citations.push({
       docid: r.docid,
       uri: r.uri,
-      startLine: r.snippetRange?.startLine,
-      endLine: r.snippetRange?.endLine,
+      startLine: usedFullContent ? undefined : r.snippetRange?.startLine,
+      endLine: usedFullContent ? undefined : r.snippetRange?.endLine,
     });
   }

package/src/pipeline/contextual.ts ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * Contextual embedding formatting.
+ * Prepends document context to chunks for better retrieval.
+ *
+ * Based on Anthropic Contextual Retrieval research:
+ * - Query relevance jumps from 0.1 to 0.92 for context-dependent queries
+ * - 49% reduction in retrieval failure with contextual embeddings + BM25
+ * - 67% reduction with reranking added
+ *
+ * @module src/pipeline/contextual
+ */
+// Top-level regex for performance
+const HEADING_REGEX = /^##?\s+(.+)$/m;
+const SUBHEADING_REGEX = /^##\s+(.+)$/m;
+const EXT_REGEX = /\.\w+$/;
+/**
+ * Format document text for embedding.
+ * Prepends title for contextual retrieval.
+ */
+export function formatDocForEmbedding(text: string, title?: string): string {
+  const safeTitle = title?.trim() || 'none';
+  return `title: ${safeTitle} | text: ${text}`;
+}
+/**
+ * Format query for embedding.
+ * Uses task-prefixed format for asymmetric retrieval.
+ */
+export function formatQueryForEmbedding(query: string): string {
+  return `task: search result | query: ${query}`;
+}
+/**
+ * Extract title from markdown content or filename.
+ * Prefers first heading, falls back to filename without extension.
+ */
+export function extractTitle(content: string, filename: string): string {
+  // Try to find first heading (# or ##)
+  const match = content.match(HEADING_REGEX);
+  if (match?.[1]) {
+    const title = match[1].trim();
+    // Skip generic titles like "Notes" and try next heading
+    if (title.toLowerCase() === 'notes') {
+      const nextMatch = content.match(SUBHEADING_REGEX);
+      if (nextMatch?.[1]) {
+        return nextMatch[1].trim();
+      }
+    }
+    return title;
+  }
+  // Fall back to filename without extension
+  const basename = filename.split('/').pop() ?? filename;
+  return basename.replace(EXT_REGEX, '');
+}

package/src/pipeline/expansion.ts CHANGED Viewed

@@ -15,9 +15,10 @@ import type { ExpansionResult } from './types';
 // Constants
 // ─────────────────────────────────────────────────────────────────────────────
-const EXPANSION_PROMPT_VERSION = 'v1';
+const EXPANSION_PROMPT_VERSION = 'v2';
 const DEFAULT_TIMEOUT_MS = 5000;
-const JSON_EXTRACT_PATTERN = /\{[\s\S]*\}/;
+// Non-greedy to avoid matching from first { to last } across multiple objects
+const JSON_EXTRACT_PATTERN = /\{[\s\S]*?\}/;
 // ─────────────────────────────────────────────────────────────────────────────
 // Cache Key Generation
@@ -40,45 +41,53 @@ export function generateCacheKey(
 // Prompt Templates
 // ─────────────────────────────────────────────────────────────────────────────
-const EXPANSION_PROMPT_EN = `You are a query expansion assistant. Given a search query, generate alternative phrasings to improve search results.
+const EXPANSION_PROMPT_EN = `You expand search queries for a hybrid search system.
-Input query: "{query}"
+Query: "{query}"
-Generate a JSON object with:
-- "lexicalQueries": array of 2-3 keyword-based variations (for BM25 search)
-- "vectorQueries": array of 2-3 semantic rephrasing (for embedding search)
-- "hyde": a short hypothetical document passage that would answer the query (optional)
+Generate JSON with:
+1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
+2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
+3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
-Respond ONLY with valid JSON, no explanation.
+Rules:
+- Keep proper nouns exactly as written
+- Be concise - each variation 3-8 words
+- HyDE should read like actual documentation, not a question
-Example:
-{
-  "lexicalQueries": ["deployment process", "how to deploy", "deploying application"],
-  "vectorQueries": ["steps to release software to production", "guide for application deployment"],
-  "hyde": "To deploy the application, first run the build command, then push to the staging environment..."
-}`;
+Respond with valid JSON only.`;
-const EXPANSION_PROMPT_DE = `Du bist ein Query-Erweiterungs-Assistent. Generiere alternative Formulierungen für die Suchanfrage.
+const EXPANSION_PROMPT_DE = `Du erweiterst Suchanfragen für ein hybrides Suchsystem.
-Suchanfrage: "{query}"
+Anfrage: "{query}"
-Generiere ein JSON-Objekt mit:
-- "lexicalQueries": Array mit 2-3 Keyword-Variationen (für BM25-Suche)
-- "vectorQueries": Array mit 2-3 semantischen Umformulierungen (für Vektor-Suche)
-- "hyde": Ein kurzer hypothetischer Dokumentenausschnitt, der die Anfrage beantworten würde (optional)
+Generiere JSON mit:
+1. "lexicalQueries": 2-3 Keyword-Variationen mit Synonymen (für BM25)
+2. "vectorQueries": 2-3 semantische Umformulierungen (für Embeddings)
+3. "hyde": Ein 50-100 Wort Abschnitt, der die Anfrage direkt beantwortet, wie aus einem relevanten Dokument
-Antworte NUR mit validem JSON, keine Erklärung.`;
+Regeln:
+- Eigennamen exakt beibehalten
+- Kurz halten - jede Variation 3-8 Wörter
+- HyDE soll wie echte Dokumentation klingen, nicht wie eine Frage
-const EXPANSION_PROMPT_MULTILINGUAL = `You are a query expansion assistant. Generate alternative phrasings for the search query in the same language as the query.
+Antworte nur mit validem JSON.`;
-Input query: "{query}"
+const EXPANSION_PROMPT_MULTILINGUAL = `You expand search queries for a hybrid search system. Respond in the same language as the query.
-Generate a JSON object with:
-- "lexicalQueries": array of 2-3 keyword-based variations
-- "vectorQueries": array of 2-3 semantic rephrasing
-- "hyde": a short hypothetical document passage (optional)
+Query: "{query}"
-Respond ONLY with valid JSON.`;
+Generate JSON with:
+1. "lexicalQueries": 2-3 keyword variations using synonyms (for BM25)
+2. "vectorQueries": 2-3 semantic rephrasings capturing intent (for embeddings)
+3. "hyde": A 50-100 word passage that directly answers the query, as if excerpted from a relevant document
+Rules:
+- Keep proper nouns exactly as written
+- Be concise - each variation 3-8 words
+- HyDE should read like actual documentation, not a question
+Respond with valid JSON only.`;
 /**
  * Get prompt template for language.
@@ -178,9 +187,10 @@ export async function expandQuery(
   const template = getPromptTemplate(options.lang);
   const prompt = template.replace('{query}', query);
-  // Run with timeout
+  // Run with timeout (clear timer to avoid resource leak)
+  let timeoutId: ReturnType<typeof setTimeout> | undefined;
   const timeoutPromise = new Promise<null>((resolve) => {
-    setTimeout(() => resolve(null), timeout);
+    timeoutId = setTimeout(() => resolve(null), timeout);
   });
   try {
@@ -193,6 +203,11 @@ export async function expandQuery(
       timeoutPromise,
     ]);
+    // Clear timeout if generation completed first
+    if (timeoutId) {
+      clearTimeout(timeoutId);
+    }
     // Timeout
     if (result === null) {
       return ok(null);
@@ -207,6 +222,9 @@ export async function expandQuery(
     const parsed = parseExpansionResult(result.value);
     return ok(parsed);
   } catch {
+    if (timeoutId) {
+      clearTimeout(timeoutId);
+    }
     return ok(null); // Graceful degradation
   }
 }

package/src/pipeline/explain.ts CHANGED Viewed

@@ -49,15 +49,23 @@ export function formatResultExplain(results: ExplainResult[]): string {
 // Explain Line Builders
 // ─────────────────────────────────────────────────────────────────────────────
+export type ExpansionStatus =
+  | 'disabled' // User chose --no-expand
+  | 'skipped_strong' // Strong BM25 signal detected
+  | 'attempted'; // Expansion was attempted (may have succeeded or timed out)
 export function explainExpansion(
-  enabled: boolean,
+  status: ExpansionStatus,
   result: ExpansionResult | null
 ): ExplainLine {
-  if (!enabled) {
+  if (status === 'disabled') {
     return { stage: 'expansion', message: 'disabled' };
   }
+  if (status === 'skipped_strong') {
+    return { stage: 'expansion', message: 'skipped (strong BM25)' };
+  }
   if (!result) {
-    return { stage: 'expansion', message: 'skipped (strong BM25 or timeout)' };
+    return { stage: 'expansion', message: 'skipped (timeout)' };
   }
   const lex = result.lexicalQueries.length;
   const sem = result.vectorQueries.length;

package/src/pipeline/fusion.ts CHANGED Viewed

@@ -64,9 +64,12 @@ export function rrfFuse(
   );
   // Process BM25 sources
+  // Original query gets 2x weight to prevent dilution by expansion variants
   for (const input of bm25Inputs) {
     const weight =
-      input.source === 'bm25' ? config.bm25Weight : config.bm25Weight * 0.5;
+      input.source === 'bm25'
+        ? config.bm25Weight * 2.0
+        : config.bm25Weight * 0.5;
     for (const result of input.results) {
       const key = `${result.mirrorHash}:${result.seq}`;
@@ -98,8 +101,9 @@ export function rrfFuse(
   }
   // Process vector sources
+  // Original query gets 2x weight to prevent dilution by expansion variants
   for (const input of vectorInputs) {
-    let weight = config.vecWeight;
+    let weight = config.vecWeight * 2.0; // Default for original vector
     if (input.source === 'vector_variant') {
       weight = config.vecWeight * 0.5;
     } else if (input.source === 'hyde') {
@@ -135,16 +139,23 @@ export function rrfFuse(
     }
   }
-  // Apply top-rank bonus
+  // Apply tiered top-rank bonus
+  // Rewards documents ranking highly in ANY list (not requiring both)
   for (const candidate of candidates.values()) {
-    if (
-      candidate.bm25Rank !== null &&
-      candidate.bm25Rank <= config.topRankThreshold &&
-      candidate.vecRank !== null &&
-      candidate.vecRank <= config.topRankThreshold
-    ) {
+    const bm25Rank = candidate.bm25Rank;
+    const vecRank = candidate.vecRank;
+    // Tier 1: #1 in any list
+    if (bm25Rank === 1 || vecRank === 1) {
       candidate.fusionScore += config.topRankBonus;
     }
+    // Tier 2: Top-3 in any list (but not #1)
+    else if (
+      (bm25Rank !== null && bm25Rank <= config.topRankThreshold) ||
+      (vecRank !== null && vecRank <= config.topRankThreshold)
+    ) {
+      candidate.fusionScore += config.topRankBonus * 0.4; // 40% of tier 1
+    }
   }
   // Sort by fusion score (descending), then by mirrorHash:seq for determinism