npm - @tobilu/qmd - Versions diffs - 1.1.2 → 1.1.5 - Mend

@tobilu/qmd 1.1.2 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,37 @@
 ## [Unreleased]
+## [1.1.5] - 2026-03-07
+Ambiguous queries like "performance" now produce dramatically better results
+when the caller knows what they mean. The new `intent` parameter steers all
+five pipeline stages — expansion, strong-signal bypass, chunk selection,
+reranking, and snippet extraction — without searching on its own. Design and
+original implementation by Ilya Grigorik (@vyalamar) in #180.
+### Changes
+- **Intent parameter**: optional `intent` string disambiguates queries across
+  the entire search pipeline. Available via CLI (`--intent` flag or `intent:`
+  line in query documents), MCP (`intent` field on the query tool), and
+  programmatic API. Adapted from PR #180 (thanks @vyalamar).
+- **Query expansion**: when intent is provided, the expansion LLM prompt
+  includes `Query intent: {intent}`, matching the finetune training data
+  format for better-aligned expansions.
+- **Reranking**: intent is prepended to the rerank query so Qwen3-Reranker
+  scores with domain context.
+- **Chunk selection**: intent terms scored at 0.5× weight alongside query
+  terms (1.0×) when selecting the best chunk per document for reranking.
+- **Snippet extraction**: intent terms scored at 0.3× weight to nudge
+  snippets toward intent-relevant lines without overriding query anchoring.
+- **Strong-signal bypass disabled with intent**: when intent is provided, the
+  BM25 strong-signal shortcut is skipped — the obvious keyword match may not
+  be what the caller wants.
+- **MCP instructions**: callers are now guided to provide `intent` on every
+  search call for disambiguation.
+- **Query document syntax**: `intent:` recognized as a line type. At most one
+  per document, cannot appear alone. Grammar updated in `docs/SYNTAX.md`.
 ## [1.1.2] - 2026-03-07
 13 community PRs merged. GPU initialization replaced with node-llama-cpp's

package/dist/formatter.d.ts CHANGED Viewed

@@ -28,6 +28,7 @@ export type FormatOptions = {
     query?: string;
     useColor?: boolean;
     lineNumbers?: boolean;
+    intent?: string;
 };
 /**
  * Add line numbers to text content.

package/dist/formatter.js CHANGED Viewed

@@ -55,7 +55,7 @@ export function searchResultsToJson(results, opts = {}) {
     const output = results.map(row => {
         const bodyStr = row.body || "";
         let body = opts.full ? bodyStr : undefined;
-        let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos).snippet : undefined;
+        let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
         if (opts.lineNumbers) {
             if (body)
                 body = addLineNumbers(body);
@@ -82,7 +82,7 @@ export function searchResultsToCsv(results, opts = {}) {
     const header = "docid,score,file,title,context,line,snippet";
     const rows = results.map(row => {
         const bodyStr = row.body || "";
-        const { line, snippet } = extractSnippet(bodyStr, query, 500, row.chunkPos);
+        const { line, snippet } = extractSnippet(bodyStr, query, 500, row.chunkPos, undefined, opts.intent);
         let content = opts.full ? bodyStr : snippet;
         if (opts.lineNumbers && content) {
             content = addLineNumbers(content);
@@ -121,7 +121,7 @@ export function searchResultsToMarkdown(results, opts = {}) {
             content = bodyStr;
         }
         else {
-            content = extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
+            content = extractSnippet(bodyStr, query, 500, row.chunkPos, undefined, opts.intent).snippet;
         }
         if (opts.lineNumbers) {
             content = addLineNumbers(content);
@@ -138,7 +138,7 @@ export function searchResultsToXml(results, opts = {}) {
     const items = results.map(row => {
         const titleAttr = row.title ? ` title="${escapeXml(row.title)}"` : "";
         const bodyStr = row.body || "";
-        let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
+        let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, 500, row.chunkPos, undefined, opts.intent).snippet;
         if (opts.lineNumbers) {
             content = addLineNumbers(content);
         }

package/dist/llm.d.ts CHANGED Viewed

@@ -330,6 +330,7 @@ export declare class LlamaCpp implements LLM {
     expandQuery(query: string, options?: {
         context?: string;
         includeLexical?: boolean;
+        intent?: string;
     }): Promise<Queryable[]>;
     private static readonly RERANK_TEMPLATE_OVERHEAD;
     private static readonly RERANK_TARGET_DOCS_PER_CONTEXT;

package/dist/llm.js CHANGED Viewed

@@ -691,7 +691,10 @@ export class LlamaCpp {
         content ::= [^\\n]+
       `
         });
-        const prompt = `/no_think Expand this search query: ${query}`;
+        const intent = options.intent;
+        const prompt = intent
+            ? `/no_think Expand this search query: ${query}\nQuery intent: ${intent}`
+            : `/no_think Expand this search query: ${query}`;
         // Create a bounded context for expansion to prevent large default VRAM allocations.
         const genContext = await this.generateModel.createContext({
             contextSize: this.expandContextSize,

package/dist/mcp.js CHANGED Viewed

@@ -84,10 +84,13 @@ function buildInstructions(store) {
     lines.push("  - type:'vec' — semantic vector search (meaning-based)");
     lines.push("  - type:'hyde' — hypothetical document (write what the answer looks like)");
     lines.push("");
+    lines.push("  Always provide `intent` on every search call to disambiguate and improve snippets.");
+    lines.push("");
     lines.push("Examples:");
     lines.push("  Quick keyword lookup: [{type:'lex', query:'error handling'}]");
     lines.push("  Semantic search: [{type:'vec', query:'how to handle errors gracefully'}]");
     lines.push("  Best results: [{type:'lex', query:'error'}, {type:'vec', query:'error handling best practices'}]");
+    lines.push("  With intent: searches=[{type:'lex', query:'performance'}], intent='web page load times'");
     // --- Retrieval workflow ---
     lines.push("");
     lines.push("Retrieval:");
@@ -236,8 +239,9 @@ Intent-aware lex (C++ performance, not sports):
             minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
             candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
             collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
+            intent: z.string().optional().describe("Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."),
         },
-    }, async ({ searches, limit, minScore, candidateLimit, collections }) => {
+    }, async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
         // Map to internal format
         const subSearches = searches.map(s => ({
             type: s.type,
@@ -250,13 +254,14 @@ Intent-aware lex (C++ performance, not sports):
             limit,
             minScore,
             candidateLimit,
+            intent,
         });
         // Use first lex or vec query for snippet extraction
         const primaryQuery = searches.find(s => s.type === 'lex')?.query
             || searches.find(s => s.type === 'vec')?.query
             || searches[0]?.query || "";
         const filtered = results.map(r => {
-            const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
+            const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300, undefined, undefined, intent);
             return {
                 docid: `#${r.docid}`,
                 file: r.displayPath,

package/dist/qmd.js CHANGED Viewed

@@ -1567,7 +1567,7 @@ function outputResults(results, query, opts) {
         const output = filtered.map(row => {
             const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
             let body = opts.full ? row.body : undefined;
-            let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
+            let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
             if (opts.lineNumbers) {
                 if (body)
                     body = addLineNumbers(body);
@@ -1600,7 +1600,7 @@ function outputResults(results, query, opts) {
             const row = filtered[i];
             if (!row)
                 continue;
-            const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
+            const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
             const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
             // Line 1: filepath with docid
             const path = toQmdPath(row.displayPath);
@@ -1659,7 +1659,7 @@ function outputResults(results, query, opts) {
                 continue;
             const heading = row.title || row.displayPath;
             const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
-            let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
+            let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
             if (opts.lineNumbers) {
                 content = addLineNumbers(content);
             }
@@ -1673,7 +1673,7 @@ function outputResults(results, query, opts) {
             const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
             const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
             const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
-            let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
+            let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
             if (opts.lineNumbers) {
                 content = addLineNumbers(content);
             }
@@ -1684,7 +1684,7 @@ function outputResults(results, query, opts) {
         // CSV format
         console.log("docid,score,file,title,context,line,snippet");
         for (const row of filtered) {
-            const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
+            const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
             let content = opts.full ? row.body : snippet;
             if (opts.lineNumbers) {
                 content = addLineNumbers(content, line);
@@ -1727,21 +1727,6 @@ function filterByCollections(results, collectionNames) {
         return prefixes.some(p => path.startsWith(p));
     });
 }
-/**
- * Parse structured search query syntax.
- * Lines starting with lex:, vec:, or hyde: are routed directly.
- * Plain lines without prefix go through query expansion.
- *
- * Returns null if this is a plain query (single line, no prefix).
- * Returns StructuredSubSearch[] if structured syntax detected.
- * Throws if multiple plain lines (ambiguous).
- *
- * Examples:
- *   "CAP theorem"                    -> null (plain query, use expansion)
- *   "lex: CAP theorem"               -> [{ type: 'lex', query: 'CAP theorem' }]
- *   "lex: CAP\nvec: consistency"     -> [{ type: 'lex', ... }, { type: 'vec', ... }]
- *   "CAP\nconsistency"               -> throws (multiple plain lines)
- */
 function parseStructuredQuery(query) {
     const rawLines = query.split('\n').map((line, idx) => ({
         raw: line,
@@ -1752,7 +1737,9 @@ function parseStructuredQuery(query) {
         return null;
     const prefixRe = /^(lex|vec|hyde):\s*/i;
     const expandRe = /^expand:\s*/i;
+    const intentRe = /^intent:\s*/i;
     const typed = [];
+    let intent;
     for (const line of rawLines) {
         if (expandRe.test(line.trimmed)) {
             if (rawLines.length > 1) {
@@ -1764,6 +1751,18 @@ function parseStructuredQuery(query) {
             }
             return null; // treat as standalone expand query
         }
+        // Parse intent: lines
+        if (intentRe.test(line.trimmed)) {
+            if (intent !== undefined) {
+                throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`);
+            }
+            const text = line.trimmed.replace(intentRe, '').trim();
+            if (!text) {
+                throw new Error(`Line ${line.number}: intent: must include text.`);
+            }
+            intent = text;
+            continue;
+        }
         const match = line.trimmed.match(prefixRe);
         if (match) {
             const type = match[1].toLowerCase();
@@ -1781,9 +1780,13 @@ function parseStructuredQuery(query) {
             // Single plain line -> implicit expand
             return null;
         }
-        throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde: prefix. Each line in a query document must start with one.`);
+        throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`);
     }
-    return typed.length > 0 ? typed : null;
+    // intent: alone is not a valid query — must have at least one search
+    if (intent && typed.length === 0) {
+        throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.');
+    }
+    return typed.length > 0 ? { searches: typed, intent } : null;
 }
 function search(query, opts) {
     const db = getDb();
@@ -1840,6 +1843,7 @@ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
             collection: singleCollection,
             limit: opts.all ? 500 : (opts.limit || 10),
             minScore: opts.minScore || 0.3,
+            intent: opts.intent,
             hooks: {
                 onExpand: (original, expanded) => {
                     logExpansionTree(original, expanded);
@@ -1877,14 +1881,20 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
     const collectionNames = resolveCollectionFilter(opts.collection, true);
     const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
     checkIndexHealth(store.db);
-    // Check for structured query syntax (lex:/vec:/hyde: prefixes)
-    const structuredQueries = parseStructuredQuery(query);
+    // Check for structured query syntax (lex:/vec:/hyde:/intent: prefixes)
+    const parsed = parseStructuredQuery(query);
+    // Intent can come from --intent flag or from intent: line in query document
+    const intent = opts.intent || parsed?.intent;
     await withLLMSession(async () => {
         let results;
-        if (structuredQueries) {
+        if (parsed) {
+            const structuredQueries = parsed.searches;
             // Structured search — user provided their own query expansions
             const typeLabels = structuredQueries.map(s => s.type).join('+');
             process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
+            if (intent) {
+                process.stderr.write(`${c.dim}├─ intent: ${intent}${c.reset}\n`);
+            }
             // Log each sub-query
             for (const s of structuredQueries) {
                 let preview = s.query.replace(/\n/g, ' ');
@@ -1899,6 +1909,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
                 minScore: opts.minScore || 0,
                 candidateLimit: opts.candidateLimit,
                 explain: !!opts.explain,
+                intent,
                 hooks: {
                     onEmbedStart: (count) => {
                         process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
@@ -1925,6 +1936,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
                 minScore: opts.minScore || 0,
                 candidateLimit: opts.candidateLimit,
                 explain: !!opts.explain,
+                intent,
                 hooks: {
                     onStrongSignal: (score) => {
                         process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@@ -1967,6 +1979,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
             return;
         }
         // Use first lex/vec query for output context, or original query
+        const structuredQueries = parsed?.searches;
         const displayQuery = structuredQueries
             ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
             : query;
@@ -2026,6 +2039,7 @@ function parseCLI() {
             "line-numbers": { type: "boolean" }, // add line numbers to output
             // Query options
             "candidate-limit": { type: "string", short: "C" },
+            intent: { type: "string" },
             // MCP HTTP transport options
             http: { type: "boolean" },
             daemon: { type: "boolean" },
@@ -2066,6 +2080,7 @@ function parseCLI() {
         lineNumbers: !!values["line-numbers"],
         candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
         explain: !!values.explain,
+        intent: values.intent,
     };
     return {
         command: positionals[0] || "",
@@ -2124,7 +2139,8 @@ function showHelp() {
         `query          = expand_query | query_document ;`,
         `expand_query   = text | explicit_expand ;`,
         `explicit_expand= "expand:" text ;`,
-        `query_document = { typed_line } ;`,
+        `query_document = [ intent_line ] { typed_line } ;`,
+        `intent_line    = "intent:" text newline ;`,
         `typed_line     = type ":" text newline ;`,
         `type           = "lex" | "vec" | "hyde" ;`,
         `text           = quoted_phrase | plain_text ;`,

package/dist/store.d.ts CHANGED Viewed

@@ -202,11 +202,11 @@ export type Store = {
     toVirtualPath: (absolutePath: string) => string | null;
     searchFTS: (query: string, limit?: number, collectionName?: string) => SearchResult[];
     searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => Promise<SearchResult[]>;
-    expandQuery: (query: string, model?: string) => Promise<ExpandedQuery[]>;
+    expandQuery: (query: string, model?: string, intent?: string) => Promise<ExpandedQuery[]>;
     rerank: (query: string, documents: {
         file: string;
         text: string;
-    }[], model?: string) => Promise<{
+    }[], model?: string, intent?: string) => Promise<{
         file: string;
         score: number;
     }[]>;
@@ -598,11 +598,11 @@ export declare function clearAllEmbeddings(db: Database): void;
  * The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
  */
 export declare function insertEmbedding(db: Database, hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string): void;
-export declare function expandQuery(query: string, model: string | undefined, db: Database): Promise<ExpandedQuery[]>;
+export declare function expandQuery(query: string, model: string | undefined, db: Database, intent?: string): Promise<ExpandedQuery[]>;
 export declare function rerank(query: string, documents: {
     file: string;
     text: string;
-}[], model: string | undefined, db: Database): Promise<{
+}[], model: string | undefined, db: Database, intent?: string): Promise<{
     file: string;
     score: number;
 }[]>;
@@ -650,7 +650,17 @@ export type SnippetResult = {
     linesAfter: number;
     snippetLines: number;
 };
-export declare function extractSnippet(body: string, query: string, maxLen?: number, chunkPos?: number, chunkLen?: number): SnippetResult;
+/** Weight for intent terms relative to query terms (1.0) in snippet scoring */
+export declare const INTENT_WEIGHT_SNIPPET = 0.3;
+/** Weight for intent terms relative to query terms (1.0) in chunk selection */
+export declare const INTENT_WEIGHT_CHUNK = 0.5;
+/**
+ * Extract meaningful terms from an intent string, filtering stop words and punctuation.
+ * Uses Unicode-aware punctuation stripping so domain terms like "API" survive.
+ * Returns lowercase terms suitable for text matching.
+ */
+export declare function extractIntentTerms(intent: string): string[];
+export declare function extractSnippet(body: string, query: string, maxLen?: number, chunkPos?: number, chunkLen?: number, intent?: string): SnippetResult;
 /**
  * Add line numbers to text content.
  * Each line becomes: "{lineNum}: {content}"
@@ -682,6 +692,7 @@ export interface HybridQueryOptions {
     minScore?: number;
     candidateLimit?: number;
     explain?: boolean;
+    intent?: string;
     hooks?: SearchHooks;
 }
 export interface HybridQueryResult {
@@ -719,6 +730,7 @@ export interface VectorSearchOptions {
     collection?: string;
     limit?: number;
     minScore?: number;
+    intent?: string;
     hooks?: Pick<SearchHooks, 'onExpand'>;
 }
 export interface VectorSearchResult {
@@ -758,7 +770,7 @@ export interface StructuredSearchOptions {
     minScore?: number;
     candidateLimit?: number;
     explain?: boolean;
-    /** Future: domain intent hint for routing/boosting */
+    /** Domain intent hint for disambiguation — steers reranking and chunk selection */
     intent?: string;
     hooks?: SearchHooks;
 }

package/dist/store.js CHANGED Viewed

@@ -667,8 +667,8 @@ export function createStore(dbPath) {
         searchFTS: (query, limit, collectionName) => searchFTS(db, query, limit, collectionName),
         searchVec: (query, model, limit, collectionName, session, precomputedEmbedding) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding),
         // Query expansion & reranking
-        expandQuery: (query, model) => expandQuery(query, model, db),
-        rerank: (query, documents, model) => rerank(query, documents, model, db),
+        expandQuery: (query, model, intent) => expandQuery(query, model, db, intent),
+        rerank: (query, documents, model, intent) => rerank(query, documents, model, db, intent),
         // Document retrieval
         findDocument: (filename, options) => findDocument(db, filename, options),
         getDocumentBody: (doc, fromLine, maxLines) => getDocumentBody(db, doc, fromLine, maxLines),
@@ -1798,9 +1798,9 @@ export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt
 // =============================================================================
 // Query expansion
 // =============================================================================
-export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db) {
+export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db, intent) {
     // Check cache first — stored as JSON preserving types
-    const cacheKey = getCacheKey("expandQuery", { query, model });
+    const cacheKey = getCacheKey("expandQuery", { query, model, ...(intent && { intent }) });
     const cached = getCachedResult(db, cacheKey);
     if (cached) {
         try {
@@ -1812,7 +1812,7 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db) {
     }
     const llm = getDefaultLlamaCpp();
     // Note: LlamaCpp uses hardcoded model, model parameter is ignored
-    const results = await llm.expandQuery(query);
+    const results = await llm.expandQuery(query, { intent });
     // Map Queryable[] → ExpandedQuery[] (same shape, decoupled from llm.ts internals).
     // Filter out entries that duplicate the original query text.
     const expanded = results
@@ -1826,7 +1826,9 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db) {
 // =============================================================================
 // Reranking
 // =============================================================================
-export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db) {
+export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db, intent) {
+    // Prepend intent to rerank query so the reranker scores with domain context
+    const rerankQuery = intent ? `${intent}\n\n${query}` : query;
     const cachedResults = new Map();
     const uncachedDocsByChunk = new Map();
     // Check cache for each document
@@ -1835,7 +1837,7 @@ export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db)
     // File path is excluded from the new cache key because the reranker score
     // depends on the chunk content, not where it came from.
     for (const doc of documents) {
-        const cacheKey = getCacheKey("rerank", { query, model, chunk: doc.text });
+        const cacheKey = getCacheKey("rerank", { query: rerankQuery, model, chunk: doc.text });
         const legacyCacheKey = getCacheKey("rerank", { query, file: doc.file, model, chunk: doc.text });
         const cached = getCachedResult(db, cacheKey) ?? getCachedResult(db, legacyCacheKey);
         if (cached !== null) {
@@ -1849,12 +1851,12 @@ export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db)
     if (uncachedDocsByChunk.size > 0) {
         const llm = getDefaultLlamaCpp();
         const uncachedDocs = [...uncachedDocsByChunk.values()];
-        const rerankResult = await llm.rerank(query, uncachedDocs, { model });
+        const rerankResult = await llm.rerank(rerankQuery, uncachedDocs, { model });
         // Cache results by chunk text so identical chunks across files are scored once.
         const textByFile = new Map(uncachedDocs.map(d => [d.file, d.text]));
         for (const result of rerankResult.results) {
             const chunk = textByFile.get(result.file) || "";
-            const cacheKey = getCacheKey("rerank", { query, model, chunk });
+            const cacheKey = getCacheKey("rerank", { query: rerankQuery, model, chunk });
             setCachedResult(db, cacheKey, result.score.toString());
             cachedResults.set(chunk, result.score);
         }
@@ -2254,7 +2256,41 @@ export function getStatus(db) {
         collections,
     };
 }
-export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen) {
+/** Weight for intent terms relative to query terms (1.0) in snippet scoring */
+export const INTENT_WEIGHT_SNIPPET = 0.3;
+/** Weight for intent terms relative to query terms (1.0) in chunk selection */
+export const INTENT_WEIGHT_CHUNK = 0.5;
+// Common stop words filtered from intent strings before tokenization.
+// Seeded from finetune/reward.py KEY_TERM_STOPWORDS, extended with common
+// 2-3 char function words so the length threshold can drop to >1 and let
+// short domain terms (API, SQL, LLM, CPU, CDN, …) survive.
+const INTENT_STOP_WORDS = new Set([
+    // 2-char function words
+    "am", "an", "as", "at", "be", "by", "do", "he", "if",
+    "in", "is", "it", "me", "my", "no", "of", "on", "or", "so",
+    "to", "up", "us", "we",
+    // 3-char function words
+    "all", "and", "any", "are", "but", "can", "did", "for", "get",
+    "has", "her", "him", "his", "how", "its", "let", "may", "not",
+    "our", "out", "the", "too", "was", "who", "why", "you",
+    // 4+ char common words
+    "also", "does", "find", "from", "have", "into", "more", "need",
+    "show", "some", "tell", "that", "them", "this", "want", "what",
+    "when", "will", "with", "your",
+    // Search-context noise
+    "about", "looking", "notes", "search", "where", "which",
+]);
+/**
+ * Extract meaningful terms from an intent string, filtering stop words and punctuation.
+ * Uses Unicode-aware punctuation stripping so domain terms like "API" survive.
+ * Returns lowercase terms suitable for text matching.
+ */
+export function extractIntentTerms(intent) {
+    return intent.toLowerCase().split(/\s+/)
+        .map(t => t.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, ""))
+        .filter(t => t.length > 1 && !INTENT_STOP_WORDS.has(t));
+}
+export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen, intent) {
     const totalLines = body.split('\n').length;
     let searchBody = body;
     let lineOffset = 0;
@@ -2271,13 +2307,18 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen) {
     }
     const lines = searchBody.split('\n');
     const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 0);
+    const intentTerms = intent ? extractIntentTerms(intent) : [];
     let bestLine = 0, bestScore = -1;
     for (let i = 0; i < lines.length; i++) {
         const lineLower = (lines[i] ?? "").toLowerCase();
         let score = 0;
         for (const term of queryTerms) {
             if (lineLower.includes(term))
-                score++;
+                score += 1.0;
+        }
+        for (const term of intentTerms) {
+            if (lineLower.includes(term))
+                score += INTENT_WEIGHT_SNIPPET;
         }
         if (score > bestScore) {
             bestScore = score;
@@ -2291,7 +2332,7 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen) {
     // If we focused on a chunk window and it produced an empty/whitespace-only snippet,
     // fall back to a full-document snippet so we always show something useful.
     if (chunkPos && chunkPos > 0 && snippetText.trim().length === 0) {
-        return extractSnippet(body, query, maxLen, undefined);
+        return extractSnippet(body, query, maxLen, undefined, undefined, intent);
     }
     if (snippetText.length > maxLen)
         snippetText = snippetText.substring(0, maxLen - 3) + "...";
@@ -2340,17 +2381,21 @@ export async function hybridQuery(store, query, options) {
     const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
     const collection = options?.collection;
     const explain = options?.explain ?? false;
+    const intent = options?.intent;
     const hooks = options?.hooks;
     const rankedLists = [];
     const rankedListMeta = [];
     const docidMap = new Map(); // filepath -> docid
     const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
     // Step 1: BM25 probe — strong signal skips expensive LLM expansion
+    // When intent is provided, disable strong-signal bypass — the obvious BM25
+    // match may not be what the caller wants (e.g. "performance" with intent
+    // "web page load times" should NOT shortcut to a sports-performance doc).
     // Pass collection directly into FTS query (filter at SQL level, not post-hoc)
     const initialFts = store.searchFTS(query, 20, collection);
     const topScore = initialFts[0]?.score ?? 0;
     const secondScore = initialFts[1]?.score ?? 0;
-    const hasStrongSignal = initialFts.length > 0
+    const hasStrongSignal = !intent && initialFts.length > 0
         && topScore >= STRONG_SIGNAL_MIN_SCORE
         && (topScore - secondScore) >= STRONG_SIGNAL_MIN_GAP;
     if (hasStrongSignal)
@@ -2360,7 +2405,7 @@ export async function hybridQuery(store, query, options) {
     const expandStart = Date.now();
     const expanded = hasStrongSignal
         ? []
-        : await store.expandQuery(query);
+        : await store.expandQuery(query, undefined, intent);
     hooks?.onExpand?.(query, expanded, Date.now() - expandStart);
     // Seed with initial FTS results (avoid re-running original query FTS)
     if (initialFts.length > 0) {
@@ -2440,6 +2485,7 @@ export async function hybridQuery(store, query, options) {
     // Step 5: Chunk documents, pick best chunk per doc for reranking.
     // Reranking full bodies is O(tokens) — the critical perf lesson that motivated this refactor.
     const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
+    const intentTerms = intent ? extractIntentTerms(intent) : [];
     const chunksToRerank = [];
     const docChunkMap = new Map();
     for (const cand of candidates) {
@@ -2447,11 +2493,16 @@ export async function hybridQuery(store, query, options) {
         if (chunks.length === 0)
             continue;
         // Pick chunk with most keyword overlap (fallback: first chunk)
+        // Intent terms contribute at INTENT_WEIGHT_CHUNK (0.5) relative to query terms (1.0)
         let bestIdx = 0;
         let bestScore = -1;
         for (let i = 0; i < chunks.length; i++) {
             const chunkLower = chunks[i].text.toLowerCase();
-            const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
+            let score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
+            for (const term of intentTerms) {
+                if (chunkLower.includes(term))
+                    score += INTENT_WEIGHT_CHUNK;
+            }
             if (score > bestScore) {
                 bestScore = score;
                 bestIdx = i;
@@ -2463,7 +2514,7 @@ export async function hybridQuery(store, query, options) {
     // Step 6: Rerank chunks (NOT full bodies)
     hooks?.onRerankStart?.(chunksToRerank.length);
     const rerankStart = Date.now();
-    const reranked = await store.rerank(query, chunksToRerank);
+    const reranked = await store.rerank(query, chunksToRerank, undefined, intent);
     hooks?.onRerankDone?.(Date.now() - rerankStart);
     // Step 7: Blend RRF position score with reranker score
     // Position-aware weights: top retrieval results get more protection from reranker disagreement
@@ -2541,12 +2592,13 @@ export async function vectorSearchQuery(store, query, options) {
     const limit = options?.limit ?? 10;
     const minScore = options?.minScore ?? 0.3;
     const collection = options?.collection;
+    const intent = options?.intent;
     const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
     if (!hasVectors)
         return [];
     // Expand query — filter to vec/hyde only (lex queries target FTS, not vector)
     const expandStart = Date.now();
-    const allExpanded = await store.expandQuery(query);
+    const allExpanded = await store.expandQuery(query, undefined, intent);
     const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
     options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
     // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
@@ -2597,6 +2649,7 @@ export async function structuredSearch(store, searches, options) {
     const minScore = options?.minScore ?? 0;
     const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
     const explain = options?.explain ?? false;
+    const intent = options?.intent;
     const hooks = options?.hooks;
     const collections = options?.collections;
     if (searches.length === 0)
@@ -2696,6 +2749,7 @@ export async function structuredSearch(store, searches, options) {
         || searches.find(s => s.type === 'vec')?.query
         || searches[0]?.query || "";
     const queryTerms = primaryQuery.toLowerCase().split(/\s+/).filter(t => t.length > 2);
+    const intentTerms = intent ? extractIntentTerms(intent) : [];
     const chunksToRerank = [];
     const docChunkMap = new Map();
     for (const cand of candidates) {
@@ -2703,11 +2757,16 @@ export async function structuredSearch(store, searches, options) {
         if (chunks.length === 0)
             continue;
         // Pick chunk with most keyword overlap
+        // Intent terms contribute at INTENT_WEIGHT_CHUNK (0.5) relative to query terms (1.0)
         let bestIdx = 0;
         let bestScore = -1;
         for (let i = 0; i < chunks.length; i++) {
             const chunkLower = chunks[i].text.toLowerCase();
-            const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
+            let score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
+            for (const term of intentTerms) {
+                if (chunkLower.includes(term))
+                    score += INTENT_WEIGHT_CHUNK;
+            }
             if (score > bestScore) {
                 bestScore = score;
                 bestIdx = i;
@@ -2719,7 +2778,7 @@ export async function structuredSearch(store, searches, options) {
     // Step 5: Rerank chunks
     hooks?.onRerankStart?.(chunksToRerank.length);
     const rerankStart2 = Date.now();
-    const reranked = await store.rerank(primaryQuery, chunksToRerank);
+    const reranked = await store.rerank(primaryQuery, chunksToRerank, undefined, intent);
     hooks?.onRerankDone?.(Date.now() - rerankStart2);
     // Step 6: Blend RRF position score with reranker score
     const candidateMap = new Map(candidates.map(c => [c.file, {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tobilu/qmd",
-  "version": "1.1.2",
+  "version": "1.1.5",
   "description": "Query Markup Documents - On-device hybrid search for markdown files with BM25, vector search, and LLM reranking",
   "type": "module",
   "bin": {