@tobilu/qmd 1.1.2 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,37 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [1.1.5] - 2026-03-07
6
+
7
+ Ambiguous queries like "performance" now produce dramatically better results
8
+ when the caller knows what they mean. The new `intent` parameter steers all
9
+ five pipeline stages — expansion, strong-signal bypass, chunk selection,
10
+ reranking, and snippet extraction — without searching on its own. Design and
11
+ original implementation by Ilya Grigorik (@vyalamar) in #180.
12
+
13
+ ### Changes
14
+
15
+ - **Intent parameter**: optional `intent` string disambiguates queries across
16
+ the entire search pipeline. Available via CLI (`--intent` flag or `intent:`
17
+ line in query documents), MCP (`intent` field on the query tool), and
18
+ programmatic API. Adapted from PR #180 (thanks @vyalamar).
19
+ - **Query expansion**: when intent is provided, the expansion LLM prompt
20
+ includes `Query intent: {intent}`, matching the finetune training data
21
+ format for better-aligned expansions.
22
+ - **Reranking**: intent is prepended to the rerank query so Qwen3-Reranker
23
+ scores with domain context.
24
+ - **Chunk selection**: intent terms scored at 0.5× weight alongside query
25
+ terms (1.0×) when selecting the best chunk per document for reranking.
26
+ - **Snippet extraction**: intent terms scored at 0.3× weight to nudge
27
+ snippets toward intent-relevant lines without overriding query anchoring.
28
+ - **Strong-signal bypass disabled with intent**: when intent is provided, the
29
+ BM25 strong-signal shortcut is skipped — the obvious keyword match may not
30
+ be what the caller wants.
31
+ - **MCP instructions**: callers are now guided to provide `intent` on every
32
+ search call for disambiguation.
33
+ - **Query document syntax**: `intent:` recognized as a line type. At most one
34
+ per document, cannot appear alone. Grammar updated in `docs/SYNTAX.md`.
35
+
5
36
  ## [1.1.2] - 2026-03-07
6
37
 
7
38
  13 community PRs merged. GPU initialization replaced with node-llama-cpp's
@@ -28,6 +28,7 @@ export type FormatOptions = {
28
28
  query?: string;
29
29
  useColor?: boolean;
30
30
  lineNumbers?: boolean;
31
+ intent?: string;
31
32
  };
32
33
  /**
33
34
  * Add line numbers to text content.
package/dist/formatter.js CHANGED
@@ -55,7 +55,7 @@ export function searchResultsToJson(results, opts = {}) {
55
55
  const output = results.map(row => {
56
56
  const bodyStr = row.body || "";
57
57
  let body = opts.full ? bodyStr : undefined;
58
- let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos).snippet : undefined;
58
+ let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
59
59
  if (opts.lineNumbers) {
60
60
  if (body)
61
61
  body = addLineNumbers(body);
@@ -82,7 +82,7 @@ export function searchResultsToCsv(results, opts = {}) {
82
82
  const header = "docid,score,file,title,context,line,snippet";
83
83
  const rows = results.map(row => {
84
84
  const bodyStr = row.body || "";
85
- const { line, snippet } = extractSnippet(bodyStr, query, 500, row.chunkPos);
85
+ const { line, snippet } = extractSnippet(bodyStr, query, 500, row.chunkPos, undefined, opts.intent);
86
86
  let content = opts.full ? bodyStr : snippet;
87
87
  if (opts.lineNumbers && content) {
88
88
  content = addLineNumbers(content);
@@ -121,7 +121,7 @@ export function searchResultsToMarkdown(results, opts = {}) {
121
121
  content = bodyStr;
122
122
  }
123
123
  else {
124
- content = extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
124
+ content = extractSnippet(bodyStr, query, 500, row.chunkPos, undefined, opts.intent).snippet;
125
125
  }
126
126
  if (opts.lineNumbers) {
127
127
  content = addLineNumbers(content);
@@ -138,7 +138,7 @@ export function searchResultsToXml(results, opts = {}) {
138
138
  const items = results.map(row => {
139
139
  const titleAttr = row.title ? ` title="${escapeXml(row.title)}"` : "";
140
140
  const bodyStr = row.body || "";
141
- let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
141
+ let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, 500, row.chunkPos, undefined, opts.intent).snippet;
142
142
  if (opts.lineNumbers) {
143
143
  content = addLineNumbers(content);
144
144
  }
package/dist/llm.d.ts CHANGED
@@ -330,6 +330,7 @@ export declare class LlamaCpp implements LLM {
330
330
  expandQuery(query: string, options?: {
331
331
  context?: string;
332
332
  includeLexical?: boolean;
333
+ intent?: string;
333
334
  }): Promise<Queryable[]>;
334
335
  private static readonly RERANK_TEMPLATE_OVERHEAD;
335
336
  private static readonly RERANK_TARGET_DOCS_PER_CONTEXT;
package/dist/llm.js CHANGED
@@ -691,7 +691,10 @@ export class LlamaCpp {
691
691
  content ::= [^\\n]+
692
692
  `
693
693
  });
694
- const prompt = `/no_think Expand this search query: ${query}`;
694
+ const intent = options.intent;
695
+ const prompt = intent
696
+ ? `/no_think Expand this search query: ${query}\nQuery intent: ${intent}`
697
+ : `/no_think Expand this search query: ${query}`;
695
698
  // Create a bounded context for expansion to prevent large default VRAM allocations.
696
699
  const genContext = await this.generateModel.createContext({
697
700
  contextSize: this.expandContextSize,
package/dist/mcp.js CHANGED
@@ -84,10 +84,13 @@ function buildInstructions(store) {
84
84
  lines.push(" - type:'vec' — semantic vector search (meaning-based)");
85
85
  lines.push(" - type:'hyde' — hypothetical document (write what the answer looks like)");
86
86
  lines.push("");
87
+ lines.push(" Always provide `intent` on every search call to disambiguate and improve snippets.");
88
+ lines.push("");
87
89
  lines.push("Examples:");
88
90
  lines.push(" Quick keyword lookup: [{type:'lex', query:'error handling'}]");
89
91
  lines.push(" Semantic search: [{type:'vec', query:'how to handle errors gracefully'}]");
90
92
  lines.push(" Best results: [{type:'lex', query:'error'}, {type:'vec', query:'error handling best practices'}]");
93
+ lines.push(" With intent: searches=[{type:'lex', query:'performance'}], intent='web page load times'");
91
94
  // --- Retrieval workflow ---
92
95
  lines.push("");
93
96
  lines.push("Retrieval:");
@@ -236,8 +239,9 @@ Intent-aware lex (C++ performance, not sports):
236
239
  minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
237
240
  candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
238
241
  collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
242
+ intent: z.string().optional().describe("Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."),
239
243
  },
240
- }, async ({ searches, limit, minScore, candidateLimit, collections }) => {
244
+ }, async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
241
245
  // Map to internal format
242
246
  const subSearches = searches.map(s => ({
243
247
  type: s.type,
@@ -250,13 +254,14 @@ Intent-aware lex (C++ performance, not sports):
250
254
  limit,
251
255
  minScore,
252
256
  candidateLimit,
257
+ intent,
253
258
  });
254
259
  // Use first lex or vec query for snippet extraction
255
260
  const primaryQuery = searches.find(s => s.type === 'lex')?.query
256
261
  || searches.find(s => s.type === 'vec')?.query
257
262
  || searches[0]?.query || "";
258
263
  const filtered = results.map(r => {
259
- const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
264
+ const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300, undefined, undefined, intent);
260
265
  return {
261
266
  docid: `#${r.docid}`,
262
267
  file: r.displayPath,
package/dist/qmd.js CHANGED
@@ -1567,7 +1567,7 @@ function outputResults(results, query, opts) {
1567
1567
  const output = filtered.map(row => {
1568
1568
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1569
1569
  let body = opts.full ? row.body : undefined;
1570
- let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
1570
+ let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
1571
1571
  if (opts.lineNumbers) {
1572
1572
  if (body)
1573
1573
  body = addLineNumbers(body);
@@ -1600,7 +1600,7 @@ function outputResults(results, query, opts) {
1600
1600
  const row = filtered[i];
1601
1601
  if (!row)
1602
1602
  continue;
1603
- const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
1603
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
1604
1604
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1605
1605
  // Line 1: filepath with docid
1606
1606
  const path = toQmdPath(row.displayPath);
@@ -1659,7 +1659,7 @@ function outputResults(results, query, opts) {
1659
1659
  continue;
1660
1660
  const heading = row.title || row.displayPath;
1661
1661
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1662
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
1662
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
1663
1663
  if (opts.lineNumbers) {
1664
1664
  content = addLineNumbers(content);
1665
1665
  }
@@ -1673,7 +1673,7 @@ function outputResults(results, query, opts) {
1673
1673
  const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
1674
1674
  const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
1675
1675
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1676
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
1676
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
1677
1677
  if (opts.lineNumbers) {
1678
1678
  content = addLineNumbers(content);
1679
1679
  }
@@ -1684,7 +1684,7 @@ function outputResults(results, query, opts) {
1684
1684
  // CSV format
1685
1685
  console.log("docid,score,file,title,context,line,snippet");
1686
1686
  for (const row of filtered) {
1687
- const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
1687
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
1688
1688
  let content = opts.full ? row.body : snippet;
1689
1689
  if (opts.lineNumbers) {
1690
1690
  content = addLineNumbers(content, line);
@@ -1727,21 +1727,6 @@ function filterByCollections(results, collectionNames) {
1727
1727
  return prefixes.some(p => path.startsWith(p));
1728
1728
  });
1729
1729
  }
1730
- /**
1731
- * Parse structured search query syntax.
1732
- * Lines starting with lex:, vec:, or hyde: are routed directly.
1733
- * Plain lines without prefix go through query expansion.
1734
- *
1735
- * Returns null if this is a plain query (single line, no prefix).
1736
- * Returns StructuredSubSearch[] if structured syntax detected.
1737
- * Throws if multiple plain lines (ambiguous).
1738
- *
1739
- * Examples:
1740
- * "CAP theorem" -> null (plain query, use expansion)
1741
- * "lex: CAP theorem" -> [{ type: 'lex', query: 'CAP theorem' }]
1742
- * "lex: CAP\nvec: consistency" -> [{ type: 'lex', ... }, { type: 'vec', ... }]
1743
- * "CAP\nconsistency" -> throws (multiple plain lines)
1744
- */
1745
1730
  function parseStructuredQuery(query) {
1746
1731
  const rawLines = query.split('\n').map((line, idx) => ({
1747
1732
  raw: line,
@@ -1752,7 +1737,9 @@ function parseStructuredQuery(query) {
1752
1737
  return null;
1753
1738
  const prefixRe = /^(lex|vec|hyde):\s*/i;
1754
1739
  const expandRe = /^expand:\s*/i;
1740
+ const intentRe = /^intent:\s*/i;
1755
1741
  const typed = [];
1742
+ let intent;
1756
1743
  for (const line of rawLines) {
1757
1744
  if (expandRe.test(line.trimmed)) {
1758
1745
  if (rawLines.length > 1) {
@@ -1764,6 +1751,18 @@ function parseStructuredQuery(query) {
1764
1751
  }
1765
1752
  return null; // treat as standalone expand query
1766
1753
  }
1754
+ // Parse intent: lines
1755
+ if (intentRe.test(line.trimmed)) {
1756
+ if (intent !== undefined) {
1757
+ throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`);
1758
+ }
1759
+ const text = line.trimmed.replace(intentRe, '').trim();
1760
+ if (!text) {
1761
+ throw new Error(`Line ${line.number}: intent: must include text.`);
1762
+ }
1763
+ intent = text;
1764
+ continue;
1765
+ }
1767
1766
  const match = line.trimmed.match(prefixRe);
1768
1767
  if (match) {
1769
1768
  const type = match[1].toLowerCase();
@@ -1781,9 +1780,13 @@ function parseStructuredQuery(query) {
1781
1780
  // Single plain line -> implicit expand
1782
1781
  return null;
1783
1782
  }
1784
- throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde: prefix. Each line in a query document must start with one.`);
1783
+ throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`);
1785
1784
  }
1786
- return typed.length > 0 ? typed : null;
1785
+ // intent: alone is not a valid query — must have at least one search
1786
+ if (intent && typed.length === 0) {
1787
+ throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.');
1788
+ }
1789
+ return typed.length > 0 ? { searches: typed, intent } : null;
1787
1790
  }
1788
1791
  function search(query, opts) {
1789
1792
  const db = getDb();
@@ -1840,6 +1843,7 @@ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
1840
1843
  collection: singleCollection,
1841
1844
  limit: opts.all ? 500 : (opts.limit || 10),
1842
1845
  minScore: opts.minScore || 0.3,
1846
+ intent: opts.intent,
1843
1847
  hooks: {
1844
1848
  onExpand: (original, expanded) => {
1845
1849
  logExpansionTree(original, expanded);
@@ -1877,14 +1881,20 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1877
1881
  const collectionNames = resolveCollectionFilter(opts.collection, true);
1878
1882
  const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1879
1883
  checkIndexHealth(store.db);
1880
- // Check for structured query syntax (lex:/vec:/hyde: prefixes)
1881
- const structuredQueries = parseStructuredQuery(query);
1884
+ // Check for structured query syntax (lex:/vec:/hyde:/intent: prefixes)
1885
+ const parsed = parseStructuredQuery(query);
1886
+ // Intent can come from --intent flag or from intent: line in query document
1887
+ const intent = opts.intent || parsed?.intent;
1882
1888
  await withLLMSession(async () => {
1883
1889
  let results;
1884
- if (structuredQueries) {
1890
+ if (parsed) {
1891
+ const structuredQueries = parsed.searches;
1885
1892
  // Structured search — user provided their own query expansions
1886
1893
  const typeLabels = structuredQueries.map(s => s.type).join('+');
1887
1894
  process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
1895
+ if (intent) {
1896
+ process.stderr.write(`${c.dim}├─ intent: ${intent}${c.reset}\n`);
1897
+ }
1888
1898
  // Log each sub-query
1889
1899
  for (const s of structuredQueries) {
1890
1900
  let preview = s.query.replace(/\n/g, ' ');
@@ -1899,6 +1909,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1899
1909
  minScore: opts.minScore || 0,
1900
1910
  candidateLimit: opts.candidateLimit,
1901
1911
  explain: !!opts.explain,
1912
+ intent,
1902
1913
  hooks: {
1903
1914
  onEmbedStart: (count) => {
1904
1915
  process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
@@ -1925,6 +1936,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1925
1936
  minScore: opts.minScore || 0,
1926
1937
  candidateLimit: opts.candidateLimit,
1927
1938
  explain: !!opts.explain,
1939
+ intent,
1928
1940
  hooks: {
1929
1941
  onStrongSignal: (score) => {
1930
1942
  process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@@ -1967,6 +1979,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1967
1979
  return;
1968
1980
  }
1969
1981
  // Use first lex/vec query for output context, or original query
1982
+ const structuredQueries = parsed?.searches;
1970
1983
  const displayQuery = structuredQueries
1971
1984
  ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
1972
1985
  : query;
@@ -2026,6 +2039,7 @@ function parseCLI() {
2026
2039
  "line-numbers": { type: "boolean" }, // add line numbers to output
2027
2040
  // Query options
2028
2041
  "candidate-limit": { type: "string", short: "C" },
2042
+ intent: { type: "string" },
2029
2043
  // MCP HTTP transport options
2030
2044
  http: { type: "boolean" },
2031
2045
  daemon: { type: "boolean" },
@@ -2066,6 +2080,7 @@ function parseCLI() {
2066
2080
  lineNumbers: !!values["line-numbers"],
2067
2081
  candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
2068
2082
  explain: !!values.explain,
2083
+ intent: values.intent,
2069
2084
  };
2070
2085
  return {
2071
2086
  command: positionals[0] || "",
@@ -2124,7 +2139,8 @@ function showHelp() {
2124
2139
  `query = expand_query | query_document ;`,
2125
2140
  `expand_query = text | explicit_expand ;`,
2126
2141
  `explicit_expand= "expand:" text ;`,
2127
- `query_document = { typed_line } ;`,
2142
+ `query_document = [ intent_line ] { typed_line } ;`,
2143
+ `intent_line = "intent:" text newline ;`,
2128
2144
  `typed_line = type ":" text newline ;`,
2129
2145
  `type = "lex" | "vec" | "hyde" ;`,
2130
2146
  `text = quoted_phrase | plain_text ;`,
package/dist/store.d.ts CHANGED
@@ -202,11 +202,11 @@ export type Store = {
202
202
  toVirtualPath: (absolutePath: string) => string | null;
203
203
  searchFTS: (query: string, limit?: number, collectionName?: string) => SearchResult[];
204
204
  searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => Promise<SearchResult[]>;
205
- expandQuery: (query: string, model?: string) => Promise<ExpandedQuery[]>;
205
+ expandQuery: (query: string, model?: string, intent?: string) => Promise<ExpandedQuery[]>;
206
206
  rerank: (query: string, documents: {
207
207
  file: string;
208
208
  text: string;
209
- }[], model?: string) => Promise<{
209
+ }[], model?: string, intent?: string) => Promise<{
210
210
  file: string;
211
211
  score: number;
212
212
  }[]>;
@@ -598,11 +598,11 @@ export declare function clearAllEmbeddings(db: Database): void;
598
598
  * The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
599
599
  */
600
600
  export declare function insertEmbedding(db: Database, hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string): void;
601
- export declare function expandQuery(query: string, model: string | undefined, db: Database): Promise<ExpandedQuery[]>;
601
+ export declare function expandQuery(query: string, model: string | undefined, db: Database, intent?: string): Promise<ExpandedQuery[]>;
602
602
  export declare function rerank(query: string, documents: {
603
603
  file: string;
604
604
  text: string;
605
- }[], model: string | undefined, db: Database): Promise<{
605
+ }[], model: string | undefined, db: Database, intent?: string): Promise<{
606
606
  file: string;
607
607
  score: number;
608
608
  }[]>;
@@ -650,7 +650,17 @@ export type SnippetResult = {
650
650
  linesAfter: number;
651
651
  snippetLines: number;
652
652
  };
653
- export declare function extractSnippet(body: string, query: string, maxLen?: number, chunkPos?: number, chunkLen?: number): SnippetResult;
653
+ /** Weight for intent terms relative to query terms (1.0) in snippet scoring */
654
+ export declare const INTENT_WEIGHT_SNIPPET = 0.3;
655
+ /** Weight for intent terms relative to query terms (1.0) in chunk selection */
656
+ export declare const INTENT_WEIGHT_CHUNK = 0.5;
657
+ /**
658
+ * Extract meaningful terms from an intent string, filtering stop words and punctuation.
659
+ * Uses Unicode-aware punctuation stripping so domain terms like "API" survive.
660
+ * Returns lowercase terms suitable for text matching.
661
+ */
662
+ export declare function extractIntentTerms(intent: string): string[];
663
+ export declare function extractSnippet(body: string, query: string, maxLen?: number, chunkPos?: number, chunkLen?: number, intent?: string): SnippetResult;
654
664
  /**
655
665
  * Add line numbers to text content.
656
666
  * Each line becomes: "{lineNum}: {content}"
@@ -682,6 +692,7 @@ export interface HybridQueryOptions {
682
692
  minScore?: number;
683
693
  candidateLimit?: number;
684
694
  explain?: boolean;
695
+ intent?: string;
685
696
  hooks?: SearchHooks;
686
697
  }
687
698
  export interface HybridQueryResult {
@@ -719,6 +730,7 @@ export interface VectorSearchOptions {
719
730
  collection?: string;
720
731
  limit?: number;
721
732
  minScore?: number;
733
+ intent?: string;
722
734
  hooks?: Pick<SearchHooks, 'onExpand'>;
723
735
  }
724
736
  export interface VectorSearchResult {
@@ -758,7 +770,7 @@ export interface StructuredSearchOptions {
758
770
  minScore?: number;
759
771
  candidateLimit?: number;
760
772
  explain?: boolean;
761
- /** Future: domain intent hint for routing/boosting */
773
+ /** Domain intent hint for disambiguation — steers reranking and chunk selection */
762
774
  intent?: string;
763
775
  hooks?: SearchHooks;
764
776
  }
package/dist/store.js CHANGED
@@ -667,8 +667,8 @@ export function createStore(dbPath) {
667
667
  searchFTS: (query, limit, collectionName) => searchFTS(db, query, limit, collectionName),
668
668
  searchVec: (query, model, limit, collectionName, session, precomputedEmbedding) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding),
669
669
  // Query expansion & reranking
670
- expandQuery: (query, model) => expandQuery(query, model, db),
671
- rerank: (query, documents, model) => rerank(query, documents, model, db),
670
+ expandQuery: (query, model, intent) => expandQuery(query, model, db, intent),
671
+ rerank: (query, documents, model, intent) => rerank(query, documents, model, db, intent),
672
672
  // Document retrieval
673
673
  findDocument: (filename, options) => findDocument(db, filename, options),
674
674
  getDocumentBody: (doc, fromLine, maxLines) => getDocumentBody(db, doc, fromLine, maxLines),
@@ -1798,9 +1798,9 @@ export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt
1798
1798
  // =============================================================================
1799
1799
  // Query expansion
1800
1800
  // =============================================================================
1801
- export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db) {
1801
+ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db, intent) {
1802
1802
  // Check cache first — stored as JSON preserving types
1803
- const cacheKey = getCacheKey("expandQuery", { query, model });
1803
+ const cacheKey = getCacheKey("expandQuery", { query, model, ...(intent && { intent }) });
1804
1804
  const cached = getCachedResult(db, cacheKey);
1805
1805
  if (cached) {
1806
1806
  try {
@@ -1812,7 +1812,7 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db) {
1812
1812
  }
1813
1813
  const llm = getDefaultLlamaCpp();
1814
1814
  // Note: LlamaCpp uses hardcoded model, model parameter is ignored
1815
- const results = await llm.expandQuery(query);
1815
+ const results = await llm.expandQuery(query, { intent });
1816
1816
  // Map Queryable[] → ExpandedQuery[] (same shape, decoupled from llm.ts internals).
1817
1817
  // Filter out entries that duplicate the original query text.
1818
1818
  const expanded = results
@@ -1826,7 +1826,9 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db) {
1826
1826
  // =============================================================================
1827
1827
  // Reranking
1828
1828
  // =============================================================================
1829
- export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db) {
1829
+ export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db, intent) {
1830
+ // Prepend intent to rerank query so the reranker scores with domain context
1831
+ const rerankQuery = intent ? `${intent}\n\n${query}` : query;
1830
1832
  const cachedResults = new Map();
1831
1833
  const uncachedDocsByChunk = new Map();
1832
1834
  // Check cache for each document
@@ -1835,7 +1837,7 @@ export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db)
1835
1837
  // File path is excluded from the new cache key because the reranker score
1836
1838
  // depends on the chunk content, not where it came from.
1837
1839
  for (const doc of documents) {
1838
- const cacheKey = getCacheKey("rerank", { query, model, chunk: doc.text });
1840
+ const cacheKey = getCacheKey("rerank", { query: rerankQuery, model, chunk: doc.text });
1839
1841
  const legacyCacheKey = getCacheKey("rerank", { query, file: doc.file, model, chunk: doc.text });
1840
1842
  const cached = getCachedResult(db, cacheKey) ?? getCachedResult(db, legacyCacheKey);
1841
1843
  if (cached !== null) {
@@ -1849,12 +1851,12 @@ export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db)
1849
1851
  if (uncachedDocsByChunk.size > 0) {
1850
1852
  const llm = getDefaultLlamaCpp();
1851
1853
  const uncachedDocs = [...uncachedDocsByChunk.values()];
1852
- const rerankResult = await llm.rerank(query, uncachedDocs, { model });
1854
+ const rerankResult = await llm.rerank(rerankQuery, uncachedDocs, { model });
1853
1855
  // Cache results by chunk text so identical chunks across files are scored once.
1854
1856
  const textByFile = new Map(uncachedDocs.map(d => [d.file, d.text]));
1855
1857
  for (const result of rerankResult.results) {
1856
1858
  const chunk = textByFile.get(result.file) || "";
1857
- const cacheKey = getCacheKey("rerank", { query, model, chunk });
1859
+ const cacheKey = getCacheKey("rerank", { query: rerankQuery, model, chunk });
1858
1860
  setCachedResult(db, cacheKey, result.score.toString());
1859
1861
  cachedResults.set(chunk, result.score);
1860
1862
  }
@@ -2254,7 +2256,41 @@ export function getStatus(db) {
2254
2256
  collections,
2255
2257
  };
2256
2258
  }
2257
- export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen) {
2259
+ /** Weight for intent terms relative to query terms (1.0) in snippet scoring */
2260
+ export const INTENT_WEIGHT_SNIPPET = 0.3;
2261
+ /** Weight for intent terms relative to query terms (1.0) in chunk selection */
2262
+ export const INTENT_WEIGHT_CHUNK = 0.5;
2263
+ // Common stop words filtered from intent strings before tokenization.
2264
+ // Seeded from finetune/reward.py KEY_TERM_STOPWORDS, extended with common
2265
+ // 2-3 char function words so the length threshold can drop to >1 and let
2266
+ // short domain terms (API, SQL, LLM, CPU, CDN, …) survive.
2267
+ const INTENT_STOP_WORDS = new Set([
2268
+ // 2-char function words
2269
+ "am", "an", "as", "at", "be", "by", "do", "he", "if",
2270
+ "in", "is", "it", "me", "my", "no", "of", "on", "or", "so",
2271
+ "to", "up", "us", "we",
2272
+ // 3-char function words
2273
+ "all", "and", "any", "are", "but", "can", "did", "for", "get",
2274
+ "has", "her", "him", "his", "how", "its", "let", "may", "not",
2275
+ "our", "out", "the", "too", "was", "who", "why", "you",
2276
+ // 4+ char common words
2277
+ "also", "does", "find", "from", "have", "into", "more", "need",
2278
+ "show", "some", "tell", "that", "them", "this", "want", "what",
2279
+ "when", "will", "with", "your",
2280
+ // Search-context noise
2281
+ "about", "looking", "notes", "search", "where", "which",
2282
+ ]);
2283
+ /**
2284
+ * Extract meaningful terms from an intent string, filtering stop words and punctuation.
2285
+ * Uses Unicode-aware punctuation stripping so domain terms like "API" survive.
2286
+ * Returns lowercase terms suitable for text matching.
2287
+ */
2288
+ export function extractIntentTerms(intent) {
2289
+ return intent.toLowerCase().split(/\s+/)
2290
+ .map(t => t.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, ""))
2291
+ .filter(t => t.length > 1 && !INTENT_STOP_WORDS.has(t));
2292
+ }
2293
+ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen, intent) {
2258
2294
  const totalLines = body.split('\n').length;
2259
2295
  let searchBody = body;
2260
2296
  let lineOffset = 0;
@@ -2271,13 +2307,18 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen) {
2271
2307
  }
2272
2308
  const lines = searchBody.split('\n');
2273
2309
  const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 0);
2310
+ const intentTerms = intent ? extractIntentTerms(intent) : [];
2274
2311
  let bestLine = 0, bestScore = -1;
2275
2312
  for (let i = 0; i < lines.length; i++) {
2276
2313
  const lineLower = (lines[i] ?? "").toLowerCase();
2277
2314
  let score = 0;
2278
2315
  for (const term of queryTerms) {
2279
2316
  if (lineLower.includes(term))
2280
- score++;
2317
+ score += 1.0;
2318
+ }
2319
+ for (const term of intentTerms) {
2320
+ if (lineLower.includes(term))
2321
+ score += INTENT_WEIGHT_SNIPPET;
2281
2322
  }
2282
2323
  if (score > bestScore) {
2283
2324
  bestScore = score;
@@ -2291,7 +2332,7 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen) {
2291
2332
  // If we focused on a chunk window and it produced an empty/whitespace-only snippet,
2292
2333
  // fall back to a full-document snippet so we always show something useful.
2293
2334
  if (chunkPos && chunkPos > 0 && snippetText.trim().length === 0) {
2294
- return extractSnippet(body, query, maxLen, undefined);
2335
+ return extractSnippet(body, query, maxLen, undefined, undefined, intent);
2295
2336
  }
2296
2337
  if (snippetText.length > maxLen)
2297
2338
  snippetText = snippetText.substring(0, maxLen - 3) + "...";
@@ -2340,17 +2381,21 @@ export async function hybridQuery(store, query, options) {
2340
2381
  const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
2341
2382
  const collection = options?.collection;
2342
2383
  const explain = options?.explain ?? false;
2384
+ const intent = options?.intent;
2343
2385
  const hooks = options?.hooks;
2344
2386
  const rankedLists = [];
2345
2387
  const rankedListMeta = [];
2346
2388
  const docidMap = new Map(); // filepath -> docid
2347
2389
  const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
2348
2390
  // Step 1: BM25 probe — strong signal skips expensive LLM expansion
2391
+ // When intent is provided, disable strong-signal bypass — the obvious BM25
2392
+ // match may not be what the caller wants (e.g. "performance" with intent
2393
+ // "web page load times" should NOT shortcut to a sports-performance doc).
2349
2394
  // Pass collection directly into FTS query (filter at SQL level, not post-hoc)
2350
2395
  const initialFts = store.searchFTS(query, 20, collection);
2351
2396
  const topScore = initialFts[0]?.score ?? 0;
2352
2397
  const secondScore = initialFts[1]?.score ?? 0;
2353
- const hasStrongSignal = initialFts.length > 0
2398
+ const hasStrongSignal = !intent && initialFts.length > 0
2354
2399
  && topScore >= STRONG_SIGNAL_MIN_SCORE
2355
2400
  && (topScore - secondScore) >= STRONG_SIGNAL_MIN_GAP;
2356
2401
  if (hasStrongSignal)
@@ -2360,7 +2405,7 @@ export async function hybridQuery(store, query, options) {
2360
2405
  const expandStart = Date.now();
2361
2406
  const expanded = hasStrongSignal
2362
2407
  ? []
2363
- : await store.expandQuery(query);
2408
+ : await store.expandQuery(query, undefined, intent);
2364
2409
  hooks?.onExpand?.(query, expanded, Date.now() - expandStart);
2365
2410
  // Seed with initial FTS results (avoid re-running original query FTS)
2366
2411
  if (initialFts.length > 0) {
@@ -2440,6 +2485,7 @@ export async function hybridQuery(store, query, options) {
2440
2485
  // Step 5: Chunk documents, pick best chunk per doc for reranking.
2441
2486
  // Reranking full bodies is O(tokens) — the critical perf lesson that motivated this refactor.
2442
2487
  const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
2488
+ const intentTerms = intent ? extractIntentTerms(intent) : [];
2443
2489
  const chunksToRerank = [];
2444
2490
  const docChunkMap = new Map();
2445
2491
  for (const cand of candidates) {
@@ -2447,11 +2493,16 @@ export async function hybridQuery(store, query, options) {
2447
2493
  if (chunks.length === 0)
2448
2494
  continue;
2449
2495
  // Pick chunk with most keyword overlap (fallback: first chunk)
2496
+ // Intent terms contribute at INTENT_WEIGHT_CHUNK (0.5) relative to query terms (1.0)
2450
2497
  let bestIdx = 0;
2451
2498
  let bestScore = -1;
2452
2499
  for (let i = 0; i < chunks.length; i++) {
2453
2500
  const chunkLower = chunks[i].text.toLowerCase();
2454
- const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
2501
+ let score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
2502
+ for (const term of intentTerms) {
2503
+ if (chunkLower.includes(term))
2504
+ score += INTENT_WEIGHT_CHUNK;
2505
+ }
2455
2506
  if (score > bestScore) {
2456
2507
  bestScore = score;
2457
2508
  bestIdx = i;
@@ -2463,7 +2514,7 @@ export async function hybridQuery(store, query, options) {
2463
2514
  // Step 6: Rerank chunks (NOT full bodies)
2464
2515
  hooks?.onRerankStart?.(chunksToRerank.length);
2465
2516
  const rerankStart = Date.now();
2466
- const reranked = await store.rerank(query, chunksToRerank);
2517
+ const reranked = await store.rerank(query, chunksToRerank, undefined, intent);
2467
2518
  hooks?.onRerankDone?.(Date.now() - rerankStart);
2468
2519
  // Step 7: Blend RRF position score with reranker score
2469
2520
  // Position-aware weights: top retrieval results get more protection from reranker disagreement
@@ -2541,12 +2592,13 @@ export async function vectorSearchQuery(store, query, options) {
2541
2592
  const limit = options?.limit ?? 10;
2542
2593
  const minScore = options?.minScore ?? 0.3;
2543
2594
  const collection = options?.collection;
2595
+ const intent = options?.intent;
2544
2596
  const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
2545
2597
  if (!hasVectors)
2546
2598
  return [];
2547
2599
  // Expand query — filter to vec/hyde only (lex queries target FTS, not vector)
2548
2600
  const expandStart = Date.now();
2549
- const allExpanded = await store.expandQuery(query);
2601
+ const allExpanded = await store.expandQuery(query, undefined, intent);
2550
2602
  const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
2551
2603
  options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
2552
2604
  // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
@@ -2597,6 +2649,7 @@ export async function structuredSearch(store, searches, options) {
2597
2649
  const minScore = options?.minScore ?? 0;
2598
2650
  const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
2599
2651
  const explain = options?.explain ?? false;
2652
+ const intent = options?.intent;
2600
2653
  const hooks = options?.hooks;
2601
2654
  const collections = options?.collections;
2602
2655
  if (searches.length === 0)
@@ -2696,6 +2749,7 @@ export async function structuredSearch(store, searches, options) {
2696
2749
  || searches.find(s => s.type === 'vec')?.query
2697
2750
  || searches[0]?.query || "";
2698
2751
  const queryTerms = primaryQuery.toLowerCase().split(/\s+/).filter(t => t.length > 2);
2752
+ const intentTerms = intent ? extractIntentTerms(intent) : [];
2699
2753
  const chunksToRerank = [];
2700
2754
  const docChunkMap = new Map();
2701
2755
  for (const cand of candidates) {
@@ -2703,11 +2757,16 @@ export async function structuredSearch(store, searches, options) {
2703
2757
  if (chunks.length === 0)
2704
2758
  continue;
2705
2759
  // Pick chunk with most keyword overlap
2760
+ // Intent terms contribute at INTENT_WEIGHT_CHUNK (0.5) relative to query terms (1.0)
2706
2761
  let bestIdx = 0;
2707
2762
  let bestScore = -1;
2708
2763
  for (let i = 0; i < chunks.length; i++) {
2709
2764
  const chunkLower = chunks[i].text.toLowerCase();
2710
- const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
2765
+ let score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
2766
+ for (const term of intentTerms) {
2767
+ if (chunkLower.includes(term))
2768
+ score += INTENT_WEIGHT_CHUNK;
2769
+ }
2711
2770
  if (score > bestScore) {
2712
2771
  bestScore = score;
2713
2772
  bestIdx = i;
@@ -2719,7 +2778,7 @@ export async function structuredSearch(store, searches, options) {
2719
2778
  // Step 5: Rerank chunks
2720
2779
  hooks?.onRerankStart?.(chunksToRerank.length);
2721
2780
  const rerankStart2 = Date.now();
2722
- const reranked = await store.rerank(primaryQuery, chunksToRerank);
2781
+ const reranked = await store.rerank(primaryQuery, chunksToRerank, undefined, intent);
2723
2782
  hooks?.onRerankDone?.(Date.now() - rerankStart2);
2724
2783
  // Step 6: Blend RRF position score with reranker score
2725
2784
  const candidateMap = new Map(candidates.map(c => [c.file, {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tobilu/qmd",
3
- "version": "1.1.2",
3
+ "version": "1.1.5",
4
4
  "description": "Query Markup Documents - On-device hybrid search for markdown files with BM25, vector search, and LLM reranking",
5
5
  "type": "module",
6
6
  "bin": {