@tobilu/qmd 1.1.1 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/store.js CHANGED
@@ -667,8 +667,8 @@ export function createStore(dbPath) {
667
667
  searchFTS: (query, limit, collectionName) => searchFTS(db, query, limit, collectionName),
668
668
  searchVec: (query, model, limit, collectionName, session, precomputedEmbedding) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding),
669
669
  // Query expansion & reranking
670
- expandQuery: (query, model) => expandQuery(query, model, db),
671
- rerank: (query, documents, model) => rerank(query, documents, model, db),
670
+ expandQuery: (query, model, intent) => expandQuery(query, model, db, intent),
671
+ rerank: (query, documents, model, intent) => rerank(query, documents, model, db, intent),
672
672
  // Document retrieval
673
673
  findDocument: (filename, options) => findDocument(db, filename, options),
674
674
  getDocumentBody: (doc, fromLine, maxLines) => getDocumentBody(db, doc, fromLine, maxLines),
@@ -706,15 +706,24 @@ export function getDocid(hash) {
706
706
  * - Preserve folder structure (a/b/c/d.md stays structured)
707
707
  * - Preserve file extension
708
708
  */
709
+ /** Replace emoji/symbol codepoints with their hex representation (e.g. 🐘 → 1f418) */
710
+ function emojiToHex(str) {
711
+ return str.replace(/(?:\p{So}\p{Mn}?|\p{Sk})+/gu, (run) => {
712
+ // Split the run into individual emoji and convert each to hex, dash-separated
713
+ return [...run].filter(c => /\p{So}|\p{Sk}/u.test(c))
714
+ .map(c => c.codePointAt(0).toString(16)).join('-');
715
+ });
716
+ }
709
717
  export function handelize(path) {
710
718
  if (!path || path.trim() === '') {
711
719
  throw new Error('handelize: path cannot be empty');
712
720
  }
713
721
  // Allow route-style "$" filenames while still rejecting paths with no usable content.
722
+ // Emoji (\p{So}) counts as valid content — they get converted to hex codepoints below.
714
723
  const segments = path.split('/').filter(Boolean);
715
724
  const lastSegment = segments[segments.length - 1] || '';
716
725
  const filenameWithoutExt = lastSegment.replace(/\.[^.]+$/, '');
717
- const hasValidContent = /[\p{L}\p{N}$]/u.test(filenameWithoutExt);
726
+ const hasValidContent = /[\p{L}\p{N}\p{So}\p{Sk}$]/u.test(filenameWithoutExt);
718
727
  if (!hasValidContent) {
719
728
  throw new Error(`handelize: path "${path}" has no valid filename content`);
720
729
  }
@@ -724,6 +733,8 @@ export function handelize(path) {
724
733
  .split('/')
725
734
  .map((segment, idx, arr) => {
726
735
  const isLastSegment = idx === arr.length - 1;
736
+ // Convert emoji to hex codepoints before cleaning
737
+ segment = emojiToHex(segment);
727
738
  if (isLastSegment) {
728
739
  // For the filename (last segment), preserve the extension
729
740
  const extMatch = segment.match(/(\.[a-z0-9]+)$/i);
@@ -1745,7 +1756,7 @@ export async function searchVec(db, query, model, limit = 20, collectionName, se
1745
1756
  // =============================================================================
1746
1757
  async function getEmbedding(text, model, isQuery, session) {
1747
1758
  // Format text using the appropriate prompt template
1748
- const formattedText = isQuery ? formatQueryForEmbedding(text) : formatDocForEmbedding(text);
1759
+ const formattedText = isQuery ? formatQueryForEmbedding(text, model) : formatDocForEmbedding(text, undefined, model);
1749
1760
  const result = session
1750
1761
  ? await session.embed(formattedText, { model, isQuery })
1751
1762
  : await getDefaultLlamaCpp().embed(formattedText, { model, isQuery });
@@ -1787,9 +1798,9 @@ export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt
1787
1798
  // =============================================================================
1788
1799
  // Query expansion
1789
1800
  // =============================================================================
1790
- export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db) {
1801
+ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db, intent) {
1791
1802
  // Check cache first — stored as JSON preserving types
1792
- const cacheKey = getCacheKey("expandQuery", { query, model });
1803
+ const cacheKey = getCacheKey("expandQuery", { query, model, ...(intent && { intent }) });
1793
1804
  const cached = getCachedResult(db, cacheKey);
1794
1805
  if (cached) {
1795
1806
  try {
@@ -1801,7 +1812,7 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db) {
1801
1812
  }
1802
1813
  const llm = getDefaultLlamaCpp();
1803
1814
  // Note: LlamaCpp uses hardcoded model, model parameter is ignored
1804
- const results = await llm.expandQuery(query);
1815
+ const results = await llm.expandQuery(query, { intent });
1805
1816
  // Map Queryable[] → ExpandedQuery[] (same shape, decoupled from llm.ts internals).
1806
1817
  // Filter out entries that duplicate the original query text.
1807
1818
  const expanded = results
@@ -1815,37 +1826,44 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db) {
1815
1826
  // =============================================================================
1816
1827
  // Reranking
1817
1828
  // =============================================================================
1818
- export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db) {
1829
+ export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db, intent) {
1830
+ // Prepend intent to rerank query so the reranker scores with domain context
1831
+ const rerankQuery = intent ? `${intent}\n\n${query}` : query;
1819
1832
  const cachedResults = new Map();
1820
- const uncachedDocs = [];
1833
+ const uncachedDocsByChunk = new Map();
1821
1834
  // Check cache for each document
1822
1835
  // Cache key includes chunk text — different queries can select different chunks
1823
1836
  // from the same file, and the reranker score depends on which chunk was sent.
1837
+ // File path is excluded from the new cache key because the reranker score
1838
+ // depends on the chunk content, not where it came from.
1824
1839
  for (const doc of documents) {
1825
- const cacheKey = getCacheKey("rerank", { query, file: doc.file, model, chunk: doc.text });
1826
- const cached = getCachedResult(db, cacheKey);
1840
+ const cacheKey = getCacheKey("rerank", { query: rerankQuery, model, chunk: doc.text });
1841
+ const legacyCacheKey = getCacheKey("rerank", { query, file: doc.file, model, chunk: doc.text });
1842
+ const cached = getCachedResult(db, cacheKey) ?? getCachedResult(db, legacyCacheKey);
1827
1843
  if (cached !== null) {
1828
- cachedResults.set(doc.file, parseFloat(cached));
1844
+ cachedResults.set(doc.text, parseFloat(cached));
1829
1845
  }
1830
1846
  else {
1831
- uncachedDocs.push({ file: doc.file, text: doc.text });
1847
+ uncachedDocsByChunk.set(doc.text, { file: doc.file, text: doc.text });
1832
1848
  }
1833
1849
  }
1834
1850
  // Rerank uncached documents using LlamaCpp
1835
- if (uncachedDocs.length > 0) {
1851
+ if (uncachedDocsByChunk.size > 0) {
1836
1852
  const llm = getDefaultLlamaCpp();
1837
- const rerankResult = await llm.rerank(query, uncachedDocs, { model });
1838
- // Cache results use original doc.text for cache key (result.file lacks chunk text)
1839
- const textByFile = new Map(documents.map(d => [d.file, d.text]));
1853
+ const uncachedDocs = [...uncachedDocsByChunk.values()];
1854
+ const rerankResult = await llm.rerank(rerankQuery, uncachedDocs, { model });
1855
+ // Cache results by chunk text so identical chunks across files are scored once.
1856
+ const textByFile = new Map(uncachedDocs.map(d => [d.file, d.text]));
1840
1857
  for (const result of rerankResult.results) {
1841
- const cacheKey = getCacheKey("rerank", { query, file: result.file, model, chunk: textByFile.get(result.file) || "" });
1858
+ const chunk = textByFile.get(result.file) || "";
1859
+ const cacheKey = getCacheKey("rerank", { query: rerankQuery, model, chunk });
1842
1860
  setCachedResult(db, cacheKey, result.score.toString());
1843
- cachedResults.set(result.file, result.score);
1861
+ cachedResults.set(chunk, result.score);
1844
1862
  }
1845
1863
  }
1846
1864
  // Return all results sorted by score
1847
1865
  return documents
1848
- .map(doc => ({ file: doc.file, score: cachedResults.get(doc.file) || 0 }))
1866
+ .map(doc => ({ file: doc.file, score: cachedResults.get(doc.text) || 0 }))
1849
1867
  .sort((a, b) => b.score - a.score);
1850
1868
  }
1851
1869
  // =============================================================================
@@ -1890,6 +1908,65 @@ export function reciprocalRankFusion(resultLists, weights = [], k = 60) {
1890
1908
  .sort((a, b) => b.rrfScore - a.rrfScore)
1891
1909
  .map(e => ({ ...e.result, score: e.rrfScore }));
1892
1910
  }
1911
+ /**
1912
+ * Build per-document RRF contribution traces for explain/debug output.
1913
+ */
1914
+ export function buildRrfTrace(resultLists, weights = [], listMeta = [], k = 60) {
1915
+ const traces = new Map();
1916
+ for (let listIdx = 0; listIdx < resultLists.length; listIdx++) {
1917
+ const list = resultLists[listIdx];
1918
+ if (!list)
1919
+ continue;
1920
+ const weight = weights[listIdx] ?? 1.0;
1921
+ const meta = listMeta[listIdx] ?? {
1922
+ source: "fts",
1923
+ queryType: "original",
1924
+ query: "",
1925
+ };
1926
+ for (let rank0 = 0; rank0 < list.length; rank0++) {
1927
+ const result = list[rank0];
1928
+ if (!result)
1929
+ continue;
1930
+ const rank = rank0 + 1; // 1-indexed rank for explain output
1931
+ const contribution = weight / (k + rank);
1932
+ const existing = traces.get(result.file);
1933
+ const detail = {
1934
+ listIndex: listIdx,
1935
+ source: meta.source,
1936
+ queryType: meta.queryType,
1937
+ query: meta.query,
1938
+ rank,
1939
+ weight,
1940
+ backendScore: result.score,
1941
+ rrfContribution: contribution,
1942
+ };
1943
+ if (existing) {
1944
+ existing.baseScore += contribution;
1945
+ existing.topRank = Math.min(existing.topRank, rank);
1946
+ existing.contributions.push(detail);
1947
+ }
1948
+ else {
1949
+ traces.set(result.file, {
1950
+ contributions: [detail],
1951
+ baseScore: contribution,
1952
+ topRank: rank,
1953
+ topRankBonus: 0,
1954
+ totalScore: 0,
1955
+ });
1956
+ }
1957
+ }
1958
+ }
1959
+ for (const trace of traces.values()) {
1960
+ let bonus = 0;
1961
+ if (trace.topRank === 1)
1962
+ bonus = 0.05;
1963
+ else if (trace.topRank <= 3)
1964
+ bonus = 0.02;
1965
+ trace.topRankBonus = bonus;
1966
+ trace.totalScore = trace.baseScore + bonus;
1967
+ }
1968
+ return traces;
1969
+ }
1893
1970
  /**
1894
1971
  * Find a document by filename/path, docid (#hash), or with fuzzy matching.
1895
1972
  * Returns document metadata without body by default.
@@ -2179,7 +2256,41 @@ export function getStatus(db) {
2179
2256
  collections,
2180
2257
  };
2181
2258
  }
2182
- export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen) {
2259
+ /** Weight for intent terms relative to query terms (1.0) in snippet scoring */
2260
+ export const INTENT_WEIGHT_SNIPPET = 0.3;
2261
+ /** Weight for intent terms relative to query terms (1.0) in chunk selection */
2262
+ export const INTENT_WEIGHT_CHUNK = 0.5;
2263
+ // Common stop words filtered from intent strings before tokenization.
2264
+ // Seeded from finetune/reward.py KEY_TERM_STOPWORDS, extended with common
2265
+ // 2-3 char function words so the length threshold can drop to >1 and let
2266
+ // short domain terms (API, SQL, LLM, CPU, CDN, …) survive.
2267
+ const INTENT_STOP_WORDS = new Set([
2268
+ // 2-char function words
2269
+ "am", "an", "as", "at", "be", "by", "do", "he", "if",
2270
+ "in", "is", "it", "me", "my", "no", "of", "on", "or", "so",
2271
+ "to", "up", "us", "we",
2272
+ // 3-char function words
2273
+ "all", "and", "any", "are", "but", "can", "did", "for", "get",
2274
+ "has", "her", "him", "his", "how", "its", "let", "may", "not",
2275
+ "our", "out", "the", "too", "was", "who", "why", "you",
2276
+ // 4+ char common words
2277
+ "also", "does", "find", "from", "have", "into", "more", "need",
2278
+ "show", "some", "tell", "that", "them", "this", "want", "what",
2279
+ "when", "will", "with", "your",
2280
+ // Search-context noise
2281
+ "about", "looking", "notes", "search", "where", "which",
2282
+ ]);
2283
+ /**
2284
+ * Extract meaningful terms from an intent string, filtering stop words and punctuation.
2285
+ * Uses Unicode-aware punctuation stripping so domain terms like "API" survive.
2286
+ * Returns lowercase terms suitable for text matching.
2287
+ */
2288
+ export function extractIntentTerms(intent) {
2289
+ return intent.toLowerCase().split(/\s+/)
2290
+ .map(t => t.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, ""))
2291
+ .filter(t => t.length > 1 && !INTENT_STOP_WORDS.has(t));
2292
+ }
2293
+ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen, intent) {
2183
2294
  const totalLines = body.split('\n').length;
2184
2295
  let searchBody = body;
2185
2296
  let lineOffset = 0;
@@ -2196,13 +2307,18 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen) {
2196
2307
  }
2197
2308
  const lines = searchBody.split('\n');
2198
2309
  const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 0);
2310
+ const intentTerms = intent ? extractIntentTerms(intent) : [];
2199
2311
  let bestLine = 0, bestScore = -1;
2200
2312
  for (let i = 0; i < lines.length; i++) {
2201
2313
  const lineLower = (lines[i] ?? "").toLowerCase();
2202
2314
  let score = 0;
2203
2315
  for (const term of queryTerms) {
2204
2316
  if (lineLower.includes(term))
2205
- score++;
2317
+ score += 1.0;
2318
+ }
2319
+ for (const term of intentTerms) {
2320
+ if (lineLower.includes(term))
2321
+ score += INTENT_WEIGHT_SNIPPET;
2206
2322
  }
2207
2323
  if (score > bestScore) {
2208
2324
  bestScore = score;
@@ -2216,7 +2332,7 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen) {
2216
2332
  // If we focused on a chunk window and it produced an empty/whitespace-only snippet,
2217
2333
  // fall back to a full-document snippet so we always show something useful.
2218
2334
  if (chunkPos && chunkPos > 0 && snippetText.trim().length === 0) {
2219
- return extractSnippet(body, query, maxLen, undefined);
2335
+ return extractSnippet(body, query, maxLen, undefined, undefined, intent);
2220
2336
  }
2221
2337
  if (snippetText.length > maxLen)
2222
2338
  snippetText = snippetText.substring(0, maxLen - 3) + "...";
@@ -2264,16 +2380,22 @@ export async function hybridQuery(store, query, options) {
2264
2380
  const minScore = options?.minScore ?? 0;
2265
2381
  const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
2266
2382
  const collection = options?.collection;
2383
+ const explain = options?.explain ?? false;
2384
+ const intent = options?.intent;
2267
2385
  const hooks = options?.hooks;
2268
2386
  const rankedLists = [];
2387
+ const rankedListMeta = [];
2269
2388
  const docidMap = new Map(); // filepath -> docid
2270
2389
  const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
2271
2390
  // Step 1: BM25 probe — strong signal skips expensive LLM expansion
2391
+ // When intent is provided, disable strong-signal bypass — the obvious BM25
2392
+ // match may not be what the caller wants (e.g. "performance" with intent
2393
+ // "web page load times" should NOT shortcut to a sports-performance doc).
2272
2394
  // Pass collection directly into FTS query (filter at SQL level, not post-hoc)
2273
2395
  const initialFts = store.searchFTS(query, 20, collection);
2274
2396
  const topScore = initialFts[0]?.score ?? 0;
2275
2397
  const secondScore = initialFts[1]?.score ?? 0;
2276
- const hasStrongSignal = initialFts.length > 0
2398
+ const hasStrongSignal = !intent && initialFts.length > 0
2277
2399
  && topScore >= STRONG_SIGNAL_MIN_SCORE
2278
2400
  && (topScore - secondScore) >= STRONG_SIGNAL_MIN_GAP;
2279
2401
  if (hasStrongSignal)
@@ -2283,7 +2405,7 @@ export async function hybridQuery(store, query, options) {
2283
2405
  const expandStart = Date.now();
2284
2406
  const expanded = hasStrongSignal
2285
2407
  ? []
2286
- : await store.expandQuery(query);
2408
+ : await store.expandQuery(query, undefined, intent);
2287
2409
  hooks?.onExpand?.(query, expanded, Date.now() - expandStart);
2288
2410
  // Seed with initial FTS results (avoid re-running original query FTS)
2289
2411
  if (initialFts.length > 0) {
@@ -2293,6 +2415,7 @@ export async function hybridQuery(store, query, options) {
2293
2415
  file: r.filepath, displayPath: r.displayPath,
2294
2416
  title: r.title, body: r.body || "", score: r.score,
2295
2417
  })));
2418
+ rankedListMeta.push({ source: "fts", queryType: "original", query });
2296
2419
  }
2297
2420
  // Step 3: Route searches by query type
2298
2421
  //
@@ -2310,17 +2433,18 @@ export async function hybridQuery(store, query, options) {
2310
2433
  file: r.filepath, displayPath: r.displayPath,
2311
2434
  title: r.title, body: r.body || "", score: r.score,
2312
2435
  })));
2436
+ rankedListMeta.push({ source: "fts", queryType: "lex", query: q.text });
2313
2437
  }
2314
2438
  }
2315
2439
  }
2316
2440
  // 3b: Collect all texts that need vector search (original query + vec/hyde expansions)
2317
2441
  if (hasVectors) {
2318
2442
  const vecQueries = [
2319
- { text: query, isOriginal: true },
2443
+ { text: query, queryType: "original" },
2320
2444
  ];
2321
2445
  for (const q of expanded) {
2322
2446
  if (q.type === 'vec' || q.type === 'hyde') {
2323
- vecQueries.push({ text: q.text, isOriginal: false });
2447
+ vecQueries.push({ text: q.text, queryType: q.type });
2324
2448
  }
2325
2449
  }
2326
2450
  // Batch embed all vector queries in a single call
@@ -2343,18 +2467,25 @@ export async function hybridQuery(store, query, options) {
2343
2467
  file: r.filepath, displayPath: r.displayPath,
2344
2468
  title: r.title, body: r.body || "", score: r.score,
2345
2469
  })));
2470
+ rankedListMeta.push({
2471
+ source: "vec",
2472
+ queryType: vecQueries[i].queryType,
2473
+ query: vecQueries[i].text,
2474
+ });
2346
2475
  }
2347
2476
  }
2348
2477
  }
2349
2478
  // Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
2350
2479
  const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
2351
2480
  const fused = reciprocalRankFusion(rankedLists, weights);
2481
+ const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
2352
2482
  const candidates = fused.slice(0, candidateLimit);
2353
2483
  if (candidates.length === 0)
2354
2484
  return [];
2355
2485
  // Step 5: Chunk documents, pick best chunk per doc for reranking.
2356
2486
  // Reranking full bodies is O(tokens) — the critical perf lesson that motivated this refactor.
2357
2487
  const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
2488
+ const intentTerms = intent ? extractIntentTerms(intent) : [];
2358
2489
  const chunksToRerank = [];
2359
2490
  const docChunkMap = new Map();
2360
2491
  for (const cand of candidates) {
@@ -2362,11 +2493,16 @@ export async function hybridQuery(store, query, options) {
2362
2493
  if (chunks.length === 0)
2363
2494
  continue;
2364
2495
  // Pick chunk with most keyword overlap (fallback: first chunk)
2496
+ // Intent terms contribute at INTENT_WEIGHT_CHUNK (0.5) relative to query terms (1.0)
2365
2497
  let bestIdx = 0;
2366
2498
  let bestScore = -1;
2367
2499
  for (let i = 0; i < chunks.length; i++) {
2368
2500
  const chunkLower = chunks[i].text.toLowerCase();
2369
- const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
2501
+ let score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
2502
+ for (const term of intentTerms) {
2503
+ if (chunkLower.includes(term))
2504
+ score += INTENT_WEIGHT_CHUNK;
2505
+ }
2370
2506
  if (score > bestScore) {
2371
2507
  bestScore = score;
2372
2508
  bestIdx = i;
@@ -2378,7 +2514,7 @@ export async function hybridQuery(store, query, options) {
2378
2514
  // Step 6: Rerank chunks (NOT full bodies)
2379
2515
  hooks?.onRerankStart?.(chunksToRerank.length);
2380
2516
  const rerankStart = Date.now();
2381
- const reranked = await store.rerank(query, chunksToRerank);
2517
+ const reranked = await store.rerank(query, chunksToRerank, undefined, intent);
2382
2518
  hooks?.onRerankDone?.(Date.now() - rerankStart);
2383
2519
  // Step 7: Blend RRF position score with reranker score
2384
2520
  // Position-aware weights: top retrieval results get more protection from reranker disagreement
@@ -2402,6 +2538,22 @@ export async function hybridQuery(store, query, options) {
2402
2538
  const bestIdx = chunkInfo?.bestIdx ?? 0;
2403
2539
  const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
2404
2540
  const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
2541
+ const trace = rrfTraceByFile?.get(r.file);
2542
+ const explainData = explain ? {
2543
+ ftsScores: trace?.contributions.filter(c => c.source === "fts").map(c => c.backendScore) ?? [],
2544
+ vectorScores: trace?.contributions.filter(c => c.source === "vec").map(c => c.backendScore) ?? [],
2545
+ rrf: {
2546
+ rank: rrfRank,
2547
+ positionScore: rrfScore,
2548
+ weight: rrfWeight,
2549
+ baseScore: trace?.baseScore ?? 0,
2550
+ topRankBonus: trace?.topRankBonus ?? 0,
2551
+ totalScore: trace?.totalScore ?? 0,
2552
+ contributions: trace?.contributions ?? [],
2553
+ },
2554
+ rerankScore: r.score,
2555
+ blendedScore,
2556
+ } : undefined;
2405
2557
  return {
2406
2558
  file: r.file,
2407
2559
  displayPath: candidate?.displayPath || "",
@@ -2412,6 +2564,7 @@ export async function hybridQuery(store, query, options) {
2412
2564
  score: blendedScore,
2413
2565
  context: store.getContextForFile(r.file),
2414
2566
  docid: docidMap.get(r.file) || "",
2567
+ ...(explainData ? { explain: explainData } : {}),
2415
2568
  };
2416
2569
  }).sort((a, b) => b.score - a.score);
2417
2570
  // Step 8: Dedup by file (safety net — prevents duplicate output)
@@ -2439,12 +2592,13 @@ export async function vectorSearchQuery(store, query, options) {
2439
2592
  const limit = options?.limit ?? 10;
2440
2593
  const minScore = options?.minScore ?? 0.3;
2441
2594
  const collection = options?.collection;
2595
+ const intent = options?.intent;
2442
2596
  const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
2443
2597
  if (!hasVectors)
2444
2598
  return [];
2445
2599
  // Expand query — filter to vec/hyde only (lex queries target FTS, not vector)
2446
2600
  const expandStart = Date.now();
2447
- const allExpanded = await store.expandQuery(query);
2601
+ const allExpanded = await store.expandQuery(query, undefined, intent);
2448
2602
  const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
2449
2603
  options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
2450
2604
  // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
@@ -2494,6 +2648,8 @@ export async function structuredSearch(store, searches, options) {
2494
2648
  const limit = options?.limit ?? 10;
2495
2649
  const minScore = options?.minScore ?? 0;
2496
2650
  const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
2651
+ const explain = options?.explain ?? false;
2652
+ const intent = options?.intent;
2497
2653
  const hooks = options?.hooks;
2498
2654
  const collections = options?.collections;
2499
2655
  if (searches.length === 0)
@@ -2518,6 +2674,7 @@ export async function structuredSearch(store, searches, options) {
2518
2674
  }
2519
2675
  }
2520
2676
  const rankedLists = [];
2677
+ const rankedListMeta = [];
2521
2678
  const docidMap = new Map(); // filepath -> docid
2522
2679
  const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
2523
2680
  // Helper to run search across collections (or all if undefined)
@@ -2534,13 +2691,18 @@ export async function structuredSearch(store, searches, options) {
2534
2691
  file: r.filepath, displayPath: r.displayPath,
2535
2692
  title: r.title, body: r.body || "", score: r.score,
2536
2693
  })));
2694
+ rankedListMeta.push({
2695
+ source: "fts",
2696
+ queryType: "lex",
2697
+ query: search.query,
2698
+ });
2537
2699
  }
2538
2700
  }
2539
2701
  }
2540
2702
  }
2541
2703
  // Step 2: Batch embed and run vector searches for vec/hyde
2542
2704
  if (hasVectors) {
2543
- const vecSearches = searches.filter(s => s.type === 'vec' || s.type === 'hyde');
2705
+ const vecSearches = searches.filter((s) => s.type === 'vec' || s.type === 'hyde');
2544
2706
  if (vecSearches.length > 0) {
2545
2707
  const llm = getDefaultLlamaCpp();
2546
2708
  const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
@@ -2561,6 +2723,11 @@ export async function structuredSearch(store, searches, options) {
2561
2723
  file: r.filepath, displayPath: r.displayPath,
2562
2724
  title: r.title, body: r.body || "", score: r.score,
2563
2725
  })));
2726
+ rankedListMeta.push({
2727
+ source: "vec",
2728
+ queryType: vecSearches[i].type,
2729
+ query: vecSearches[i].query,
2730
+ });
2564
2731
  }
2565
2732
  }
2566
2733
  }
@@ -2571,6 +2738,7 @@ export async function structuredSearch(store, searches, options) {
2571
2738
  // Step 3: RRF fusion — first list gets 2x weight (assume caller ordered by importance)
2572
2739
  const weights = rankedLists.map((_, i) => i === 0 ? 2.0 : 1.0);
2573
2740
  const fused = reciprocalRankFusion(rankedLists, weights);
2741
+ const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
2574
2742
  const candidates = fused.slice(0, candidateLimit);
2575
2743
  if (candidates.length === 0)
2576
2744
  return [];
@@ -2581,6 +2749,7 @@ export async function structuredSearch(store, searches, options) {
2581
2749
  || searches.find(s => s.type === 'vec')?.query
2582
2750
  || searches[0]?.query || "";
2583
2751
  const queryTerms = primaryQuery.toLowerCase().split(/\s+/).filter(t => t.length > 2);
2752
+ const intentTerms = intent ? extractIntentTerms(intent) : [];
2584
2753
  const chunksToRerank = [];
2585
2754
  const docChunkMap = new Map();
2586
2755
  for (const cand of candidates) {
@@ -2588,11 +2757,16 @@ export async function structuredSearch(store, searches, options) {
2588
2757
  if (chunks.length === 0)
2589
2758
  continue;
2590
2759
  // Pick chunk with most keyword overlap
2760
+ // Intent terms contribute at INTENT_WEIGHT_CHUNK (0.5) relative to query terms (1.0)
2591
2761
  let bestIdx = 0;
2592
2762
  let bestScore = -1;
2593
2763
  for (let i = 0; i < chunks.length; i++) {
2594
2764
  const chunkLower = chunks[i].text.toLowerCase();
2595
- const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
2765
+ let score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
2766
+ for (const term of intentTerms) {
2767
+ if (chunkLower.includes(term))
2768
+ score += INTENT_WEIGHT_CHUNK;
2769
+ }
2596
2770
  if (score > bestScore) {
2597
2771
  bestScore = score;
2598
2772
  bestIdx = i;
@@ -2604,7 +2778,7 @@ export async function structuredSearch(store, searches, options) {
2604
2778
  // Step 5: Rerank chunks
2605
2779
  hooks?.onRerankStart?.(chunksToRerank.length);
2606
2780
  const rerankStart2 = Date.now();
2607
- const reranked = await store.rerank(primaryQuery, chunksToRerank);
2781
+ const reranked = await store.rerank(primaryQuery, chunksToRerank, undefined, intent);
2608
2782
  hooks?.onRerankDone?.(Date.now() - rerankStart2);
2609
2783
  // Step 6: Blend RRF position score with reranker score
2610
2784
  const candidateMap = new Map(candidates.map(c => [c.file, {
@@ -2627,6 +2801,22 @@ export async function structuredSearch(store, searches, options) {
2627
2801
  const bestIdx = chunkInfo?.bestIdx ?? 0;
2628
2802
  const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
2629
2803
  const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
2804
+ const trace = rrfTraceByFile?.get(r.file);
2805
+ const explainData = explain ? {
2806
+ ftsScores: trace?.contributions.filter(c => c.source === "fts").map(c => c.backendScore) ?? [],
2807
+ vectorScores: trace?.contributions.filter(c => c.source === "vec").map(c => c.backendScore) ?? [],
2808
+ rrf: {
2809
+ rank: rrfRank,
2810
+ positionScore: rrfScore,
2811
+ weight: rrfWeight,
2812
+ baseScore: trace?.baseScore ?? 0,
2813
+ topRankBonus: trace?.topRankBonus ?? 0,
2814
+ totalScore: trace?.totalScore ?? 0,
2815
+ contributions: trace?.contributions ?? [],
2816
+ },
2817
+ rerankScore: r.score,
2818
+ blendedScore,
2819
+ } : undefined;
2630
2820
  return {
2631
2821
  file: r.file,
2632
2822
  displayPath: candidate?.displayPath || "",
@@ -2637,6 +2827,7 @@ export async function structuredSearch(store, searches, options) {
2637
2827
  score: blendedScore,
2638
2828
  context: store.getContextForFile(r.file),
2639
2829
  docid: docidMap.get(r.file) || "",
2830
+ ...(explainData ? { explain: explainData } : {}),
2640
2831
  };
2641
2832
  }).sort((a, b) => b.score - a.score);
2642
2833
  // Step 7: Dedup by file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tobilu/qmd",
3
- "version": "1.1.1",
3
+ "version": "1.1.5",
4
4
  "description": "Query Markup Documents - On-device hybrid search for markdown files with BM25, vector search, and LLM reranking",
5
5
  "type": "module",
6
6
  "bin": {
@@ -39,7 +39,7 @@
39
39
  "@modelcontextprotocol/sdk": "^1.25.1",
40
40
  "better-sqlite3": "^11.0.0",
41
41
  "fast-glob": "^3.3.0",
42
- "node-llama-cpp": "^3.14.5",
42
+ "node-llama-cpp": "^3.17.1",
43
43
  "picomatch": "^4.0.0",
44
44
  "sqlite-vec": "^0.1.7-alpha.2",
45
45
  "yaml": "^2.8.2",
@@ -48,8 +48,9 @@
48
48
  "optionalDependencies": {
49
49
  "sqlite-vec-darwin-arm64": "^0.1.7-alpha.2",
50
50
  "sqlite-vec-darwin-x64": "^0.1.7-alpha.2",
51
+ "sqlite-vec-linux-arm64": "^0.1.7-alpha.2",
51
52
  "sqlite-vec-linux-x64": "^0.1.7-alpha.2",
52
- "sqlite-vec-win32-x64": "^0.1.7-alpha.2"
53
+ "sqlite-vec-windows-x64": "^0.1.7-alpha.2"
53
54
  },
54
55
  "devDependencies": {
55
56
  "@types/better-sqlite3": "^7.6.0",