@tobilu/qmd 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/qmd.js CHANGED
@@ -74,19 +74,24 @@ const cursor = {
74
74
  // Ensure cursor is restored on exit
75
75
  process.on('SIGINT', () => { cursor.show(); process.exit(130); });
76
76
  process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
77
- // Terminal progress bar using OSC 9;4 escape sequence
77
+ // Terminal progress bar using OSC 9;4 escape sequence (TTY only)
78
+ const isTTY = process.stderr.isTTY;
78
79
  const progress = {
79
80
  set(percent) {
80
- process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
81
+ if (isTTY)
82
+ process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
81
83
  },
82
84
  clear() {
83
- process.stderr.write(`\x1b]9;4;0\x07`);
85
+ if (isTTY)
86
+ process.stderr.write(`\x1b]9;4;0\x07`);
84
87
  },
85
88
  indeterminate() {
86
- process.stderr.write(`\x1b]9;4;3\x07`);
89
+ if (isTTY)
90
+ process.stderr.write(`\x1b]9;4;3\x07`);
87
91
  },
88
92
  error() {
89
- process.stderr.write(`\x1b]9;4;2\x07`);
93
+ if (isTTY)
94
+ process.stderr.write(`\x1b]9;4;2\x07`);
90
95
  },
91
96
  };
92
97
  // Format seconds into human-readable ETA
@@ -398,7 +403,7 @@ async function updateCollections() {
398
403
  process.exit(1);
399
404
  }
400
405
  }
401
- await indexFiles(col.pwd, col.glob_pattern, col.name, true);
406
+ await indexFiles(col.pwd, col.glob_pattern, col.name, true, yamlCol?.ignore);
402
407
  console.log("");
403
408
  }
404
409
  // Check if any documents need embedding (show once at end)
@@ -1103,6 +1108,9 @@ function collectionList() {
1103
1108
  const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
1104
1109
  console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`);
1105
1110
  console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
1111
+ if (yamlColl?.ignore?.length) {
1112
+ console.log(` ${c.dim}Ignore:${c.reset} ${yamlColl.ignore.join(', ')}`);
1113
+ }
1106
1114
  console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
1107
1115
  console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
1108
1116
  console.log();
@@ -1138,7 +1146,8 @@ async function collectionAdd(pwd, globPattern, name) {
1138
1146
  addCollection(collName, pwd, globPattern);
1139
1147
  // Create the collection and index files
1140
1148
  console.log(`Creating collection '${collName}'...`);
1141
- await indexFiles(pwd, globPattern, collName);
1149
+ const newColl = getCollectionFromYaml(collName);
1150
+ await indexFiles(pwd, globPattern, collName, false, newColl?.ignore);
1142
1151
  console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`);
1143
1152
  }
1144
1153
  function collectionRemove(name) {
@@ -1179,7 +1188,7 @@ function collectionRename(oldName, newName) {
1179
1188
  console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
1180
1189
  console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`);
1181
1190
  }
1182
- async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppressEmbedNotice = false) {
1191
+ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppressEmbedNotice = false, ignorePatterns) {
1183
1192
  const db = getDb();
1184
1193
  const resolvedPwd = pwd || getPwd();
1185
1194
  const now = new Date().toISOString();
@@ -1192,12 +1201,16 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
1192
1201
  }
1193
1202
  console.log(`Collection: ${resolvedPwd} (${globPattern})`);
1194
1203
  progress.indeterminate();
1204
+ const allIgnore = [
1205
+ ...excludeDirs.map(d => `**/${d}/**`),
1206
+ ...(ignorePatterns || []),
1207
+ ];
1195
1208
  const allFiles = await fastGlob(globPattern, {
1196
1209
  cwd: resolvedPwd,
1197
1210
  onlyFiles: true,
1198
1211
  followSymbolicLinks: false,
1199
1212
  dot: false,
1200
- ignore: excludeDirs.map(d => `**/${d}/**`),
1213
+ ignore: allIgnore,
1201
1214
  });
1202
1215
  // Filter hidden files/folders (dot: false handles top-level but not nested)
1203
1216
  const files = allFiles.filter(file => {
@@ -1205,11 +1218,11 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
1205
1218
  return !parts.some(part => part.startsWith("."));
1206
1219
  });
1207
1220
  const total = files.length;
1208
- if (total === 0) {
1221
+ const hasNoFiles = total === 0;
1222
+ if (hasNoFiles) {
1209
1223
  progress.clear();
1210
1224
  console.log("No files found matching pattern.");
1211
- closeDb();
1212
- return;
1225
+ // Continue so the deactivation pass can mark previously indexed docs as inactive.
1213
1226
  }
1214
1227
  let indexed = 0, updated = 0, unchanged = 0, processed = 0;
1215
1228
  const seenPaths = new Set();
@@ -1218,7 +1231,16 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
1218
1231
  const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
1219
1232
  const path = handelize(relativeFile); // Normalize path for token-friendliness
1220
1233
  seenPaths.add(path);
1221
- const content = readFileSync(filepath, "utf-8");
1234
+ let content;
1235
+ try {
1236
+ content = readFileSync(filepath, "utf-8");
1237
+ }
1238
+ catch (err) {
1239
+ // Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
1240
+ processed++;
1241
+ progress.set((processed / total) * 100);
1242
+ continue;
1243
+ }
1222
1244
  // Skip empty files - nothing useful to index
1223
1245
  if (!content.trim()) {
1224
1246
  processed++;
@@ -1260,7 +1282,8 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
1260
1282
  const rate = processed / elapsed;
1261
1283
  const remaining = (total - processed) / rate;
1262
1284
  const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
1263
- process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
1285
+ if (isTTY)
1286
+ process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
1264
1287
  }
1265
1288
  // Deactivate documents in this collection that no longer exist
1266
1289
  const allActive = getActiveDocumentPaths(db, collectionName);
@@ -1423,7 +1446,8 @@ async function vectorIndex(model = DEFAULT_EMBED_MODEL, force = false) {
1423
1446
  const throughput = `${formatBytes(bytesPerSec)}/s`;
1424
1447
  const eta = elapsed > 2 ? formatETA(etaSec) : "...";
1425
1448
  const errStr = errors > 0 ? ` ${c.yellow}${errors} err${c.reset}` : "";
1426
- process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
1449
+ if (isTTY)
1450
+ process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
1427
1451
  }
1428
1452
  progress.clear();
1429
1453
  cursor.show();
@@ -1496,6 +1520,9 @@ function formatScore(score) {
1496
1520
  return `${c.yellow}${pct}%${c.reset}`;
1497
1521
  return `${c.dim}${pct}%${c.reset}`;
1498
1522
  }
1523
+ function formatExplainNumber(value) {
1524
+ return value.toFixed(4);
1525
+ }
1499
1526
  // Shorten directory path for display - relative to $HOME (used for context paths, not documents)
1500
1527
  function shortPath(dirpath) {
1501
1528
  const home = homedir();
@@ -1504,10 +1531,33 @@ function shortPath(dirpath) {
1504
1531
  }
1505
1532
  return dirpath;
1506
1533
  }
1534
+ // Emit format-safe empty output for search commands.
1535
+ function printEmptySearchResults(format, reason = "no_results") {
1536
+ if (format === "json") {
1537
+ console.log("[]");
1538
+ return;
1539
+ }
1540
+ if (format === "csv") {
1541
+ console.log("docid,score,file,title,context,line,snippet");
1542
+ return;
1543
+ }
1544
+ if (format === "xml") {
1545
+ console.log("<results></results>");
1546
+ return;
1547
+ }
1548
+ if (format === "md" || format === "files") {
1549
+ return;
1550
+ }
1551
+ if (reason === "min_score") {
1552
+ console.log("No results found above minimum score threshold.");
1553
+ return;
1554
+ }
1555
+ console.log("No results found.");
1556
+ }
1507
1557
  function outputResults(results, query, opts) {
1508
1558
  const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
1509
1559
  if (filtered.length === 0) {
1510
- console.log("No results found above minimum score threshold.");
1560
+ printEmptySearchResults(opts.format, "min_score");
1511
1561
  return;
1512
1562
  }
1513
1563
  // Helper to create qmd:// URI from displayPath
@@ -1532,6 +1582,7 @@ function outputResults(results, query, opts) {
1532
1582
  ...(row.context && { context: row.context }),
1533
1583
  ...(body && { body }),
1534
1584
  ...(snippet && { snippet }),
1585
+ ...(opts.explain && row.explain && { explain: row.explain }),
1535
1586
  };
1536
1587
  });
1537
1588
  console.log(JSON.stringify(output, null, 2));
@@ -1570,6 +1621,27 @@ function outputResults(results, query, opts) {
1570
1621
  // Line 4: Score
1571
1622
  const score = formatScore(row.score);
1572
1623
  console.log(`Score: ${c.bold}${score}${c.reset}`);
1624
+ if (opts.explain && row.explain) {
1625
+ const explain = row.explain;
1626
+ const ftsScores = explain.ftsScores.length > 0
1627
+ ? explain.ftsScores.map(formatExplainNumber).join(", ")
1628
+ : "none";
1629
+ const vecScores = explain.vectorScores.length > 0
1630
+ ? explain.vectorScores.map(formatExplainNumber).join(", ")
1631
+ : "none";
1632
+ const contribSummary = explain.rrf.contributions
1633
+ .slice()
1634
+ .sort((a, b) => b.rrfContribution - a.rrfContribution)
1635
+ .slice(0, 3)
1636
+ .map(c => `${c.source}/${c.queryType}#${c.rank}:${formatExplainNumber(c.rrfContribution)}`)
1637
+ .join(" | ");
1638
+ console.log(`${c.dim}Explain: fts=[${ftsScores}] vec=[${vecScores}]${c.reset}`);
1639
+ console.log(`${c.dim} RRF: total=${formatExplainNumber(explain.rrf.totalScore)} base=${formatExplainNumber(explain.rrf.baseScore)} bonus=${formatExplainNumber(explain.rrf.topRankBonus)} rank=${explain.rrf.rank}${c.reset}`);
1640
+ console.log(`${c.dim} Blend: ${Math.round(explain.rrf.weight * 100)}%*${formatExplainNumber(explain.rrf.positionScore)} + ${Math.round((1 - explain.rrf.weight) * 100)}%*${formatExplainNumber(explain.rerankScore)} = ${formatExplainNumber(explain.blendedScore)}${c.reset}`);
1641
+ if (contribSummary.length > 0) {
1642
+ console.log(`${c.dim} Top RRF contributions: ${contribSummary}${c.reset}`);
1643
+ }
1644
+ }
1573
1645
  console.log();
1574
1646
  // Snippet with highlighting (diff-style header included)
1575
1647
  let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
@@ -1735,12 +1807,7 @@ function search(query, opts) {
1735
1807
  }));
1736
1808
  closeDb();
1737
1809
  if (resultsWithContext.length === 0) {
1738
- if (opts.format === "json") {
1739
- console.log("[]");
1740
- }
1741
- else {
1742
- console.log("No results found.");
1743
- }
1810
+ printEmptySearchResults(opts.format);
1744
1811
  return;
1745
1812
  }
1746
1813
  outputResults(resultsWithContext, query, opts);
@@ -1789,12 +1856,7 @@ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
1789
1856
  }
1790
1857
  closeDb();
1791
1858
  if (results.length === 0) {
1792
- if (opts.format === "json") {
1793
- console.log("[]");
1794
- }
1795
- else {
1796
- console.log("No results found.");
1797
- }
1859
+ printEmptySearchResults(opts.format);
1798
1860
  return;
1799
1861
  }
1800
1862
  outputResults(results.map(r => ({
@@ -1835,6 +1897,8 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1835
1897
  collections: singleCollection ? [singleCollection] : undefined,
1836
1898
  limit: opts.all ? 500 : (opts.limit || 10),
1837
1899
  minScore: opts.minScore || 0,
1900
+ candidateLimit: opts.candidateLimit,
1901
+ explain: !!opts.explain,
1838
1902
  hooks: {
1839
1903
  onEmbedStart: (count) => {
1840
1904
  process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
@@ -1859,6 +1923,8 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1859
1923
  collection: singleCollection,
1860
1924
  limit: opts.all ? 500 : (opts.limit || 10),
1861
1925
  minScore: opts.minScore || 0,
1926
+ candidateLimit: opts.candidateLimit,
1927
+ explain: !!opts.explain,
1862
1928
  hooks: {
1863
1929
  onStrongSignal: (score) => {
1864
1930
  process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@@ -1897,12 +1963,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1897
1963
  }
1898
1964
  closeDb();
1899
1965
  if (results.length === 0) {
1900
- if (opts.format === "json") {
1901
- console.log("[]");
1902
- }
1903
- else {
1904
- console.log("No results found.");
1905
- }
1966
+ printEmptySearchResults(opts.format);
1906
1967
  return;
1907
1968
  }
1908
1969
  // Use first lex/vec query for output context, or original query
@@ -1919,6 +1980,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1919
1980
  score: r.score,
1920
1981
  context: r.context,
1921
1982
  docid: r.docid,
1983
+ explain: r.explain,
1922
1984
  })), displayQuery, { ...opts, limit: results.length });
1923
1985
  }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
1924
1986
  }
@@ -1947,6 +2009,7 @@ function parseCLI() {
1947
2009
  xml: { type: "boolean" },
1948
2010
  files: { type: "boolean" },
1949
2011
  json: { type: "boolean" },
2012
+ explain: { type: "boolean" },
1950
2013
  collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s)
1951
2014
  // Collection options
1952
2015
  name: { type: "string" }, // collection name
@@ -1961,6 +2024,8 @@ function parseCLI() {
1961
2024
  from: { type: "string" }, // start line
1962
2025
  "max-bytes": { type: "string" }, // max bytes for multi-get
1963
2026
  "line-numbers": { type: "boolean" }, // add line numbers to output
2027
+ // Query options
2028
+ "candidate-limit": { type: "string", short: "C" },
1964
2029
  // MCP HTTP transport options
1965
2030
  http: { type: "boolean" },
1966
2031
  daemon: { type: "boolean" },
@@ -1999,6 +2064,8 @@ function parseCLI() {
1999
2064
  all: isAll,
2000
2065
  collection: values.collection,
2001
2066
  lineNumbers: !!values["line-numbers"],
2067
+ candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
2068
+ explain: !!values.explain,
2002
2069
  };
2003
2070
  return {
2004
2071
  command: positionals[0] || "",
@@ -2094,7 +2161,9 @@ function showHelp() {
2094
2161
  console.log(" --all - Return all matches (pair with --min-score)");
2095
2162
  console.log(" --min-score <num> - Minimum similarity score");
2096
2163
  console.log(" --full - Output full document instead of snippet");
2164
+ console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
2097
2165
  console.log(" --line-numbers - Include line numbers in output");
2166
+ console.log(" --explain - Include retrieval score traces (query --json/CLI)");
2098
2167
  console.log(" --files | --json | --csv | --md | --xml - Output format");
2099
2168
  console.log(" -c, --collection <name> - Filter by one or more collections");
2100
2169
  console.log("");
package/dist/store.d.ts CHANGED
@@ -280,15 +280,6 @@ export type DocumentResult = {
280
280
  * Extract short docid from a full hash (first 6 characters).
281
281
  */
282
282
  export declare function getDocid(hash: string): string;
283
- /**
284
- * Handelize a filename to be more token-friendly.
285
- * - Convert triple underscore `___` to `/` (folder separator)
286
- * - Convert to lowercase
287
- * - Replace sequences of non-word chars (except /) with single dash
288
- * - Remove leading/trailing dashes from path segments
289
- * - Preserve folder structure (a/b/c/d.md stays structured)
290
- * - Preserve file extension
291
- */
292
283
  export declare function handelize(path: string): string;
293
284
  /**
294
285
  * Search result extends DocumentResult with score and source info
@@ -308,6 +299,38 @@ export type RankedResult = {
308
299
  body: string;
309
300
  score: number;
310
301
  };
302
+ export type RRFContributionTrace = {
303
+ listIndex: number;
304
+ source: "fts" | "vec";
305
+ queryType: "original" | "lex" | "vec" | "hyde";
306
+ query: string;
307
+ rank: number;
308
+ weight: number;
309
+ backendScore: number;
310
+ rrfContribution: number;
311
+ };
312
+ export type RRFScoreTrace = {
313
+ contributions: RRFContributionTrace[];
314
+ baseScore: number;
315
+ topRank: number;
316
+ topRankBonus: number;
317
+ totalScore: number;
318
+ };
319
+ export type HybridQueryExplain = {
320
+ ftsScores: number[];
321
+ vectorScores: number[];
322
+ rrf: {
323
+ rank: number;
324
+ positionScore: number;
325
+ weight: number;
326
+ baseScore: number;
327
+ topRankBonus: number;
328
+ totalScore: number;
329
+ contributions: RRFContributionTrace[];
330
+ };
331
+ rerankScore: number;
332
+ blendedScore: number;
333
+ };
311
334
  /**
312
335
  * Error result when document is not found
313
336
  */
@@ -584,6 +607,10 @@ export declare function rerank(query: string, documents: {
584
607
  score: number;
585
608
  }[]>;
586
609
  export declare function reciprocalRankFusion(resultLists: RankedResult[][], weights?: number[], k?: number): RankedResult[];
610
+ /**
611
+ * Build per-document RRF contribution traces for explain/debug output.
612
+ */
613
+ export declare function buildRrfTrace(resultLists: RankedResult[][], weights?: number[], listMeta?: RankedListMeta[], k?: number): Map<string, RRFScoreTrace>;
587
614
  /**
588
615
  * Find a document by filename/path, docid (#hash), or with fuzzy matching.
589
616
  * Returns document metadata without body by default.
@@ -654,6 +681,7 @@ export interface HybridQueryOptions {
654
681
  limit?: number;
655
682
  minScore?: number;
656
683
  candidateLimit?: number;
684
+ explain?: boolean;
657
685
  hooks?: SearchHooks;
658
686
  }
659
687
  export interface HybridQueryResult {
@@ -666,7 +694,13 @@ export interface HybridQueryResult {
666
694
  score: number;
667
695
  context: string | null;
668
696
  docid: string;
697
+ explain?: HybridQueryExplain;
669
698
  }
699
+ export type RankedListMeta = {
700
+ source: "fts" | "vec";
701
+ queryType: "original" | "lex" | "vec" | "hyde";
702
+ query: string;
703
+ };
670
704
  /**
671
705
  * Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
672
706
  *
@@ -723,6 +757,7 @@ export interface StructuredSearchOptions {
723
757
  limit?: number;
724
758
  minScore?: number;
725
759
  candidateLimit?: number;
760
+ explain?: boolean;
726
761
  /** Future: domain intent hint for routing/boosting */
727
762
  intent?: string;
728
763
  hooks?: SearchHooks;