@tobilu/qmd 1.1.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/store.js CHANGED
@@ -13,9 +13,10 @@
13
13
  import { openDatabase, loadSqliteVec } from "./db.js";
14
14
  import picomatch from "picomatch";
15
15
  import { createHash } from "crypto";
16
- import { realpathSync, statSync, mkdirSync } from "node:fs";
17
- import { LlamaCpp, getDefaultLlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, } from "./llm.js";
18
- import { findContextForPath as collectionsFindContextForPath, addContext as collectionsAddContext, removeContext as collectionsRemoveContext, listAllContexts as collectionsListAllContexts, getCollection, listCollections as collectionsListCollections, addCollection as collectionsAddCollection, removeCollection as collectionsRemoveCollection, renameCollection as collectionsRenameCollection, setGlobalContext, loadConfig as collectionsLoadConfig, } from "./collections.js";
16
+ import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs";
17
+ // Note: node:path resolve is not imported we export our own cross-platform resolve()
18
+ import fastGlob from "fast-glob";
19
+ import { LlamaCpp, getDefaultLlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, withLLMSessionForLlm, } from "./llm.js";
19
20
  // =============================================================================
20
21
  // Configuration
21
22
  // =============================================================================
@@ -35,6 +36,13 @@ export const CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP_TOKENS * 4; // 540 chars
35
36
  // Search window for finding optimal break points (in tokens, ~200 tokens)
36
37
  export const CHUNK_WINDOW_TOKENS = 200;
37
38
  export const CHUNK_WINDOW_CHARS = CHUNK_WINDOW_TOKENS * 4; // 800 chars
39
+ /**
40
+ * Get the LlamaCpp instance for a store — prefers the store's own instance,
41
+ * falls back to the global singleton.
42
+ */
43
+ function getLlm(store) {
44
+ return store.llm ?? getDefaultLlamaCpp();
45
+ }
38
46
  /**
39
47
  * Patterns for detecting break points in markdown documents.
40
48
  * Higher scores indicate better places to split.
@@ -442,8 +450,8 @@ export function resolveVirtualPath(db, virtualPath) {
442
450
  * Returns null if the file is not in any indexed collection.
443
451
  */
444
452
  export function toVirtualPath(db, absolutePath) {
445
- // Get all collections from YAML config
446
- const collections = collectionsListCollections();
453
+ // Get all collections from DB
454
+ const collections = getStoreCollections(db);
447
455
  // Find which collection this absolute path belongs to
448
456
  for (const coll of collections) {
449
457
  if (absolutePath.startsWith(coll.path + '/') || absolutePath === coll.path) {
@@ -556,6 +564,25 @@ function initializeDatabase(db) {
556
564
  embedded_at TEXT NOT NULL,
557
565
  PRIMARY KEY (hash, seq)
558
566
  )
567
+ `);
568
+ // Store collections — makes the DB self-contained (no external config needed)
569
+ db.exec(`
570
+ CREATE TABLE IF NOT EXISTS store_collections (
571
+ name TEXT PRIMARY KEY,
572
+ path TEXT NOT NULL,
573
+ pattern TEXT NOT NULL DEFAULT '**/*.md',
574
+ ignore_patterns TEXT,
575
+ include_by_default INTEGER DEFAULT 1,
576
+ update_command TEXT,
577
+ context TEXT
578
+ )
579
+ `);
580
+ // Store config — key-value metadata (e.g. config_hash for sync optimization)
581
+ db.exec(`
582
+ CREATE TABLE IF NOT EXISTS store_config (
583
+ key TEXT PRIMARY KEY,
584
+ value TEXT
585
+ )
559
586
  `);
560
587
  // FTS - index filepath (collection/path), title, and content
561
588
  db.exec(`
@@ -600,6 +627,141 @@ function initializeDatabase(db) {
600
627
  END
601
628
  `);
602
629
  }
630
+ function rowToNamedCollection(row) {
631
+ return {
632
+ name: row.name,
633
+ path: row.path,
634
+ pattern: row.pattern,
635
+ ...(row.ignore_patterns ? { ignore: JSON.parse(row.ignore_patterns) } : {}),
636
+ ...(row.include_by_default === 0 ? { includeByDefault: false } : {}),
637
+ ...(row.update_command ? { update: row.update_command } : {}),
638
+ ...(row.context ? { context: JSON.parse(row.context) } : {}),
639
+ };
640
+ }
641
+ export function getStoreCollections(db) {
642
+ const rows = db.prepare(`SELECT * FROM store_collections`).all();
643
+ return rows.map(rowToNamedCollection);
644
+ }
645
+ export function getStoreCollection(db, name) {
646
+ const row = db.prepare(`SELECT * FROM store_collections WHERE name = ?`).get(name);
647
+ if (row == null)
648
+ return null;
649
+ return rowToNamedCollection(row);
650
+ }
651
+ export function getStoreGlobalContext(db) {
652
+ const row = db.prepare(`SELECT value FROM store_config WHERE key = 'global_context'`).get();
653
+ if (row == null)
654
+ return undefined;
655
+ return row.value || undefined;
656
+ }
657
+ export function getStoreContexts(db) {
658
+ const results = [];
659
+ // Global context
660
+ const globalCtx = getStoreGlobalContext(db);
661
+ if (globalCtx) {
662
+ results.push({ collection: "*", path: "/", context: globalCtx });
663
+ }
664
+ // Collection contexts
665
+ const rows = db.prepare(`SELECT name, context FROM store_collections WHERE context IS NOT NULL`).all();
666
+ for (const row of rows) {
667
+ const ctxMap = JSON.parse(row.context);
668
+ for (const [path, context] of Object.entries(ctxMap)) {
669
+ results.push({ collection: row.name, path, context });
670
+ }
671
+ }
672
+ return results;
673
+ }
674
+ export function upsertStoreCollection(db, name, collection) {
675
+ db.prepare(`
676
+ INSERT INTO store_collections (name, path, pattern, ignore_patterns, include_by_default, update_command, context)
677
+ VALUES (?, ?, ?, ?, ?, ?, ?)
678
+ ON CONFLICT(name) DO UPDATE SET
679
+ path = excluded.path,
680
+ pattern = excluded.pattern,
681
+ ignore_patterns = excluded.ignore_patterns,
682
+ include_by_default = excluded.include_by_default,
683
+ update_command = excluded.update_command,
684
+ context = excluded.context
685
+ `).run(name, collection.path, collection.pattern || '**/*.md', collection.ignore ? JSON.stringify(collection.ignore) : null, collection.includeByDefault === false ? 0 : 1, collection.update || null, collection.context ? JSON.stringify(collection.context) : null);
686
+ }
687
+ export function deleteStoreCollection(db, name) {
688
+ const result = db.prepare(`DELETE FROM store_collections WHERE name = ?`).run(name);
689
+ return result.changes > 0;
690
+ }
691
+ export function renameStoreCollection(db, oldName, newName) {
692
+ // Check target doesn't exist
693
+ const existing = db.prepare(`SELECT name FROM store_collections WHERE name = ?`).get(newName);
694
+ if (existing != null) {
695
+ throw new Error(`Collection '${newName}' already exists`);
696
+ }
697
+ const result = db.prepare(`UPDATE store_collections SET name = ? WHERE name = ?`).run(newName, oldName);
698
+ return result.changes > 0;
699
+ }
700
+ export function updateStoreContext(db, collectionName, path, text) {
701
+ const row = db.prepare(`SELECT context FROM store_collections WHERE name = ?`).get(collectionName);
702
+ if (row == null)
703
+ return false;
704
+ const ctxMap = row.context ? JSON.parse(row.context) : {};
705
+ ctxMap[path] = text;
706
+ db.prepare(`UPDATE store_collections SET context = ? WHERE name = ?`).run(JSON.stringify(ctxMap), collectionName);
707
+ return true;
708
+ }
709
+ export function removeStoreContext(db, collectionName, path) {
710
+ const row = db.prepare(`SELECT context FROM store_collections WHERE name = ?`).get(collectionName);
711
+ if (row == null)
712
+ return false;
713
+ if (!row.context)
714
+ return false;
715
+ const ctxMap = JSON.parse(row.context);
716
+ if (!(path in ctxMap))
717
+ return false;
718
+ delete ctxMap[path];
719
+ const newCtx = Object.keys(ctxMap).length > 0 ? JSON.stringify(ctxMap) : null;
720
+ db.prepare(`UPDATE store_collections SET context = ? WHERE name = ?`).run(newCtx, collectionName);
721
+ return true;
722
+ }
723
+ export function setStoreGlobalContext(db, value) {
724
+ if (value === undefined) {
725
+ db.prepare(`DELETE FROM store_config WHERE key = 'global_context'`).run();
726
+ }
727
+ else {
728
+ db.prepare(`INSERT INTO store_config (key, value) VALUES ('global_context', ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value`).run(value);
729
+ }
730
+ }
731
+ /**
732
+ * Sync external config (YAML/inline) into SQLite store_collections.
733
+ * External config always wins. Skips sync if config hash hasn't changed.
734
+ */
735
+ export function syncConfigToDb(db, config) {
736
+ // Check config hash — skip sync if unchanged
737
+ const configJson = JSON.stringify(config);
738
+ const hash = createHash('sha256').update(configJson).digest('hex');
739
+ const existingHash = db.prepare(`SELECT value FROM store_config WHERE key = 'config_hash'`).get();
740
+ if (existingHash != null && existingHash.value === hash) {
741
+ return; // Config unchanged, skip sync
742
+ }
743
+ // Sync collections
744
+ const configNames = new Set(Object.keys(config.collections));
745
+ for (const [name, coll] of Object.entries(config.collections)) {
746
+ upsertStoreCollection(db, name, coll);
747
+ }
748
+ // Delete collections not in config
749
+ const dbCollections = db.prepare(`SELECT name FROM store_collections`).all();
750
+ for (const row of dbCollections) {
751
+ if (!configNames.has(row.name)) {
752
+ db.prepare(`DELETE FROM store_collections WHERE name = ?`).run(row.name);
753
+ }
754
+ }
755
+ // Sync global context
756
+ if (config.global_context !== undefined) {
757
+ setStoreGlobalContext(db, config.global_context);
758
+ }
759
+ else {
760
+ setStoreGlobalContext(db, undefined);
761
+ }
762
+ // Save config hash
763
+ db.prepare(`INSERT INTO store_config (key, value) VALUES ('config_hash', ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value`).run(hash);
764
+ }
603
765
  export function isSqliteVecAvailable() {
604
766
  return _sqliteVecAvailable === true;
605
767
  }
@@ -620,6 +782,199 @@ function ensureVecTableInternal(db, dimensions) {
620
782
  }
621
783
  db.exec(`CREATE VIRTUAL TABLE vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[${dimensions}] distance_metric=cosine)`);
622
784
  }
785
+ /**
786
+ * Re-index a single collection by scanning the filesystem and updating the database.
787
+ * Pure function — no console output, no db lifecycle management.
788
+ */
789
+ export async function reindexCollection(store, collectionPath, globPattern, collectionName, options) {
790
+ const db = store.db;
791
+ const now = new Date().toISOString();
792
+ const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
793
+ const allIgnore = [
794
+ ...excludeDirs.map(d => `**/${d}/**`),
795
+ ...(options?.ignorePatterns || []),
796
+ ];
797
+ const allFiles = await fastGlob(globPattern, {
798
+ cwd: collectionPath,
799
+ onlyFiles: true,
800
+ followSymbolicLinks: false,
801
+ dot: false,
802
+ ignore: allIgnore,
803
+ });
804
+ // Filter hidden files/folders
805
+ const files = allFiles.filter(file => {
806
+ const parts = file.split("/");
807
+ return !parts.some(part => part.startsWith("."));
808
+ });
809
+ const total = files.length;
810
+ let indexed = 0, updated = 0, unchanged = 0, processed = 0;
811
+ const seenPaths = new Set();
812
+ for (const relativeFile of files) {
813
+ const filepath = getRealPath(resolve(collectionPath, relativeFile));
814
+ const path = handelize(relativeFile);
815
+ seenPaths.add(path);
816
+ let content;
817
+ try {
818
+ content = readFileSync(filepath, "utf-8");
819
+ }
820
+ catch {
821
+ processed++;
822
+ options?.onProgress?.({ file: relativeFile, current: processed, total });
823
+ continue;
824
+ }
825
+ if (!content.trim()) {
826
+ processed++;
827
+ continue;
828
+ }
829
+ const hash = await hashContent(content);
830
+ const title = extractTitle(content, relativeFile);
831
+ const existing = findActiveDocument(db, collectionName, path);
832
+ if (existing) {
833
+ if (existing.hash === hash) {
834
+ if (existing.title !== title) {
835
+ updateDocumentTitle(db, existing.id, title, now);
836
+ updated++;
837
+ }
838
+ else {
839
+ unchanged++;
840
+ }
841
+ }
842
+ else {
843
+ insertContent(db, hash, content, now);
844
+ const stat = statSync(filepath);
845
+ updateDocument(db, existing.id, title, hash, stat ? new Date(stat.mtime).toISOString() : now);
846
+ updated++;
847
+ }
848
+ }
849
+ else {
850
+ indexed++;
851
+ insertContent(db, hash, content, now);
852
+ const stat = statSync(filepath);
853
+ insertDocument(db, collectionName, path, title, hash, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now);
854
+ }
855
+ processed++;
856
+ options?.onProgress?.({ file: relativeFile, current: processed, total });
857
+ }
858
+ // Deactivate documents that no longer exist
859
+ const allActive = getActiveDocumentPaths(db, collectionName);
860
+ let removed = 0;
861
+ for (const path of allActive) {
862
+ if (!seenPaths.has(path)) {
863
+ deactivateDocument(db, collectionName, path);
864
+ removed++;
865
+ }
866
+ }
867
+ const orphanedCleaned = cleanupOrphanedContent(db);
868
+ return { indexed, updated, unchanged, removed, orphanedCleaned };
869
+ }
870
+ /**
871
+ * Generate vector embeddings for documents that need them.
872
+ * Pure function — no console output, no db lifecycle management.
873
+ * Uses the store's LlamaCpp instance if set, otherwise the global singleton.
874
+ */
875
+ export async function generateEmbeddings(store, options) {
876
+ const db = store.db;
877
+ const model = options?.model ?? DEFAULT_EMBED_MODEL;
878
+ const now = new Date().toISOString();
879
+ if (options?.force) {
880
+ clearAllEmbeddings(db);
881
+ }
882
+ const hashesToEmbed = getHashesForEmbedding(db);
883
+ if (hashesToEmbed.length === 0) {
884
+ return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
885
+ }
886
+ const allChunks = [];
887
+ for (const item of hashesToEmbed) {
888
+ const encoder = new TextEncoder();
889
+ const bodyBytes = encoder.encode(item.body).length;
890
+ if (bodyBytes === 0)
891
+ continue;
892
+ const title = extractTitle(item.body, item.path);
893
+ const chunks = await chunkDocumentByTokens(item.body);
894
+ for (let seq = 0; seq < chunks.length; seq++) {
895
+ allChunks.push({
896
+ hash: item.hash,
897
+ title,
898
+ text: chunks[seq].text,
899
+ seq,
900
+ pos: chunks[seq].pos,
901
+ tokens: chunks[seq].tokens,
902
+ bytes: encoder.encode(chunks[seq].text).length,
903
+ });
904
+ }
905
+ }
906
+ if (allChunks.length === 0) {
907
+ return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
908
+ }
909
+ const totalBytes = allChunks.reduce((sum, chk) => sum + chk.bytes, 0);
910
+ const totalChunks = allChunks.length;
911
+ const totalDocs = hashesToEmbed.length;
912
+ const startTime = Date.now();
913
+ // Use store's LlamaCpp or global singleton, wrapped in a session
914
+ const llm = getLlm(store);
915
+ const sessionOptions = { maxDuration: 30 * 60 * 1000, name: 'generateEmbeddings' };
916
+ // Create a session manager for this llm instance
917
+ const result = await withLLMSessionForLlm(llm, async (session) => {
918
+ // Get embedding dimensions from first chunk
919
+ const firstChunk = allChunks[0];
920
+ const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title);
921
+ const firstResult = await session.embed(firstText);
922
+ if (!firstResult) {
923
+ throw new Error("Failed to get embedding dimensions from first chunk");
924
+ }
925
+ store.ensureVecTable(firstResult.embedding.length);
926
+ let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
927
+ const BATCH_SIZE = 32;
928
+ for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
929
+ const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
930
+ const batch = allChunks.slice(batchStart, batchEnd);
931
+ const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
932
+ try {
933
+ const embeddings = await session.embedBatch(texts);
934
+ for (let i = 0; i < batch.length; i++) {
935
+ const chunk = batch[i];
936
+ const embedding = embeddings[i];
937
+ if (embedding) {
938
+ insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
939
+ chunksEmbedded++;
940
+ }
941
+ else {
942
+ errors++;
943
+ }
944
+ bytesProcessed += chunk.bytes;
945
+ }
946
+ }
947
+ catch {
948
+ // Batch failed — try individual embeddings as fallback
949
+ for (const chunk of batch) {
950
+ try {
951
+ const text = formatDocForEmbedding(chunk.text, chunk.title);
952
+ const result = await session.embed(text);
953
+ if (result) {
954
+ insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
955
+ chunksEmbedded++;
956
+ }
957
+ else {
958
+ errors++;
959
+ }
960
+ }
961
+ catch {
962
+ errors++;
963
+ }
964
+ bytesProcessed += chunk.bytes;
965
+ }
966
+ }
967
+ options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
968
+ }
969
+ return { chunksEmbedded, errors };
970
+ }, sessionOptions);
971
+ return {
972
+ docsProcessed: totalDocs,
973
+ chunksEmbedded: result.chunksEmbedded,
974
+ errors: result.errors,
975
+ durationMs: Date.now() - startTime,
976
+ };
977
+ }
623
978
  /**
624
979
  * Create a new store instance with the given database path.
625
980
  * If no path is provided, uses the default path (~/.cache/qmd/index.sqlite).
@@ -631,7 +986,7 @@ export function createStore(dbPath) {
631
986
  const resolvedPath = dbPath || getDefaultDbPath();
632
987
  const db = openDatabase(resolvedPath);
633
988
  initializeDatabase(db);
634
- return {
989
+ const store = {
635
990
  db,
636
991
  dbPath: resolvedPath,
637
992
  close: () => db.close(),
@@ -667,8 +1022,8 @@ export function createStore(dbPath) {
667
1022
  searchFTS: (query, limit, collectionName) => searchFTS(db, query, limit, collectionName),
668
1023
  searchVec: (query, model, limit, collectionName, session, precomputedEmbedding) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding),
669
1024
  // Query expansion & reranking
670
- expandQuery: (query, model, intent) => expandQuery(query, model, db, intent),
671
- rerank: (query, documents, model, intent) => rerank(query, documents, model, db, intent),
1025
+ expandQuery: (query, model, intent) => expandQuery(query, model, db, intent, store.llm),
1026
+ rerank: (query, documents, model, intent) => rerank(query, documents, model, db, intent, store.llm),
672
1027
  // Document retrieval
673
1028
  findDocument: (filename, options) => findDocument(db, filename, options),
674
1029
  getDocumentBody: (doc, fromLine, maxLines) => getDocumentBody(db, doc, fromLine, maxLines),
@@ -690,6 +1045,7 @@ export function createStore(dbPath) {
690
1045
  clearAllEmbeddings: () => clearAllEmbeddings(db),
691
1046
  insertEmbedding: (hash, seq, pos, embedding, model, embeddedAt) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt),
692
1047
  };
1048
+ return store;
693
1049
  }
694
1050
  /**
695
1051
  * Extract short docid from a full hash (first 6 characters).
@@ -1188,15 +1544,15 @@ export function matchFilesByGlob(db, pattern) {
1188
1544
  * @returns Context string or null if no context is defined
1189
1545
  */
1190
1546
  export function getContextForPath(db, collectionName, path) {
1191
- const config = collectionsLoadConfig();
1192
- const coll = getCollection(collectionName);
1547
+ const coll = getStoreCollection(db, collectionName);
1193
1548
  if (!coll)
1194
1549
  return null;
1195
1550
  // Collect ALL matching contexts (global + all path prefixes)
1196
1551
  const contexts = [];
1197
1552
  // Add global context if present
1198
- if (config.global_context) {
1199
- contexts.push(config.global_context);
1553
+ const globalCtx = getStoreGlobalContext(db);
1554
+ if (globalCtx) {
1555
+ contexts.push(globalCtx);
1200
1556
  }
1201
1557
  // Add all matching path contexts (from most general to most specific)
1202
1558
  if (coll.context) {
@@ -1221,15 +1577,14 @@ export function getContextForPath(db, collectionName, path) {
1221
1577
  }
1222
1578
  /**
1223
1579
  * Get context for a file path (virtual or filesystem).
1224
- * Resolves the collection and relative path using the YAML collections config.
1580
+ * Resolves the collection and relative path from the DB store_collections table.
1225
1581
  */
1226
1582
  export function getContextForFile(db, filepath) {
1227
1583
  // Handle undefined or null filepath
1228
1584
  if (!filepath)
1229
1585
  return null;
1230
- // Get all collections from YAML config
1231
- const collections = collectionsListCollections();
1232
- const config = collectionsLoadConfig();
1586
+ // Get all collections from DB
1587
+ const collections = getStoreCollections(db);
1233
1588
  // Parse virtual path format: qmd://collection/path
1234
1589
  let collectionName = null;
1235
1590
  let relativePath = null;
@@ -1256,8 +1611,8 @@ export function getContextForFile(db, filepath) {
1256
1611
  if (!collectionName || relativePath === null)
1257
1612
  return null;
1258
1613
  }
1259
- // Get the collection from config
1260
- const coll = getCollection(collectionName);
1614
+ // Get the collection from DB
1615
+ const coll = getStoreCollection(db, collectionName);
1261
1616
  if (!coll)
1262
1617
  return null;
1263
1618
  // Verify this document exists in the database
@@ -1272,8 +1627,9 @@ export function getContextForFile(db, filepath) {
1272
1627
  // Collect ALL matching contexts (global + all path prefixes)
1273
1628
  const contexts = [];
1274
1629
  // Add global context if present
1275
- if (config.global_context) {
1276
- contexts.push(config.global_context);
1630
+ const globalCtx = getStoreGlobalContext(db);
1631
+ if (globalCtx) {
1632
+ contexts.push(globalCtx);
1277
1633
  }
1278
1634
  // Add all matching path contexts (from most general to most specific)
1279
1635
  if (coll.context) {
@@ -1297,11 +1653,10 @@ export function getContextForFile(db, filepath) {
1297
1653
  return contexts.length > 0 ? contexts.join('\n\n') : null;
1298
1654
  }
1299
1655
  /**
1300
- * Get collection by name from YAML config.
1301
- * Returns collection metadata from ~/.config/qmd/index.yml
1656
+ * Get collection by name from DB store_collections table.
1302
1657
  */
1303
1658
  export function getCollectionByName(db, name) {
1304
- const collection = getCollection(name);
1659
+ const collection = getStoreCollection(db, name);
1305
1660
  if (!collection)
1306
1661
  return null;
1307
1662
  return {
@@ -1312,10 +1667,10 @@ export function getCollectionByName(db, name) {
1312
1667
  }
1313
1668
  /**
1314
1669
  * List all collections with document counts from database.
1315
- * Merges YAML config with database statistics.
1670
+ * Merges store_collections config with database statistics.
1316
1671
  */
1317
1672
  export function listCollections(db) {
1318
- const collections = collectionsListCollections();
1673
+ const collections = getStoreCollections(db);
1319
1674
  // Get document counts from database for each collection
1320
1675
  const result = collections.map(coll => {
1321
1676
  const stats = db.prepare(`
@@ -1333,6 +1688,7 @@ export function listCollections(db) {
1333
1688
  doc_count: stats?.doc_count || 0,
1334
1689
  active_count: stats?.active_count || 0,
1335
1690
  last_modified: stats?.last_modified || null,
1691
+ includeByDefault: coll.includeByDefault !== false,
1336
1692
  };
1337
1693
  });
1338
1694
  return result;
@@ -1349,8 +1705,8 @@ export function removeCollection(db, collectionName) {
1349
1705
  DELETE FROM content
1350
1706
  WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
1351
1707
  `).run();
1352
- // Remove from YAML config (returns true if found and removed)
1353
- collectionsRemoveCollection(collectionName);
1708
+ // Remove from store_collections
1709
+ deleteStoreCollection(db, collectionName);
1354
1710
  return {
1355
1711
  deletedDocs: docResult.changes,
1356
1712
  cleanedHashes: cleanupResult.changes
@@ -1364,8 +1720,8 @@ export function renameCollection(db, oldName, newName) {
1364
1720
  // Update all documents with the new collection name in database
1365
1721
  db.prepare(`UPDATE documents SET collection = ? WHERE collection = ?`)
1366
1722
  .run(newName, oldName);
1367
- // Rename in YAML config
1368
- collectionsRenameCollection(oldName, newName);
1723
+ // Rename in store_collections
1724
+ renameStoreCollection(db, oldName, newName);
1369
1725
  }
1370
1726
  // =============================================================================
1371
1727
  // Context Management Operations
@@ -1379,16 +1735,16 @@ export function insertContext(db, collectionId, pathPrefix, context) {
1379
1735
  if (!coll) {
1380
1736
  throw new Error(`Collection with id ${collectionId} not found`);
1381
1737
  }
1382
- // Use collections.ts to add context
1383
- collectionsAddContext(coll.name, pathPrefix, context);
1738
+ // Add context to store_collections
1739
+ updateStoreContext(db, coll.name, pathPrefix, context);
1384
1740
  }
1385
1741
  /**
1386
1742
  * Delete a context for a specific collection and path prefix.
1387
1743
  * Returns the number of contexts deleted.
1388
1744
  */
1389
1745
  export function deleteContext(db, collectionName, pathPrefix) {
1390
- // Use collections.ts to remove context
1391
- const success = collectionsRemoveContext(collectionName, pathPrefix);
1746
+ // Remove context from store_collections
1747
+ const success = removeStoreContext(db, collectionName, pathPrefix);
1392
1748
  return success ? 1 : 0;
1393
1749
  }
1394
1750
  /**
@@ -1398,12 +1754,12 @@ export function deleteContext(db, collectionName, pathPrefix) {
1398
1754
  export function deleteGlobalContexts(db) {
1399
1755
  let deletedCount = 0;
1400
1756
  // Remove global context
1401
- setGlobalContext(undefined);
1757
+ setStoreGlobalContext(db, undefined);
1402
1758
  deletedCount++;
1403
1759
  // Remove root context (empty string) from all collections
1404
- const collections = collectionsListCollections();
1760
+ const collections = getStoreCollections(db);
1405
1761
  for (const coll of collections) {
1406
- const success = collectionsRemoveContext(coll.name, '');
1762
+ const success = removeStoreContext(db, coll.name, '');
1407
1763
  if (success) {
1408
1764
  deletedCount++;
1409
1765
  }
@@ -1415,7 +1771,7 @@ export function deleteGlobalContexts(db) {
1415
1771
  * Returns contexts ordered by collection name, then by path prefix length (longest first).
1416
1772
  */
1417
1773
  export function listPathContexts(db) {
1418
- const allContexts = collectionsListAllContexts();
1774
+ const allContexts = getStoreContexts(db);
1419
1775
  // Convert to expected format and sort
1420
1776
  return allContexts.map(ctx => ({
1421
1777
  collection_name: ctx.collection,
@@ -1438,7 +1794,7 @@ export function listPathContexts(db) {
1438
1794
  * Get all collections (name only - from YAML config).
1439
1795
  */
1440
1796
  export function getAllCollections(db) {
1441
- const collections = collectionsListCollections();
1797
+ const collections = getStoreCollections(db);
1442
1798
  return collections.map(c => ({ name: c.name }));
1443
1799
  }
1444
1800
  /**
@@ -1446,11 +1802,11 @@ export function getAllCollections(db) {
1446
1802
  * Returns collections that have no context entries at all (not even root context).
1447
1803
  */
1448
1804
  export function getCollectionsWithoutContext(db) {
1449
- // Get all collections from YAML config
1450
- const yamlCollections = collectionsListCollections();
1805
+ // Get all collections from DB
1806
+ const allCollections = getStoreCollections(db);
1451
1807
  // Filter to those without context
1452
1808
  const collectionsWithoutContext = [];
1453
- for (const coll of yamlCollections) {
1809
+ for (const coll of allCollections) {
1454
1810
  // Check if collection has any context
1455
1811
  if (!coll.context || Object.keys(coll.context).length === 0) {
1456
1812
  // Get doc count from database
@@ -1478,13 +1834,13 @@ export function getTopLevelPathsWithoutContext(db, collectionName) {
1478
1834
  SELECT DISTINCT path FROM documents
1479
1835
  WHERE collection = ? AND active = 1
1480
1836
  `).all(collectionName);
1481
- // Get existing contexts for this collection from YAML
1482
- const yamlColl = getCollection(collectionName);
1483
- if (!yamlColl)
1837
+ // Get existing contexts for this collection from DB
1838
+ const dbColl = getStoreCollection(db, collectionName);
1839
+ if (!dbColl)
1484
1840
  return [];
1485
1841
  const contextPrefixes = new Set();
1486
- if (yamlColl.context) {
1487
- for (const prefix of Object.keys(yamlColl.context)) {
1842
+ if (dbColl.context) {
1843
+ for (const prefix of Object.keys(dbColl.context)) {
1488
1844
  contextPrefixes.add(prefix);
1489
1845
  }
1490
1846
  }
@@ -1754,12 +2110,12 @@ export async function searchVec(db, query, model, limit = 20, collectionName, se
1754
2110
  // =============================================================================
1755
2111
  // Embeddings
1756
2112
  // =============================================================================
1757
- async function getEmbedding(text, model, isQuery, session) {
2113
+ async function getEmbedding(text, model, isQuery, session, llmOverride) {
1758
2114
  // Format text using the appropriate prompt template
1759
2115
  const formattedText = isQuery ? formatQueryForEmbedding(text, model) : formatDocForEmbedding(text, undefined, model);
1760
2116
  const result = session
1761
2117
  ? await session.embed(formattedText, { model, isQuery })
1762
- : await getDefaultLlamaCpp().embed(formattedText, { model, isQuery });
2118
+ : await (llmOverride ?? getDefaultLlamaCpp()).embed(formattedText, { model, isQuery });
1763
2119
  return result?.embedding || null;
1764
2120
  }
1765
2121
  /**
@@ -1798,26 +2154,33 @@ export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt
1798
2154
  // =============================================================================
1799
2155
  // Query expansion
1800
2156
  // =============================================================================
1801
- export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db, intent) {
2157
+ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db, intent, llmOverride) {
1802
2158
  // Check cache first — stored as JSON preserving types
1803
2159
  const cacheKey = getCacheKey("expandQuery", { query, model, ...(intent && { intent }) });
1804
2160
  const cached = getCachedResult(db, cacheKey);
1805
2161
  if (cached) {
1806
2162
  try {
1807
- return JSON.parse(cached);
2163
+ const parsed = JSON.parse(cached);
2164
+ // Migrate old cache format: { type, text } → { type, query }
2165
+ if (parsed.length > 0 && parsed[0].query) {
2166
+ return parsed;
2167
+ }
2168
+ else if (parsed.length > 0 && parsed[0].text) {
2169
+ return parsed.map((r) => ({ type: r.type, query: r.text }));
2170
+ }
1808
2171
  }
1809
2172
  catch {
1810
2173
  // Old cache format (pre-typed, newline-separated text) — re-expand
1811
2174
  }
1812
2175
  }
1813
- const llm = getDefaultLlamaCpp();
2176
+ const llm = llmOverride ?? getDefaultLlamaCpp();
1814
2177
  // Note: LlamaCpp uses hardcoded model, model parameter is ignored
1815
2178
  const results = await llm.expandQuery(query, { intent });
1816
2179
  // Map Queryable[] → ExpandedQuery[] (same shape, decoupled from llm.ts internals).
1817
2180
  // Filter out entries that duplicate the original query text.
1818
2181
  const expanded = results
1819
2182
  .filter(r => r.text !== query)
1820
- .map(r => ({ type: r.type, text: r.text }));
2183
+ .map(r => ({ type: r.type, query: r.text }));
1821
2184
  if (expanded.length > 0) {
1822
2185
  setCachedResult(db, cacheKey, JSON.stringify(expanded));
1823
2186
  }
@@ -1826,7 +2189,7 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db, intent
1826
2189
  // =============================================================================
1827
2190
  // Reranking
1828
2191
  // =============================================================================
1829
- export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db, intent) {
2192
+ export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db, intent, llmOverride) {
1830
2193
  // Prepend intent to rerank query so the reranker scores with domain context
1831
2194
  const rerankQuery = intent ? `${intent}\n\n${query}` : query;
1832
2195
  const cachedResults = new Map();
@@ -1849,7 +2212,7 @@ export async function rerank(query, documents, model = DEFAULT_RERANK_MODEL, db,
1849
2212
  }
1850
2213
  // Rerank uncached documents using LlamaCpp
1851
2214
  if (uncachedDocsByChunk.size > 0) {
1852
- const llm = getDefaultLlamaCpp();
2215
+ const llm = llmOverride ?? getDefaultLlamaCpp();
1853
2216
  const uncachedDocs = [...uncachedDocsByChunk.values()];
1854
2217
  const rerankResult = await llm.rerank(rerankQuery, uncachedDocs, { model });
1855
2218
  // Cache results by chunk text so identical chunks across files are scored once.
@@ -2026,9 +2389,9 @@ export function findDocument(db, filename, options = {}) {
2026
2389
  LIMIT 1
2027
2390
  `).get(`%${filepath}`);
2028
2391
  }
2029
- // Try to match by absolute path (requires looking up collection paths from YAML)
2392
+ // Try to match by absolute path (requires looking up collection paths from DB)
2030
2393
  if (!doc && !filepath.startsWith('qmd://')) {
2031
- const collections = collectionsListCollections();
2394
+ const collections = getStoreCollections(db);
2032
2395
  for (const coll of collections) {
2033
2396
  let relativePath = null;
2034
2397
  // If filepath is absolute and starts with collection path, extract relative part
@@ -2088,9 +2451,9 @@ export function getDocumentBody(db, doc, fromLine, maxLines) {
2088
2451
  WHERE 'qmd://' || d.collection || '/' || d.path = ? AND d.active = 1
2089
2452
  `).get(filepath);
2090
2453
  }
2091
- // Try absolute path by looking up in YAML collections
2454
+ // Try absolute path by looking up in DB store_collections
2092
2455
  if (!row) {
2093
- const collections = collectionsListCollections();
2456
+ const collections = getStoreCollections(db);
2094
2457
  for (const coll of collections) {
2095
2458
  if (filepath.startsWith(coll.path + '/')) {
2096
2459
  const relativePath = filepath.slice(coll.path.length + 1);
@@ -2219,23 +2582,27 @@ export function findDocuments(db, pattern, options = {}) {
2219
2582
  // Status
2220
2583
  // =============================================================================
2221
2584
  export function getStatus(db) {
2222
- // Load collections from YAML
2223
- const yamlCollections = collectionsListCollections();
2224
- // Get document counts and last update times for each collection
2225
- const collections = yamlCollections.map(col => {
2226
- const stats = db.prepare(`
2227
- SELECT
2228
- COUNT(*) as active_count,
2229
- MAX(modified_at) as last_doc_update
2230
- FROM documents
2231
- WHERE collection = ? AND active = 1
2232
- `).get(col.name);
2585
+ // DB is source of truth for collections config provides supplementary metadata
2586
+ const dbCollections = db.prepare(`
2587
+ SELECT
2588
+ collection as name,
2589
+ COUNT(*) as active_count,
2590
+ MAX(modified_at) as last_doc_update
2591
+ FROM documents
2592
+ WHERE active = 1
2593
+ GROUP BY collection
2594
+ `).all();
2595
+ // Build a lookup from store_collections for path/pattern metadata
2596
+ const storeCollections = getStoreCollections(db);
2597
+ const configLookup = new Map(storeCollections.map(c => [c.name, { path: c.path, pattern: c.pattern }]));
2598
+ const collections = dbCollections.map(row => {
2599
+ const config = configLookup.get(row.name);
2233
2600
  return {
2234
- name: col.name,
2235
- path: col.path,
2236
- pattern: col.pattern,
2237
- documents: stats.active_count,
2238
- lastUpdated: stats.last_doc_update || new Date().toISOString(),
2601
+ name: row.name,
2602
+ path: config?.path ?? null,
2603
+ pattern: config?.pattern ?? null,
2604
+ documents: row.active_count,
2605
+ lastUpdated: row.last_doc_update || new Date().toISOString(),
2239
2606
  };
2240
2607
  });
2241
2608
  // Sort by last update time (most recent first)
@@ -2382,6 +2749,7 @@ export async function hybridQuery(store, query, options) {
2382
2749
  const collection = options?.collection;
2383
2750
  const explain = options?.explain ?? false;
2384
2751
  const intent = options?.intent;
2752
+ const skipRerank = options?.skipRerank ?? false;
2385
2753
  const hooks = options?.hooks;
2386
2754
  const rankedLists = [];
2387
2755
  const rankedListMeta = [];
@@ -2425,7 +2793,7 @@ export async function hybridQuery(store, query, options) {
2425
2793
  // 3a: Run FTS for all lex expansions right away (no LLM needed)
2426
2794
  for (const q of expanded) {
2427
2795
  if (q.type === 'lex') {
2428
- const ftsResults = store.searchFTS(q.text, 20, collection);
2796
+ const ftsResults = store.searchFTS(q.query, 20, collection);
2429
2797
  if (ftsResults.length > 0) {
2430
2798
  for (const r of ftsResults)
2431
2799
  docidMap.set(r.filepath, r.docid);
@@ -2433,7 +2801,7 @@ export async function hybridQuery(store, query, options) {
2433
2801
  file: r.filepath, displayPath: r.displayPath,
2434
2802
  title: r.title, body: r.body || "", score: r.score,
2435
2803
  })));
2436
- rankedListMeta.push({ source: "fts", queryType: "lex", query: q.text });
2804
+ rankedListMeta.push({ source: "fts", queryType: "lex", query: q.query });
2437
2805
  }
2438
2806
  }
2439
2807
  }
@@ -2444,11 +2812,11 @@ export async function hybridQuery(store, query, options) {
2444
2812
  ];
2445
2813
  for (const q of expanded) {
2446
2814
  if (q.type === 'vec' || q.type === 'hyde') {
2447
- vecQueries.push({ text: q.text, queryType: q.type });
2815
+ vecQueries.push({ text: q.query, queryType: q.type });
2448
2816
  }
2449
2817
  }
2450
2818
  // Batch embed all vector queries in a single call
2451
- const llm = getDefaultLlamaCpp();
2819
+ const llm = getLlm(store);
2452
2820
  const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text));
2453
2821
  hooks?.onEmbedStart?.(textsToEmbed.length);
2454
2822
  const embedStart = Date.now();
@@ -2486,7 +2854,6 @@ export async function hybridQuery(store, query, options) {
2486
2854
  // Reranking full bodies is O(tokens) — the critical perf lesson that motivated this refactor.
2487
2855
  const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
2488
2856
  const intentTerms = intent ? extractIntentTerms(intent) : [];
2489
- const chunksToRerank = [];
2490
2857
  const docChunkMap = new Map();
2491
2858
  for (const cand of candidates) {
2492
2859
  const chunks = chunkDocument(cand.body);
@@ -2508,10 +2875,65 @@ export async function hybridQuery(store, query, options) {
2508
2875
  bestIdx = i;
2509
2876
  }
2510
2877
  }
2511
- chunksToRerank.push({ file: cand.file, text: chunks[bestIdx].text });
2512
2878
  docChunkMap.set(cand.file, { chunks, bestIdx });
2513
2879
  }
2880
+ if (skipRerank) {
2881
+ // Skip LLM reranking — return candidates scored by RRF only
2882
+ const seenFiles = new Set();
2883
+ return candidates
2884
+ .map((cand, i) => {
2885
+ const chunkInfo = docChunkMap.get(cand.file);
2886
+ const bestIdx = chunkInfo?.bestIdx ?? 0;
2887
+ const bestChunk = chunkInfo?.chunks[bestIdx]?.text || cand.body || "";
2888
+ const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
2889
+ const rrfRank = i + 1;
2890
+ const rrfScore = 1 / rrfRank;
2891
+ const trace = rrfTraceByFile?.get(cand.file);
2892
+ const explainData = explain ? {
2893
+ ftsScores: trace?.contributions.filter(c => c.source === "fts").map(c => c.backendScore) ?? [],
2894
+ vectorScores: trace?.contributions.filter(c => c.source === "vec").map(c => c.backendScore) ?? [],
2895
+ rrf: {
2896
+ rank: rrfRank,
2897
+ positionScore: rrfScore,
2898
+ weight: 1.0,
2899
+ baseScore: trace?.baseScore ?? 0,
2900
+ topRankBonus: trace?.topRankBonus ?? 0,
2901
+ totalScore: trace?.totalScore ?? 0,
2902
+ contributions: trace?.contributions ?? [],
2903
+ },
2904
+ rerankScore: 0,
2905
+ blendedScore: rrfScore,
2906
+ } : undefined;
2907
+ return {
2908
+ file: cand.file,
2909
+ displayPath: cand.displayPath,
2910
+ title: cand.title,
2911
+ body: cand.body,
2912
+ bestChunk,
2913
+ bestChunkPos,
2914
+ score: rrfScore,
2915
+ context: store.getContextForFile(cand.file),
2916
+ docid: docidMap.get(cand.file) || "",
2917
+ ...(explainData ? { explain: explainData } : {}),
2918
+ };
2919
+ })
2920
+ .filter(r => {
2921
+ if (seenFiles.has(r.file))
2922
+ return false;
2923
+ seenFiles.add(r.file);
2924
+ return true;
2925
+ })
2926
+ .filter(r => r.score >= minScore)
2927
+ .slice(0, limit);
2928
+ }
2514
2929
  // Step 6: Rerank chunks (NOT full bodies)
2930
+ const chunksToRerank = [];
2931
+ for (const cand of candidates) {
2932
+ const chunkInfo = docChunkMap.get(cand.file);
2933
+ if (chunkInfo) {
2934
+ chunksToRerank.push({ file: cand.file, text: chunkInfo.chunks[chunkInfo.bestIdx].text });
2935
+ }
2936
+ }
2515
2937
  hooks?.onRerankStart?.(chunksToRerank.length);
2516
2938
  const rerankStart = Date.now();
2517
2939
  const reranked = await store.rerank(query, chunksToRerank, undefined, intent);
@@ -2602,7 +3024,7 @@ export async function vectorSearchQuery(store, query, options) {
2602
3024
  const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
2603
3025
  options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
2604
3026
  // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
2605
- const queryTexts = [query, ...vecExpanded.map(q => q.text)];
3027
+ const queryTexts = [query, ...vecExpanded.map(q => q.query)];
2606
3028
  const allResults = new Map();
2607
3029
  for (const q of queryTexts) {
2608
3030
  const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
@@ -2650,6 +3072,7 @@ export async function structuredSearch(store, searches, options) {
2650
3072
  const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
2651
3073
  const explain = options?.explain ?? false;
2652
3074
  const intent = options?.intent;
3075
+ const skipRerank = options?.skipRerank ?? false;
2653
3076
  const hooks = options?.hooks;
2654
3077
  const collections = options?.collections;
2655
3078
  if (searches.length === 0)
@@ -2704,7 +3127,7 @@ export async function structuredSearch(store, searches, options) {
2704
3127
  if (hasVectors) {
2705
3128
  const vecSearches = searches.filter((s) => s.type === 'vec' || s.type === 'hyde');
2706
3129
  if (vecSearches.length > 0) {
2707
- const llm = getDefaultLlamaCpp();
3130
+ const llm = getLlm(store);
2708
3131
  const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
2709
3132
  hooks?.onEmbedStart?.(textsToEmbed.length);
2710
3133
  const embedStart = Date.now();
@@ -2750,7 +3173,6 @@ export async function structuredSearch(store, searches, options) {
2750
3173
  || searches[0]?.query || "";
2751
3174
  const queryTerms = primaryQuery.toLowerCase().split(/\s+/).filter(t => t.length > 2);
2752
3175
  const intentTerms = intent ? extractIntentTerms(intent) : [];
2753
- const chunksToRerank = [];
2754
3176
  const docChunkMap = new Map();
2755
3177
  for (const cand of candidates) {
2756
3178
  const chunks = chunkDocument(cand.body);
@@ -2772,10 +3194,65 @@ export async function structuredSearch(store, searches, options) {
2772
3194
  bestIdx = i;
2773
3195
  }
2774
3196
  }
2775
- chunksToRerank.push({ file: cand.file, text: chunks[bestIdx].text });
2776
3197
  docChunkMap.set(cand.file, { chunks, bestIdx });
2777
3198
  }
3199
+ if (skipRerank) {
3200
+ // Skip LLM reranking — return candidates scored by RRF only
3201
+ const seenFiles = new Set();
3202
+ return candidates
3203
+ .map((cand, i) => {
3204
+ const chunkInfo = docChunkMap.get(cand.file);
3205
+ const bestIdx = chunkInfo?.bestIdx ?? 0;
3206
+ const bestChunk = chunkInfo?.chunks[bestIdx]?.text || cand.body || "";
3207
+ const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
3208
+ const rrfRank = i + 1;
3209
+ const rrfScore = 1 / rrfRank;
3210
+ const trace = rrfTraceByFile?.get(cand.file);
3211
+ const explainData = explain ? {
3212
+ ftsScores: trace?.contributions.filter(c => c.source === "fts").map(c => c.backendScore) ?? [],
3213
+ vectorScores: trace?.contributions.filter(c => c.source === "vec").map(c => c.backendScore) ?? [],
3214
+ rrf: {
3215
+ rank: rrfRank,
3216
+ positionScore: rrfScore,
3217
+ weight: 1.0,
3218
+ baseScore: trace?.baseScore ?? 0,
3219
+ topRankBonus: trace?.topRankBonus ?? 0,
3220
+ totalScore: trace?.totalScore ?? 0,
3221
+ contributions: trace?.contributions ?? [],
3222
+ },
3223
+ rerankScore: 0,
3224
+ blendedScore: rrfScore,
3225
+ } : undefined;
3226
+ return {
3227
+ file: cand.file,
3228
+ displayPath: cand.displayPath,
3229
+ title: cand.title,
3230
+ body: cand.body,
3231
+ bestChunk,
3232
+ bestChunkPos,
3233
+ score: rrfScore,
3234
+ context: store.getContextForFile(cand.file),
3235
+ docid: docidMap.get(cand.file) || "",
3236
+ ...(explainData ? { explain: explainData } : {}),
3237
+ };
3238
+ })
3239
+ .filter(r => {
3240
+ if (seenFiles.has(r.file))
3241
+ return false;
3242
+ seenFiles.add(r.file);
3243
+ return true;
3244
+ })
3245
+ .filter(r => r.score >= minScore)
3246
+ .slice(0, limit);
3247
+ }
2778
3248
  // Step 5: Rerank chunks
3249
+ const chunksToRerank = [];
3250
+ for (const cand of candidates) {
3251
+ const chunkInfo = docChunkMap.get(cand.file);
3252
+ if (chunkInfo) {
3253
+ chunksToRerank.push({ file: cand.file, text: chunkInfo.chunks[chunkInfo.bestIdx].text });
3254
+ }
3255
+ }
2779
3256
  hooks?.onRerankStart?.(chunksToRerank.length);
2780
3257
  const rerankStart2 = Date.now();
2781
3258
  const reranked = await store.rerank(primaryQuery, chunksToRerank, undefined, intent);