@tobilu/qmd 2.1.0 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/store.js CHANGED
@@ -16,18 +16,21 @@ import { createHash } from "crypto";
16
16
  import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs";
17
17
  // Note: node:path resolve is not imported — we export our own cross-platform resolve()
18
18
  import fastGlob from "fast-glob";
19
- import { LlamaCpp, getDefaultLlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, withLLMSessionForLlm, } from "./llm.js";
19
+ import { qmdHomedir } from "./paths.js";
20
+ import { LlamaCpp, getDefaultLlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, withLLMSessionForLlm, DEFAULT_EMBED_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, } from "./llm.js";
20
21
  // =============================================================================
21
22
  // Configuration
22
23
  // =============================================================================
23
- const HOME = process.env.HOME || "/tmp";
24
- export const DEFAULT_EMBED_MODEL = "embeddinggemma";
25
- export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
26
- export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
24
+ export const DEFAULT_EMBED_MODEL = DEFAULT_EMBED_MODEL_URI;
25
+ export const DEFAULT_RERANK_MODEL = DEFAULT_RERANK_MODEL_URI;
26
+ export const DEFAULT_QUERY_MODEL = DEFAULT_GENERATE_MODEL_URI;
27
27
  export const DEFAULT_GLOB = "**/*.md";
28
28
  export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB
29
29
  export const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64;
30
30
  export const DEFAULT_EMBED_MAX_BATCH_BYTES = 64 * 1024 * 1024; // 64MB
31
+ const EMBED_FINGERPRINT_PROBE_QUERY = "__qmd_embedding_query_probe__";
32
+ const EMBED_FINGERPRINT_PROBE_TITLE = "__qmd_embedding_title_probe__";
33
+ const EMBED_FINGERPRINT_PROBE_DOC = "__qmd_embedding_document_probe__";
31
34
  // Chunking: 900 tokens per chunk with 15% overlap
32
35
  // Increased from 800 to accommodate smart chunking finding natural break points
33
36
  export const CHUNK_SIZE_TOKENS = 900;
@@ -38,6 +41,16 @@ export const CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP_TOKENS * 4; // 540 chars
38
41
  // Search window for finding optimal break points (in tokens, ~200 tokens)
39
42
  export const CHUNK_WINDOW_TOKENS = 200;
40
43
  export const CHUNK_WINDOW_CHARS = CHUNK_WINDOW_TOKENS * 4; // 800 chars
44
+ export function getEmbeddingFingerprint(model = DEFAULT_EMBED_MODEL) {
45
+ const significant = [
46
+ `model:${model}`,
47
+ `query:${formatQueryForEmbedding(EMBED_FINGERPRINT_PROBE_QUERY, model)}`,
48
+ `doc:${formatDocForEmbedding(EMBED_FINGERPRINT_PROBE_DOC, EMBED_FINGERPRINT_PROBE_TITLE, model)}`,
49
+ `chunk_tokens:${CHUNK_SIZE_TOKENS}`,
50
+ `chunk_overlap_tokens:${CHUNK_OVERLAP_TOKENS}`,
51
+ ].join("\n");
52
+ return createHash("sha256").update(significant).digest("hex").slice(0, 6);
53
+ }
41
54
  /**
42
55
  * Get the LlamaCpp instance for a store — prefers the store's own instance,
43
56
  * falls back to the global singleton.
@@ -228,7 +241,7 @@ export const RERANK_CANDIDATE_LIMIT = 40;
228
241
  // Path utilities
229
242
  // =============================================================================
230
243
  export function homedir() {
231
- return HOME;
244
+ return qmdHomedir();
232
245
  }
233
246
  /**
234
247
  * Check if a path is absolute.
@@ -468,21 +481,25 @@ export function normalizeVirtualPath(input) {
468
481
  export function parseVirtualPath(virtualPath) {
469
482
  // Normalize the path first
470
483
  const normalized = normalizeVirtualPath(virtualPath);
484
+ const [pathPart = normalized, queryString = ""] = normalized.split("?");
471
485
  // Match: qmd://collection-name[/optional-path]
472
486
  // Allows: qmd://name, qmd://name/, qmd://name/path
473
- const match = normalized.match(/^qmd:\/\/([^\/]+)\/?(.*)$/);
487
+ const match = pathPart.match(/^qmd:\/\/([^\/]+)\/?(.*)$/);
474
488
  if (!match?.[1])
475
489
  return null;
490
+ const indexName = new URLSearchParams(queryString).get("index")?.trim() || undefined;
476
491
  return {
477
492
  collectionName: match[1],
478
493
  path: match[2] ?? '', // Empty string for collection root
494
+ ...(indexName ? { indexName } : {}),
479
495
  };
480
496
  }
481
497
  /**
482
498
  * Build a virtual path from collection name and relative path.
483
499
  */
484
- export function buildVirtualPath(collectionName, path) {
485
- return `qmd://${collectionName}/${path}`;
500
+ export function buildVirtualPath(collectionName, path, indexName) {
501
+ const base = `qmd://${collectionName}/${path}`;
502
+ return indexName ? `${base}?index=${encodeURIComponent(indexName)}` : base;
486
503
  }
487
504
  /**
488
505
  * Check if a path is explicitly a virtual path.
@@ -552,6 +569,7 @@ function createSqliteVecUnavailableError(reason) {
552
569
  "Install Homebrew SQLite so the sqlite-vec extension can be loaded, " +
553
570
  "and set BREW_PREFIX if Homebrew is installed in a non-standard location.");
554
571
  }
572
+ let _sqliteVecUnavailableReason = null;
555
573
  function getErrorMessage(err) {
556
574
  return err instanceof Error ? err.message : String(err);
557
575
  }
@@ -568,16 +586,76 @@ export function verifySqliteVecLoaded(db) {
568
586
  }
569
587
  }
570
588
  let _sqliteVecAvailable = null;
589
+ const CJK_CHAR_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
590
+ const CJK_RUN_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]+/gu;
591
+ const FTS_CJK_NORMALIZED_VERSION = "1";
592
+ /**
593
+ * FTS5's unicode61 tokenizer does not segment CJK text into searchable words.
594
+ * Normalize CJK runs by spacing every character so exact CJK queries can be
595
+ * translated into phrase queries while Latin text keeps the default tokenizer.
596
+ */
597
+ export function normalizeCjkForFTS(text) {
598
+ return text.replace(CJK_RUN_PATTERN, run => ` ${Array.from(run).join(' ')} `);
599
+ }
600
+ function containsCjk(text) {
601
+ return CJK_CHAR_PATTERN.test(text);
602
+ }
603
+ function sanitizeFTS5Phrase(phrase) {
604
+ return normalizeCjkForFTS(phrase)
605
+ .split(/\s+/)
606
+ .map(t => sanitizeFTS5Term(t))
607
+ .filter(t => t)
608
+ .join(' ');
609
+ }
610
+ function rebuildFTSForCjkNormalization(db) {
611
+ const version = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get();
612
+ if (version?.value === FTS_CJK_NORMALIZED_VERSION)
613
+ return;
614
+ try {
615
+ db.exec(`DELETE FROM documents_fts WHERE rowid >= 0`);
616
+ }
617
+ catch {
618
+ // Some older/corrupt FTS5 shadow-table states can reject bulk deletes even
619
+ // though reads still work. Recreate the virtual table; documents_fts is a
620
+ // derived index, so rebuilding it from documents/content is safe.
621
+ db.exec(`DROP TABLE IF EXISTS documents_fts`);
622
+ db.exec(`
623
+ CREATE VIRTUAL TABLE documents_fts USING fts5(
624
+ filepath, title, body,
625
+ tokenize='porter unicode61'
626
+ )
627
+ `);
628
+ }
629
+ const rows = db.prepare(`
630
+ SELECT d.id, d.collection, d.path, d.title, content.doc as body
631
+ FROM documents d
632
+ JOIN content ON content.hash = d.hash
633
+ WHERE d.active = 1
634
+ `).all();
635
+ const insert = db.prepare(`INSERT INTO documents_fts(rowid, filepath, title, body) VALUES (?, ?, ?, ?)`);
636
+ const rebuild = db.transaction(() => {
637
+ for (const row of rows) {
638
+ insert.run(row.id, normalizeCjkForFTS(`${row.collection}/${row.path}`), normalizeCjkForFTS(row.title), normalizeCjkForFTS(row.body));
639
+ }
640
+ });
641
+ rebuild();
642
+ db.prepare(`
643
+ INSERT OR REPLACE INTO store_config(key, value)
644
+ VALUES ('fts_cjk_normalized_version', ?)
645
+ `).run(FTS_CJK_NORMALIZED_VERSION);
646
+ }
571
647
  function initializeDatabase(db) {
572
648
  try {
573
649
  loadSqliteVec(db);
574
650
  verifySqliteVecLoaded(db);
575
651
  _sqliteVecAvailable = true;
652
+ _sqliteVecUnavailableReason = null;
576
653
  }
577
654
  catch (err) {
578
655
  // sqlite-vec is optional — vector search won't work but FTS is fine
579
656
  _sqliteVecAvailable = false;
580
- console.warn(getErrorMessage(err));
657
+ _sqliteVecUnavailableReason = getErrorMessage(err);
658
+ console.warn(_sqliteVecUnavailableReason);
581
659
  }
582
660
  db.exec("PRAGMA journal_mode = WAL");
583
661
  db.exec("PRAGMA foreign_keys = ON");
@@ -619,19 +697,16 @@ function initializeDatabase(db) {
619
697
  created_at TEXT NOT NULL
620
698
  )
621
699
  `);
622
- // Content vectors
623
- const cvInfo = db.prepare(`PRAGMA table_info(content_vectors)`).all();
624
- const hasSeqColumn = cvInfo.some(col => col.name === 'seq');
625
- if (cvInfo.length > 0 && !hasSeqColumn) {
626
- db.exec(`DROP TABLE IF EXISTS content_vectors`);
627
- db.exec(`DROP TABLE IF EXISTS vectors_vec`);
628
- }
700
+ // Content vectors. Avoid PRAGMA schema probes during startup; legacy vector
701
+ // columns are repaired lazily when a vector/embedding query first needs them.
629
702
  db.exec(`
630
703
  CREATE TABLE IF NOT EXISTS content_vectors (
631
704
  hash TEXT NOT NULL,
632
705
  seq INTEGER NOT NULL DEFAULT 0,
633
706
  pos INTEGER NOT NULL DEFAULT 0,
634
707
  model TEXT NOT NULL,
708
+ embed_fingerprint TEXT NOT NULL DEFAULT '',
709
+ total_chunks INTEGER NOT NULL DEFAULT 1,
635
710
  embedded_at TEXT NOT NULL,
636
711
  PRIMARY KEY (hash, seq)
637
712
  )
@@ -662,9 +737,12 @@ function initializeDatabase(db) {
662
737
  tokenize='porter unicode61'
663
738
  )
664
739
  `);
665
- // Triggers to keep FTS in sync
740
+ // Triggers keep FTS in sync for callers that write directly to documents.
741
+ // Production indexing paths rebuild entries in TypeScript so CJK text can be
742
+ // normalized before it reaches the unicode61 tokenizer.
743
+ db.exec(`DROP TRIGGER IF EXISTS documents_ai`);
666
744
  db.exec(`
667
- CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents
745
+ CREATE TRIGGER documents_ai AFTER INSERT ON documents
668
746
  WHEN new.active = 1
669
747
  BEGIN
670
748
  INSERT INTO documents_fts(rowid, filepath, title, body)
@@ -676,13 +754,15 @@ function initializeDatabase(db) {
676
754
  WHERE new.active = 1;
677
755
  END
678
756
  `);
757
+ db.exec(`DROP TRIGGER IF EXISTS documents_ad`);
679
758
  db.exec(`
680
- CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
759
+ CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
681
760
  DELETE FROM documents_fts WHERE rowid = old.id;
682
761
  END
683
762
  `);
763
+ db.exec(`DROP TRIGGER IF EXISTS documents_au`);
684
764
  db.exec(`
685
- CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents
765
+ CREATE TRIGGER documents_au AFTER UPDATE ON documents
686
766
  BEGIN
687
767
  -- Delete from FTS if no longer active
688
768
  DELETE FROM documents_fts WHERE rowid = old.id AND new.active = 0;
@@ -697,6 +777,7 @@ function initializeDatabase(db) {
697
777
  WHERE new.active = 1;
698
778
  END
699
779
  `);
780
+ rebuildFTSForCjkNormalization(db);
700
781
  }
701
782
  function rowToNamedCollection(row) {
702
783
  return {
@@ -838,7 +919,7 @@ export function isSqliteVecAvailable() {
838
919
  }
839
920
  function ensureVecTableInternal(db, dimensions) {
840
921
  if (!_sqliteVecAvailable) {
841
- throw new Error("sqlite-vec is not available. Vector operations require a SQLite build with extension loading support.");
922
+ throw createSqliteVecUnavailableError(_sqliteVecUnavailableReason ?? "vector operations require a SQLite build with extension loading support");
842
923
  }
843
924
  const tableInfo = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
844
925
  if (tableInfo) {
@@ -902,7 +983,7 @@ export async function reindexCollection(store, collectionPath, globPattern, coll
902
983
  }
903
984
  const hash = await hashContent(content);
904
985
  const title = extractTitle(content, relativeFile);
905
- const existing = findActiveDocument(db, collectionName, path);
986
+ const existing = findOrMigrateLegacyDocument(db, collectionName, path);
906
987
  if (existing) {
907
988
  if (existing.hash === hash) {
908
989
  if (existing.title !== title) {
@@ -955,16 +1036,74 @@ function resolveEmbedOptions(options) {
955
1036
  maxBatchBytes: validatePositiveIntegerOption("maxBatchBytes", options?.maxBatchBytes, DEFAULT_EMBED_MAX_BATCH_BYTES),
956
1037
  };
957
1038
  }
958
- function getPendingEmbeddingDocs(db) {
959
- return db.prepare(`
960
- SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
961
- FROM documents d
962
- JOIN content c ON d.hash = c.hash
963
- LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
964
- WHERE d.active = 1 AND v.hash IS NULL
965
- GROUP BY d.hash
966
- ORDER BY MIN(d.path)
967
- `).all();
1039
+ const CONTENT_VECTOR_DESIRED_COLUMNS = [
1040
+ { name: "seq", definition: "INTEGER NOT NULL DEFAULT 0" },
1041
+ { name: "pos", definition: "INTEGER NOT NULL DEFAULT 0" },
1042
+ { name: "model", definition: "TEXT NOT NULL DEFAULT ''" },
1043
+ { name: "embed_fingerprint", definition: "TEXT NOT NULL DEFAULT ''" },
1044
+ { name: "total_chunks", definition: "INTEGER NOT NULL DEFAULT 1" },
1045
+ { name: "embedded_at", definition: "TEXT NOT NULL DEFAULT ''" },
1046
+ ];
1047
+ function isContentVectorColumnError(error) {
1048
+ const message = error instanceof Error ? error.message : String(error);
1049
+ if (!/(no such column|has no column named)/i.test(message)) {
1050
+ return false;
1051
+ }
1052
+ return CONTENT_VECTOR_DESIRED_COLUMNS.some(col => message.includes(col.name));
1053
+ }
1054
+ function runContentVectorColumnRepairs(db) {
1055
+ for (const column of CONTENT_VECTOR_DESIRED_COLUMNS) {
1056
+ try {
1057
+ db.exec(`ALTER TABLE content_vectors ADD COLUMN ${column.name} ${column.definition}`);
1058
+ }
1059
+ catch (error) {
1060
+ const message = error instanceof Error ? error.message : String(error);
1061
+ // The repair series is intentionally idempotent: most columns should
1062
+ // already exist, and another caller may have repaired a missing column
1063
+ // between the failed query and this ALTER series.
1064
+ if (!message.includes("duplicate column name")) {
1065
+ throw error;
1066
+ }
1067
+ }
1068
+ }
1069
+ }
1070
+ function withLazyContentVectorMigration(db, operation) {
1071
+ let repaired = false;
1072
+ while (true) {
1073
+ try {
1074
+ return operation();
1075
+ }
1076
+ catch (error) {
1077
+ if (repaired || !isContentVectorColumnError(error)) {
1078
+ throw error;
1079
+ }
1080
+ runContentVectorColumnRepairs(db);
1081
+ repaired = true;
1082
+ }
1083
+ }
1084
+ }
1085
+ function getPendingEmbeddingDocs(db, collection, model = DEFAULT_EMBED_MODEL) {
1086
+ const collectionFilter = collection ? `AND d.collection = ?` : ``;
1087
+ const fingerprint = getEmbeddingFingerprint(model);
1088
+ return withLazyContentVectorMigration(db, () => {
1089
+ const stmt = db.prepare(`
1090
+ SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
1091
+ FROM documents d
1092
+ JOIN content c ON d.hash = c.hash
1093
+ LEFT JOIN (
1094
+ SELECT hash, model, COUNT(*) AS chunk_count, MAX(total_chunks) AS expected_chunks
1095
+ FROM content_vectors
1096
+ WHERE model = ? AND embed_fingerprint = ?
1097
+ GROUP BY hash, model, embed_fingerprint
1098
+ ) v ON d.hash = v.hash
1099
+ WHERE d.active = 1
1100
+ AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
1101
+ ${collectionFilter}
1102
+ GROUP BY d.hash
1103
+ ORDER BY MIN(d.path)
1104
+ `);
1105
+ return (collection ? stmt.all(model, fingerprint, collection) : stmt.all(model, fingerprint));
1106
+ });
968
1107
  }
969
1108
  function buildEmbeddingBatches(docs, maxDocsPerBatch, maxBatchBytes) {
970
1109
  const batches = [];
@@ -1009,14 +1148,16 @@ function getEmbeddingDocsForBatch(db, batch) {
1009
1148
  */
1010
1149
  export async function generateEmbeddings(store, options) {
1011
1150
  const db = store.db;
1012
- const model = options?.model ?? DEFAULT_EMBED_MODEL;
1151
+ const llm = getLlm(store);
1152
+ const model = options?.model ?? llm.embedModelName ?? DEFAULT_EMBED_MODEL;
1153
+ const fingerprint = getEmbeddingFingerprint(model);
1013
1154
  const now = new Date().toISOString();
1014
1155
  const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
1015
1156
  const encoder = new TextEncoder();
1016
1157
  if (options?.force) {
1017
- clearAllEmbeddings(db);
1158
+ clearAllEmbeddings(db, options?.collection);
1018
1159
  }
1019
- const docsToEmbed = getPendingEmbeddingDocs(db);
1160
+ const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection, model);
1020
1161
  if (docsToEmbed.length === 0) {
1021
1162
  return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
1022
1163
  }
@@ -1024,16 +1165,88 @@ export async function generateEmbeddings(store, options) {
1024
1165
  const totalDocs = docsToEmbed.length;
1025
1166
  const startTime = Date.now();
1026
1167
  // Use store's LlamaCpp or global singleton, wrapped in a session
1027
- const llm = getLlm(store);
1028
- const embedModelUri = llm.embedModelName;
1168
+ const embedModelUri = model;
1029
1169
  // Create a session manager for this llm instance
1030
1170
  const result = await withLLMSessionForLlm(llm, async (session) => {
1031
1171
  let chunksEmbedded = 0;
1032
- let errors = 0;
1033
1172
  let bytesProcessed = 0;
1034
1173
  let totalChunks = 0;
1035
1174
  let vectorTableInitialized = false;
1036
1175
  const BATCH_SIZE = 32;
1176
+ const RETRY_AFTER_SUCCESSFUL_CHUNKS = 64;
1177
+ const MAX_RETRY_ATTEMPTS = 3;
1178
+ const failures = new Map();
1179
+ const retryQueue = new Map();
1180
+ let successesSinceRetry = 0;
1181
+ const failureList = () => [...failures.values()];
1182
+ const activeErrorCount = () => failures.size;
1183
+ const chunkKey = (chunk) => `${chunk.hash}:${chunk.seq}`;
1184
+ const reasonFromError = (error) => {
1185
+ const raw = error instanceof Error ? error.message : String(error);
1186
+ return raw.length > 180 ? `${raw.slice(0, 177)}...` : raw;
1187
+ };
1188
+ const recordFailure = (chunk, reason) => {
1189
+ const key = chunkKey(chunk);
1190
+ const previous = failures.get(key);
1191
+ failures.set(key, {
1192
+ path: chunk.path,
1193
+ hash: chunk.hash,
1194
+ seq: chunk.seq,
1195
+ attempts: (previous?.attempts ?? 0) + 1,
1196
+ reason,
1197
+ });
1198
+ retryQueue.set(key, chunk);
1199
+ };
1200
+ const clearFailure = (chunk) => {
1201
+ const key = chunkKey(chunk);
1202
+ failures.delete(key);
1203
+ retryQueue.delete(key);
1204
+ };
1205
+ const tryEmbedChunk = async (chunk) => {
1206
+ try {
1207
+ const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
1208
+ const result = await session.embed(text, { model });
1209
+ if (!result) {
1210
+ recordFailure(chunk, "embedding returned no vector");
1211
+ return false;
1212
+ }
1213
+ insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now, chunk.expectedTotalChunks, fingerprint);
1214
+ chunksEmbedded++;
1215
+ successesSinceRetry++;
1216
+ clearFailure(chunk);
1217
+ return true;
1218
+ }
1219
+ catch (error) {
1220
+ recordFailure(chunk, reasonFromError(error));
1221
+ return false;
1222
+ }
1223
+ };
1224
+ const retryFailedChunks = async (force = false) => {
1225
+ if (!session.isValid || retryQueue.size === 0)
1226
+ return;
1227
+ if (!force && successesSinceRetry < RETRY_AFTER_SUCCESSFUL_CHUNKS)
1228
+ return;
1229
+ successesSinceRetry = 0;
1230
+ // Normal mode: one retry pass after enough unrelated chunks succeeded.
1231
+ // Force mode: we have run out of other chunks for this batch, so keep
1232
+ // retrying outstanding failures until they recover or hit the cap. The
1233
+ // cap prevents endless loops on permanently bad chunks.
1234
+ do {
1235
+ let retried = 0;
1236
+ for (const [key, chunk] of [...retryQueue]) {
1237
+ const failure = failures.get(key);
1238
+ if (!failure || failure.attempts >= MAX_RETRY_ATTEMPTS)
1239
+ continue;
1240
+ retried++;
1241
+ await tryEmbedChunk(chunk);
1242
+ }
1243
+ if (!force || retried === 0)
1244
+ break;
1245
+ } while (session.isValid && [...retryQueue].some(([key]) => {
1246
+ const failure = failures.get(key);
1247
+ return !!failure && failure.attempts < MAX_RETRY_ATTEMPTS;
1248
+ }));
1249
+ };
1037
1250
  const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes);
1038
1251
  for (const batchMeta of batches) {
1039
1252
  // Abort early if session has been invalidated
@@ -1043,6 +1256,7 @@ export async function generateEmbeddings(store, options) {
1043
1256
  }
1044
1257
  const batchDocs = getEmbeddingDocsForBatch(db, batchMeta);
1045
1258
  const batchChunks = [];
1259
+ const expectedChunksByHash = new Map();
1046
1260
  const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
1047
1261
  for (const doc of batchDocs) {
1048
1262
  if (!doc.body.trim())
@@ -1052,19 +1266,22 @@ export async function generateEmbeddings(store, options) {
1052
1266
  for (let seq = 0; seq < chunks.length; seq++) {
1053
1267
  batchChunks.push({
1054
1268
  hash: doc.hash,
1269
+ path: doc.path,
1055
1270
  title,
1056
1271
  text: chunks[seq].text,
1057
1272
  seq,
1058
1273
  pos: chunks[seq].pos,
1059
1274
  tokens: chunks[seq].tokens,
1060
1275
  bytes: encoder.encode(chunks[seq].text).length,
1276
+ expectedTotalChunks: chunks.length,
1061
1277
  });
1062
1278
  }
1279
+ expectedChunksByHash.set(doc.hash, chunks.length);
1063
1280
  }
1064
1281
  totalChunks += batchChunks.length;
1065
1282
  if (batchChunks.length === 0) {
1066
1283
  bytesProcessed += batchBytes;
1067
- options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
1284
+ options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors: activeErrorCount(), failures: failureList() });
1068
1285
  continue;
1069
1286
  }
1070
1287
  if (!vectorTableInitialized) {
@@ -1082,17 +1299,19 @@ export async function generateEmbeddings(store, options) {
1082
1299
  for (let batchStart = 0; batchStart < batchChunks.length; batchStart += BATCH_SIZE) {
1083
1300
  // Abort early if session has been invalidated (e.g. max duration exceeded)
1084
1301
  if (!session.isValid) {
1085
- const remaining = batchChunks.length - batchStart;
1086
- errors += remaining;
1087
- console.warn(`⚠ Session expired skipping ${remaining} remaining chunks`);
1302
+ const remainingChunks = batchChunks.slice(batchStart);
1303
+ for (const chunk of remainingChunks)
1304
+ recordFailure(chunk, "LLM session expired before embedding chunk");
1305
+ console.warn(`⚠ Session expired — skipping ${remainingChunks.length} remaining chunks`);
1088
1306
  break;
1089
1307
  }
1090
- // Abort early if error rate is too high (>80% of processed chunks failed)
1091
- const processed = chunksEmbedded + errors;
1092
- if (processed >= BATCH_SIZE && errors > processed * 0.8) {
1093
- const remaining = batchChunks.length - batchStart;
1094
- errors += remaining;
1095
- console.warn(`⚠ Error rate too high (${errors}/${processed}) aborting embedding`);
1308
+ // Abort early if active error rate is too high (>80% of attempted chunks failed)
1309
+ const processed = chunksEmbedded + activeErrorCount();
1310
+ if (processed >= BATCH_SIZE && activeErrorCount() > processed * 0.8) {
1311
+ const remainingChunks = batchChunks.slice(batchStart);
1312
+ for (const chunk of remainingChunks)
1313
+ recordFailure(chunk, "embedding aborted because error rate was too high");
1314
+ console.warn(`⚠ Error rate too high (${activeErrorCount()}/${processed}) — aborting embedding`);
1096
1315
  break;
1097
1316
  }
1098
1317
  const batchEnd = Math.min(batchStart + BATCH_SIZE, batchChunks.length);
@@ -1104,39 +1323,33 @@ export async function generateEmbeddings(store, options) {
1104
1323
  const chunk = chunkBatch[i];
1105
1324
  const embedding = embeddings[i];
1106
1325
  if (embedding) {
1107
- insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
1326
+ insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now, chunk.expectedTotalChunks, fingerprint);
1108
1327
  chunksEmbedded++;
1328
+ successesSinceRetry++;
1329
+ clearFailure(chunk);
1109
1330
  }
1110
1331
  else {
1111
- errors++;
1332
+ recordFailure(chunk, "batch embedding returned no vector");
1112
1333
  }
1113
1334
  batchChunkBytesProcessed += chunk.bytes;
1114
1335
  }
1336
+ await retryFailedChunks();
1115
1337
  }
1116
- catch {
1117
- // Batch failed — try individual embeddings as fallback
1118
- // But skip if session is already invalid (avoids N doomed retries)
1338
+ catch (error) {
1339
+ // Batch failed — try individual embeddings as fallback. If an
1340
+ // individual retry succeeds, any prior failure for that chunk is
1341
+ // cleared, so the visible error count reflects outstanding failures.
1342
+ const batchReason = reasonFromError(error);
1119
1343
  if (!session.isValid) {
1120
- errors += chunkBatch.length;
1344
+ for (const chunk of chunkBatch)
1345
+ recordFailure(chunk, `batch failed and session expired: ${batchReason}`);
1121
1346
  batchChunkBytesProcessed += chunkBatch.reduce((sum, c) => sum + c.bytes, 0);
1122
1347
  }
1123
1348
  else {
1124
1349
  for (const chunk of chunkBatch) {
1125
- try {
1126
- const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
1127
- const result = await session.embed(text, { model });
1128
- if (result) {
1129
- insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
1130
- chunksEmbedded++;
1131
- }
1132
- else {
1133
- errors++;
1134
- }
1135
- }
1136
- catch {
1137
- errors++;
1138
- }
1350
+ await tryEmbedChunk(chunk);
1139
1351
  batchChunkBytesProcessed += chunk.bytes;
1352
+ await retryFailedChunks();
1140
1353
  }
1141
1354
  }
1142
1355
  }
@@ -1148,18 +1361,25 @@ export async function generateEmbeddings(store, options) {
1148
1361
  totalChunks,
1149
1362
  bytesProcessed: bytesProcessed + proportionalBytes,
1150
1363
  totalBytes,
1151
- errors,
1364
+ errors: activeErrorCount(),
1365
+ failures: failureList(),
1152
1366
  });
1153
1367
  }
1368
+ await retryFailedChunks(true);
1369
+ const removedPartialChunks = removeIncompleteEmbeddings(db, expectedChunksByHash, model);
1370
+ if (removedPartialChunks > 0) {
1371
+ chunksEmbedded = Math.max(0, chunksEmbedded - removedPartialChunks);
1372
+ }
1154
1373
  bytesProcessed += batchBytes;
1155
- options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
1374
+ options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors: activeErrorCount(), failures: failureList() });
1156
1375
  }
1157
- return { chunksEmbedded, errors };
1376
+ return { chunksEmbedded, errors: activeErrorCount(), failures: failureList() };
1158
1377
  }, { maxDuration: 30 * 60 * 1000, name: 'generateEmbeddings' });
1159
1378
  return {
1160
1379
  docsProcessed: totalDocs,
1161
1380
  chunksEmbedded: result.chunksEmbedded,
1162
1381
  errors: result.errors,
1382
+ failures: result.failures,
1163
1383
  durationMs: Date.now() - startTime,
1164
1384
  };
1165
1385
  }
@@ -1180,9 +1400,9 @@ export function createStore(dbPath) {
1180
1400
  close: () => db.close(),
1181
1401
  ensureVecTable: (dimensions) => ensureVecTableInternal(db, dimensions),
1182
1402
  // Index health
1183
- getHashesNeedingEmbedding: () => getHashesNeedingEmbedding(db),
1184
- getIndexHealth: () => getIndexHealth(db),
1185
- getStatus: () => getStatus(db),
1403
+ getHashesNeedingEmbedding: (model) => getHashesNeedingEmbedding(db, undefined, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
1404
+ getIndexHealth: (model) => getIndexHealth(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
1405
+ getStatus: (model) => getStatus(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
1186
1406
  // Caching
1187
1407
  getCacheKey,
1188
1408
  getCachedResult: (cacheKey) => getCachedResult(db, cacheKey),
@@ -1210,8 +1430,8 @@ export function createStore(dbPath) {
1210
1430
  searchFTS: (query, limit, collectionName) => searchFTS(db, query, limit, collectionName),
1211
1431
  searchVec: (query, model, limit, collectionName, session, precomputedEmbedding) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding),
1212
1432
  // Query expansion & reranking
1213
- expandQuery: (query, model, intent) => expandQuery(query, model, db, intent, store.llm),
1214
- rerank: (query, documents, model, intent) => rerank(query, documents, model, db, intent, store.llm),
1433
+ expandQuery: (query, model, intent) => expandQuery(query, model ?? store.llm?.generateModelName ?? DEFAULT_QUERY_MODEL, db, intent, store.llm),
1434
+ rerank: (query, documents, model, intent) => rerank(query, documents, model ?? store.llm?.rerankModelName ?? DEFAULT_RERANK_MODEL, db, intent, store.llm),
1215
1435
  // Document retrieval
1216
1436
  findDocument: (filename, options) => findDocument(db, filename, options),
1217
1437
  getDocumentBody: (doc, fromLine, maxLines) => getDocumentBody(db, doc, fromLine, maxLines),
@@ -1224,6 +1444,7 @@ export function createStore(dbPath) {
1224
1444
  insertContent: (hash, content, createdAt) => insertContent(db, hash, content, createdAt),
1225
1445
  insertDocument: (collectionName, path, title, hash, createdAt, modifiedAt) => insertDocument(db, collectionName, path, title, hash, createdAt, modifiedAt),
1226
1446
  findActiveDocument: (collectionName, path) => findActiveDocument(db, collectionName, path),
1447
+ findOrMigrateLegacyDocument: (collectionName, path) => findOrMigrateLegacyDocument(db, collectionName, path),
1227
1448
  updateDocumentTitle: (documentId, title, modifiedAt) => updateDocumentTitle(db, documentId, title, modifiedAt),
1228
1449
  updateDocument: (documentId, title, hash, modifiedAt) => updateDocument(db, documentId, title, hash, modifiedAt),
1229
1450
  deactivateDocument: (collectionName, path) => deactivateDocument(db, collectionName, path),
@@ -1231,7 +1452,7 @@ export function createStore(dbPath) {
1231
1452
  // Vector/embedding operations
1232
1453
  getHashesForEmbedding: () => getHashesForEmbedding(db),
1233
1454
  clearAllEmbeddings: () => clearAllEmbeddings(db),
1234
- insertEmbedding: (hash, seq, pos, embedding, model, embeddedAt) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt),
1455
+ insertEmbedding: (hash, seq, pos, embedding, model, embeddedAt, totalChunks, fingerprint) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt, totalChunks, fingerprint),
1235
1456
  };
1236
1457
  return store;
1237
1458
  }
@@ -1244,11 +1465,11 @@ export function getDocid(hash) {
1244
1465
  /**
1245
1466
  * Handelize a filename to be more token-friendly.
1246
1467
  * - Convert triple underscore `___` to `/` (folder separator)
1247
- * - Convert to lowercase
1248
1468
  * - Replace sequences of non-word chars (except /) with single dash
1249
1469
  * - Remove leading/trailing dashes from path segments
1250
1470
  * - Preserve folder structure (a/b/c/d.md stays structured)
1251
1471
  * - Preserve file extension
1472
+ * - Preserve original case (important for case-sensitive filesystems)
1252
1473
  */
1253
1474
  /** Replace emoji/symbol codepoints with their hex representation (e.g. 🐘 → 1f418) */
1254
1475
  function emojiToHex(str) {
@@ -1273,7 +1494,6 @@ export function handelize(path) {
1273
1494
  }
1274
1495
  const result = path
1275
1496
  .replace(/___/g, '/') // Triple underscore becomes folder separator
1276
- .toLowerCase()
1277
1497
  .split('/')
1278
1498
  .map((segment, idx, arr) => {
1279
1499
  const isLastSegment = idx === arr.length - 1;
@@ -1306,17 +1526,85 @@ export function handelize(path) {
1306
1526
  // =============================================================================
1307
1527
  // Index health
1308
1528
  // =============================================================================
1309
- export function getHashesNeedingEmbedding(db) {
1310
- const result = db.prepare(`
1311
- SELECT COUNT(DISTINCT d.hash) as count
1312
- FROM documents d
1313
- LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
1314
- WHERE d.active = 1 AND v.hash IS NULL
1315
- `).get();
1316
- return result.count;
1529
+ export function getHashesNeedingEmbedding(db, collection, model = DEFAULT_EMBED_MODEL) {
1530
+ const collectionFilter = collection ? `AND d.collection = ?` : ``;
1531
+ const fingerprint = getEmbeddingFingerprint(model);
1532
+ return withLazyContentVectorMigration(db, () => {
1533
+ const stmt = db.prepare(`
1534
+ SELECT COUNT(DISTINCT d.hash) as count
1535
+ FROM documents d
1536
+ LEFT JOIN (
1537
+ SELECT hash, model, COUNT(*) AS chunk_count, MAX(total_chunks) AS expected_chunks
1538
+ FROM content_vectors
1539
+ WHERE model = ? AND embed_fingerprint = ?
1540
+ GROUP BY hash, model, embed_fingerprint
1541
+ ) v ON d.hash = v.hash
1542
+ WHERE d.active = 1
1543
+ AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
1544
+ ${collectionFilter}
1545
+ `);
1546
+ const result = (collection ? stmt.get(model, fingerprint, collection) : stmt.get(model, fingerprint));
1547
+ return result.count;
1548
+ });
1317
1549
  }
1318
- export function getIndexHealth(db) {
1319
- const needsEmbedding = getHashesNeedingEmbedding(db);
1550
+ export async function maybeAdoptLegacyEmbeddingFingerprint(store, model = DEFAULT_EMBED_MODEL) {
1551
+ const db = store.db;
1552
+ const fingerprint = getEmbeddingFingerprint(model);
1553
+ const legacyCount = withLazyContentVectorMigration(db, () => {
1554
+ const row = db.prepare(`SELECT COUNT(DISTINCT hash) AS count FROM content_vectors WHERE model = ? AND embed_fingerprint = ''`).get(model);
1555
+ return row.count;
1556
+ });
1557
+ if (legacyCount === 0) {
1558
+ return { checked: false, adopted: 0, reason: "no legacy empty-fingerprint embeddings" };
1559
+ }
1560
+ const sample = withLazyContentVectorMigration(db, () => db.prepare(`
1561
+ SELECT cv.hash, cv.seq, cv.pos, cv.total_chunks, c.doc AS body, MIN(d.path) AS path
1562
+ FROM content_vectors cv
1563
+ JOIN documents d ON d.hash = cv.hash AND d.active = 1
1564
+ JOIN content c ON c.hash = cv.hash
1565
+ WHERE cv.model = ? AND cv.embed_fingerprint = ''
1566
+ GROUP BY cv.hash, cv.seq, cv.pos, cv.total_chunks, c.doc
1567
+ ORDER BY cv.hash, cv.seq
1568
+ LIMIT 1
1569
+ `).get(model));
1570
+ if (!sample) {
1571
+ return { checked: false, adopted: 0, reason: `${legacyCount} legacy docs have no active sample` };
1572
+ }
1573
+ const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
1574
+ if (!tableExists) {
1575
+ return { checked: false, adopted: 0, reason: "vectors_vec table is missing" };
1576
+ }
1577
+ const expectedHashSeq = `${sample.hash}_${sample.seq}`;
1578
+ const title = extractTitle(sample.body, sample.path);
1579
+ const llm = getLlm(store);
1580
+ return await withLLMSessionForLlm(llm, async (session) => {
1581
+ const chunks = await chunkDocumentByTokens(sample.body, undefined, undefined, undefined, sample.path, undefined, session.signal);
1582
+ const chunk = chunks[sample.seq];
1583
+ if (!chunk) {
1584
+ return { checked: true, adopted: 0, reason: `sample chunk ${expectedHashSeq} no longer exists` };
1585
+ }
1586
+ const result = await session.embed(formatDocForEmbedding(chunk.text, title, model), { model });
1587
+ if (!result) {
1588
+ return { checked: true, adopted: 0, reason: "failed to embed legacy sample" };
1589
+ }
1590
+ const nearest = db.prepare(`
1591
+ SELECT hash_seq, distance
1592
+ FROM vectors_vec
1593
+ WHERE embedding MATCH ? AND k = 1
1594
+ `).get(new Float32Array(result.embedding));
1595
+ if (!nearest) {
1596
+ return { checked: true, adopted: 0, reason: "legacy sample vector not found" };
1597
+ }
1598
+ const threshold = 0.0001;
1599
+ if (nearest.hash_seq !== expectedHashSeq || nearest.distance > threshold) {
1600
+ return { checked: true, adopted: 0, reason: `legacy sample differs from current fingerprint (nearest ${nearest.hash_seq}, distance ${nearest.distance.toFixed(6)})` };
1601
+ }
1602
+ const update = withLazyContentVectorMigration(db, () => db.prepare(`UPDATE content_vectors SET embed_fingerprint = ? WHERE model = ? AND embed_fingerprint = ''`).run(fingerprint, model));
1603
+ return { checked: true, adopted: update.changes, reason: `sample ${expectedHashSeq} matched current fingerprint at distance ${nearest.distance.toFixed(6)}` };
1604
+ });
1605
+ }
1606
+ export function getIndexHealth(db, model = DEFAULT_EMBED_MODEL) {
1607
+ const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
1320
1608
  const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get().count;
1321
1609
  const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get();
1322
1610
  let daysStale = null;
@@ -1369,13 +1657,15 @@ export function deleteInactiveDocuments(db) {
1369
1657
  return result.changes;
1370
1658
  }
1371
1659
  /**
1372
- * Remove orphaned content hashes that are not referenced by any active document.
1660
+ * Remove orphaned content hashes that are not referenced by any document.
1661
+ * Inactive documents are soft-deleted tombstones, so their content rows must
1662
+ * remain referenced until deleteInactiveDocuments() hard-deletes them.
1373
1663
  * Returns the number of orphaned content hashes deleted.
1374
1664
  */
1375
1665
  export function cleanupOrphanedContent(db) {
1376
1666
  const result = db.prepare(`
1377
1667
  DELETE FROM content
1378
- WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
1668
+ WHERE hash NOT IN (SELECT DISTINCT hash FROM documents)
1379
1669
  `).run();
1380
1670
  return result.changes;
1381
1671
  }
@@ -1400,32 +1690,34 @@ export function cleanupOrphanedVectors(db) {
1400
1690
  catch {
1401
1691
  return 0;
1402
1692
  }
1403
- // Count orphaned vectors first
1404
- const countResult = db.prepare(`
1405
- SELECT COUNT(*) as c FROM content_vectors cv
1406
- WHERE NOT EXISTS (
1407
- SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
1408
- )
1409
- `).get();
1410
- if (countResult.c === 0) {
1411
- return 0;
1412
- }
1413
- // Delete from vectors_vec first
1414
- db.exec(`
1415
- DELETE FROM vectors_vec WHERE hash_seq IN (
1416
- SELECT cv.hash || '_' || cv.seq FROM content_vectors cv
1693
+ return withLazyContentVectorMigration(db, () => {
1694
+ // Count orphaned vectors first
1695
+ const countResult = db.prepare(`
1696
+ SELECT COUNT(*) as c FROM content_vectors cv
1417
1697
  WHERE NOT EXISTS (
1418
1698
  SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
1419
1699
  )
1420
- )
1421
- `);
1422
- // Delete from content_vectors
1423
- db.exec(`
1424
- DELETE FROM content_vectors WHERE hash NOT IN (
1425
- SELECT hash FROM documents WHERE active = 1
1426
- )
1427
- `);
1428
- return countResult.c;
1700
+ `).get();
1701
+ if (countResult.c === 0) {
1702
+ return 0;
1703
+ }
1704
+ // Delete from vectors_vec first
1705
+ db.exec(`
1706
+ DELETE FROM vectors_vec WHERE hash_seq IN (
1707
+ SELECT cv.hash || '_' || cv.seq FROM content_vectors cv
1708
+ WHERE NOT EXISTS (
1709
+ SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
1710
+ )
1711
+ )
1712
+ `);
1713
+ // Delete from content_vectors
1714
+ db.exec(`
1715
+ DELETE FROM content_vectors WHERE hash NOT IN (
1716
+ SELECT hash FROM documents WHERE active = 1
1717
+ )
1718
+ `);
1719
+ return countResult.c;
1720
+ });
1429
1721
  }
1430
1722
  /**
1431
1723
  * Run VACUUM to reclaim unused space in the database.
@@ -1487,6 +1779,21 @@ export function insertContent(db, hash, content, createdAt) {
1487
1779
  db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
1488
1780
  .run(hash, content, createdAt);
1489
1781
  }
1782
+ function rebuildDocumentFTS(db, documentId) {
1783
+ const row = db.prepare(`
1784
+ SELECT d.id, d.collection, d.path, d.title, content.doc as body
1785
+ FROM documents d
1786
+ JOIN content ON content.hash = d.hash
1787
+ WHERE d.id = ? AND d.active = 1
1788
+ `).get(documentId);
1789
+ db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(documentId);
1790
+ if (!row)
1791
+ return;
1792
+ db.prepare(`
1793
+ INSERT INTO documents_fts(rowid, filepath, title, body)
1794
+ VALUES (?, ?, ?, ?)
1795
+ `).run(row.id, normalizeCjkForFTS(`${row.collection}/${row.path}`), normalizeCjkForFTS(row.title), normalizeCjkForFTS(row.body));
1796
+ }
1490
1797
  /**
1491
1798
  * Insert a new document into the documents table.
1492
1799
  */
@@ -1500,6 +1807,9 @@ export function insertDocument(db, collectionName, path, title, hash, createdAt,
1500
1807
  modified_at = excluded.modified_at,
1501
1808
  active = 1
1502
1809
  `).run(collectionName, path, title, hash, createdAt, modifiedAt);
1810
+ const row = db.prepare(`SELECT id FROM documents WHERE collection = ? AND path = ?`).get(collectionName, path);
1811
+ if (row)
1812
+ rebuildDocumentFTS(db, row.id);
1503
1813
  }
1504
1814
  /**
1505
1815
  * Find an active document by collection name and path.
@@ -1511,12 +1821,48 @@ export function findActiveDocument(db, collectionName, path) {
1511
1821
  `).get(collectionName, path);
1512
1822
  return row ?? null;
1513
1823
  }
1824
+ /**
1825
+ * Find an active document, falling back to a case-insensitive path match.
1826
+ * If found under a different casing, renames it in-place and rebuilds the
1827
+ * FTS entry. Embeddings are keyed by content hash, so the rename is
1828
+ * safe — no re-embedding required.
1829
+ *
1830
+ * @internal Used by reindexCollection and indexFiles during qmd update.
1831
+ * Returns null if the document does not exist under either path.
1832
+ */
1833
+ export function findOrMigrateLegacyDocument(db, collectionName, path) {
1834
+ const existing = findActiveDocument(db, collectionName, path);
1835
+ if (existing)
1836
+ return existing;
1837
+ const legacy = db.prepare(`
1838
+ SELECT id, hash, title FROM documents
1839
+ WHERE collection = ? AND path COLLATE NOCASE = ? AND active = 1
1840
+ ORDER BY id
1841
+ LIMIT 1
1842
+ `).get(collectionName, path);
1843
+ if (!legacy)
1844
+ return null;
1845
+ // Wrap rename + FTS rebuild in a transaction for atomicity.
1846
+ const migrate = db.transaction(() => {
1847
+ // Use OR IGNORE so a UNIQUE conflict (e.g. both "readme.md" and
1848
+ // "README.md" already exist) is a no-op rather than crashing.
1849
+ const result = db.prepare(`UPDATE OR IGNORE documents SET path = ? WHERE id = ? AND active = 1`).run(path, legacy.id);
1850
+ if (result.changes === 0)
1851
+ return false;
1852
+ rebuildDocumentFTS(db, legacy.id);
1853
+ return true;
1854
+ });
1855
+ if (!migrate())
1856
+ return null;
1857
+ return findActiveDocument(db, collectionName, path);
1858
+ }
1514
1859
  /**
1515
1860
  * Update the title and modified_at timestamp for a document.
1516
1861
  */
1517
1862
  export function updateDocumentTitle(db, documentId, title, modifiedAt) {
1518
1863
  db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`)
1519
1864
  .run(title, modifiedAt, documentId);
1865
+ rebuildDocumentFTS(db, documentId);
1520
1866
  }
1521
1867
  /**
1522
1868
  * Update an existing document's hash, title, and modified_at timestamp.
@@ -1525,6 +1871,7 @@ export function updateDocumentTitle(db, documentId, title, modifiedAt) {
1525
1871
  export function updateDocument(db, documentId, title, hash, modifiedAt) {
1526
1872
  db.prepare(`UPDATE documents SET title = ?, hash = ?, modified_at = ? WHERE id = ?`)
1527
1873
  .run(title, hash, modifiedAt, documentId);
1874
+ rebuildDocumentFTS(db, documentId);
1528
1875
  }
1529
1876
  /**
1530
1877
  * Deactivate a document (mark as inactive but don't delete).
@@ -1593,31 +1940,54 @@ export async function chunkDocumentByTokens(content, maxTokens = CHUNK_SIZE_TOKE
1593
1940
  let charChunks = await chunkDocumentAsync(content, maxChars, overlapChars, windowChars, filepath, chunkStrategy);
1594
1941
  // Tokenize and split any chunks that still exceed limit
1595
1942
  const results = [];
1596
- for (const chunk of charChunks) {
1597
- // Respect abort signal to avoid runaway tokenization
1943
+ const clampOverlapChars = (value, maxChars) => {
1944
+ if (maxChars <= 1)
1945
+ return 0;
1946
+ return Math.max(0, Math.min(maxChars - 1, Math.floor(value)));
1947
+ };
1948
+ const pushChunkWithinTokenLimit = async (text, pos) => {
1598
1949
  if (signal?.aborted)
1599
- break;
1600
- const tokens = await llm.tokenize(chunk.text);
1601
- if (tokens.length <= maxTokens) {
1602
- results.push({ text: chunk.text, pos: chunk.pos, tokens: tokens.length });
1950
+ return;
1951
+ const tokens = await llm.tokenize(text);
1952
+ if (tokens.length <= maxTokens || text.length <= 1) {
1953
+ results.push({ text, pos, tokens: tokens.length });
1954
+ return;
1603
1955
  }
1604
- else {
1605
- // Chunk is still too large - split it further
1606
- // Use actual token count to estimate better char limit
1607
- const actualCharsPerToken = chunk.text.length / tokens.length;
1608
- const safeMaxChars = Math.floor(maxTokens * actualCharsPerToken * 0.95); // 5% safety margin
1609
- const subChunks = chunkDocument(chunk.text, safeMaxChars, Math.floor(overlapChars * actualCharsPerToken / 2), Math.floor(windowChars * actualCharsPerToken / 2));
1610
- for (const subChunk of subChunks) {
1611
- if (signal?.aborted)
1612
- break;
1613
- const subTokens = await llm.tokenize(subChunk.text);
1614
- results.push({
1615
- text: subChunk.text,
1616
- pos: chunk.pos + subChunk.pos,
1617
- tokens: subTokens.length,
1618
- });
1619
- }
1956
+ const actualCharsPerToken = text.length / tokens.length;
1957
+ let safeMaxChars = Math.floor(maxTokens * actualCharsPerToken * 0.95);
1958
+ if (!Number.isFinite(safeMaxChars) || safeMaxChars < 1) {
1959
+ safeMaxChars = Math.floor(text.length / 2);
1960
+ }
1961
+ safeMaxChars = Math.max(1, Math.min(text.length - 1, safeMaxChars));
1962
+ let nextOverlapChars = clampOverlapChars(overlapChars * actualCharsPerToken / 2, safeMaxChars);
1963
+ let nextWindowChars = Math.max(0, Math.floor(windowChars * actualCharsPerToken / 2));
1964
+ let subChunks = chunkDocument(text, safeMaxChars, nextOverlapChars, nextWindowChars);
1965
+ // Pathological single-line blobs can produce no meaningful breakpoint progress.
1966
+ // Fall back to a simple half split so every recursion step strictly shrinks.
1967
+ if (subChunks.length <= 1
1968
+ || subChunks[0]?.text.length === text.length) {
1969
+ safeMaxChars = Math.max(1, Math.floor(text.length / 2));
1970
+ nextOverlapChars = 0;
1971
+ nextWindowChars = 0;
1972
+ subChunks = chunkDocument(text, safeMaxChars, nextOverlapChars, nextWindowChars);
1973
+ }
1974
+ if (subChunks.length <= 1
1975
+ || subChunks[0]?.text.length === text.length) {
1976
+ const fallbackTokens = tokens.slice(0, Math.max(1, maxTokens));
1977
+ const truncatedText = await llm.detokenize(fallbackTokens);
1978
+ results.push({
1979
+ text: truncatedText,
1980
+ pos,
1981
+ tokens: fallbackTokens.length,
1982
+ });
1983
+ return;
1620
1984
  }
1985
+ for (const subChunk of subChunks) {
1986
+ await pushChunkWithinTokenLimit(text.slice(subChunk.pos, subChunk.pos + subChunk.text.length), pos + subChunk.pos);
1987
+ }
1988
+ };
1989
+ for (const chunk of charChunks) {
1990
+ await pushChunkWithinTokenLimit(chunk.text, chunk.pos);
1621
1991
  }
1622
1992
  return results;
1623
1993
  }
@@ -2135,7 +2505,7 @@ function buildFTS5Query(query) {
2135
2505
  const phrase = s.slice(start, i).trim();
2136
2506
  i++; // skip closing quote
2137
2507
  if (phrase.length > 0) {
2138
- const sanitized = phrase.split(/\s+/).map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
2508
+ const sanitized = sanitizeFTS5Phrase(phrase);
2139
2509
  if (sanitized) {
2140
2510
  const ftsPhrase = `"${sanitized}"`; // Exact phrase, no prefix match
2141
2511
  if (negated) {
@@ -2167,6 +2537,18 @@ function buildFTS5Query(query) {
2167
2537
  }
2168
2538
  }
2169
2539
  }
2540
+ else if (containsCjk(term)) {
2541
+ const sanitized = sanitizeFTS5Phrase(term);
2542
+ if (sanitized) {
2543
+ const ftsPhrase = `"${sanitized}"`; // CJK phrase over character tokens
2544
+ if (negated) {
2545
+ negative.push(ftsPhrase);
2546
+ }
2547
+ else {
2548
+ positive.push(ftsPhrase);
2549
+ }
2550
+ }
2551
+ }
2170
2552
  else {
2171
2553
  const sanitized = sanitizeFTS5Term(term);
2172
2554
  if (sanitized) {
@@ -2199,8 +2581,9 @@ function buildFTS5Query(query) {
2199
2581
  * Returns error message if invalid, null if valid.
2200
2582
  */
2201
2583
  export function validateSemanticQuery(query) {
2202
- // Check for negation syntax
2203
- if (/-\w/.test(query) || /-"/.test(query)) {
2584
+ // Check for negation syntax — only at token boundaries (start of string or after whitespace).
2585
+ // Hyphenated words like "real-time" or "write-ahead" must not trigger this.
2586
+ if (/(^|\s)-[\w"]/.test(query)) {
2204
2587
  return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.';
2205
2588
  }
2206
2589
  return null;
@@ -2326,7 +2709,7 @@ export async function searchVec(db, query, model, limit = 20, collectionName, se
2326
2709
  docSql += ` AND d.collection = ?`;
2327
2710
  params.push(collectionName);
2328
2711
  }
2329
- const docRows = db.prepare(docSql).all(...params);
2712
+ const docRows = withLazyContentVectorMigration(db, () => db.prepare(docSql).all(...params));
2330
2713
  // Combine with distances and dedupe by filepath
2331
2714
  const seen = new Map();
2332
2715
  for (const row of docRows) {
@@ -2373,23 +2756,82 @@ async function getEmbedding(text, model, isQuery, session, llmOverride) {
2373
2756
  * Get all unique content hashes that need embeddings (from active documents).
2374
2757
  * Returns hash, document body, and a sample path for display purposes.
2375
2758
  */
2376
- export function getHashesForEmbedding(db) {
2377
- return db.prepare(`
2759
+ export function getHashesForEmbedding(db, model = DEFAULT_EMBED_MODEL) {
2760
+ const fingerprint = getEmbeddingFingerprint(model);
2761
+ return withLazyContentVectorMigration(db, () => db.prepare(`
2378
2762
  SELECT d.hash, c.doc as body, MIN(d.path) as path
2379
2763
  FROM documents d
2380
2764
  JOIN content c ON d.hash = c.hash
2381
- LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
2382
- WHERE d.active = 1 AND v.hash IS NULL
2765
+ LEFT JOIN (
2766
+ SELECT hash, model, COUNT(*) AS chunk_count, MAX(total_chunks) AS expected_chunks
2767
+ FROM content_vectors
2768
+ WHERE model = ? AND embed_fingerprint = ?
2769
+ GROUP BY hash, model, embed_fingerprint
2770
+ ) v ON d.hash = v.hash
2771
+ WHERE d.active = 1
2772
+ AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
2383
2773
  GROUP BY d.hash
2384
- `).all();
2774
+ `).all(model, fingerprint));
2385
2775
  }
2386
2776
  /**
2387
- * Clear all embeddings from the database (force re-index).
2388
- * Deletes all rows from content_vectors and drops the vectors_vec table.
2389
- */
2390
- export function clearAllEmbeddings(db) {
2391
- db.exec(`DELETE FROM content_vectors`);
2392
- db.exec(`DROP TABLE IF EXISTS vectors_vec`);
2777
+ * Clear embeddings for the whole index, or just for one collection.
2778
+ *
2779
+ * When `collection` is omitted the entire content_vectors table is emptied and
2780
+ * the vectors_vec virtual table is dropped (it is recreated with the right
2781
+ * dimensions on the next embed run).
2782
+ *
2783
+ * When `collection` is provided, only vectors whose hash is referenced
2784
+ * exclusively by active documents in that collection are removed. Hashes
2785
+ * shared with active documents in other collections are left in place so
2786
+ * vector search keeps working there (content_vectors is keyed globally by
2787
+ * content hash; identical document bodies across collections share a row).
2788
+ * vectors_vec is preserved so other collections keep working unless the scoped
2789
+ * clear empties content_vectors entirely, in which case it is dropped so the
2790
+ * next embed can recreate the table with the current dimensions.
2791
+ */
2792
+ export function clearAllEmbeddings(db, collection) {
2793
+ if (!collection) {
2794
+ db.exec(`DELETE FROM content_vectors`);
2795
+ db.exec(`DROP TABLE IF EXISTS vectors_vec`);
2796
+ return;
2797
+ }
2798
+ const exclusiveHashesQuery = `
2799
+ SELECT DISTINCT d.hash
2800
+ FROM documents d
2801
+ WHERE d.collection = ? AND d.active = 1
2802
+ AND NOT EXISTS (
2803
+ SELECT 1 FROM documents d2
2804
+ WHERE d2.hash = d.hash
2805
+ AND d2.active = 1
2806
+ AND d2.collection != d.collection
2807
+ )
2808
+ `;
2809
+ const vecTableExists = db
2810
+ .prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors_vec'`)
2811
+ .get();
2812
+ withLazyContentVectorMigration(db, () => {
2813
+ if (vecTableExists) {
2814
+ const hashSeqRows = db.prepare(`
2815
+ SELECT cv.hash, cv.seq
2816
+ FROM content_vectors cv
2817
+ WHERE cv.hash IN (${exclusiveHashesQuery})
2818
+ `).all(collection);
2819
+ const delVec = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
2820
+ for (const row of hashSeqRows) {
2821
+ delVec.run(`${row.hash}_${row.seq}`);
2822
+ }
2823
+ }
2824
+ db.prepare(`
2825
+ DELETE FROM content_vectors
2826
+ WHERE hash IN (${exclusiveHashesQuery})
2827
+ `).run(collection);
2828
+ const remaining = db
2829
+ .prepare(`SELECT COUNT(*) AS n FROM content_vectors`)
2830
+ .get();
2831
+ if (remaining.n === 0) {
2832
+ db.exec(`DROP TABLE IF EXISTS vectors_vec`);
2833
+ }
2834
+ });
2393
2835
  }
2394
2836
  /**
2395
2837
  * Insert a single embedding into both content_vectors and vectors_vec tables.
@@ -2401,16 +2843,37 @@ export function clearAllEmbeddings(db) {
2401
2843
  * vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
2402
2844
  * vec0 virtual tables silently ignore the OR REPLACE conflict clause.
2403
2845
  */
2404
- export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt) {
2846
+ export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt, totalChunks = 1, fingerprint = getEmbeddingFingerprint(model)) {
2405
2847
  const hashSeq = `${hash}_${seq}`;
2406
- // Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding)
2407
- const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`);
2408
- insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt);
2409
- // vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT
2410
- const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
2411
- const insertVecStmt = db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
2412
- deleteVecStmt.run(hashSeq);
2413
- insertVecStmt.run(hashSeq, embedding);
2848
+ withLazyContentVectorMigration(db, () => {
2849
+ // Insert content_vectors first crash-safe ordering (see getHashesForEmbedding)
2850
+ const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embed_fingerprint, total_chunks, embedded_at) VALUES (?, ?, ?, ?, ?, ?, ?)`);
2851
+ insertContentVectorStmt.run(hash, seq, pos, model, fingerprint, totalChunks, embeddedAt);
2852
+ // vec0 virtual tables don't support OR REPLACE use DELETE + INSERT
2853
+ const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
2854
+ const insertVecStmt = db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
2855
+ deleteVecStmt.run(hashSeq);
2856
+ insertVecStmt.run(hashSeq, embedding);
2857
+ });
2858
+ }
2859
+ function removeIncompleteEmbeddings(db, expectedChunksByHash, model) {
2860
+ return withLazyContentVectorMigration(db, () => {
2861
+ let removed = 0;
2862
+ const rowsStmt = db.prepare(`SELECT seq FROM content_vectors WHERE hash = ? AND model = ?`);
2863
+ const deleteContentStmt = db.prepare(`DELETE FROM content_vectors WHERE hash = ? AND model = ?`);
2864
+ const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
2865
+ for (const [hash, expectedChunks] of expectedChunksByHash) {
2866
+ const rows = rowsStmt.all(hash, model);
2867
+ if (rows.length === 0 || rows.length === expectedChunks)
2868
+ continue;
2869
+ for (const row of rows) {
2870
+ deleteVecStmt.run(`${hash}_${row.seq}`);
2871
+ }
2872
+ deleteContentStmt.run(hash, model);
2873
+ removed += rows.length;
2874
+ }
2875
+ return removed;
2876
+ });
2414
2877
  }
2415
2878
  // =============================================================================
2416
2879
  // Query expansion
@@ -2422,12 +2885,15 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db, intent
2422
2885
  if (cached) {
2423
2886
  try {
2424
2887
  const parsed = JSON.parse(cached);
2888
+ if (!Array.isArray(parsed))
2889
+ return [];
2890
+ const rows = parsed;
2425
2891
  // Migrate old cache format: { type, text } → { type, query }
2426
- if (parsed.length > 0 && parsed[0].query) {
2427
- return parsed;
2892
+ if (rows.length > 0 && typeof rows[0]?.query === "string") {
2893
+ return rows.map((r) => ({ type: r.type, query: String(r.query) }));
2428
2894
  }
2429
- else if (parsed.length > 0 && parsed[0].text) {
2430
- return parsed.map((r) => ({ type: r.type, query: r.text }));
2895
+ else if (rows.length > 0 && typeof rows[0]?.text === "string") {
2896
+ return rows.map((r) => ({ type: r.type, query: String(r.text) }));
2431
2897
  }
2432
2898
  }
2433
2899
  catch {
@@ -2734,7 +3200,7 @@ export function getDocumentBody(db, doc, fromLine, maxLines) {
2734
3200
  let body = row.body;
2735
3201
  if (fromLine !== undefined || maxLines !== undefined) {
2736
3202
  const lines = body.split('\n');
2737
- const start = (fromLine || 1) - 1;
3203
+ const start = Math.max(0, (fromLine || 1) - 1);
2738
3204
  const end = maxLines !== undefined ? start + maxLines : lines.length;
2739
3205
  body = lines.slice(start, end).join('\n');
2740
3206
  }
@@ -2842,7 +3308,7 @@ export function findDocuments(db, pattern, options = {}) {
2842
3308
  // =============================================================================
2843
3309
  // Status
2844
3310
  // =============================================================================
2845
- export function getStatus(db) {
3311
+ export function getStatus(db, model = DEFAULT_EMBED_MODEL) {
2846
3312
  // DB is source of truth for collections — config provides supplementary metadata
2847
3313
  const dbCollections = db.prepare(`
2848
3314
  SELECT
@@ -2875,7 +3341,7 @@ export function getStatus(db) {
2875
3341
  return new Date(b.lastUpdated).getTime() - new Date(a.lastUpdated).getTime();
2876
3342
  });
2877
3343
  const totalDocs = db.prepare(`SELECT COUNT(*) as c FROM documents WHERE active = 1`).get().c;
2878
- const needsEmbedding = getHashesNeedingEmbedding(db);
3344
+ const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
2879
3345
  const hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
2880
3346
  return {
2881
3347
  totalDocuments: totalDocs,
@@ -2922,7 +3388,7 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen, in
2922
3388
  const totalLines = body.split('\n').length;
2923
3389
  let searchBody = body;
2924
3390
  let lineOffset = 0;
2925
- if (chunkPos && chunkPos > 0) {
3391
+ if (chunkPos !== undefined && chunkPos >= 0) {
2926
3392
  // Search within the chunk region, with some padding for context
2927
3393
  // Use provided chunkLen or fall back to max chunk size (covers variable-length chunks)
2928
3394
  const searchLen = chunkLen || CHUNK_SIZE_CHARS;
@@ -2953,6 +3419,22 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen, in
2953
3419
  bestLine = i;
2954
3420
  }
2955
3421
  }
3422
+ if (chunkPos !== undefined && chunkPos >= 0 && bestScore <= 0) {
3423
+ if (chunkPos === 0) {
3424
+ // chunkPos=0 may be the chunk selector's initialization default for queries
3425
+ // where lexical chunk scoring found no winner (e.g. tokens filtered to empty
3426
+ // by the length>2 guard). Retry with full body so the real match isn't missed.
3427
+ return extractSnippet(body, query, maxLen, undefined, undefined, intent);
3428
+ }
3429
+ // For chunkPos > 0 the reranker actively picked this chunk. Tokens failing to
3430
+ // match literally is most likely a tokenizer limitation (quoted phrases, FTS5
3431
+ // syntax, HYDE passages, semantic hits), so anchor on the chunk start rather
3432
+ // than disregarding the reranker's pick.
3433
+ const contextStart = Math.max(0, chunkPos - 100);
3434
+ bestLine = chunkPos > contextStart
3435
+ ? searchBody.slice(0, chunkPos - contextStart).split('\n').length - 1
3436
+ : 0;
3437
+ }
2956
3438
  const start = Math.max(0, bestLine - 1);
2957
3439
  const end = Math.min(lines.length, bestLine + 3);
2958
3440
  const snippetLines = lines.slice(start, end);
@@ -2990,6 +3472,20 @@ export function addLineNumbers(text, startLine = 1) {
2990
3472
  const lines = text.split('\n');
2991
3473
  return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
2992
3474
  }
3475
+ /**
3476
+ * RRF list weights for hybridQuery.
3477
+ *
3478
+ * Original-query retrieval paths are the primary evidence and get 2x weight:
3479
+ * - original FTS
3480
+ * - original vector search
3481
+ *
3482
+ * Expansion-derived lists (lex/vec/hyde) stay at 1x regardless of list order,
3483
+ * so a lex expansion inserted before original vector search cannot steal the
3484
+ * original vector boost.
3485
+ */
3486
+ export function getHybridRrfWeights(rankedListMeta) {
3487
+ return rankedListMeta.map(meta => meta.queryType === "original" ? 2.0 : 1.0);
3488
+ }
2993
3489
  /**
2994
3490
  * Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
2995
3491
  *
@@ -3078,7 +3574,8 @@ export async function hybridQuery(store, query, options) {
3078
3574
  }
3079
3575
  // Batch embed all vector queries in a single call
3080
3576
  const llm = getLlm(store);
3081
- const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName));
3577
+ const embedModel = llm.embedModelName;
3578
+ const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, embedModel));
3082
3579
  hooks?.onEmbedStart?.(textsToEmbed.length);
3083
3580
  const embedStart = Date.now();
3084
3581
  const embeddings = await llm.embedBatch(textsToEmbed);
@@ -3088,7 +3585,7 @@ export async function hybridQuery(store, query, options) {
3088
3585
  const embedding = embeddings[i]?.embedding;
3089
3586
  if (!embedding)
3090
3587
  continue;
3091
- const vecResults = await store.searchVec(vecQueries[i].text, DEFAULT_EMBED_MODEL, 20, collection, undefined, embedding);
3588
+ const vecResults = await store.searchVec(vecQueries[i].text, embedModel, 20, collection, undefined, embedding);
3092
3589
  if (vecResults.length > 0) {
3093
3590
  for (const r of vecResults)
3094
3591
  docidMap.set(r.filepath, r.docid);
@@ -3104,8 +3601,9 @@ export async function hybridQuery(store, query, options) {
3104
3601
  }
3105
3602
  }
3106
3603
  }
3107
- // Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
3108
- const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
3604
+ // Step 4: RRF fusion — original-query FTS and vector lists get 2x weight;
3605
+ // expansion-derived lists stay at 1x independent of insertion order.
3606
+ const weights = getHybridRrfWeights(rankedListMeta);
3109
3607
  const fused = reciprocalRankFusion(rankedLists, weights);
3110
3608
  const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
3111
3609
  const candidates = fused.slice(0, candidateLimit);
@@ -3286,10 +3784,11 @@ export async function vectorSearchQuery(store, query, options) {
3286
3784
  const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
3287
3785
  options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
3288
3786
  // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
3787
+ const embedModel = getLlm(store).embedModelName;
3289
3788
  const queryTexts = [query, ...vecExpanded.map(q => q.query)];
3290
3789
  const allResults = new Map();
3291
3790
  for (const q of queryTexts) {
3292
- const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
3791
+ const vecResults = await store.searchVec(q, embedModel, limit, collection);
3293
3792
  for (const r of vecResults) {
3294
3793
  const existing = allResults.get(r.filepath);
3295
3794
  if (!existing || r.score > existing.score) {
@@ -3390,7 +3889,8 @@ export async function structuredSearch(store, searches, options) {
3390
3889
  const vecSearches = searches.filter((s) => s.type === 'vec' || s.type === 'hyde');
3391
3890
  if (vecSearches.length > 0) {
3392
3891
  const llm = getLlm(store);
3393
- const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName));
3892
+ const embedModel = llm.embedModelName;
3893
+ const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, embedModel));
3394
3894
  hooks?.onEmbedStart?.(textsToEmbed.length);
3395
3895
  const embedStart = Date.now();
3396
3896
  const embeddings = await llm.embedBatch(textsToEmbed);
@@ -3400,7 +3900,7 @@ export async function structuredSearch(store, searches, options) {
3400
3900
  if (!embedding)
3401
3901
  continue;
3402
3902
  for (const coll of collectionList) {
3403
- const vecResults = await store.searchVec(vecSearches[i].query, DEFAULT_EMBED_MODEL, 20, coll, undefined, embedding);
3903
+ const vecResults = await store.searchVec(vecSearches[i].query, embedModel, 20, coll, undefined, embedding);
3404
3904
  if (vecResults.length > 0) {
3405
3905
  for (const r of vecResults)
3406
3906
  docidMap.set(r.filepath, r.docid);