kontext-engine 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -779,6 +779,25 @@ function prepareChunkText(filePath, parent, text) {
779
779
  parts.push(text);
780
780
  return parts.join("\n");
781
781
  }
782
+ var MAX_RETRIES = 3;
783
+ var BASE_DELAY_MS = 500;
784
+ async function fetchWithRetry(url, init) {
785
+ let lastError = null;
786
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
787
+ const response = await fetch(url, init);
788
+ if (response.ok) return response;
789
+ if (response.status === 429 && attempt < MAX_RETRIES) {
790
+ const delay = BASE_DELAY_MS * Math.pow(2, attempt);
791
+ await new Promise((resolve) => setTimeout(resolve, delay));
792
+ lastError = new Error(`HTTP ${response.status}: ${response.statusText}`);
793
+ continue;
794
+ }
795
+ throw new Error(
796
+ `Embedding API error: HTTP ${response.status} ${response.statusText}`
797
+ );
798
+ }
799
+ throw lastError ?? new Error("Embedding API request failed after retries");
800
+ }
782
801
  var LOCAL_MODEL_ID = "Xenova/all-MiniLM-L6-v2";
783
802
  var LOCAL_DIMENSIONS = 384;
784
803
  var LOCAL_BATCH_SIZE = 32;
@@ -836,6 +855,87 @@ async function createLocalEmbedder() {
836
855
  }
837
856
  };
838
857
  }
858
+ var VOYAGE_API_URL = "https://api.voyageai.com/v1/embeddings";
859
+ var VOYAGE_MODEL = "voyage-code-3";
860
+ var VOYAGE_DEFAULT_DIMENSIONS = 1024;
861
+ var VOYAGE_BATCH_SIZE = 128;
862
+ function createVoyageEmbedder(apiKey, dimensions = VOYAGE_DEFAULT_DIMENSIONS) {
863
+ return {
864
+ name: VOYAGE_MODEL,
865
+ dimensions,
866
+ async embed(texts, onProgress) {
867
+ const results = [];
868
+ for (let i = 0; i < texts.length; i += VOYAGE_BATCH_SIZE) {
869
+ const batch = texts.slice(i, i + VOYAGE_BATCH_SIZE);
870
+ const vectors = await callVoyageAPI(apiKey, batch, "document", dimensions);
871
+ results.push(...vectors);
872
+ onProgress?.(Math.min(i + batch.length, texts.length), texts.length);
873
+ }
874
+ return results;
875
+ },
876
+ async embedSingle(text) {
877
+ const vectors = await callVoyageAPI(apiKey, [text], "query", dimensions);
878
+ return vectors[0];
879
+ }
880
+ };
881
+ }
882
+ async function callVoyageAPI(apiKey, texts, inputType, dimensions) {
883
+ const response = await fetchWithRetry(VOYAGE_API_URL, {
884
+ method: "POST",
885
+ headers: {
886
+ "Content-Type": "application/json",
887
+ Authorization: `Bearer ${apiKey}`
888
+ },
889
+ body: JSON.stringify({
890
+ model: VOYAGE_MODEL,
891
+ input: texts,
892
+ input_type: inputType,
893
+ output_dimension: dimensions
894
+ })
895
+ });
896
+ const json = await response.json();
897
+ return json.data.map((d) => normalizeVector(new Float32Array(d.embedding)));
898
+ }
899
+ var OPENAI_API_URL = "https://api.openai.com/v1/embeddings";
900
+ var OPENAI_MODEL = "text-embedding-3-large";
901
+ var OPENAI_DEFAULT_DIMENSIONS = 1024;
902
+ var OPENAI_BATCH_SIZE = 128;
903
+ function createOpenAIEmbedder(apiKey, dimensions = OPENAI_DEFAULT_DIMENSIONS) {
904
+ return {
905
+ name: OPENAI_MODEL,
906
+ dimensions,
907
+ async embed(texts, onProgress) {
908
+ const results = [];
909
+ for (let i = 0; i < texts.length; i += OPENAI_BATCH_SIZE) {
910
+ const batch = texts.slice(i, i + OPENAI_BATCH_SIZE);
911
+ const vectors = await callOpenAIAPI(apiKey, batch, dimensions);
912
+ results.push(...vectors);
913
+ onProgress?.(Math.min(i + batch.length, texts.length), texts.length);
914
+ }
915
+ return results;
916
+ },
917
+ async embedSingle(text) {
918
+ const vectors = await callOpenAIAPI(apiKey, [text], dimensions);
919
+ return vectors[0];
920
+ }
921
+ };
922
+ }
923
+ async function callOpenAIAPI(apiKey, texts, dimensions) {
924
+ const response = await fetchWithRetry(OPENAI_API_URL, {
925
+ method: "POST",
926
+ headers: {
927
+ "Content-Type": "application/json",
928
+ Authorization: `Bearer ${apiKey}`
929
+ },
930
+ body: JSON.stringify({
931
+ model: OPENAI_MODEL,
932
+ input: texts,
933
+ dimensions
934
+ })
935
+ });
936
+ const json = await response.json();
937
+ return json.data.map((d) => normalizeVector(new Float32Array(d.embedding)));
938
+ }
839
939
 
840
940
  // src/utils/errors.ts
841
941
  var ErrorCode = {
@@ -877,6 +977,12 @@ var ConfigError = class extends KontextError {
877
977
  this.name = "ConfigError";
878
978
  }
879
979
  };
980
+ var DatabaseError = class extends KontextError {
981
+ constructor(message, code, cause) {
982
+ super(message, code, cause);
983
+ this.name = "DatabaseError";
984
+ }
985
+ };
880
986
 
881
987
  // src/utils/error-boundary.ts
882
988
  function handleCommandError(err, logger, verbose) {
@@ -1051,7 +1157,10 @@ function searchVectors(db, query, limit) {
1051
1157
 
1052
1158
  // src/storage/db.ts
1053
1159
  var DEFAULT_DIMENSIONS = 384;
1054
- function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
1160
+ var VECTOR_DIMENSIONS_META_KEY = "vector_dimensions";
1161
+ var INDEX_EMBEDDER_PROVIDER_META_KEY = "index_embedder_provider";
1162
+ var INDEX_EMBEDDER_MODEL_META_KEY = "index_embedder_model";
1163
+ function createDatabase(dbPath, dimensions) {
1055
1164
  const dir = path3.dirname(dbPath);
1056
1165
  if (!fs4.existsSync(dir)) {
1057
1166
  fs4.mkdirSync(dir, { recursive: true });
@@ -1060,7 +1169,8 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
1060
1169
  db.pragma("journal_mode = WAL");
1061
1170
  db.pragma("foreign_keys = ON");
1062
1171
  sqliteVec.load(db);
1063
- initializeSchema(db, dimensions);
1172
+ initializeSchema(db, dimensions ?? DEFAULT_DIMENSIONS);
1173
+ ensureVectorDimensions(db, dimensions);
1064
1174
  const stmtUpsertFile = db.prepare(`
1065
1175
  INSERT INTO files (path, language, hash, last_indexed, size)
1066
1176
  VALUES (@path, @language, @hash, @lastIndexed, @size)
@@ -1069,6 +1179,7 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
1069
1179
  hash = excluded.hash,
1070
1180
  last_indexed = excluded.last_indexed,
1071
1181
  size = excluded.size
1182
+ RETURNING id
1072
1183
  `);
1073
1184
  const stmtGetFile = db.prepare(
1074
1185
  "SELECT id, path, language, hash, last_indexed as lastIndexed, size FROM files WHERE path = ?"
@@ -1081,6 +1192,16 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
1081
1192
  const stmtGetChunksByFile = db.prepare(
1082
1193
  "SELECT id, file_id as fileId, line_start as lineStart, line_end as lineEnd, type, name, parent, text, imports, exports, hash FROM chunks WHERE file_id = ? ORDER BY line_start"
1083
1194
  );
1195
+ const stmtGetChunksMissingVectors = db.prepare(
1196
+ `SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
1197
+ c.line_start as lineStart, c.line_end as lineEnd,
1198
+ c.type, c.name, c.parent, c.text, c.exports as exports
1199
+ FROM chunks c
1200
+ JOIN files f ON f.id = c.file_id
1201
+ LEFT JOIN chunk_vectors v ON v.rowid = c.id
1202
+ WHERE v.rowid IS NULL
1203
+ ORDER BY c.id`
1204
+ );
1084
1205
  const stmtGetChunkIdsByFile = db.prepare(
1085
1206
  "SELECT id FROM chunks WHERE file_id = ?"
1086
1207
  );
@@ -1110,20 +1231,26 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
1110
1231
  const stmtLastIndexed = db.prepare(
1111
1232
  "SELECT MAX(last_indexed) as lastIndexed FROM files"
1112
1233
  );
1234
+ const stmtGetMeta = db.prepare("SELECT value FROM meta WHERE key = ?");
1235
+ const stmtSetMeta = db.prepare(
1236
+ "INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)"
1237
+ );
1113
1238
  return {
1114
1239
  upsertFile(file) {
1115
- const result = stmtUpsertFile.run({
1240
+ const row = stmtUpsertFile.get({
1116
1241
  path: file.path,
1117
1242
  language: file.language,
1118
1243
  hash: file.hash,
1119
1244
  lastIndexed: Date.now(),
1120
1245
  size: file.size
1121
1246
  });
1122
- if (result.changes > 0 && result.lastInsertRowid) {
1123
- return Number(result.lastInsertRowid);
1247
+ if (!row?.id) {
1248
+ throw new DatabaseError(
1249
+ `Failed to upsert file: ${file.path}`,
1250
+ ErrorCode.DB_WRITE_FAILED
1251
+ );
1124
1252
  }
1125
- const existing = stmtGetFile.get(file.path);
1126
- return existing?.id ?? 0;
1253
+ return row.id;
1127
1254
  },
1128
1255
  getFile(filePath) {
1129
1256
  const row = stmtGetFile.get(filePath);
@@ -1166,15 +1293,17 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
1166
1293
  return row.lastIndexed;
1167
1294
  },
1168
1295
  deleteFile(filePath) {
1169
- const file = stmtGetFile.get(filePath);
1170
- if (file) {
1171
- const chunkRows = stmtGetChunkIdsByFile.all(file.id);
1172
- const chunkIds = chunkRows.map((r) => r.id);
1173
- if (chunkIds.length > 0) {
1174
- deleteVectorsByChunkIds(db, chunkIds);
1296
+ db.transaction(() => {
1297
+ const file = stmtGetFile.get(filePath);
1298
+ if (file) {
1299
+ const chunkRows = stmtGetChunkIdsByFile.all(file.id);
1300
+ const chunkIds = chunkRows.map((r) => r.id);
1301
+ if (chunkIds.length > 0) {
1302
+ deleteVectorsByChunkIds(db, chunkIds);
1303
+ }
1175
1304
  }
1176
- }
1177
- stmtDeleteFile.run(filePath);
1305
+ stmtDeleteFile.run(filePath);
1306
+ })();
1178
1307
  },
1179
1308
  insertChunks(fileId, chunks) {
1180
1309
  const ids = [];
@@ -1219,6 +1348,13 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
1219
1348
  exports: r.exports === 1
1220
1349
  }));
1221
1350
  },
1351
+ getChunksMissingVectors() {
1352
+ const rows = stmtGetChunksMissingVectors.all();
1353
+ return rows.map((r) => ({
1354
+ ...r,
1355
+ exports: r.exports === 1
1356
+ }));
1357
+ },
1222
1358
  searchChunks(filters, limit) {
1223
1359
  const conditions = [];
1224
1360
  const params = [];
@@ -1269,12 +1405,14 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
1269
1405
  }));
1270
1406
  },
1271
1407
  deleteChunksByFile(fileId) {
1272
- const chunkRows = stmtGetChunkIdsByFile.all(fileId);
1273
- const chunkIds = chunkRows.map((r) => r.id);
1274
- if (chunkIds.length > 0) {
1275
- deleteVectorsByChunkIds(db, chunkIds);
1276
- }
1277
- stmtDeleteChunksByFile.run(fileId);
1408
+ db.transaction(() => {
1409
+ const chunkRows = stmtGetChunkIdsByFile.all(fileId);
1410
+ const chunkIds = chunkRows.map((r) => r.id);
1411
+ if (chunkIds.length > 0) {
1412
+ deleteVectorsByChunkIds(db, chunkIds);
1413
+ }
1414
+ stmtDeleteChunksByFile.run(fileId);
1415
+ })();
1278
1416
  },
1279
1417
  insertDependency(sourceChunkId, targetChunkId, type) {
1280
1418
  stmtInsertDep.run(sourceChunkId, targetChunkId, type);
@@ -1314,6 +1452,66 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
1314
1452
  return Object.values(result[0])[0];
1315
1453
  }
1316
1454
  return String(result);
1455
+ },
1456
+ getVectorDimensions() {
1457
+ const row = stmtGetMeta.get(VECTOR_DIMENSIONS_META_KEY);
1458
+ if (!row) return null;
1459
+ const dimensions2 = Number.parseInt(row.value, 10);
1460
+ if (!Number.isInteger(dimensions2) || dimensions2 <= 0) {
1461
+ throw new DatabaseError(
1462
+ `Invalid stored vector dimensions metadata: ${row.value}`,
1463
+ ErrorCode.DB_CORRUPTED
1464
+ );
1465
+ }
1466
+ return dimensions2;
1467
+ },
1468
+ getIndexEmbedder() {
1469
+ const providerRow = stmtGetMeta.get(INDEX_EMBEDDER_PROVIDER_META_KEY);
1470
+ const modelRow = stmtGetMeta.get(INDEX_EMBEDDER_MODEL_META_KEY);
1471
+ if (!providerRow && !modelRow) return null;
1472
+ if (!providerRow || !modelRow) {
1473
+ throw new DatabaseError(
1474
+ "Corrupted index embedder metadata: provider/model keys are incomplete.",
1475
+ ErrorCode.DB_CORRUPTED
1476
+ );
1477
+ }
1478
+ const dimensions2 = this.getVectorDimensions();
1479
+ if (dimensions2 === null) {
1480
+ throw new DatabaseError(
1481
+ "Corrupted index embedder metadata: vector dimensions are missing.",
1482
+ ErrorCode.DB_CORRUPTED
1483
+ );
1484
+ }
1485
+ return {
1486
+ provider: providerRow.value,
1487
+ model: modelRow.value,
1488
+ dimensions: dimensions2
1489
+ };
1490
+ },
1491
+ setIndexEmbedder(metadata) {
1492
+ if (!metadata.provider || !metadata.model) {
1493
+ throw new DatabaseError(
1494
+ "Invalid index embedder metadata: provider and model are required.",
1495
+ ErrorCode.DB_WRITE_FAILED
1496
+ );
1497
+ }
1498
+ if (!Number.isInteger(metadata.dimensions) || metadata.dimensions <= 0) {
1499
+ throw new DatabaseError(
1500
+ `Invalid index embedder metadata dimensions: ${String(metadata.dimensions)}`,
1501
+ ErrorCode.DB_WRITE_FAILED
1502
+ );
1503
+ }
1504
+ const vectorDimensions = this.getVectorDimensions();
1505
+ if (vectorDimensions !== null && vectorDimensions !== metadata.dimensions) {
1506
+ throw new DatabaseError(
1507
+ `Index embedder metadata dimensions (${metadata.dimensions}) do not match vector table dimensions (${vectorDimensions}).`,
1508
+ ErrorCode.DB_WRITE_FAILED
1509
+ );
1510
+ }
1511
+ db.transaction(() => {
1512
+ stmtSetMeta.run(INDEX_EMBEDDER_PROVIDER_META_KEY, metadata.provider);
1513
+ stmtSetMeta.run(INDEX_EMBEDDER_MODEL_META_KEY, metadata.model);
1514
+ })();
1317
1515
  }
1318
1516
  };
1319
1517
  }
@@ -1336,6 +1534,59 @@ function getMetaVersion(db) {
1336
1534
  return 0;
1337
1535
  }
1338
1536
  }
1537
+ function ensureVectorDimensions(db, expectedDimensions) {
1538
+ const actual = getExistingVectorDimensions(db);
1539
+ const stored = db.prepare("SELECT value FROM meta WHERE key = ?").get(VECTOR_DIMENSIONS_META_KEY);
1540
+ const storedValue = stored?.value;
1541
+ const storedDimensions = storedValue ? Number.parseInt(storedValue, 10) : void 0;
1542
+ if (storedDimensions !== void 0 && (!Number.isInteger(storedDimensions) || storedDimensions <= 0)) {
1543
+ throw new DatabaseError(
1544
+ `Invalid stored vector dimensions metadata: ${storedValue ?? "unknown"}`,
1545
+ ErrorCode.DB_CORRUPTED
1546
+ );
1547
+ }
1548
+ if (actual !== null && storedDimensions !== void 0 && storedDimensions !== actual) {
1549
+ throw new DatabaseError(
1550
+ `Vector dimensions metadata mismatch: meta=${storedDimensions}, table=${actual}.`,
1551
+ ErrorCode.DB_CORRUPTED
1552
+ );
1553
+ }
1554
+ if (expectedDimensions === void 0) {
1555
+ if (storedDimensions !== void 0) return;
1556
+ const dimensions = actual ?? DEFAULT_DIMENSIONS;
1557
+ db.prepare(
1558
+ "INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)"
1559
+ ).run(VECTOR_DIMENSIONS_META_KEY, String(dimensions));
1560
+ return;
1561
+ }
1562
+ if (!stored) {
1563
+ const dimensions = actual ?? expectedDimensions;
1564
+ db.prepare(
1565
+ "INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)"
1566
+ ).run(VECTOR_DIMENSIONS_META_KEY, String(dimensions));
1567
+ if (actual !== null && actual !== expectedDimensions) {
1568
+ throw new DatabaseError(
1569
+ `Vector dimension mismatch: index uses ${actual} dims, but config requests ${expectedDimensions} dims. Rebuild the index.`,
1570
+ ErrorCode.CONFIG_INVALID
1571
+ );
1572
+ }
1573
+ return;
1574
+ }
1575
+ if (storedDimensions !== expectedDimensions) {
1576
+ throw new DatabaseError(
1577
+ `Vector dimension mismatch: index uses ${storedDimensions} dims, but config requests ${expectedDimensions} dims. Rebuild the index.`,
1578
+ ErrorCode.CONFIG_INVALID
1579
+ );
1580
+ }
1581
+ }
1582
+ function getExistingVectorDimensions(db) {
1583
+ const row = db.prepare("SELECT sql FROM sqlite_master WHERE name = 'chunk_vectors'").get();
1584
+ const sql = row?.sql;
1585
+ if (!sql) return null;
1586
+ const match = sql.match(/embedding\s+float\[(\d+)\]/i);
1587
+ if (!match) return null;
1588
+ return Number.parseInt(match[1], 10);
1589
+ }
1339
1590
 
1340
1591
  // src/cli/commands/config.ts
1341
1592
  import fs5 from "fs";
@@ -1451,6 +1702,20 @@ function setNestedValue(obj, key, value) {
1451
1702
  }
1452
1703
  current[parts[parts.length - 1]] = value;
1453
1704
  }
1705
+ function hasNestedKey(obj, key) {
1706
+ const parts = key.split(".");
1707
+ let current = obj;
1708
+ for (const part of parts) {
1709
+ if (current === null || current === void 0 || typeof current !== "object") {
1710
+ return false;
1711
+ }
1712
+ if (!(part in current)) {
1713
+ return false;
1714
+ }
1715
+ current = current[part];
1716
+ }
1717
+ return true;
1718
+ }
1454
1719
  function parseValue(rawValue) {
1455
1720
  if (rawValue === "null") return null;
1456
1721
  if (rawValue === "true") return true;
@@ -1489,9 +1754,26 @@ function runConfigSet(projectPath, key, rawValue) {
1489
1754
  setNestedValue(config, key, value);
1490
1755
  writeConfig(ctxDir, config);
1491
1756
  }
1492
- function runConfigReset(projectPath) {
1757
+ function runConfigReset(projectPath, key) {
1493
1758
  const ctxDir = resolveCtxDir(projectPath);
1494
- writeConfig(ctxDir, structuredClone(DEFAULT_CONFIG));
1759
+ if (!key) {
1760
+ writeConfig(ctxDir, structuredClone(DEFAULT_CONFIG));
1761
+ return;
1762
+ }
1763
+ if (!hasNestedKey(DEFAULT_CONFIG, key)) {
1764
+ throw new ConfigError(`Invalid config key: ${key}`, ErrorCode.CONFIG_INVALID);
1765
+ }
1766
+ const config = readConfig(ctxDir);
1767
+ const defaultValue = getNestedValue(
1768
+ DEFAULT_CONFIG,
1769
+ key
1770
+ );
1771
+ setNestedValue(
1772
+ config,
1773
+ key,
1774
+ structuredClone(defaultValue)
1775
+ );
1776
+ writeConfig(ctxDir, config);
1495
1777
  }
1496
1778
  function registerConfigCommand(program2) {
1497
1779
  const cmd = program2.command("config").description("Show or modify configuration");
@@ -1526,21 +1808,78 @@ function registerConfigCommand(program2) {
1526
1808
  configErrorHandler(err);
1527
1809
  }
1528
1810
  });
1529
- cmd.command("reset").description("Reset configuration to defaults").action(() => {
1811
+ cmd.command("reset [key]").description("Reset configuration to defaults or reset a specific key").action((key) => {
1530
1812
  try {
1531
- runConfigReset(process.cwd());
1532
- console.log("Configuration reset to defaults.");
1813
+ runConfigReset(process.cwd(), key);
1814
+ if (key) {
1815
+ console.log(`Reset ${key} to default.`);
1816
+ } else {
1817
+ console.log("Configuration reset to defaults.");
1818
+ }
1533
1819
  } catch (err) {
1534
1820
  configErrorHandler(err);
1535
1821
  }
1536
1822
  });
1537
1823
  }
1538
1824
 
1825
+ // src/cli/embedder.ts
1826
+ function getProjectEmbedderConfig(projectPath) {
1827
+ const { config } = runConfigShow(projectPath);
1828
+ return config.embedder;
1829
+ }
1830
+ async function createProjectEmbedder(projectPath) {
1831
+ const config = getProjectEmbedderConfig(projectPath);
1832
+ validateProjectEmbedderConfig(config);
1833
+ switch (config.provider) {
1834
+ case "local":
1835
+ return await createLocalEmbedder();
1836
+ case "voyage": {
1837
+ const apiKey = requireApiKey("CTX_VOYAGE_KEY", "voyage");
1838
+ return createVoyageEmbedder(apiKey, config.dimensions);
1839
+ }
1840
+ case "openai": {
1841
+ const apiKey = requireApiKey("CTX_OPENAI_KEY", "openai");
1842
+ return createOpenAIEmbedder(apiKey, config.dimensions);
1843
+ }
1844
+ default:
1845
+ throw new ConfigError(
1846
+ `Unsupported embedder provider "${config.provider}". Use local, voyage, or openai.`,
1847
+ ErrorCode.CONFIG_INVALID
1848
+ );
1849
+ }
1850
+ }
1851
+ function requireApiKey(envVar, provider) {
1852
+ const value = process.env[envVar];
1853
+ if (typeof value === "string" && value.length > 0) return value;
1854
+ throw new ConfigError(
1855
+ `Embedder provider "${provider}" requires ${envVar}. Export ${envVar} before running this command.`,
1856
+ ErrorCode.CONFIG_INVALID
1857
+ );
1858
+ }
1859
+ function validateProjectEmbedderConfig(config) {
1860
+ if (!Number.isInteger(config.dimensions) || config.dimensions <= 0) {
1861
+ throw new ConfigError(
1862
+ `Invalid embedder.dimensions (${String(config.dimensions)}). Must be a positive integer.`,
1863
+ ErrorCode.CONFIG_INVALID
1864
+ );
1865
+ }
1866
+ if (config.provider === "local" && config.dimensions !== 384) {
1867
+ throw new ConfigError(
1868
+ 'Local embedder requires "embedder.dimensions" = 384. Update config or switch provider.',
1869
+ ErrorCode.CONFIG_INVALID
1870
+ );
1871
+ }
1872
+ }
1873
+
1539
1874
  // src/cli/commands/init.ts
1875
+ function isSameEmbedderConfig(a, b) {
1876
+ return a.provider === b.provider && a.model === b.model && a.dimensions === b.dimensions;
1877
+ }
1540
1878
  var CTX_DIR2 = ".ctx";
1541
1879
  var DB_FILENAME = "index.db";
1542
1880
  var CONFIG_FILENAME2 = "config.json";
1543
1881
  var GITIGNORE_ENTRY = ".ctx/";
1882
+ var EMBEDDING_SAVE_BATCH_SIZE = 128;
1544
1883
  function ensureGitignore(projectRoot) {
1545
1884
  const gitignorePath = path5.join(projectRoot, ".gitignore");
1546
1885
  if (fs6.existsSync(gitignorePath)) {
@@ -1575,6 +1914,25 @@ function formatLanguageSummary(counts) {
1575
1914
  const entries = [...counts.entries()].sort((a, b) => b[1] - a[1]).map(([lang, count]) => `${lang}: ${count}`);
1576
1915
  return entries.join(", ");
1577
1916
  }
1917
+ async function embedAndPersistInBatches(db, embedder, chunks, log) {
1918
+ let vectorsCreated = 0;
1919
+ const total = chunks.length;
1920
+ for (let i = 0; i < chunks.length; i += EMBEDDING_SAVE_BATCH_SIZE) {
1921
+ const batch = chunks.slice(i, i + EMBEDDING_SAVE_BATCH_SIZE);
1922
+ const texts = batch.map(
1923
+ (chunk) => prepareChunkText(chunk.filePath, chunk.parent, chunk.text)
1924
+ );
1925
+ const vectors = await embedder.embed(texts);
1926
+ db.transaction(() => {
1927
+ for (let j = 0; j < batch.length; j++) {
1928
+ db.insertVector(batch[j].id, vectors[j]);
1929
+ }
1930
+ });
1931
+ vectorsCreated += vectors.length;
1932
+ log(` Embedding... ${vectorsCreated}/${total}`);
1933
+ }
1934
+ return vectorsCreated;
1935
+ }
1578
1936
  async function runInit(projectPath, options = {}) {
1579
1937
  const log = options.log ?? console.log;
1580
1938
  const absoluteRoot = path5.resolve(projectPath);
@@ -1584,9 +1942,28 @@ async function runInit(projectPath, options = {}) {
1584
1942
  if (!fs6.existsSync(ctxDir)) fs6.mkdirSync(ctxDir, { recursive: true });
1585
1943
  ensureGitignore(absoluteRoot);
1586
1944
  ensureConfig(ctxDir);
1945
+ const embedderConfig = getProjectEmbedderConfig(absoluteRoot);
1587
1946
  const dbPath = path5.join(ctxDir, DB_FILENAME);
1588
- const db = createDatabase(dbPath);
1947
+ const db = createDatabase(dbPath, embedderConfig.dimensions);
1589
1948
  try {
1949
+ const existingEmbedder = db.getIndexEmbedder();
1950
+ if (existingEmbedder) {
1951
+ if (!isSameEmbedderConfig(existingEmbedder, embedderConfig)) {
1952
+ throw new IndexError(
1953
+ `Index embedder mismatch: index uses ${existingEmbedder.provider} (${existingEmbedder.model}, ${existingEmbedder.dimensions} dims) but config requests ${embedderConfig.provider} (${embedderConfig.model}, ${embedderConfig.dimensions} dims). Rebuild the index.`,
1954
+ ErrorCode.CONFIG_INVALID
1955
+ );
1956
+ }
1957
+ } else {
1958
+ const isEmptyIndex = db.getFileCount() === 0 && db.getChunkCount() === 0 && db.getVectorCount() === 0;
1959
+ if (isEmptyIndex) {
1960
+ db.setIndexEmbedder({
1961
+ provider: embedderConfig.provider,
1962
+ model: embedderConfig.model,
1963
+ dimensions: embedderConfig.dimensions
1964
+ });
1965
+ }
1966
+ }
1590
1967
  const discovered = await discoverFiles({
1591
1968
  root: absoluteRoot,
1592
1969
  extraIgnore: [".ctx/"]
@@ -1672,21 +2049,34 @@ async function runInit(projectPath, options = {}) {
1672
2049
  }
1673
2050
  log(` ${allChunksWithMeta.length} chunks created`);
1674
2051
  let vectorsCreated = 0;
1675
- if (!options.skipEmbedding && allChunksWithMeta.length > 0) {
1676
- const embedder = await createEmbedder();
1677
- const texts = allChunksWithMeta.map(
1678
- (cm) => prepareChunkText(cm.fileRelPath, cm.chunk.parent, cm.chunk.text)
1679
- );
1680
- const vectors = await embedder.embed(texts, (done, total) => {
1681
- log(` Embedding... ${done}/${total}`);
1682
- });
1683
- db.transaction(() => {
1684
- for (let i = 0; i < allChunksWithMeta.length; i++) {
1685
- const chunkDbId = parseInt(allChunksWithMeta[i].chunk.id, 10);
1686
- db.insertVector(chunkDbId, vectors[i]);
2052
+ if (!options.skipEmbedding) {
2053
+ const chunksMissingVectors = db.getChunksMissingVectors().map((chunk) => ({
2054
+ id: chunk.id,
2055
+ filePath: chunk.filePath,
2056
+ parent: chunk.parent,
2057
+ text: chunk.text
2058
+ }));
2059
+ if (chunksMissingVectors.length > 0) {
2060
+ log(` ${chunksMissingVectors.length} chunks need embeddings`);
2061
+ }
2062
+ if (chunksMissingVectors.length > 0) {
2063
+ const embedder = await createEmbedder(absoluteRoot);
2064
+ try {
2065
+ vectorsCreated = await embedAndPersistInBatches(
2066
+ db,
2067
+ embedder,
2068
+ chunksMissingVectors,
2069
+ log
2070
+ );
2071
+ } catch (err) {
2072
+ const total = chunksMissingVectors.length;
2073
+ throw new IndexError(
2074
+ `Embedding failed after saving ${vectorsCreated}/${total} vectors. Run "ctx init" again to resume. ${err instanceof Error ? err.message : String(err)}`,
2075
+ ErrorCode.EMBEDDER_FAILED,
2076
+ err instanceof Error ? err : void 0
2077
+ );
1687
2078
  }
1688
- });
1689
- vectorsCreated = vectors.length;
2079
+ }
1690
2080
  }
1691
2081
  const durationMs = performance.now() - start;
1692
2082
  const dbSize = fs6.existsSync(dbPath) ? fs6.statSync(dbPath).size : 0;
@@ -1711,8 +2101,8 @@ async function runInit(projectPath, options = {}) {
1711
2101
  db.close();
1712
2102
  }
1713
2103
  }
1714
- async function createEmbedder() {
1715
- return createLocalEmbedder();
2104
+ async function createEmbedder(projectPath) {
2105
+ return createProjectEmbedder(projectPath);
1716
2106
  }
1717
2107
  function registerInitCommand(program2) {
1718
2108
  program2.command("init [path]").description("Index current directory or specified path").action(async (inputPath) => {
@@ -2031,6 +2421,7 @@ var PATH_BOOST_PARTIAL = 1.2;
2031
2421
  var IMPORT_PENALTY = 0.5;
2032
2422
  var TEST_FILE_PENALTY = 0.65;
2033
2423
  var SMALL_SNIPPET_PENALTY = 0.75;
2424
+ var DATA_LITERAL_PENALTY = 0.7;
2034
2425
  var PUBLIC_API_BOOST = 1.12;
2035
2426
  var TEST_FILE_DIRECTORY_PATTERN = /(?:^|\/)(?:tests|__tests__)(?:\/|$)/;
2036
2427
  var TEST_FILE_NAME_PATTERN = /(?:^|\/)[^/]*\.(?:test|spec)\.[cm]?[jt]sx?$/;
@@ -2045,7 +2436,8 @@ function fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms) {
2045
2436
  const importAdjusted = applyImportDeprioritization(boosted);
2046
2437
  const testAdjusted = applyTestFileDeprioritization(importAdjusted);
2047
2438
  const snippetAdjusted = applySmallSnippetDeprioritization(testAdjusted);
2048
- const boostedApi = applyPublicApiBoost(snippetAdjusted);
2439
+ const dataLiteralAdjusted = applyDataLiteralDeprioritization(snippetAdjusted);
2440
+ const boostedApi = applyPublicApiBoost(dataLiteralAdjusted);
2049
2441
  const adjusted = applyFileDiversityDiminishingReturns(boostedApi);
2050
2442
  adjusted.sort((a, b) => b.score - a.score);
2051
2443
  const sliced = adjusted.slice(0, limit);
@@ -2126,6 +2518,21 @@ function applySmallSnippetDeprioritization(results) {
2126
2518
  return r;
2127
2519
  });
2128
2520
  }
2521
+ function applyDataLiteralDeprioritization(results) {
2522
+ const hasNonDataLiteral = results.some((r) => !isDataLiteralChunk(r));
2523
+ if (!hasNonDataLiteral) return results;
2524
+ const maxNonDataScore = Math.max(
2525
+ ...results.filter((r) => !isDataLiteralChunk(r)).map((r) => r.score),
2526
+ 0
2527
+ );
2528
+ if (maxNonDataScore === 0) return results;
2529
+ return results.map((r) => {
2530
+ if (isDataLiteralChunk(r)) {
2531
+ return { ...r, score: r.score * DATA_LITERAL_PENALTY };
2532
+ }
2533
+ return r;
2534
+ });
2535
+ }
2129
2536
  function applyPublicApiBoost(results) {
2130
2537
  return results.map((r) => {
2131
2538
  if (isPublicApiSymbol(r)) {
@@ -2160,6 +2567,23 @@ function isPublicApiSymbol(result) {
2160
2567
  const textStart = result.text.trimStart().toLowerCase();
2161
2568
  return textStart.startsWith("export ");
2162
2569
  }
2570
+ function isDataLiteralChunk(result) {
2571
+ if (result.type !== "constant") return false;
2572
+ const text = result.text;
2573
+ const lines = text.split("\n").map((line) => line.trim()).filter((line) => line.length > 0);
2574
+ if (lines.length === 0) return false;
2575
+ const keyValueLineCount = lines.filter(
2576
+ (line) => /['"`]?[A-Za-z0-9_-]+['"`]?\s*:\s*/.test(line)
2577
+ ).length;
2578
+ const quoteCount = (text.match(/["'`]/g) ?? []).length;
2579
+ const colonCount = (text.match(/:/g) ?? []).length;
2580
+ const structuralCount = quoteCount + colonCount;
2581
+ const nonWhitespaceLength = text.replace(/\s+/g, "").length;
2582
+ const structuralDensity = structuralCount / Math.max(nonWhitespaceLength, 1);
2583
+ const hasLiteralShape = text.includes("{") && text.includes("}") || text.includes("[") && text.includes("]");
2584
+ const kvLineRatio = keyValueLineCount / lines.length;
2585
+ return hasLiteralShape && keyValueLineCount >= 2 && kvLineRatio >= 0.35 && structuralDensity >= 0.04;
2586
+ }
2163
2587
  function getFileDiversityFactor(fileOccurrence) {
2164
2588
  if (fileOccurrence <= 1) return 1;
2165
2589
  if (fileOccurrence === 2) return 0.9;
@@ -2340,9 +2764,13 @@ async function runQuery(projectPath, query, options) {
2340
2764
  );
2341
2765
  }
2342
2766
  const start = performance.now();
2343
- const db = createDatabase(dbPath);
2767
+ const embedderConfig = getProjectEmbedderConfig(absoluteRoot);
2768
+ const db = createDatabase(dbPath, embedderConfig.dimensions);
2344
2769
  try {
2345
- const strategyResults = await runStrategies(db, query, { ...options, limit });
2770
+ const strategyResults = await runStrategies(db, absoluteRoot, query, {
2771
+ ...options,
2772
+ limit
2773
+ });
2346
2774
  const pathBoostTerms = extractPathBoostTerms(query);
2347
2775
  const fused = fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms);
2348
2776
  const outputResults = fused.map(toOutputResult);
@@ -2362,7 +2790,7 @@ async function runQuery(projectPath, query, options) {
2362
2790
  db.close();
2363
2791
  }
2364
2792
  }
2365
- async function runStrategies(db, query, options) {
2793
+ async function runStrategies(db, projectPath, query, options) {
2366
2794
  const results = [];
2367
2795
  const filters = options.language ? { language: options.language } : void 0;
2368
2796
  const limit = options.limit * 3;
@@ -2371,6 +2799,7 @@ async function runStrategies(db, query, options) {
2371
2799
  const weight = effectiveWeights[strategy];
2372
2800
  const searchResults = await executeStrategy(
2373
2801
  db,
2802
+ projectPath,
2374
2803
  strategy,
2375
2804
  query,
2376
2805
  limit,
@@ -2382,10 +2811,10 @@ async function runStrategies(db, query, options) {
2382
2811
  }
2383
2812
  return results;
2384
2813
  }
2385
- async function executeStrategy(db, strategy, query, limit, filters) {
2814
+ async function executeStrategy(db, projectPath, strategy, query, limit, filters) {
2386
2815
  switch (strategy) {
2387
2816
  case "vector": {
2388
- const embedder = await loadEmbedder();
2817
+ const embedder = await loadEmbedder(projectPath);
2389
2818
  return vectorSearch(db, embedder, query, limit, filters);
2390
2819
  }
2391
2820
  case "fts":
@@ -2418,9 +2847,16 @@ async function executeStrategy(db, strategy, query, limit, filters) {
2418
2847
  }
2419
2848
  }
2420
2849
  var embedderInstance = null;
2421
- async function loadEmbedder() {
2422
- if (embedderInstance) return embedderInstance;
2423
- embedderInstance = await createLocalEmbedder();
2850
+ var embedderKey = null;
2851
+ function getCacheKey(projectPath) {
2852
+ const config = getProjectEmbedderConfig(projectPath);
2853
+ return `${projectPath}:${config.provider}:${config.model}:${config.dimensions}`;
2854
+ }
2855
+ async function loadEmbedder(projectPath) {
2856
+ const cacheKey = getCacheKey(projectPath);
2857
+ if (embedderInstance && embedderKey === cacheKey) return embedderInstance;
2858
+ embedderInstance = await createProjectEmbedder(projectPath);
2859
+ embedderKey = cacheKey;
2424
2860
  return embedderInstance;
2425
2861
  }
2426
2862
  function registerQueryCommand(program2) {
@@ -2838,7 +3274,9 @@ var COMMON_STEMS = {
2838
3274
  transformer: "transform",
2839
3275
  transformation: "transform",
2840
3276
  connection: "connect",
3277
+ connecting: "connect",
2841
3278
  connector: "connect",
3279
+ migrating: "migrate",
2842
3280
  migration: "migrate",
2843
3281
  scheduling: "schedule",
2844
3282
  scheduler: "schedule",
@@ -2847,7 +3285,8 @@ var COMMON_STEMS = {
2847
3285
  routing: "route",
2848
3286
  router: "route",
2849
3287
  indexing: "index",
2850
- indexer: "index"
3288
+ indexer: "index",
3289
+ subscribing: "subscribe"
2851
3290
  };
2852
3291
  var STEM_SUFFIXES = [
2853
3292
  "tion",
@@ -3127,13 +3566,13 @@ function formatTextOutput2(output) {
3127
3566
  );
3128
3567
  return lines.join("\n");
3129
3568
  }
3130
- function createSearchExecutor(db, query) {
3569
+ function createSearchExecutor(db, projectPath, query) {
3131
3570
  const pathBoostTerms = extractPathBoostTerms(query);
3132
3571
  return async (strategies, limit) => {
3133
3572
  const strategyResults = [];
3134
3573
  const fetchLimit = limit * 3;
3135
3574
  for (const plan of strategies) {
3136
- const results = await executeStrategy2(db, plan, fetchLimit);
3575
+ const results = await executeStrategy2(db, projectPath, plan, fetchLimit);
3137
3576
  if (results.length > 0) {
3138
3577
  strategyResults.push({
3139
3578
  strategy: plan.strategy,
@@ -3152,10 +3591,10 @@ function extractSymbolNames2(query) {
3152
3591
  function isPathLike2(query) {
3153
3592
  return query.includes("/") || query.includes("*") || query.includes(".");
3154
3593
  }
3155
- async function executeStrategy2(db, plan, limit) {
3594
+ async function executeStrategy2(db, projectPath, plan, limit) {
3156
3595
  switch (plan.strategy) {
3157
3596
  case "vector": {
3158
- const embedder = await loadEmbedder2();
3597
+ const embedder = await loadEmbedder2(projectPath);
3159
3598
  return vectorSearch(db, embedder, plan.query, limit);
3160
3599
  }
3161
3600
  case "fts":
@@ -3184,13 +3623,20 @@ async function executeStrategy2(db, plan, limit) {
3184
3623
  }
3185
3624
  }
3186
3625
  var embedderInstance2 = null;
3187
- async function loadEmbedder2() {
3188
- if (embedderInstance2) return embedderInstance2;
3189
- embedderInstance2 = await createLocalEmbedder();
3626
+ var embedderKey2 = null;
3627
+ function getCacheKey2(projectPath) {
3628
+ const config = getProjectEmbedderConfig(projectPath);
3629
+ return `${projectPath}:${config.provider}:${config.model}:${config.dimensions}`;
3630
+ }
3631
+ async function loadEmbedder2(projectPath) {
3632
+ const cacheKey = getCacheKey2(projectPath);
3633
+ if (embedderInstance2 && embedderKey2 === cacheKey) return embedderInstance2;
3634
+ embedderInstance2 = await createProjectEmbedder(projectPath);
3635
+ embedderKey2 = cacheKey;
3190
3636
  return embedderInstance2;
3191
3637
  }
3192
- async function fallbackSearch(db, query, limit) {
3193
- const executor = createSearchExecutor(db, query);
3638
+ async function fallbackSearch(db, projectPath, query, limit) {
3639
+ const executor = createSearchExecutor(db, projectPath, query);
3194
3640
  const fallbackStrategies = buildFallbackStrategies(query);
3195
3641
  const results = await executor(fallbackStrategies, limit);
3196
3642
  return {
@@ -3217,18 +3663,19 @@ async function runAsk(projectPath, query, options) {
3217
3663
  ErrorCode.NOT_INITIALIZED
3218
3664
  );
3219
3665
  }
3220
- const db = createDatabase(dbPath);
3666
+ const embedderConfig = getProjectEmbedderConfig(absoluteRoot);
3667
+ const db = createDatabase(dbPath, embedderConfig.dimensions);
3221
3668
  try {
3222
3669
  const provider = options.provider ?? null;
3223
3670
  if (!provider) {
3224
- const output = await fallbackSearch(db, query, limit);
3671
+ const output = await fallbackSearch(db, absoluteRoot, query, limit);
3225
3672
  output.warning = FALLBACK_NOTICE;
3226
3673
  if (options.format === "text") {
3227
3674
  output.text = formatTextOutput2(output);
3228
3675
  }
3229
3676
  return output;
3230
3677
  }
3231
- const executor = createSearchExecutor(db, query);
3678
+ const executor = createSearchExecutor(db, absoluteRoot, query);
3232
3679
  if (options.noExplain) {
3233
3680
  return await runNoExplain(provider, query, limit, options, executor);
3234
3681
  }
@@ -3403,6 +3850,7 @@ function createWatcher(options, events) {
3403
3850
  // src/cli/commands/watch.ts
3404
3851
  var CTX_DIR5 = ".ctx";
3405
3852
  var DB_FILENAME4 = "index.db";
3853
+ var EMBEDDING_SAVE_BATCH_SIZE2 = 128;
3406
3854
  function timestamp() {
3407
3855
  return (/* @__PURE__ */ new Date()).toLocaleTimeString("en-GB", { hour12: false });
3408
3856
  }
@@ -3419,6 +3867,9 @@ async function hashFile(absolutePath) {
3419
3867
  const content = fs9.readFileSync(absolutePath);
3420
3868
  return createHash3("sha256").update(content).digest("hex");
3421
3869
  }
3870
+ function isSameEmbedderConfig2(a, b) {
3871
+ return a.provider === b.provider && a.model === b.model && a.dimensions === b.dimensions;
3872
+ }
3422
3873
  async function reindexChanges(db, changes, projectPath, options) {
3423
3874
  const start = performance.now();
3424
3875
  const log = options.log;
@@ -3430,8 +3881,8 @@ async function reindexChanges(db, changes, projectPath, options) {
3430
3881
  const language = detectLanguage(change.path);
3431
3882
  if (change.type === "unlink") {
3432
3883
  log(`[${timestamp()}] Deleted: ${change.path}`);
3433
- const existingFile2 = db.getFile(change.path);
3434
- if (existingFile2) {
3884
+ const existingFile = db.getFile(change.path);
3885
+ if (existingFile) {
3435
3886
  db.deleteFile(change.path);
3436
3887
  }
3437
3888
  filesProcessed++;
@@ -3441,10 +3892,6 @@ async function reindexChanges(db, changes, projectPath, options) {
3441
3892
  if (!fs9.existsSync(absolutePath)) continue;
3442
3893
  const label = change.type === "add" ? "Added" : "Changed";
3443
3894
  log(`[${timestamp()}] ${label}: ${change.path}`);
3444
- const existingFile = db.getFile(change.path);
3445
- if (existingFile) {
3446
- db.deleteChunksByFile(existingFile.id);
3447
- }
3448
3895
  let nodes;
3449
3896
  try {
3450
3897
  nodes = await parseFile(absolutePath, language);
@@ -3455,26 +3902,31 @@ async function reindexChanges(db, changes, projectPath, options) {
3455
3902
  const chunks = chunkFile(nodes, change.path);
3456
3903
  const hash = await hashFile(absolutePath);
3457
3904
  const size = fs9.statSync(absolutePath).size;
3458
- const fileId = db.upsertFile({
3459
- path: change.path,
3460
- language,
3461
- hash,
3462
- size
3905
+ const chunkRows = chunks.map((c) => ({
3906
+ lineStart: c.lineStart,
3907
+ lineEnd: c.lineEnd,
3908
+ type: c.type,
3909
+ name: c.name,
3910
+ parent: c.parent,
3911
+ text: c.text,
3912
+ imports: c.imports,
3913
+ exports: c.exports,
3914
+ hash: c.hash
3915
+ }));
3916
+ let chunkIds = [];
3917
+ db.transaction(() => {
3918
+ const existingFile = db.getFile(change.path);
3919
+ if (existingFile) {
3920
+ db.deleteChunksByFile(existingFile.id);
3921
+ }
3922
+ const fileId = db.upsertFile({
3923
+ path: change.path,
3924
+ language,
3925
+ hash,
3926
+ size
3927
+ });
3928
+ chunkIds = db.insertChunks(fileId, chunkRows);
3463
3929
  });
3464
- const chunkIds = db.insertChunks(
3465
- fileId,
3466
- chunks.map((c) => ({
3467
- lineStart: c.lineStart,
3468
- lineEnd: c.lineEnd,
3469
- type: c.type,
3470
- name: c.name,
3471
- parent: c.parent,
3472
- text: c.text,
3473
- imports: c.imports,
3474
- exports: c.exports,
3475
- hash: c.hash
3476
- }))
3477
- );
3478
3930
  for (let i = 0; i < chunks.length; i++) {
3479
3931
  allChunksWithMeta.push({
3480
3932
  fileRelPath: change.path,
@@ -3485,25 +3937,46 @@ async function reindexChanges(db, changes, projectPath, options) {
3485
3937
  filesProcessed++;
3486
3938
  }
3487
3939
  if (!options.skipEmbedding && allChunksWithMeta.length > 0) {
3488
- const embedder = await loadEmbedder3();
3489
- const texts = allChunksWithMeta.map(
3490
- (cm) => prepareChunkText(cm.fileRelPath, cm.chunk.parent, cm.chunk.text)
3491
- );
3492
- const vectors = await embedder.embed(texts);
3493
- db.transaction(() => {
3494
- for (let i = 0; i < allChunksWithMeta.length; i++) {
3495
- const chunkDbId = parseInt(allChunksWithMeta[i].chunk.id, 10);
3496
- db.insertVector(chunkDbId, vectors[i]);
3940
+ const embedder = await loadEmbedder3(projectPath);
3941
+ let vectorsCreated = 0;
3942
+ const total = allChunksWithMeta.length;
3943
+ try {
3944
+ for (let i = 0; i < allChunksWithMeta.length; i += EMBEDDING_SAVE_BATCH_SIZE2) {
3945
+ const batch = allChunksWithMeta.slice(i, i + EMBEDDING_SAVE_BATCH_SIZE2);
3946
+ const texts = batch.map(
3947
+ (cm) => prepareChunkText(cm.fileRelPath, cm.chunk.parent, cm.chunk.text)
3948
+ );
3949
+ const vectors = await embedder.embed(texts);
3950
+ db.transaction(() => {
3951
+ for (let j = 0; j < batch.length; j++) {
3952
+ const chunkDbId = parseInt(batch[j].chunk.id, 10);
3953
+ db.insertVector(chunkDbId, vectors[j]);
3954
+ }
3955
+ });
3956
+ vectorsCreated += vectors.length;
3497
3957
  }
3498
- });
3958
+ } catch (err) {
3959
+ throw new IndexError(
3960
+ `Embedding failed after saving ${vectorsCreated}/${total} vectors. Continue watching, then run "ctx init" to backfill missing vectors. ${err instanceof Error ? err.message : String(err)}`,
3961
+ ErrorCode.EMBEDDER_FAILED,
3962
+ err instanceof Error ? err : void 0
3963
+ );
3964
+ }
3499
3965
  }
3500
3966
  const durationMs = performance.now() - start;
3501
3967
  return { filesProcessed, chunksUpdated, durationMs };
3502
3968
  }
3503
3969
  var embedderInstance3 = null;
3504
- async function loadEmbedder3() {
3505
- if (embedderInstance3) return embedderInstance3;
3506
- embedderInstance3 = await createLocalEmbedder();
3970
+ var embedderKey3 = null;
3971
+ function getCacheKey3(projectPath) {
3972
+ const config = getProjectEmbedderConfig(projectPath);
3973
+ return `${projectPath}:${config.provider}:${config.model}:${config.dimensions}`;
3974
+ }
3975
+ async function loadEmbedder3(projectPath) {
3976
+ const cacheKey = getCacheKey3(projectPath);
3977
+ if (embedderInstance3 && embedderKey3 === cacheKey) return embedderInstance3;
3978
+ embedderInstance3 = await createProjectEmbedder(projectPath);
3979
+ embedderKey3 = cacheKey;
3507
3980
  return embedderInstance3;
3508
3981
  }
3509
3982
  async function runWatch(projectPath, options = {}) {
@@ -3520,8 +3993,29 @@ async function runWatch(projectPath, options = {}) {
3520
3993
  );
3521
3994
  }
3522
3995
  await initParser();
3523
- const db = createDatabase(dbPath);
3996
+ const embedderConfig = getProjectEmbedderConfig(absoluteRoot);
3997
+ const db = createDatabase(dbPath, embedderConfig.dimensions);
3998
+ const existingEmbedder = db.getIndexEmbedder();
3999
+ if (existingEmbedder) {
4000
+ if (!isSameEmbedderConfig2(existingEmbedder, embedderConfig)) {
4001
+ db.close();
4002
+ throw new IndexError(
4003
+ `Index embedder mismatch: index uses ${existingEmbedder.provider} (${existingEmbedder.model}, ${existingEmbedder.dimensions} dims) but config requests ${embedderConfig.provider} (${embedderConfig.model}, ${embedderConfig.dimensions} dims). Rebuild the index.`,
4004
+ ErrorCode.CONFIG_INVALID
4005
+ );
4006
+ }
4007
+ } else {
4008
+ const isEmptyIndex = db.getFileCount() === 0 && db.getChunkCount() === 0 && db.getVectorCount() === 0;
4009
+ if (isEmptyIndex) {
4010
+ db.setIndexEmbedder({
4011
+ provider: embedderConfig.provider,
4012
+ model: embedderConfig.model,
4013
+ dimensions: embedderConfig.dimensions
4014
+ });
4015
+ }
4016
+ }
3524
4017
  let watcherHandle = null;
4018
+ let reindexQueue = Promise.resolve();
3525
4019
  const watcher = createWatcher(
3526
4020
  {
3527
4021
  projectPath: absoluteRoot,
@@ -3530,7 +4024,7 @@ async function runWatch(projectPath, options = {}) {
3530
4024
  },
3531
4025
  {
3532
4026
  onChange: (changes) => {
3533
- void (async () => {
4027
+ reindexQueue = reindexQueue.then(async () => {
3534
4028
  try {
3535
4029
  const result = await reindexChanges(db, changes, absoluteRoot, {
3536
4030
  skipEmbedding: options.skipEmbedding,
@@ -3546,7 +4040,7 @@ async function runWatch(projectPath, options = {}) {
3546
4040
  `[${timestamp()}] Error: ${err instanceof Error ? err.message : String(err)}`
3547
4041
  );
3548
4042
  }
3549
- })();
4043
+ });
3550
4044
  },
3551
4045
  onError: (err) => {
3552
4046
  log(`[${timestamp()}] Watcher error: ${err.message}`);
@@ -3562,6 +4056,7 @@ async function runWatch(projectPath, options = {}) {
3562
4056
  await watcherHandle.stop();
3563
4057
  watcherHandle = null;
3564
4058
  }
4059
+ await reindexQueue;
3565
4060
  db.close();
3566
4061
  log("Stopped watching. Database saved.");
3567
4062
  }
@@ -3623,6 +4118,7 @@ function readConfig2(ctxDir) {
3623
4118
  const parsed = JSON.parse(raw);
3624
4119
  const embedder = parsed.embedder;
3625
4120
  return {
4121
+ provider: embedder?.provider ?? parsed.provider ?? "unknown",
3626
4122
  model: embedder?.model ?? parsed.model ?? "unknown",
3627
4123
  dimensions: embedder?.dimensions ?? parsed.dimensions ?? 0
3628
4124
  };
@@ -3663,15 +4159,54 @@ function formatStatus(projectPath, output) {
3663
4159
  lines.push(` ${label}${count} file${count !== 1 ? "s" : ""}`);
3664
4160
  }
3665
4161
  }
3666
- if (output.config) {
3667
- lines.push("");
3668
- lines.push(
3669
- ` Embedder: local (${output.config.model}, ${output.config.dimensions} dims)`
3670
- );
4162
+ const hasConfig = output.config !== null;
4163
+ const hasIndexEmbedder = output.indexEmbedder !== null;
4164
+ if (hasConfig || hasIndexEmbedder) lines.push("");
4165
+ if (hasConfig && hasIndexEmbedder) {
4166
+ const config = output.config;
4167
+ const indexEmbedder = output.indexEmbedder;
4168
+ if (!config || !indexEmbedder) {
4169
+ lines.push(" Embedder: unknown");
4170
+ } else if (isSameEmbedder(config, indexEmbedder)) {
4171
+ lines.push(
4172
+ ` Embedder: ${indexEmbedder.provider} (${indexEmbedder.model}, ${indexEmbedder.dimensions} dims)`
4173
+ );
4174
+ } else {
4175
+ lines.push(
4176
+ ` Index embedder: ${indexEmbedder.provider} (${indexEmbedder.model}, ${indexEmbedder.dimensions} dims)`
4177
+ );
4178
+ lines.push(
4179
+ ` Config embedder: ${config.provider} (${config.model}, ${config.dimensions} dims)`
4180
+ );
4181
+ }
4182
+ } else if (hasIndexEmbedder) {
4183
+ const indexEmbedder = output.indexEmbedder;
4184
+ if (!indexEmbedder) {
4185
+ lines.push(" Index embedder: unknown");
4186
+ } else {
4187
+ lines.push(
4188
+ ` Index embedder: ${indexEmbedder.provider} (${indexEmbedder.model}, ${indexEmbedder.dimensions} dims)`
4189
+ );
4190
+ }
4191
+ } else if (hasConfig) {
4192
+ const config = output.config;
4193
+ if (!config) {
4194
+ lines.push(" Config embedder: unknown");
4195
+ } else {
4196
+ lines.push(
4197
+ ` Config embedder: ${config.provider} (${config.model}, ${config.dimensions} dims)`
4198
+ );
4199
+ }
4200
+ }
4201
+ if (output.embedderWarning) {
4202
+ lines.push(` Warning: ${output.embedderWarning}`);
3671
4203
  }
3672
4204
  lines.push("");
3673
4205
  return lines.join("\n");
3674
4206
  }
4207
+ function isSameEmbedder(a, b) {
4208
+ return a.provider === b.provider && a.model === b.model && a.dimensions === b.dimensions;
4209
+ }
3675
4210
  async function runStatus(projectPath) {
3676
4211
  const absoluteRoot = path10.resolve(projectPath);
3677
4212
  const ctxDir = path10.join(absoluteRoot, CTX_DIR6);
@@ -3686,6 +4221,8 @@ async function runStatus(projectPath) {
3686
4221
  lastIndexed: null,
3687
4222
  languages: /* @__PURE__ */ new Map(),
3688
4223
  config: null,
4224
+ indexEmbedder: null,
4225
+ embedderWarning: null,
3689
4226
  text: formatNotInitialized(absoluteRoot)
3690
4227
  };
3691
4228
  return output;
@@ -3698,7 +4235,9 @@ async function runStatus(projectPath) {
3698
4235
  const languages = db.getLanguageBreakdown();
3699
4236
  const lastIndexed = db.getLastIndexed();
3700
4237
  const config = readConfig2(ctxDir);
4238
+ const indexEmbedder = db.getIndexEmbedder();
3701
4239
  const dbSizeBytes = fs10.statSync(dbPath).size;
4240
+ const embedderWarning = config && indexEmbedder && !isSameEmbedder(config, indexEmbedder) ? `Index built with ${indexEmbedder.provider} (${indexEmbedder.dimensions} dims), config requests ${config.provider} (${config.dimensions} dims) \u2014 rebuild needed.` : null;
3702
4241
  const output = {
3703
4242
  initialized: true,
3704
4243
  fileCount,
@@ -3708,6 +4247,8 @@ async function runStatus(projectPath) {
3708
4247
  lastIndexed,
3709
4248
  languages,
3710
4249
  config,
4250
+ indexEmbedder,
4251
+ embedderWarning,
3711
4252
  text: ""
3712
4253
  };
3713
4254
  output.text = formatStatus(absoluteRoot, output);