kontext-engine 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/dist/cli/index.js +652 -111
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +14 -2
- package/dist/index.js +553 -148
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -779,6 +779,25 @@ function prepareChunkText(filePath, parent, text) {
|
|
|
779
779
|
parts.push(text);
|
|
780
780
|
return parts.join("\n");
|
|
781
781
|
}
|
|
782
|
+
var MAX_RETRIES = 3;
|
|
783
|
+
var BASE_DELAY_MS = 500;
|
|
784
|
+
async function fetchWithRetry(url, init) {
|
|
785
|
+
let lastError = null;
|
|
786
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
787
|
+
const response = await fetch(url, init);
|
|
788
|
+
if (response.ok) return response;
|
|
789
|
+
if (response.status === 429 && attempt < MAX_RETRIES) {
|
|
790
|
+
const delay = BASE_DELAY_MS * Math.pow(2, attempt);
|
|
791
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
792
|
+
lastError = new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
793
|
+
continue;
|
|
794
|
+
}
|
|
795
|
+
throw new Error(
|
|
796
|
+
`Embedding API error: HTTP ${response.status} ${response.statusText}`
|
|
797
|
+
);
|
|
798
|
+
}
|
|
799
|
+
throw lastError ?? new Error("Embedding API request failed after retries");
|
|
800
|
+
}
|
|
782
801
|
var LOCAL_MODEL_ID = "Xenova/all-MiniLM-L6-v2";
|
|
783
802
|
var LOCAL_DIMENSIONS = 384;
|
|
784
803
|
var LOCAL_BATCH_SIZE = 32;
|
|
@@ -836,6 +855,87 @@ async function createLocalEmbedder() {
|
|
|
836
855
|
}
|
|
837
856
|
};
|
|
838
857
|
}
|
|
858
|
+
var VOYAGE_API_URL = "https://api.voyageai.com/v1/embeddings";
|
|
859
|
+
var VOYAGE_MODEL = "voyage-code-3";
|
|
860
|
+
var VOYAGE_DEFAULT_DIMENSIONS = 1024;
|
|
861
|
+
var VOYAGE_BATCH_SIZE = 128;
|
|
862
|
+
function createVoyageEmbedder(apiKey, dimensions = VOYAGE_DEFAULT_DIMENSIONS) {
|
|
863
|
+
return {
|
|
864
|
+
name: VOYAGE_MODEL,
|
|
865
|
+
dimensions,
|
|
866
|
+
async embed(texts, onProgress) {
|
|
867
|
+
const results = [];
|
|
868
|
+
for (let i = 0; i < texts.length; i += VOYAGE_BATCH_SIZE) {
|
|
869
|
+
const batch = texts.slice(i, i + VOYAGE_BATCH_SIZE);
|
|
870
|
+
const vectors = await callVoyageAPI(apiKey, batch, "document", dimensions);
|
|
871
|
+
results.push(...vectors);
|
|
872
|
+
onProgress?.(Math.min(i + batch.length, texts.length), texts.length);
|
|
873
|
+
}
|
|
874
|
+
return results;
|
|
875
|
+
},
|
|
876
|
+
async embedSingle(text) {
|
|
877
|
+
const vectors = await callVoyageAPI(apiKey, [text], "query", dimensions);
|
|
878
|
+
return vectors[0];
|
|
879
|
+
}
|
|
880
|
+
};
|
|
881
|
+
}
|
|
882
|
+
async function callVoyageAPI(apiKey, texts, inputType, dimensions) {
|
|
883
|
+
const response = await fetchWithRetry(VOYAGE_API_URL, {
|
|
884
|
+
method: "POST",
|
|
885
|
+
headers: {
|
|
886
|
+
"Content-Type": "application/json",
|
|
887
|
+
Authorization: `Bearer ${apiKey}`
|
|
888
|
+
},
|
|
889
|
+
body: JSON.stringify({
|
|
890
|
+
model: VOYAGE_MODEL,
|
|
891
|
+
input: texts,
|
|
892
|
+
input_type: inputType,
|
|
893
|
+
output_dimension: dimensions
|
|
894
|
+
})
|
|
895
|
+
});
|
|
896
|
+
const json = await response.json();
|
|
897
|
+
return json.data.map((d) => normalizeVector(new Float32Array(d.embedding)));
|
|
898
|
+
}
|
|
899
|
+
var OPENAI_API_URL = "https://api.openai.com/v1/embeddings";
|
|
900
|
+
var OPENAI_MODEL = "text-embedding-3-large";
|
|
901
|
+
var OPENAI_DEFAULT_DIMENSIONS = 1024;
|
|
902
|
+
var OPENAI_BATCH_SIZE = 128;
|
|
903
|
+
function createOpenAIEmbedder(apiKey, dimensions = OPENAI_DEFAULT_DIMENSIONS) {
|
|
904
|
+
return {
|
|
905
|
+
name: OPENAI_MODEL,
|
|
906
|
+
dimensions,
|
|
907
|
+
async embed(texts, onProgress) {
|
|
908
|
+
const results = [];
|
|
909
|
+
for (let i = 0; i < texts.length; i += OPENAI_BATCH_SIZE) {
|
|
910
|
+
const batch = texts.slice(i, i + OPENAI_BATCH_SIZE);
|
|
911
|
+
const vectors = await callOpenAIAPI(apiKey, batch, dimensions);
|
|
912
|
+
results.push(...vectors);
|
|
913
|
+
onProgress?.(Math.min(i + batch.length, texts.length), texts.length);
|
|
914
|
+
}
|
|
915
|
+
return results;
|
|
916
|
+
},
|
|
917
|
+
async embedSingle(text) {
|
|
918
|
+
const vectors = await callOpenAIAPI(apiKey, [text], dimensions);
|
|
919
|
+
return vectors[0];
|
|
920
|
+
}
|
|
921
|
+
};
|
|
922
|
+
}
|
|
923
|
+
async function callOpenAIAPI(apiKey, texts, dimensions) {
|
|
924
|
+
const response = await fetchWithRetry(OPENAI_API_URL, {
|
|
925
|
+
method: "POST",
|
|
926
|
+
headers: {
|
|
927
|
+
"Content-Type": "application/json",
|
|
928
|
+
Authorization: `Bearer ${apiKey}`
|
|
929
|
+
},
|
|
930
|
+
body: JSON.stringify({
|
|
931
|
+
model: OPENAI_MODEL,
|
|
932
|
+
input: texts,
|
|
933
|
+
dimensions
|
|
934
|
+
})
|
|
935
|
+
});
|
|
936
|
+
const json = await response.json();
|
|
937
|
+
return json.data.map((d) => normalizeVector(new Float32Array(d.embedding)));
|
|
938
|
+
}
|
|
839
939
|
|
|
840
940
|
// src/utils/errors.ts
|
|
841
941
|
var ErrorCode = {
|
|
@@ -877,6 +977,12 @@ var ConfigError = class extends KontextError {
|
|
|
877
977
|
this.name = "ConfigError";
|
|
878
978
|
}
|
|
879
979
|
};
|
|
980
|
+
var DatabaseError = class extends KontextError {
|
|
981
|
+
constructor(message, code, cause) {
|
|
982
|
+
super(message, code, cause);
|
|
983
|
+
this.name = "DatabaseError";
|
|
984
|
+
}
|
|
985
|
+
};
|
|
880
986
|
|
|
881
987
|
// src/utils/error-boundary.ts
|
|
882
988
|
function handleCommandError(err, logger, verbose) {
|
|
@@ -1051,7 +1157,10 @@ function searchVectors(db, query, limit) {
|
|
|
1051
1157
|
|
|
1052
1158
|
// src/storage/db.ts
|
|
1053
1159
|
var DEFAULT_DIMENSIONS = 384;
|
|
1054
|
-
|
|
1160
|
+
var VECTOR_DIMENSIONS_META_KEY = "vector_dimensions";
|
|
1161
|
+
var INDEX_EMBEDDER_PROVIDER_META_KEY = "index_embedder_provider";
|
|
1162
|
+
var INDEX_EMBEDDER_MODEL_META_KEY = "index_embedder_model";
|
|
1163
|
+
function createDatabase(dbPath, dimensions) {
|
|
1055
1164
|
const dir = path3.dirname(dbPath);
|
|
1056
1165
|
if (!fs4.existsSync(dir)) {
|
|
1057
1166
|
fs4.mkdirSync(dir, { recursive: true });
|
|
@@ -1060,7 +1169,8 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1060
1169
|
db.pragma("journal_mode = WAL");
|
|
1061
1170
|
db.pragma("foreign_keys = ON");
|
|
1062
1171
|
sqliteVec.load(db);
|
|
1063
|
-
initializeSchema(db, dimensions);
|
|
1172
|
+
initializeSchema(db, dimensions ?? DEFAULT_DIMENSIONS);
|
|
1173
|
+
ensureVectorDimensions(db, dimensions);
|
|
1064
1174
|
const stmtUpsertFile = db.prepare(`
|
|
1065
1175
|
INSERT INTO files (path, language, hash, last_indexed, size)
|
|
1066
1176
|
VALUES (@path, @language, @hash, @lastIndexed, @size)
|
|
@@ -1069,6 +1179,7 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1069
1179
|
hash = excluded.hash,
|
|
1070
1180
|
last_indexed = excluded.last_indexed,
|
|
1071
1181
|
size = excluded.size
|
|
1182
|
+
RETURNING id
|
|
1072
1183
|
`);
|
|
1073
1184
|
const stmtGetFile = db.prepare(
|
|
1074
1185
|
"SELECT id, path, language, hash, last_indexed as lastIndexed, size FROM files WHERE path = ?"
|
|
@@ -1081,6 +1192,16 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1081
1192
|
const stmtGetChunksByFile = db.prepare(
|
|
1082
1193
|
"SELECT id, file_id as fileId, line_start as lineStart, line_end as lineEnd, type, name, parent, text, imports, exports, hash FROM chunks WHERE file_id = ? ORDER BY line_start"
|
|
1083
1194
|
);
|
|
1195
|
+
const stmtGetChunksMissingVectors = db.prepare(
|
|
1196
|
+
`SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
|
|
1197
|
+
c.line_start as lineStart, c.line_end as lineEnd,
|
|
1198
|
+
c.type, c.name, c.parent, c.text, c.exports as exports
|
|
1199
|
+
FROM chunks c
|
|
1200
|
+
JOIN files f ON f.id = c.file_id
|
|
1201
|
+
LEFT JOIN chunk_vectors v ON v.rowid = c.id
|
|
1202
|
+
WHERE v.rowid IS NULL
|
|
1203
|
+
ORDER BY c.id`
|
|
1204
|
+
);
|
|
1084
1205
|
const stmtGetChunkIdsByFile = db.prepare(
|
|
1085
1206
|
"SELECT id FROM chunks WHERE file_id = ?"
|
|
1086
1207
|
);
|
|
@@ -1110,20 +1231,26 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1110
1231
|
const stmtLastIndexed = db.prepare(
|
|
1111
1232
|
"SELECT MAX(last_indexed) as lastIndexed FROM files"
|
|
1112
1233
|
);
|
|
1234
|
+
const stmtGetMeta = db.prepare("SELECT value FROM meta WHERE key = ?");
|
|
1235
|
+
const stmtSetMeta = db.prepare(
|
|
1236
|
+
"INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)"
|
|
1237
|
+
);
|
|
1113
1238
|
return {
|
|
1114
1239
|
upsertFile(file) {
|
|
1115
|
-
const
|
|
1240
|
+
const row = stmtUpsertFile.get({
|
|
1116
1241
|
path: file.path,
|
|
1117
1242
|
language: file.language,
|
|
1118
1243
|
hash: file.hash,
|
|
1119
1244
|
lastIndexed: Date.now(),
|
|
1120
1245
|
size: file.size
|
|
1121
1246
|
});
|
|
1122
|
-
if (
|
|
1123
|
-
|
|
1247
|
+
if (!row?.id) {
|
|
1248
|
+
throw new DatabaseError(
|
|
1249
|
+
`Failed to upsert file: ${file.path}`,
|
|
1250
|
+
ErrorCode.DB_WRITE_FAILED
|
|
1251
|
+
);
|
|
1124
1252
|
}
|
|
1125
|
-
|
|
1126
|
-
return existing?.id ?? 0;
|
|
1253
|
+
return row.id;
|
|
1127
1254
|
},
|
|
1128
1255
|
getFile(filePath) {
|
|
1129
1256
|
const row = stmtGetFile.get(filePath);
|
|
@@ -1166,15 +1293,17 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1166
1293
|
return row.lastIndexed;
|
|
1167
1294
|
},
|
|
1168
1295
|
deleteFile(filePath) {
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1296
|
+
db.transaction(() => {
|
|
1297
|
+
const file = stmtGetFile.get(filePath);
|
|
1298
|
+
if (file) {
|
|
1299
|
+
const chunkRows = stmtGetChunkIdsByFile.all(file.id);
|
|
1300
|
+
const chunkIds = chunkRows.map((r) => r.id);
|
|
1301
|
+
if (chunkIds.length > 0) {
|
|
1302
|
+
deleteVectorsByChunkIds(db, chunkIds);
|
|
1303
|
+
}
|
|
1175
1304
|
}
|
|
1176
|
-
|
|
1177
|
-
|
|
1305
|
+
stmtDeleteFile.run(filePath);
|
|
1306
|
+
})();
|
|
1178
1307
|
},
|
|
1179
1308
|
insertChunks(fileId, chunks) {
|
|
1180
1309
|
const ids = [];
|
|
@@ -1219,6 +1348,13 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1219
1348
|
exports: r.exports === 1
|
|
1220
1349
|
}));
|
|
1221
1350
|
},
|
|
1351
|
+
getChunksMissingVectors() {
|
|
1352
|
+
const rows = stmtGetChunksMissingVectors.all();
|
|
1353
|
+
return rows.map((r) => ({
|
|
1354
|
+
...r,
|
|
1355
|
+
exports: r.exports === 1
|
|
1356
|
+
}));
|
|
1357
|
+
},
|
|
1222
1358
|
searchChunks(filters, limit) {
|
|
1223
1359
|
const conditions = [];
|
|
1224
1360
|
const params = [];
|
|
@@ -1269,12 +1405,14 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1269
1405
|
}));
|
|
1270
1406
|
},
|
|
1271
1407
|
deleteChunksByFile(fileId) {
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1408
|
+
db.transaction(() => {
|
|
1409
|
+
const chunkRows = stmtGetChunkIdsByFile.all(fileId);
|
|
1410
|
+
const chunkIds = chunkRows.map((r) => r.id);
|
|
1411
|
+
if (chunkIds.length > 0) {
|
|
1412
|
+
deleteVectorsByChunkIds(db, chunkIds);
|
|
1413
|
+
}
|
|
1414
|
+
stmtDeleteChunksByFile.run(fileId);
|
|
1415
|
+
})();
|
|
1278
1416
|
},
|
|
1279
1417
|
insertDependency(sourceChunkId, targetChunkId, type) {
|
|
1280
1418
|
stmtInsertDep.run(sourceChunkId, targetChunkId, type);
|
|
@@ -1314,6 +1452,66 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1314
1452
|
return Object.values(result[0])[0];
|
|
1315
1453
|
}
|
|
1316
1454
|
return String(result);
|
|
1455
|
+
},
|
|
1456
|
+
getVectorDimensions() {
|
|
1457
|
+
const row = stmtGetMeta.get(VECTOR_DIMENSIONS_META_KEY);
|
|
1458
|
+
if (!row) return null;
|
|
1459
|
+
const dimensions2 = Number.parseInt(row.value, 10);
|
|
1460
|
+
if (!Number.isInteger(dimensions2) || dimensions2 <= 0) {
|
|
1461
|
+
throw new DatabaseError(
|
|
1462
|
+
`Invalid stored vector dimensions metadata: ${row.value}`,
|
|
1463
|
+
ErrorCode.DB_CORRUPTED
|
|
1464
|
+
);
|
|
1465
|
+
}
|
|
1466
|
+
return dimensions2;
|
|
1467
|
+
},
|
|
1468
|
+
getIndexEmbedder() {
|
|
1469
|
+
const providerRow = stmtGetMeta.get(INDEX_EMBEDDER_PROVIDER_META_KEY);
|
|
1470
|
+
const modelRow = stmtGetMeta.get(INDEX_EMBEDDER_MODEL_META_KEY);
|
|
1471
|
+
if (!providerRow && !modelRow) return null;
|
|
1472
|
+
if (!providerRow || !modelRow) {
|
|
1473
|
+
throw new DatabaseError(
|
|
1474
|
+
"Corrupted index embedder metadata: provider/model keys are incomplete.",
|
|
1475
|
+
ErrorCode.DB_CORRUPTED
|
|
1476
|
+
);
|
|
1477
|
+
}
|
|
1478
|
+
const dimensions2 = this.getVectorDimensions();
|
|
1479
|
+
if (dimensions2 === null) {
|
|
1480
|
+
throw new DatabaseError(
|
|
1481
|
+
"Corrupted index embedder metadata: vector dimensions are missing.",
|
|
1482
|
+
ErrorCode.DB_CORRUPTED
|
|
1483
|
+
);
|
|
1484
|
+
}
|
|
1485
|
+
return {
|
|
1486
|
+
provider: providerRow.value,
|
|
1487
|
+
model: modelRow.value,
|
|
1488
|
+
dimensions: dimensions2
|
|
1489
|
+
};
|
|
1490
|
+
},
|
|
1491
|
+
setIndexEmbedder(metadata) {
|
|
1492
|
+
if (!metadata.provider || !metadata.model) {
|
|
1493
|
+
throw new DatabaseError(
|
|
1494
|
+
"Invalid index embedder metadata: provider and model are required.",
|
|
1495
|
+
ErrorCode.DB_WRITE_FAILED
|
|
1496
|
+
);
|
|
1497
|
+
}
|
|
1498
|
+
if (!Number.isInteger(metadata.dimensions) || metadata.dimensions <= 0) {
|
|
1499
|
+
throw new DatabaseError(
|
|
1500
|
+
`Invalid index embedder metadata dimensions: ${String(metadata.dimensions)}`,
|
|
1501
|
+
ErrorCode.DB_WRITE_FAILED
|
|
1502
|
+
);
|
|
1503
|
+
}
|
|
1504
|
+
const vectorDimensions = this.getVectorDimensions();
|
|
1505
|
+
if (vectorDimensions !== null && vectorDimensions !== metadata.dimensions) {
|
|
1506
|
+
throw new DatabaseError(
|
|
1507
|
+
`Index embedder metadata dimensions (${metadata.dimensions}) do not match vector table dimensions (${vectorDimensions}).`,
|
|
1508
|
+
ErrorCode.DB_WRITE_FAILED
|
|
1509
|
+
);
|
|
1510
|
+
}
|
|
1511
|
+
db.transaction(() => {
|
|
1512
|
+
stmtSetMeta.run(INDEX_EMBEDDER_PROVIDER_META_KEY, metadata.provider);
|
|
1513
|
+
stmtSetMeta.run(INDEX_EMBEDDER_MODEL_META_KEY, metadata.model);
|
|
1514
|
+
})();
|
|
1317
1515
|
}
|
|
1318
1516
|
};
|
|
1319
1517
|
}
|
|
@@ -1336,6 +1534,59 @@ function getMetaVersion(db) {
|
|
|
1336
1534
|
return 0;
|
|
1337
1535
|
}
|
|
1338
1536
|
}
|
|
1537
|
+
function ensureVectorDimensions(db, expectedDimensions) {
|
|
1538
|
+
const actual = getExistingVectorDimensions(db);
|
|
1539
|
+
const stored = db.prepare("SELECT value FROM meta WHERE key = ?").get(VECTOR_DIMENSIONS_META_KEY);
|
|
1540
|
+
const storedValue = stored?.value;
|
|
1541
|
+
const storedDimensions = storedValue ? Number.parseInt(storedValue, 10) : void 0;
|
|
1542
|
+
if (storedDimensions !== void 0 && (!Number.isInteger(storedDimensions) || storedDimensions <= 0)) {
|
|
1543
|
+
throw new DatabaseError(
|
|
1544
|
+
`Invalid stored vector dimensions metadata: ${storedValue ?? "unknown"}`,
|
|
1545
|
+
ErrorCode.DB_CORRUPTED
|
|
1546
|
+
);
|
|
1547
|
+
}
|
|
1548
|
+
if (actual !== null && storedDimensions !== void 0 && storedDimensions !== actual) {
|
|
1549
|
+
throw new DatabaseError(
|
|
1550
|
+
`Vector dimensions metadata mismatch: meta=${storedDimensions}, table=${actual}.`,
|
|
1551
|
+
ErrorCode.DB_CORRUPTED
|
|
1552
|
+
);
|
|
1553
|
+
}
|
|
1554
|
+
if (expectedDimensions === void 0) {
|
|
1555
|
+
if (storedDimensions !== void 0) return;
|
|
1556
|
+
const dimensions = actual ?? DEFAULT_DIMENSIONS;
|
|
1557
|
+
db.prepare(
|
|
1558
|
+
"INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)"
|
|
1559
|
+
).run(VECTOR_DIMENSIONS_META_KEY, String(dimensions));
|
|
1560
|
+
return;
|
|
1561
|
+
}
|
|
1562
|
+
if (!stored) {
|
|
1563
|
+
const dimensions = actual ?? expectedDimensions;
|
|
1564
|
+
db.prepare(
|
|
1565
|
+
"INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)"
|
|
1566
|
+
).run(VECTOR_DIMENSIONS_META_KEY, String(dimensions));
|
|
1567
|
+
if (actual !== null && actual !== expectedDimensions) {
|
|
1568
|
+
throw new DatabaseError(
|
|
1569
|
+
`Vector dimension mismatch: index uses ${actual} dims, but config requests ${expectedDimensions} dims. Rebuild the index.`,
|
|
1570
|
+
ErrorCode.CONFIG_INVALID
|
|
1571
|
+
);
|
|
1572
|
+
}
|
|
1573
|
+
return;
|
|
1574
|
+
}
|
|
1575
|
+
if (storedDimensions !== expectedDimensions) {
|
|
1576
|
+
throw new DatabaseError(
|
|
1577
|
+
`Vector dimension mismatch: index uses ${storedDimensions} dims, but config requests ${expectedDimensions} dims. Rebuild the index.`,
|
|
1578
|
+
ErrorCode.CONFIG_INVALID
|
|
1579
|
+
);
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
function getExistingVectorDimensions(db) {
|
|
1583
|
+
const row = db.prepare("SELECT sql FROM sqlite_master WHERE name = 'chunk_vectors'").get();
|
|
1584
|
+
const sql = row?.sql;
|
|
1585
|
+
if (!sql) return null;
|
|
1586
|
+
const match = sql.match(/embedding\s+float\[(\d+)\]/i);
|
|
1587
|
+
if (!match) return null;
|
|
1588
|
+
return Number.parseInt(match[1], 10);
|
|
1589
|
+
}
|
|
1339
1590
|
|
|
1340
1591
|
// src/cli/commands/config.ts
|
|
1341
1592
|
import fs5 from "fs";
|
|
@@ -1451,6 +1702,20 @@ function setNestedValue(obj, key, value) {
|
|
|
1451
1702
|
}
|
|
1452
1703
|
current[parts[parts.length - 1]] = value;
|
|
1453
1704
|
}
|
|
1705
|
+
function hasNestedKey(obj, key) {
|
|
1706
|
+
const parts = key.split(".");
|
|
1707
|
+
let current = obj;
|
|
1708
|
+
for (const part of parts) {
|
|
1709
|
+
if (current === null || current === void 0 || typeof current !== "object") {
|
|
1710
|
+
return false;
|
|
1711
|
+
}
|
|
1712
|
+
if (!(part in current)) {
|
|
1713
|
+
return false;
|
|
1714
|
+
}
|
|
1715
|
+
current = current[part];
|
|
1716
|
+
}
|
|
1717
|
+
return true;
|
|
1718
|
+
}
|
|
1454
1719
|
function parseValue(rawValue) {
|
|
1455
1720
|
if (rawValue === "null") return null;
|
|
1456
1721
|
if (rawValue === "true") return true;
|
|
@@ -1489,9 +1754,26 @@ function runConfigSet(projectPath, key, rawValue) {
|
|
|
1489
1754
|
setNestedValue(config, key, value);
|
|
1490
1755
|
writeConfig(ctxDir, config);
|
|
1491
1756
|
}
|
|
1492
|
-
function runConfigReset(projectPath) {
|
|
1757
|
+
function runConfigReset(projectPath, key) {
|
|
1493
1758
|
const ctxDir = resolveCtxDir(projectPath);
|
|
1494
|
-
|
|
1759
|
+
if (!key) {
|
|
1760
|
+
writeConfig(ctxDir, structuredClone(DEFAULT_CONFIG));
|
|
1761
|
+
return;
|
|
1762
|
+
}
|
|
1763
|
+
if (!hasNestedKey(DEFAULT_CONFIG, key)) {
|
|
1764
|
+
throw new ConfigError(`Invalid config key: ${key}`, ErrorCode.CONFIG_INVALID);
|
|
1765
|
+
}
|
|
1766
|
+
const config = readConfig(ctxDir);
|
|
1767
|
+
const defaultValue = getNestedValue(
|
|
1768
|
+
DEFAULT_CONFIG,
|
|
1769
|
+
key
|
|
1770
|
+
);
|
|
1771
|
+
setNestedValue(
|
|
1772
|
+
config,
|
|
1773
|
+
key,
|
|
1774
|
+
structuredClone(defaultValue)
|
|
1775
|
+
);
|
|
1776
|
+
writeConfig(ctxDir, config);
|
|
1495
1777
|
}
|
|
1496
1778
|
function registerConfigCommand(program2) {
|
|
1497
1779
|
const cmd = program2.command("config").description("Show or modify configuration");
|
|
@@ -1526,21 +1808,78 @@ function registerConfigCommand(program2) {
|
|
|
1526
1808
|
configErrorHandler(err);
|
|
1527
1809
|
}
|
|
1528
1810
|
});
|
|
1529
|
-
cmd.command("reset").description("Reset configuration to defaults").action(() => {
|
|
1811
|
+
cmd.command("reset [key]").description("Reset configuration to defaults or reset a specific key").action((key) => {
|
|
1530
1812
|
try {
|
|
1531
|
-
runConfigReset(process.cwd());
|
|
1532
|
-
|
|
1813
|
+
runConfigReset(process.cwd(), key);
|
|
1814
|
+
if (key) {
|
|
1815
|
+
console.log(`Reset ${key} to default.`);
|
|
1816
|
+
} else {
|
|
1817
|
+
console.log("Configuration reset to defaults.");
|
|
1818
|
+
}
|
|
1533
1819
|
} catch (err) {
|
|
1534
1820
|
configErrorHandler(err);
|
|
1535
1821
|
}
|
|
1536
1822
|
});
|
|
1537
1823
|
}
|
|
1538
1824
|
|
|
1825
|
+
// src/cli/embedder.ts
|
|
1826
|
+
function getProjectEmbedderConfig(projectPath) {
|
|
1827
|
+
const { config } = runConfigShow(projectPath);
|
|
1828
|
+
return config.embedder;
|
|
1829
|
+
}
|
|
1830
|
+
async function createProjectEmbedder(projectPath) {
|
|
1831
|
+
const config = getProjectEmbedderConfig(projectPath);
|
|
1832
|
+
validateProjectEmbedderConfig(config);
|
|
1833
|
+
switch (config.provider) {
|
|
1834
|
+
case "local":
|
|
1835
|
+
return await createLocalEmbedder();
|
|
1836
|
+
case "voyage": {
|
|
1837
|
+
const apiKey = requireApiKey("CTX_VOYAGE_KEY", "voyage");
|
|
1838
|
+
return createVoyageEmbedder(apiKey, config.dimensions);
|
|
1839
|
+
}
|
|
1840
|
+
case "openai": {
|
|
1841
|
+
const apiKey = requireApiKey("CTX_OPENAI_KEY", "openai");
|
|
1842
|
+
return createOpenAIEmbedder(apiKey, config.dimensions);
|
|
1843
|
+
}
|
|
1844
|
+
default:
|
|
1845
|
+
throw new ConfigError(
|
|
1846
|
+
`Unsupported embedder provider "${config.provider}". Use local, voyage, or openai.`,
|
|
1847
|
+
ErrorCode.CONFIG_INVALID
|
|
1848
|
+
);
|
|
1849
|
+
}
|
|
1850
|
+
}
|
|
1851
|
+
function requireApiKey(envVar, provider) {
|
|
1852
|
+
const value = process.env[envVar];
|
|
1853
|
+
if (typeof value === "string" && value.length > 0) return value;
|
|
1854
|
+
throw new ConfigError(
|
|
1855
|
+
`Embedder provider "${provider}" requires ${envVar}. Export ${envVar} before running this command.`,
|
|
1856
|
+
ErrorCode.CONFIG_INVALID
|
|
1857
|
+
);
|
|
1858
|
+
}
|
|
1859
|
+
function validateProjectEmbedderConfig(config) {
|
|
1860
|
+
if (!Number.isInteger(config.dimensions) || config.dimensions <= 0) {
|
|
1861
|
+
throw new ConfigError(
|
|
1862
|
+
`Invalid embedder.dimensions (${String(config.dimensions)}). Must be a positive integer.`,
|
|
1863
|
+
ErrorCode.CONFIG_INVALID
|
|
1864
|
+
);
|
|
1865
|
+
}
|
|
1866
|
+
if (config.provider === "local" && config.dimensions !== 384) {
|
|
1867
|
+
throw new ConfigError(
|
|
1868
|
+
'Local embedder requires "embedder.dimensions" = 384. Update config or switch provider.',
|
|
1869
|
+
ErrorCode.CONFIG_INVALID
|
|
1870
|
+
);
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1539
1874
|
// src/cli/commands/init.ts
|
|
1875
|
+
function isSameEmbedderConfig(a, b) {
|
|
1876
|
+
return a.provider === b.provider && a.model === b.model && a.dimensions === b.dimensions;
|
|
1877
|
+
}
|
|
1540
1878
|
var CTX_DIR2 = ".ctx";
|
|
1541
1879
|
var DB_FILENAME = "index.db";
|
|
1542
1880
|
var CONFIG_FILENAME2 = "config.json";
|
|
1543
1881
|
var GITIGNORE_ENTRY = ".ctx/";
|
|
1882
|
+
var EMBEDDING_SAVE_BATCH_SIZE = 128;
|
|
1544
1883
|
function ensureGitignore(projectRoot) {
|
|
1545
1884
|
const gitignorePath = path5.join(projectRoot, ".gitignore");
|
|
1546
1885
|
if (fs6.existsSync(gitignorePath)) {
|
|
@@ -1575,6 +1914,25 @@ function formatLanguageSummary(counts) {
|
|
|
1575
1914
|
const entries = [...counts.entries()].sort((a, b) => b[1] - a[1]).map(([lang, count]) => `${lang}: ${count}`);
|
|
1576
1915
|
return entries.join(", ");
|
|
1577
1916
|
}
|
|
1917
|
+
async function embedAndPersistInBatches(db, embedder, chunks, log) {
|
|
1918
|
+
let vectorsCreated = 0;
|
|
1919
|
+
const total = chunks.length;
|
|
1920
|
+
for (let i = 0; i < chunks.length; i += EMBEDDING_SAVE_BATCH_SIZE) {
|
|
1921
|
+
const batch = chunks.slice(i, i + EMBEDDING_SAVE_BATCH_SIZE);
|
|
1922
|
+
const texts = batch.map(
|
|
1923
|
+
(chunk) => prepareChunkText(chunk.filePath, chunk.parent, chunk.text)
|
|
1924
|
+
);
|
|
1925
|
+
const vectors = await embedder.embed(texts);
|
|
1926
|
+
db.transaction(() => {
|
|
1927
|
+
for (let j = 0; j < batch.length; j++) {
|
|
1928
|
+
db.insertVector(batch[j].id, vectors[j]);
|
|
1929
|
+
}
|
|
1930
|
+
});
|
|
1931
|
+
vectorsCreated += vectors.length;
|
|
1932
|
+
log(` Embedding... ${vectorsCreated}/${total}`);
|
|
1933
|
+
}
|
|
1934
|
+
return vectorsCreated;
|
|
1935
|
+
}
|
|
1578
1936
|
async function runInit(projectPath, options = {}) {
|
|
1579
1937
|
const log = options.log ?? console.log;
|
|
1580
1938
|
const absoluteRoot = path5.resolve(projectPath);
|
|
@@ -1584,9 +1942,28 @@ async function runInit(projectPath, options = {}) {
|
|
|
1584
1942
|
if (!fs6.existsSync(ctxDir)) fs6.mkdirSync(ctxDir, { recursive: true });
|
|
1585
1943
|
ensureGitignore(absoluteRoot);
|
|
1586
1944
|
ensureConfig(ctxDir);
|
|
1945
|
+
const embedderConfig = getProjectEmbedderConfig(absoluteRoot);
|
|
1587
1946
|
const dbPath = path5.join(ctxDir, DB_FILENAME);
|
|
1588
|
-
const db = createDatabase(dbPath);
|
|
1947
|
+
const db = createDatabase(dbPath, embedderConfig.dimensions);
|
|
1589
1948
|
try {
|
|
1949
|
+
const existingEmbedder = db.getIndexEmbedder();
|
|
1950
|
+
if (existingEmbedder) {
|
|
1951
|
+
if (!isSameEmbedderConfig(existingEmbedder, embedderConfig)) {
|
|
1952
|
+
throw new IndexError(
|
|
1953
|
+
`Index embedder mismatch: index uses ${existingEmbedder.provider} (${existingEmbedder.model}, ${existingEmbedder.dimensions} dims) but config requests ${embedderConfig.provider} (${embedderConfig.model}, ${embedderConfig.dimensions} dims). Rebuild the index.`,
|
|
1954
|
+
ErrorCode.CONFIG_INVALID
|
|
1955
|
+
);
|
|
1956
|
+
}
|
|
1957
|
+
} else {
|
|
1958
|
+
const isEmptyIndex = db.getFileCount() === 0 && db.getChunkCount() === 0 && db.getVectorCount() === 0;
|
|
1959
|
+
if (isEmptyIndex) {
|
|
1960
|
+
db.setIndexEmbedder({
|
|
1961
|
+
provider: embedderConfig.provider,
|
|
1962
|
+
model: embedderConfig.model,
|
|
1963
|
+
dimensions: embedderConfig.dimensions
|
|
1964
|
+
});
|
|
1965
|
+
}
|
|
1966
|
+
}
|
|
1590
1967
|
const discovered = await discoverFiles({
|
|
1591
1968
|
root: absoluteRoot,
|
|
1592
1969
|
extraIgnore: [".ctx/"]
|
|
@@ -1672,21 +2049,34 @@ async function runInit(projectPath, options = {}) {
|
|
|
1672
2049
|
}
|
|
1673
2050
|
log(` ${allChunksWithMeta.length} chunks created`);
|
|
1674
2051
|
let vectorsCreated = 0;
|
|
1675
|
-
if (!options.skipEmbedding
|
|
1676
|
-
const
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
2052
|
+
if (!options.skipEmbedding) {
|
|
2053
|
+
const chunksMissingVectors = db.getChunksMissingVectors().map((chunk) => ({
|
|
2054
|
+
id: chunk.id,
|
|
2055
|
+
filePath: chunk.filePath,
|
|
2056
|
+
parent: chunk.parent,
|
|
2057
|
+
text: chunk.text
|
|
2058
|
+
}));
|
|
2059
|
+
if (chunksMissingVectors.length > 0) {
|
|
2060
|
+
log(` ${chunksMissingVectors.length} chunks need embeddings`);
|
|
2061
|
+
}
|
|
2062
|
+
if (chunksMissingVectors.length > 0) {
|
|
2063
|
+
const embedder = await createEmbedder(absoluteRoot);
|
|
2064
|
+
try {
|
|
2065
|
+
vectorsCreated = await embedAndPersistInBatches(
|
|
2066
|
+
db,
|
|
2067
|
+
embedder,
|
|
2068
|
+
chunksMissingVectors,
|
|
2069
|
+
log
|
|
2070
|
+
);
|
|
2071
|
+
} catch (err) {
|
|
2072
|
+
const total = chunksMissingVectors.length;
|
|
2073
|
+
throw new IndexError(
|
|
2074
|
+
`Embedding failed after saving ${vectorsCreated}/${total} vectors. Run "ctx init" again to resume. ${err instanceof Error ? err.message : String(err)}`,
|
|
2075
|
+
ErrorCode.EMBEDDER_FAILED,
|
|
2076
|
+
err instanceof Error ? err : void 0
|
|
2077
|
+
);
|
|
1687
2078
|
}
|
|
1688
|
-
}
|
|
1689
|
-
vectorsCreated = vectors.length;
|
|
2079
|
+
}
|
|
1690
2080
|
}
|
|
1691
2081
|
const durationMs = performance.now() - start;
|
|
1692
2082
|
const dbSize = fs6.existsSync(dbPath) ? fs6.statSync(dbPath).size : 0;
|
|
@@ -1711,8 +2101,8 @@ async function runInit(projectPath, options = {}) {
|
|
|
1711
2101
|
db.close();
|
|
1712
2102
|
}
|
|
1713
2103
|
}
|
|
1714
|
-
async function createEmbedder() {
|
|
1715
|
-
return
|
|
2104
|
+
async function createEmbedder(projectPath) {
|
|
2105
|
+
return createProjectEmbedder(projectPath);
|
|
1716
2106
|
}
|
|
1717
2107
|
function registerInitCommand(program2) {
|
|
1718
2108
|
program2.command("init [path]").description("Index current directory or specified path").action(async (inputPath) => {
|
|
@@ -2031,6 +2421,7 @@ var PATH_BOOST_PARTIAL = 1.2;
|
|
|
2031
2421
|
var IMPORT_PENALTY = 0.5;
|
|
2032
2422
|
var TEST_FILE_PENALTY = 0.65;
|
|
2033
2423
|
var SMALL_SNIPPET_PENALTY = 0.75;
|
|
2424
|
+
var DATA_LITERAL_PENALTY = 0.7;
|
|
2034
2425
|
var PUBLIC_API_BOOST = 1.12;
|
|
2035
2426
|
var TEST_FILE_DIRECTORY_PATTERN = /(?:^|\/)(?:tests|__tests__)(?:\/|$)/;
|
|
2036
2427
|
var TEST_FILE_NAME_PATTERN = /(?:^|\/)[^/]*\.(?:test|spec)\.[cm]?[jt]sx?$/;
|
|
@@ -2045,7 +2436,8 @@ function fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms) {
|
|
|
2045
2436
|
const importAdjusted = applyImportDeprioritization(boosted);
|
|
2046
2437
|
const testAdjusted = applyTestFileDeprioritization(importAdjusted);
|
|
2047
2438
|
const snippetAdjusted = applySmallSnippetDeprioritization(testAdjusted);
|
|
2048
|
-
const
|
|
2439
|
+
const dataLiteralAdjusted = applyDataLiteralDeprioritization(snippetAdjusted);
|
|
2440
|
+
const boostedApi = applyPublicApiBoost(dataLiteralAdjusted);
|
|
2049
2441
|
const adjusted = applyFileDiversityDiminishingReturns(boostedApi);
|
|
2050
2442
|
adjusted.sort((a, b) => b.score - a.score);
|
|
2051
2443
|
const sliced = adjusted.slice(0, limit);
|
|
@@ -2126,6 +2518,21 @@ function applySmallSnippetDeprioritization(results) {
|
|
|
2126
2518
|
return r;
|
|
2127
2519
|
});
|
|
2128
2520
|
}
|
|
2521
|
+
function applyDataLiteralDeprioritization(results) {
|
|
2522
|
+
const hasNonDataLiteral = results.some((r) => !isDataLiteralChunk(r));
|
|
2523
|
+
if (!hasNonDataLiteral) return results;
|
|
2524
|
+
const maxNonDataScore = Math.max(
|
|
2525
|
+
...results.filter((r) => !isDataLiteralChunk(r)).map((r) => r.score),
|
|
2526
|
+
0
|
|
2527
|
+
);
|
|
2528
|
+
if (maxNonDataScore === 0) return results;
|
|
2529
|
+
return results.map((r) => {
|
|
2530
|
+
if (isDataLiteralChunk(r)) {
|
|
2531
|
+
return { ...r, score: r.score * DATA_LITERAL_PENALTY };
|
|
2532
|
+
}
|
|
2533
|
+
return r;
|
|
2534
|
+
});
|
|
2535
|
+
}
|
|
2129
2536
|
function applyPublicApiBoost(results) {
|
|
2130
2537
|
return results.map((r) => {
|
|
2131
2538
|
if (isPublicApiSymbol(r)) {
|
|
@@ -2160,6 +2567,23 @@ function isPublicApiSymbol(result) {
|
|
|
2160
2567
|
const textStart = result.text.trimStart().toLowerCase();
|
|
2161
2568
|
return textStart.startsWith("export ");
|
|
2162
2569
|
}
|
|
2570
|
+
function isDataLiteralChunk(result) {
|
|
2571
|
+
if (result.type !== "constant") return false;
|
|
2572
|
+
const text = result.text;
|
|
2573
|
+
const lines = text.split("\n").map((line) => line.trim()).filter((line) => line.length > 0);
|
|
2574
|
+
if (lines.length === 0) return false;
|
|
2575
|
+
const keyValueLineCount = lines.filter(
|
|
2576
|
+
(line) => /['"`]?[A-Za-z0-9_-]+['"`]?\s*:\s*/.test(line)
|
|
2577
|
+
).length;
|
|
2578
|
+
const quoteCount = (text.match(/["'`]/g) ?? []).length;
|
|
2579
|
+
const colonCount = (text.match(/:/g) ?? []).length;
|
|
2580
|
+
const structuralCount = quoteCount + colonCount;
|
|
2581
|
+
const nonWhitespaceLength = text.replace(/\s+/g, "").length;
|
|
2582
|
+
const structuralDensity = structuralCount / Math.max(nonWhitespaceLength, 1);
|
|
2583
|
+
const hasLiteralShape = text.includes("{") && text.includes("}") || text.includes("[") && text.includes("]");
|
|
2584
|
+
const kvLineRatio = keyValueLineCount / lines.length;
|
|
2585
|
+
return hasLiteralShape && keyValueLineCount >= 2 && kvLineRatio >= 0.35 && structuralDensity >= 0.04;
|
|
2586
|
+
}
|
|
2163
2587
|
function getFileDiversityFactor(fileOccurrence) {
|
|
2164
2588
|
if (fileOccurrence <= 1) return 1;
|
|
2165
2589
|
if (fileOccurrence === 2) return 0.9;
|
|
@@ -2340,9 +2764,13 @@ async function runQuery(projectPath, query, options) {
|
|
|
2340
2764
|
);
|
|
2341
2765
|
}
|
|
2342
2766
|
const start = performance.now();
|
|
2343
|
-
const
|
|
2767
|
+
const embedderConfig = getProjectEmbedderConfig(absoluteRoot);
|
|
2768
|
+
const db = createDatabase(dbPath, embedderConfig.dimensions);
|
|
2344
2769
|
try {
|
|
2345
|
-
const strategyResults = await runStrategies(db, query, {
|
|
2770
|
+
const strategyResults = await runStrategies(db, absoluteRoot, query, {
|
|
2771
|
+
...options,
|
|
2772
|
+
limit
|
|
2773
|
+
});
|
|
2346
2774
|
const pathBoostTerms = extractPathBoostTerms(query);
|
|
2347
2775
|
const fused = fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms);
|
|
2348
2776
|
const outputResults = fused.map(toOutputResult);
|
|
@@ -2362,7 +2790,7 @@ async function runQuery(projectPath, query, options) {
|
|
|
2362
2790
|
db.close();
|
|
2363
2791
|
}
|
|
2364
2792
|
}
|
|
2365
|
-
async function runStrategies(db, query, options) {
|
|
2793
|
+
async function runStrategies(db, projectPath, query, options) {
|
|
2366
2794
|
const results = [];
|
|
2367
2795
|
const filters = options.language ? { language: options.language } : void 0;
|
|
2368
2796
|
const limit = options.limit * 3;
|
|
@@ -2371,6 +2799,7 @@ async function runStrategies(db, query, options) {
|
|
|
2371
2799
|
const weight = effectiveWeights[strategy];
|
|
2372
2800
|
const searchResults = await executeStrategy(
|
|
2373
2801
|
db,
|
|
2802
|
+
projectPath,
|
|
2374
2803
|
strategy,
|
|
2375
2804
|
query,
|
|
2376
2805
|
limit,
|
|
@@ -2382,10 +2811,10 @@ async function runStrategies(db, query, options) {
|
|
|
2382
2811
|
}
|
|
2383
2812
|
return results;
|
|
2384
2813
|
}
|
|
2385
|
-
async function executeStrategy(db, strategy, query, limit, filters) {
|
|
2814
|
+
async function executeStrategy(db, projectPath, strategy, query, limit, filters) {
|
|
2386
2815
|
switch (strategy) {
|
|
2387
2816
|
case "vector": {
|
|
2388
|
-
const embedder = await loadEmbedder();
|
|
2817
|
+
const embedder = await loadEmbedder(projectPath);
|
|
2389
2818
|
return vectorSearch(db, embedder, query, limit, filters);
|
|
2390
2819
|
}
|
|
2391
2820
|
case "fts":
|
|
@@ -2418,9 +2847,16 @@ async function executeStrategy(db, strategy, query, limit, filters) {
|
|
|
2418
2847
|
}
|
|
2419
2848
|
}
|
|
2420
2849
|
var embedderInstance = null;
|
|
2421
|
-
|
|
2422
|
-
|
|
2423
|
-
|
|
2850
|
+
var embedderKey = null;
|
|
2851
|
+
function getCacheKey(projectPath) {
|
|
2852
|
+
const config = getProjectEmbedderConfig(projectPath);
|
|
2853
|
+
return `${projectPath}:${config.provider}:${config.model}:${config.dimensions}`;
|
|
2854
|
+
}
|
|
2855
|
+
async function loadEmbedder(projectPath) {
|
|
2856
|
+
const cacheKey = getCacheKey(projectPath);
|
|
2857
|
+
if (embedderInstance && embedderKey === cacheKey) return embedderInstance;
|
|
2858
|
+
embedderInstance = await createProjectEmbedder(projectPath);
|
|
2859
|
+
embedderKey = cacheKey;
|
|
2424
2860
|
return embedderInstance;
|
|
2425
2861
|
}
|
|
2426
2862
|
function registerQueryCommand(program2) {
|
|
@@ -2838,7 +3274,9 @@ var COMMON_STEMS = {
|
|
|
2838
3274
|
transformer: "transform",
|
|
2839
3275
|
transformation: "transform",
|
|
2840
3276
|
connection: "connect",
|
|
3277
|
+
connecting: "connect",
|
|
2841
3278
|
connector: "connect",
|
|
3279
|
+
migrating: "migrate",
|
|
2842
3280
|
migration: "migrate",
|
|
2843
3281
|
scheduling: "schedule",
|
|
2844
3282
|
scheduler: "schedule",
|
|
@@ -2847,7 +3285,8 @@ var COMMON_STEMS = {
|
|
|
2847
3285
|
routing: "route",
|
|
2848
3286
|
router: "route",
|
|
2849
3287
|
indexing: "index",
|
|
2850
|
-
indexer: "index"
|
|
3288
|
+
indexer: "index",
|
|
3289
|
+
subscribing: "subscribe"
|
|
2851
3290
|
};
|
|
2852
3291
|
var STEM_SUFFIXES = [
|
|
2853
3292
|
"tion",
|
|
@@ -3127,13 +3566,13 @@ function formatTextOutput2(output) {
|
|
|
3127
3566
|
);
|
|
3128
3567
|
return lines.join("\n");
|
|
3129
3568
|
}
|
|
3130
|
-
function createSearchExecutor(db, query) {
|
|
3569
|
+
function createSearchExecutor(db, projectPath, query) {
|
|
3131
3570
|
const pathBoostTerms = extractPathBoostTerms(query);
|
|
3132
3571
|
return async (strategies, limit) => {
|
|
3133
3572
|
const strategyResults = [];
|
|
3134
3573
|
const fetchLimit = limit * 3;
|
|
3135
3574
|
for (const plan of strategies) {
|
|
3136
|
-
const results = await executeStrategy2(db, plan, fetchLimit);
|
|
3575
|
+
const results = await executeStrategy2(db, projectPath, plan, fetchLimit);
|
|
3137
3576
|
if (results.length > 0) {
|
|
3138
3577
|
strategyResults.push({
|
|
3139
3578
|
strategy: plan.strategy,
|
|
@@ -3152,10 +3591,10 @@ function extractSymbolNames2(query) {
|
|
|
3152
3591
|
function isPathLike2(query) {
|
|
3153
3592
|
return query.includes("/") || query.includes("*") || query.includes(".");
|
|
3154
3593
|
}
|
|
3155
|
-
async function executeStrategy2(db, plan, limit) {
|
|
3594
|
+
async function executeStrategy2(db, projectPath, plan, limit) {
|
|
3156
3595
|
switch (plan.strategy) {
|
|
3157
3596
|
case "vector": {
|
|
3158
|
-
const embedder = await loadEmbedder2();
|
|
3597
|
+
const embedder = await loadEmbedder2(projectPath);
|
|
3159
3598
|
return vectorSearch(db, embedder, plan.query, limit);
|
|
3160
3599
|
}
|
|
3161
3600
|
case "fts":
|
|
@@ -3184,13 +3623,20 @@ async function executeStrategy2(db, plan, limit) {
|
|
|
3184
3623
|
}
|
|
3185
3624
|
}
|
|
3186
3625
|
var embedderInstance2 = null;
|
|
3187
|
-
|
|
3188
|
-
|
|
3189
|
-
|
|
3626
|
+
var embedderKey2 = null;
|
|
3627
|
+
function getCacheKey2(projectPath) {
|
|
3628
|
+
const config = getProjectEmbedderConfig(projectPath);
|
|
3629
|
+
return `${projectPath}:${config.provider}:${config.model}:${config.dimensions}`;
|
|
3630
|
+
}
|
|
3631
|
+
async function loadEmbedder2(projectPath) {
|
|
3632
|
+
const cacheKey = getCacheKey2(projectPath);
|
|
3633
|
+
if (embedderInstance2 && embedderKey2 === cacheKey) return embedderInstance2;
|
|
3634
|
+
embedderInstance2 = await createProjectEmbedder(projectPath);
|
|
3635
|
+
embedderKey2 = cacheKey;
|
|
3190
3636
|
return embedderInstance2;
|
|
3191
3637
|
}
|
|
3192
|
-
async function fallbackSearch(db, query, limit) {
|
|
3193
|
-
const executor = createSearchExecutor(db, query);
|
|
3638
|
+
async function fallbackSearch(db, projectPath, query, limit) {
|
|
3639
|
+
const executor = createSearchExecutor(db, projectPath, query);
|
|
3194
3640
|
const fallbackStrategies = buildFallbackStrategies(query);
|
|
3195
3641
|
const results = await executor(fallbackStrategies, limit);
|
|
3196
3642
|
return {
|
|
@@ -3217,18 +3663,19 @@ async function runAsk(projectPath, query, options) {
|
|
|
3217
3663
|
ErrorCode.NOT_INITIALIZED
|
|
3218
3664
|
);
|
|
3219
3665
|
}
|
|
3220
|
-
const
|
|
3666
|
+
const embedderConfig = getProjectEmbedderConfig(absoluteRoot);
|
|
3667
|
+
const db = createDatabase(dbPath, embedderConfig.dimensions);
|
|
3221
3668
|
try {
|
|
3222
3669
|
const provider = options.provider ?? null;
|
|
3223
3670
|
if (!provider) {
|
|
3224
|
-
const output = await fallbackSearch(db, query, limit);
|
|
3671
|
+
const output = await fallbackSearch(db, absoluteRoot, query, limit);
|
|
3225
3672
|
output.warning = FALLBACK_NOTICE;
|
|
3226
3673
|
if (options.format === "text") {
|
|
3227
3674
|
output.text = formatTextOutput2(output);
|
|
3228
3675
|
}
|
|
3229
3676
|
return output;
|
|
3230
3677
|
}
|
|
3231
|
-
const executor = createSearchExecutor(db, query);
|
|
3678
|
+
const executor = createSearchExecutor(db, absoluteRoot, query);
|
|
3232
3679
|
if (options.noExplain) {
|
|
3233
3680
|
return await runNoExplain(provider, query, limit, options, executor);
|
|
3234
3681
|
}
|
|
@@ -3403,6 +3850,7 @@ function createWatcher(options, events) {
|
|
|
3403
3850
|
// src/cli/commands/watch.ts
|
|
3404
3851
|
var CTX_DIR5 = ".ctx";
|
|
3405
3852
|
var DB_FILENAME4 = "index.db";
|
|
3853
|
+
var EMBEDDING_SAVE_BATCH_SIZE2 = 128;
|
|
3406
3854
|
function timestamp() {
|
|
3407
3855
|
return (/* @__PURE__ */ new Date()).toLocaleTimeString("en-GB", { hour12: false });
|
|
3408
3856
|
}
|
|
@@ -3419,6 +3867,9 @@ async function hashFile(absolutePath) {
|
|
|
3419
3867
|
const content = fs9.readFileSync(absolutePath);
|
|
3420
3868
|
return createHash3("sha256").update(content).digest("hex");
|
|
3421
3869
|
}
|
|
3870
|
+
function isSameEmbedderConfig2(a, b) {
|
|
3871
|
+
return a.provider === b.provider && a.model === b.model && a.dimensions === b.dimensions;
|
|
3872
|
+
}
|
|
3422
3873
|
async function reindexChanges(db, changes, projectPath, options) {
|
|
3423
3874
|
const start = performance.now();
|
|
3424
3875
|
const log = options.log;
|
|
@@ -3430,8 +3881,8 @@ async function reindexChanges(db, changes, projectPath, options) {
|
|
|
3430
3881
|
const language = detectLanguage(change.path);
|
|
3431
3882
|
if (change.type === "unlink") {
|
|
3432
3883
|
log(`[${timestamp()}] Deleted: ${change.path}`);
|
|
3433
|
-
const
|
|
3434
|
-
if (
|
|
3884
|
+
const existingFile = db.getFile(change.path);
|
|
3885
|
+
if (existingFile) {
|
|
3435
3886
|
db.deleteFile(change.path);
|
|
3436
3887
|
}
|
|
3437
3888
|
filesProcessed++;
|
|
@@ -3441,10 +3892,6 @@ async function reindexChanges(db, changes, projectPath, options) {
|
|
|
3441
3892
|
if (!fs9.existsSync(absolutePath)) continue;
|
|
3442
3893
|
const label = change.type === "add" ? "Added" : "Changed";
|
|
3443
3894
|
log(`[${timestamp()}] ${label}: ${change.path}`);
|
|
3444
|
-
const existingFile = db.getFile(change.path);
|
|
3445
|
-
if (existingFile) {
|
|
3446
|
-
db.deleteChunksByFile(existingFile.id);
|
|
3447
|
-
}
|
|
3448
3895
|
let nodes;
|
|
3449
3896
|
try {
|
|
3450
3897
|
nodes = await parseFile(absolutePath, language);
|
|
@@ -3455,26 +3902,31 @@ async function reindexChanges(db, changes, projectPath, options) {
|
|
|
3455
3902
|
const chunks = chunkFile(nodes, change.path);
|
|
3456
3903
|
const hash = await hashFile(absolutePath);
|
|
3457
3904
|
const size = fs9.statSync(absolutePath).size;
|
|
3458
|
-
const
|
|
3459
|
-
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
|
|
3905
|
+
const chunkRows = chunks.map((c) => ({
|
|
3906
|
+
lineStart: c.lineStart,
|
|
3907
|
+
lineEnd: c.lineEnd,
|
|
3908
|
+
type: c.type,
|
|
3909
|
+
name: c.name,
|
|
3910
|
+
parent: c.parent,
|
|
3911
|
+
text: c.text,
|
|
3912
|
+
imports: c.imports,
|
|
3913
|
+
exports: c.exports,
|
|
3914
|
+
hash: c.hash
|
|
3915
|
+
}));
|
|
3916
|
+
let chunkIds = [];
|
|
3917
|
+
db.transaction(() => {
|
|
3918
|
+
const existingFile = db.getFile(change.path);
|
|
3919
|
+
if (existingFile) {
|
|
3920
|
+
db.deleteChunksByFile(existingFile.id);
|
|
3921
|
+
}
|
|
3922
|
+
const fileId = db.upsertFile({
|
|
3923
|
+
path: change.path,
|
|
3924
|
+
language,
|
|
3925
|
+
hash,
|
|
3926
|
+
size
|
|
3927
|
+
});
|
|
3928
|
+
chunkIds = db.insertChunks(fileId, chunkRows);
|
|
3463
3929
|
});
|
|
3464
|
-
const chunkIds = db.insertChunks(
|
|
3465
|
-
fileId,
|
|
3466
|
-
chunks.map((c) => ({
|
|
3467
|
-
lineStart: c.lineStart,
|
|
3468
|
-
lineEnd: c.lineEnd,
|
|
3469
|
-
type: c.type,
|
|
3470
|
-
name: c.name,
|
|
3471
|
-
parent: c.parent,
|
|
3472
|
-
text: c.text,
|
|
3473
|
-
imports: c.imports,
|
|
3474
|
-
exports: c.exports,
|
|
3475
|
-
hash: c.hash
|
|
3476
|
-
}))
|
|
3477
|
-
);
|
|
3478
3930
|
for (let i = 0; i < chunks.length; i++) {
|
|
3479
3931
|
allChunksWithMeta.push({
|
|
3480
3932
|
fileRelPath: change.path,
|
|
@@ -3485,25 +3937,46 @@ async function reindexChanges(db, changes, projectPath, options) {
|
|
|
3485
3937
|
filesProcessed++;
|
|
3486
3938
|
}
|
|
3487
3939
|
if (!options.skipEmbedding && allChunksWithMeta.length > 0) {
|
|
3488
|
-
const embedder = await loadEmbedder3();
|
|
3489
|
-
|
|
3490
|
-
|
|
3491
|
-
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
|
|
3940
|
+
const embedder = await loadEmbedder3(projectPath);
|
|
3941
|
+
let vectorsCreated = 0;
|
|
3942
|
+
const total = allChunksWithMeta.length;
|
|
3943
|
+
try {
|
|
3944
|
+
for (let i = 0; i < allChunksWithMeta.length; i += EMBEDDING_SAVE_BATCH_SIZE2) {
|
|
3945
|
+
const batch = allChunksWithMeta.slice(i, i + EMBEDDING_SAVE_BATCH_SIZE2);
|
|
3946
|
+
const texts = batch.map(
|
|
3947
|
+
(cm) => prepareChunkText(cm.fileRelPath, cm.chunk.parent, cm.chunk.text)
|
|
3948
|
+
);
|
|
3949
|
+
const vectors = await embedder.embed(texts);
|
|
3950
|
+
db.transaction(() => {
|
|
3951
|
+
for (let j = 0; j < batch.length; j++) {
|
|
3952
|
+
const chunkDbId = parseInt(batch[j].chunk.id, 10);
|
|
3953
|
+
db.insertVector(chunkDbId, vectors[j]);
|
|
3954
|
+
}
|
|
3955
|
+
});
|
|
3956
|
+
vectorsCreated += vectors.length;
|
|
3497
3957
|
}
|
|
3498
|
-
})
|
|
3958
|
+
} catch (err) {
|
|
3959
|
+
throw new IndexError(
|
|
3960
|
+
`Embedding failed after saving ${vectorsCreated}/${total} vectors. Continue watching, then run "ctx init" to backfill missing vectors. ${err instanceof Error ? err.message : String(err)}`,
|
|
3961
|
+
ErrorCode.EMBEDDER_FAILED,
|
|
3962
|
+
err instanceof Error ? err : void 0
|
|
3963
|
+
);
|
|
3964
|
+
}
|
|
3499
3965
|
}
|
|
3500
3966
|
const durationMs = performance.now() - start;
|
|
3501
3967
|
return { filesProcessed, chunksUpdated, durationMs };
|
|
3502
3968
|
}
|
|
3503
3969
|
var embedderInstance3 = null;
|
|
3504
|
-
|
|
3505
|
-
|
|
3506
|
-
|
|
3970
|
+
var embedderKey3 = null;
|
|
3971
|
+
function getCacheKey3(projectPath) {
|
|
3972
|
+
const config = getProjectEmbedderConfig(projectPath);
|
|
3973
|
+
return `${projectPath}:${config.provider}:${config.model}:${config.dimensions}`;
|
|
3974
|
+
}
|
|
3975
|
+
async function loadEmbedder3(projectPath) {
|
|
3976
|
+
const cacheKey = getCacheKey3(projectPath);
|
|
3977
|
+
if (embedderInstance3 && embedderKey3 === cacheKey) return embedderInstance3;
|
|
3978
|
+
embedderInstance3 = await createProjectEmbedder(projectPath);
|
|
3979
|
+
embedderKey3 = cacheKey;
|
|
3507
3980
|
return embedderInstance3;
|
|
3508
3981
|
}
|
|
3509
3982
|
async function runWatch(projectPath, options = {}) {
|
|
@@ -3520,8 +3993,29 @@ async function runWatch(projectPath, options = {}) {
|
|
|
3520
3993
|
);
|
|
3521
3994
|
}
|
|
3522
3995
|
await initParser();
|
|
3523
|
-
const
|
|
3996
|
+
const embedderConfig = getProjectEmbedderConfig(absoluteRoot);
|
|
3997
|
+
const db = createDatabase(dbPath, embedderConfig.dimensions);
|
|
3998
|
+
const existingEmbedder = db.getIndexEmbedder();
|
|
3999
|
+
if (existingEmbedder) {
|
|
4000
|
+
if (!isSameEmbedderConfig2(existingEmbedder, embedderConfig)) {
|
|
4001
|
+
db.close();
|
|
4002
|
+
throw new IndexError(
|
|
4003
|
+
`Index embedder mismatch: index uses ${existingEmbedder.provider} (${existingEmbedder.model}, ${existingEmbedder.dimensions} dims) but config requests ${embedderConfig.provider} (${embedderConfig.model}, ${embedderConfig.dimensions} dims). Rebuild the index.`,
|
|
4004
|
+
ErrorCode.CONFIG_INVALID
|
|
4005
|
+
);
|
|
4006
|
+
}
|
|
4007
|
+
} else {
|
|
4008
|
+
const isEmptyIndex = db.getFileCount() === 0 && db.getChunkCount() === 0 && db.getVectorCount() === 0;
|
|
4009
|
+
if (isEmptyIndex) {
|
|
4010
|
+
db.setIndexEmbedder({
|
|
4011
|
+
provider: embedderConfig.provider,
|
|
4012
|
+
model: embedderConfig.model,
|
|
4013
|
+
dimensions: embedderConfig.dimensions
|
|
4014
|
+
});
|
|
4015
|
+
}
|
|
4016
|
+
}
|
|
3524
4017
|
let watcherHandle = null;
|
|
4018
|
+
let reindexQueue = Promise.resolve();
|
|
3525
4019
|
const watcher = createWatcher(
|
|
3526
4020
|
{
|
|
3527
4021
|
projectPath: absoluteRoot,
|
|
@@ -3530,7 +4024,7 @@ async function runWatch(projectPath, options = {}) {
|
|
|
3530
4024
|
},
|
|
3531
4025
|
{
|
|
3532
4026
|
onChange: (changes) => {
|
|
3533
|
-
|
|
4027
|
+
reindexQueue = reindexQueue.then(async () => {
|
|
3534
4028
|
try {
|
|
3535
4029
|
const result = await reindexChanges(db, changes, absoluteRoot, {
|
|
3536
4030
|
skipEmbedding: options.skipEmbedding,
|
|
@@ -3546,7 +4040,7 @@ async function runWatch(projectPath, options = {}) {
|
|
|
3546
4040
|
`[${timestamp()}] Error: ${err instanceof Error ? err.message : String(err)}`
|
|
3547
4041
|
);
|
|
3548
4042
|
}
|
|
3549
|
-
})
|
|
4043
|
+
});
|
|
3550
4044
|
},
|
|
3551
4045
|
onError: (err) => {
|
|
3552
4046
|
log(`[${timestamp()}] Watcher error: ${err.message}`);
|
|
@@ -3562,6 +4056,7 @@ async function runWatch(projectPath, options = {}) {
|
|
|
3562
4056
|
await watcherHandle.stop();
|
|
3563
4057
|
watcherHandle = null;
|
|
3564
4058
|
}
|
|
4059
|
+
await reindexQueue;
|
|
3565
4060
|
db.close();
|
|
3566
4061
|
log("Stopped watching. Database saved.");
|
|
3567
4062
|
}
|
|
@@ -3623,6 +4118,7 @@ function readConfig2(ctxDir) {
|
|
|
3623
4118
|
const parsed = JSON.parse(raw);
|
|
3624
4119
|
const embedder = parsed.embedder;
|
|
3625
4120
|
return {
|
|
4121
|
+
provider: embedder?.provider ?? parsed.provider ?? "unknown",
|
|
3626
4122
|
model: embedder?.model ?? parsed.model ?? "unknown",
|
|
3627
4123
|
dimensions: embedder?.dimensions ?? parsed.dimensions ?? 0
|
|
3628
4124
|
};
|
|
@@ -3663,15 +4159,54 @@ function formatStatus(projectPath, output) {
|
|
|
3663
4159
|
lines.push(` ${label}${count} file${count !== 1 ? "s" : ""}`);
|
|
3664
4160
|
}
|
|
3665
4161
|
}
|
|
3666
|
-
|
|
3667
|
-
|
|
3668
|
-
|
|
3669
|
-
|
|
3670
|
-
|
|
4162
|
+
const hasConfig = output.config !== null;
|
|
4163
|
+
const hasIndexEmbedder = output.indexEmbedder !== null;
|
|
4164
|
+
if (hasConfig || hasIndexEmbedder) lines.push("");
|
|
4165
|
+
if (hasConfig && hasIndexEmbedder) {
|
|
4166
|
+
const config = output.config;
|
|
4167
|
+
const indexEmbedder = output.indexEmbedder;
|
|
4168
|
+
if (!config || !indexEmbedder) {
|
|
4169
|
+
lines.push(" Embedder: unknown");
|
|
4170
|
+
} else if (isSameEmbedder(config, indexEmbedder)) {
|
|
4171
|
+
lines.push(
|
|
4172
|
+
` Embedder: ${indexEmbedder.provider} (${indexEmbedder.model}, ${indexEmbedder.dimensions} dims)`
|
|
4173
|
+
);
|
|
4174
|
+
} else {
|
|
4175
|
+
lines.push(
|
|
4176
|
+
` Index embedder: ${indexEmbedder.provider} (${indexEmbedder.model}, ${indexEmbedder.dimensions} dims)`
|
|
4177
|
+
);
|
|
4178
|
+
lines.push(
|
|
4179
|
+
` Config embedder: ${config.provider} (${config.model}, ${config.dimensions} dims)`
|
|
4180
|
+
);
|
|
4181
|
+
}
|
|
4182
|
+
} else if (hasIndexEmbedder) {
|
|
4183
|
+
const indexEmbedder = output.indexEmbedder;
|
|
4184
|
+
if (!indexEmbedder) {
|
|
4185
|
+
lines.push(" Index embedder: unknown");
|
|
4186
|
+
} else {
|
|
4187
|
+
lines.push(
|
|
4188
|
+
` Index embedder: ${indexEmbedder.provider} (${indexEmbedder.model}, ${indexEmbedder.dimensions} dims)`
|
|
4189
|
+
);
|
|
4190
|
+
}
|
|
4191
|
+
} else if (hasConfig) {
|
|
4192
|
+
const config = output.config;
|
|
4193
|
+
if (!config) {
|
|
4194
|
+
lines.push(" Config embedder: unknown");
|
|
4195
|
+
} else {
|
|
4196
|
+
lines.push(
|
|
4197
|
+
` Config embedder: ${config.provider} (${config.model}, ${config.dimensions} dims)`
|
|
4198
|
+
);
|
|
4199
|
+
}
|
|
4200
|
+
}
|
|
4201
|
+
if (output.embedderWarning) {
|
|
4202
|
+
lines.push(` Warning: ${output.embedderWarning}`);
|
|
3671
4203
|
}
|
|
3672
4204
|
lines.push("");
|
|
3673
4205
|
return lines.join("\n");
|
|
3674
4206
|
}
|
|
4207
|
+
function isSameEmbedder(a, b) {
|
|
4208
|
+
return a.provider === b.provider && a.model === b.model && a.dimensions === b.dimensions;
|
|
4209
|
+
}
|
|
3675
4210
|
async function runStatus(projectPath) {
|
|
3676
4211
|
const absoluteRoot = path10.resolve(projectPath);
|
|
3677
4212
|
const ctxDir = path10.join(absoluteRoot, CTX_DIR6);
|
|
@@ -3686,6 +4221,8 @@ async function runStatus(projectPath) {
|
|
|
3686
4221
|
lastIndexed: null,
|
|
3687
4222
|
languages: /* @__PURE__ */ new Map(),
|
|
3688
4223
|
config: null,
|
|
4224
|
+
indexEmbedder: null,
|
|
4225
|
+
embedderWarning: null,
|
|
3689
4226
|
text: formatNotInitialized(absoluteRoot)
|
|
3690
4227
|
};
|
|
3691
4228
|
return output;
|
|
@@ -3698,7 +4235,9 @@ async function runStatus(projectPath) {
|
|
|
3698
4235
|
const languages = db.getLanguageBreakdown();
|
|
3699
4236
|
const lastIndexed = db.getLastIndexed();
|
|
3700
4237
|
const config = readConfig2(ctxDir);
|
|
4238
|
+
const indexEmbedder = db.getIndexEmbedder();
|
|
3701
4239
|
const dbSizeBytes = fs10.statSync(dbPath).size;
|
|
4240
|
+
const embedderWarning = config && indexEmbedder && !isSameEmbedder(config, indexEmbedder) ? `Index built with ${indexEmbedder.provider} (${indexEmbedder.dimensions} dims), config requests ${config.provider} (${config.dimensions} dims) \u2014 rebuild needed.` : null;
|
|
3702
4241
|
const output = {
|
|
3703
4242
|
initialized: true,
|
|
3704
4243
|
fileCount,
|
|
@@ -3708,6 +4247,8 @@ async function runStatus(projectPath) {
|
|
|
3708
4247
|
lastIndexed,
|
|
3709
4248
|
languages,
|
|
3710
4249
|
config,
|
|
4250
|
+
indexEmbedder,
|
|
4251
|
+
embedderWarning,
|
|
3711
4252
|
text: ""
|
|
3712
4253
|
};
|
|
3713
4254
|
output.text = formatStatus(absoluteRoot, output);
|