@neuralsea/workspace-indexer 0.2.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -0
- package/dist/chunk-GGL3XTMV.js +4487 -0
- package/dist/cli.cjs +4551 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.js +62 -59
- package/dist/index.cjs +4924 -0
- package/dist/index.d.cts +1326 -0
- package/dist/index.d.ts +757 -22
- package/dist/index.js +404 -3
- package/package.json +12 -6
- package/dist/chunk-Z3BRYSQM.js +0 -2481
|
@@ -0,0 +1,4487 @@
|
|
|
1
|
+
// src/progress.ts
|
|
2
|
+
function toHandler(progress) {
|
|
3
|
+
if (!progress) return null;
|
|
4
|
+
if (typeof progress === "function") return progress;
|
|
5
|
+
if (typeof progress.emit === "function") return (e) => progress.emit(e);
|
|
6
|
+
return null;
|
|
7
|
+
}
|
|
8
|
+
var IndexerProgressObservable = class {
|
|
9
|
+
handlers = /* @__PURE__ */ new Set();
|
|
10
|
+
subscribe(handler) {
|
|
11
|
+
this.handlers.add(handler);
|
|
12
|
+
return () => this.handlers.delete(handler);
|
|
13
|
+
}
|
|
14
|
+
on(type, handler) {
|
|
15
|
+
const wrapped = (e) => {
|
|
16
|
+
if (e.type !== type) return;
|
|
17
|
+
handler(e);
|
|
18
|
+
};
|
|
19
|
+
return this.subscribe(wrapped);
|
|
20
|
+
}
|
|
21
|
+
emit(event) {
|
|
22
|
+
for (const h of this.handlers) {
|
|
23
|
+
try {
|
|
24
|
+
h(event);
|
|
25
|
+
} catch {
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
function asProgressSink(progress) {
|
|
31
|
+
const handler = toHandler(progress);
|
|
32
|
+
if (!handler) return null;
|
|
33
|
+
return { emit: handler };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// src/util.ts
|
|
37
|
+
import crypto from "crypto";
|
|
38
|
+
import os from "os";
|
|
39
|
+
import path from "path";
|
|
40
|
+
function sha256Hex(data) {
|
|
41
|
+
return crypto.createHash("sha256").update(data).digest("hex");
|
|
42
|
+
}
|
|
43
|
+
function defaultCacheDir() {
|
|
44
|
+
const home = os.homedir();
|
|
45
|
+
return path.join(home, ".cache", "petri");
|
|
46
|
+
}
|
|
47
|
+
function fromPosixPath(p) {
|
|
48
|
+
return p.split("/").join(path.sep);
|
|
49
|
+
}
|
|
50
|
+
function estimateTokens(text) {
|
|
51
|
+
return Math.max(1, Math.ceil(text.length / 4));
|
|
52
|
+
}
|
|
53
|
+
function normalise(vec) {
|
|
54
|
+
let sumSq = 0;
|
|
55
|
+
for (let i = 0; i < vec.length; i++) sumSq += vec[i] * vec[i];
|
|
56
|
+
const norm = Math.sqrt(sumSq) || 1;
|
|
57
|
+
const out = new Float32Array(vec.length);
|
|
58
|
+
for (let i = 0; i < vec.length; i++) out[i] = vec[i] / norm;
|
|
59
|
+
return out;
|
|
60
|
+
}
|
|
61
|
+
function dot(a, b) {
|
|
62
|
+
const n = Math.min(a.length, b.length);
|
|
63
|
+
let s = 0;
|
|
64
|
+
for (let i = 0; i < n; i++) s += a[i] * b[i];
|
|
65
|
+
return s;
|
|
66
|
+
}
|
|
67
|
+
function clamp(x, lo, hi) {
|
|
68
|
+
return Math.min(hi, Math.max(lo, x));
|
|
69
|
+
}
|
|
70
|
+
function makePreview(text, maxLen = 240) {
|
|
71
|
+
const oneLine = text.replace(/\s+/g, " ").trim();
|
|
72
|
+
return oneLine.length <= maxLen ? oneLine : oneLine.slice(0, maxLen - 1) + "\u2026";
|
|
73
|
+
}
|
|
74
|
+
function nowMs() {
|
|
75
|
+
return Date.now();
|
|
76
|
+
}
|
|
77
|
+
function recencyScore(fileMtimeMs, halfLifeDays = 14) {
|
|
78
|
+
const ageMs = Math.max(0, nowMs() - fileMtimeMs);
|
|
79
|
+
const halfLifeMs = halfLifeDays * 24 * 60 * 60 * 1e3;
|
|
80
|
+
const score = Math.pow(0.5, ageMs / Math.max(1, halfLifeMs));
|
|
81
|
+
return clamp(score, 0, 1);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// src/optionalTypescript.ts
|
|
85
|
+
import { createRequire } from "module";
|
|
86
|
+
var cached = null;
|
|
87
|
+
var didTryLoad = false;
|
|
88
|
+
function getTypeScript() {
|
|
89
|
+
if (didTryLoad) return cached;
|
|
90
|
+
try {
|
|
91
|
+
const require2 = createRequire(import.meta.url);
|
|
92
|
+
cached = require2("typescript");
|
|
93
|
+
didTryLoad = true;
|
|
94
|
+
return cached;
|
|
95
|
+
} catch {
|
|
96
|
+
cached = null;
|
|
97
|
+
didTryLoad = true;
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// src/chunker.ts
|
|
103
|
+
function languageFromPath(posixPath) {
|
|
104
|
+
const ext = posixPath.toLowerCase().split(".").pop() ?? "";
|
|
105
|
+
if (["ts", "tsx"].includes(ext)) return "typescript";
|
|
106
|
+
if (["js", "jsx", "mjs", "cjs"].includes(ext)) return "javascript";
|
|
107
|
+
if (["py"].includes(ext)) return "python";
|
|
108
|
+
if (["go"].includes(ext)) return "go";
|
|
109
|
+
if (["ctl", "ltl", "isl"].includes(ext)) return ext;
|
|
110
|
+
if (["ispec"].includes(ext)) return "isl";
|
|
111
|
+
if (["ca", "cpnexpr"].includes(ext)) return "colour-algebra";
|
|
112
|
+
if (["rs"].includes(ext)) return "rust";
|
|
113
|
+
if (["java"].includes(ext)) return "java";
|
|
114
|
+
if (["kt", "kts"].includes(ext)) return "kotlin";
|
|
115
|
+
if (["cs"].includes(ext)) return "csharp";
|
|
116
|
+
if (["md"].includes(ext)) return "markdown";
|
|
117
|
+
if (["json", "yaml", "yml", "toml"].includes(ext)) return "config";
|
|
118
|
+
return "text";
|
|
119
|
+
}
|
|
120
|
+
function chunkByLines(sourceText, cfg) {
|
|
121
|
+
const lines = sourceText.split(/\r?\n/);
|
|
122
|
+
const out = [];
|
|
123
|
+
const step = Math.max(1, cfg.maxLines - cfg.overlapLines);
|
|
124
|
+
for (let start = 0; start < lines.length; start += step) {
|
|
125
|
+
const end = Math.min(lines.length, start + cfg.maxLines);
|
|
126
|
+
const text = lines.slice(start, end).join("\n");
|
|
127
|
+
if (!text.trim()) continue;
|
|
128
|
+
out.push({
|
|
129
|
+
startLine: start + 1,
|
|
130
|
+
endLine: end,
|
|
131
|
+
text,
|
|
132
|
+
contentHash: sha256Hex(text),
|
|
133
|
+
tokens: estimateTokens(text)
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
return out;
|
|
137
|
+
}
|
|
138
|
+
function splitIfTooLarge(ch, cfg) {
|
|
139
|
+
if (ch.text.length <= cfg.maxChars) return [ch];
|
|
140
|
+
return chunkByLines(ch.text, cfg).map((sub) => ({
|
|
141
|
+
...sub,
|
|
142
|
+
startLine: ch.startLine + (sub.startLine - 1),
|
|
143
|
+
endLine: ch.startLine + (sub.endLine - 1)
|
|
144
|
+
}));
|
|
145
|
+
}
|
|
146
|
+
function isTopLevelChunkable(tsMod, stmt) {
|
|
147
|
+
return tsMod.isFunctionDeclaration(stmt) || tsMod.isClassDeclaration(stmt) || tsMod.isInterfaceDeclaration(stmt) || tsMod.isTypeAliasDeclaration(stmt) || tsMod.isEnumDeclaration(stmt) || tsMod.isModuleDeclaration(stmt) || tsMod.isVariableStatement(stmt) || tsMod.isExportAssignment(stmt) || tsMod.isExportDeclaration(stmt);
|
|
148
|
+
}
|
|
149
|
+
function chunkTypeScriptLike(sourceText, virtualFileName, cfg) {
|
|
150
|
+
const tsMod = getTypeScript();
|
|
151
|
+
if (!tsMod) return chunkByLines(sourceText, cfg);
|
|
152
|
+
const sf = tsMod.createSourceFile(
|
|
153
|
+
virtualFileName,
|
|
154
|
+
sourceText,
|
|
155
|
+
tsMod.ScriptTarget.Latest,
|
|
156
|
+
true
|
|
157
|
+
);
|
|
158
|
+
const chunks = [];
|
|
159
|
+
for (const stmt of sf.statements) {
|
|
160
|
+
if (!isTopLevelChunkable(tsMod, stmt)) continue;
|
|
161
|
+
const startPos = stmt.getFullStart();
|
|
162
|
+
const endPos = stmt.end;
|
|
163
|
+
const startLC = tsMod.getLineAndCharacterOfPosition(sf, startPos);
|
|
164
|
+
const endLC = tsMod.getLineAndCharacterOfPosition(sf, endPos);
|
|
165
|
+
const text = sourceText.slice(startPos, endPos).trimEnd();
|
|
166
|
+
if (!text.trim()) continue;
|
|
167
|
+
const base = {
|
|
168
|
+
startLine: startLC.line + 1,
|
|
169
|
+
endLine: endLC.line + 1,
|
|
170
|
+
text,
|
|
171
|
+
contentHash: sha256Hex(text),
|
|
172
|
+
tokens: estimateTokens(text)
|
|
173
|
+
};
|
|
174
|
+
chunks.push(...splitIfTooLarge(base, cfg));
|
|
175
|
+
}
|
|
176
|
+
if (chunks.length === 0) return chunkByLines(sourceText, cfg);
|
|
177
|
+
return chunks;
|
|
178
|
+
}
|
|
179
|
+
function chunkSource(posixPath, sourceText, cfg) {
|
|
180
|
+
const language = languageFromPath(posixPath);
|
|
181
|
+
if (language === "typescript" || language === "javascript") {
|
|
182
|
+
return { language, chunks: chunkTypeScriptLike(sourceText, posixPath, cfg) };
|
|
183
|
+
}
|
|
184
|
+
return { language, chunks: chunkByLines(sourceText, cfg) };
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// src/vector/factory.ts
|
|
188
|
+
import path4 from "path";
|
|
189
|
+
import { pathToFileURL } from "url";
|
|
190
|
+
|
|
191
|
+
// src/vector/bruteforce.ts
|
|
192
|
+
var BruteForceVectorIndex = class {
|
|
193
|
+
kind = "bruteforce";
|
|
194
|
+
metric = "cosine";
|
|
195
|
+
dimension = 0;
|
|
196
|
+
ids = [];
|
|
197
|
+
vecs = [];
|
|
198
|
+
pos = /* @__PURE__ */ new Map();
|
|
199
|
+
async init(init) {
|
|
200
|
+
this.metric = init.metric;
|
|
201
|
+
this.dimension = init.dimension;
|
|
202
|
+
}
|
|
203
|
+
async upsert(points) {
|
|
204
|
+
for (const p of points) this.upsertOne(p.id, p.vector);
|
|
205
|
+
}
|
|
206
|
+
upsertOne(id, vec) {
|
|
207
|
+
const v = normalise(vec);
|
|
208
|
+
const existing = this.pos.get(id);
|
|
209
|
+
if (existing !== void 0) {
|
|
210
|
+
this.vecs[existing] = v;
|
|
211
|
+
return;
|
|
212
|
+
}
|
|
213
|
+
this.pos.set(id, this.ids.length);
|
|
214
|
+
this.ids.push(id);
|
|
215
|
+
this.vecs.push(v);
|
|
216
|
+
}
|
|
217
|
+
async remove(ids) {
|
|
218
|
+
for (const id of ids) this.removeOne(id);
|
|
219
|
+
}
|
|
220
|
+
removeOne(id) {
|
|
221
|
+
const i = this.pos.get(id);
|
|
222
|
+
if (i === void 0) return;
|
|
223
|
+
const last = this.ids.length - 1;
|
|
224
|
+
if (i !== last) {
|
|
225
|
+
const lastId = this.ids[last];
|
|
226
|
+
this.ids[i] = lastId;
|
|
227
|
+
this.vecs[i] = this.vecs[last];
|
|
228
|
+
this.pos.set(lastId, i);
|
|
229
|
+
}
|
|
230
|
+
this.ids.pop();
|
|
231
|
+
this.vecs.pop();
|
|
232
|
+
this.pos.delete(id);
|
|
233
|
+
}
|
|
234
|
+
async rebuild(points) {
|
|
235
|
+
this.ids = [];
|
|
236
|
+
this.vecs = [];
|
|
237
|
+
this.pos = /* @__PURE__ */ new Map();
|
|
238
|
+
for (const p of points) this.upsertOne(p.id, p.vector);
|
|
239
|
+
}
|
|
240
|
+
async search(query, k) {
|
|
241
|
+
const q = normalise(query);
|
|
242
|
+
const top = [];
|
|
243
|
+
for (let i = 0; i < this.ids.length; i++) {
|
|
244
|
+
const s = dot(q, this.vecs[i]);
|
|
245
|
+
if (top.length < k) {
|
|
246
|
+
top.push({ id: this.ids[i], score: s });
|
|
247
|
+
top.sort((a, b) => b.score - a.score);
|
|
248
|
+
} else if (s > top[top.length - 1].score) {
|
|
249
|
+
top[top.length - 1] = { id: this.ids[i], score: s };
|
|
250
|
+
top.sort((a, b) => b.score - a.score);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return top;
|
|
254
|
+
}
|
|
255
|
+
async count() {
|
|
256
|
+
return this.ids.length;
|
|
257
|
+
}
|
|
258
|
+
async flush() {
|
|
259
|
+
}
|
|
260
|
+
async close() {
|
|
261
|
+
}
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
// src/vector/faiss.ts
|
|
265
|
+
import fs from "fs";
|
|
266
|
+
import path2 from "path";
|
|
267
|
+
async function importFaiss() {
|
|
268
|
+
const modName = "faiss-node";
|
|
269
|
+
try {
|
|
270
|
+
const mod = await import(modName);
|
|
271
|
+
const ns = mod?.Index ? mod : mod?.default;
|
|
272
|
+
if (!ns?.Index) throw new Error("faiss-node did not export Index");
|
|
273
|
+
return ns;
|
|
274
|
+
} catch (e) {
|
|
275
|
+
const hint = "To use the 'faiss' provider, install the optional dependency: npm i faiss-node";
|
|
276
|
+
throw new Error(`${String(e?.message ?? e)}
|
|
277
|
+
${hint}`);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
function safeReadJson(p) {
|
|
281
|
+
try {
|
|
282
|
+
return JSON.parse(fs.readFileSync(p, "utf8"));
|
|
283
|
+
} catch {
|
|
284
|
+
return null;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
var FaissVectorIndex = class {
|
|
288
|
+
kind = "faiss";
|
|
289
|
+
metric = "cosine";
|
|
290
|
+
dimension = 0;
|
|
291
|
+
cfg;
|
|
292
|
+
index = null;
|
|
293
|
+
labelToId = [];
|
|
294
|
+
points = /* @__PURE__ */ new Map();
|
|
295
|
+
dirty = false;
|
|
296
|
+
readyForIncremental = false;
|
|
297
|
+
indexPath = "";
|
|
298
|
+
mapPath = "";
|
|
299
|
+
constructor(cfg = {}) {
|
|
300
|
+
this.cfg = {
|
|
301
|
+
descriptor: cfg.descriptor ?? "HNSW,Flat",
|
|
302
|
+
persist: cfg.persist ?? true,
|
|
303
|
+
persistDebounceMs: cfg.persistDebounceMs ?? 2e3,
|
|
304
|
+
rebuildStrategy: cfg.rebuildStrategy ?? "lazy"
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
async init(init) {
|
|
308
|
+
this.metric = init.metric;
|
|
309
|
+
this.dimension = init.dimension;
|
|
310
|
+
const baseDir = path2.join(init.cacheDir, "vector", init.repoId);
|
|
311
|
+
fs.mkdirSync(baseDir, { recursive: true });
|
|
312
|
+
this.indexPath = path2.join(baseDir, `${init.commit}.faiss.idx`);
|
|
313
|
+
this.mapPath = path2.join(baseDir, `${init.commit}.faiss.map.json`);
|
|
314
|
+
if (this.cfg.persist && fs.existsSync(this.indexPath) && fs.existsSync(this.mapPath)) {
|
|
315
|
+
const map = safeReadJson(this.mapPath);
|
|
316
|
+
if (map && map.version === 1 && map.dimension === this.dimension && map.metric === this.metric) {
|
|
317
|
+
const { Index } = await importFaiss();
|
|
318
|
+
this.index = Index.read(this.indexPath);
|
|
319
|
+
this.labelToId = map.labelToId;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
async rebuild(points) {
|
|
324
|
+
this.points = new Map(points.map((p) => [p.id, p.vector]));
|
|
325
|
+
this.readyForIncremental = true;
|
|
326
|
+
await this.rebuildFromPoints();
|
|
327
|
+
}
|
|
328
|
+
async rebuildFromPoints() {
|
|
329
|
+
const { Index, MetricType } = await importFaiss();
|
|
330
|
+
const metric = this.metric === "l2" ? MetricType.METRIC_L2 : MetricType.METRIC_INNER_PRODUCT;
|
|
331
|
+
const idx = Index.fromFactory(this.dimension, this.cfg.descriptor, metric);
|
|
332
|
+
const ids = [];
|
|
333
|
+
const flat = [];
|
|
334
|
+
for (const [id, v] of this.points.entries()) {
|
|
335
|
+
ids.push(id);
|
|
336
|
+
const vec = this.metric === "cosine" ? normalise(v) : v;
|
|
337
|
+
for (let i = 0; i < vec.length; i++) flat.push(vec[i]);
|
|
338
|
+
}
|
|
339
|
+
if (flat.length > 0) {
|
|
340
|
+
if (typeof idx.isTrained === "function" && !idx.isTrained()) {
|
|
341
|
+
idx.train(flat);
|
|
342
|
+
}
|
|
343
|
+
idx.add(flat);
|
|
344
|
+
}
|
|
345
|
+
this.index = idx;
|
|
346
|
+
this.labelToId = ids;
|
|
347
|
+
this.dirty = false;
|
|
348
|
+
if (this.cfg.persist) {
|
|
349
|
+
const map = {
|
|
350
|
+
version: 1,
|
|
351
|
+
dimension: this.dimension,
|
|
352
|
+
metric: this.metric,
|
|
353
|
+
labelToId: ids
|
|
354
|
+
};
|
|
355
|
+
fs.writeFileSync(this.mapPath, JSON.stringify(map));
|
|
356
|
+
idx.write(this.indexPath);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
async upsert(points) {
|
|
360
|
+
if (!this.readyForIncremental) {
|
|
361
|
+
throw new Error("FAISS provider requires an initial rebuild() before incremental writes.");
|
|
362
|
+
}
|
|
363
|
+
for (const p of points) this.points.set(p.id, p.vector);
|
|
364
|
+
this.dirty = true;
|
|
365
|
+
if (this.cfg.rebuildStrategy === "eager") await this.rebuildFromPoints();
|
|
366
|
+
}
|
|
367
|
+
async remove(ids) {
|
|
368
|
+
if (!this.readyForIncremental) {
|
|
369
|
+
throw new Error("FAISS provider requires an initial rebuild() before incremental writes.");
|
|
370
|
+
}
|
|
371
|
+
for (const id of ids) this.points.delete(id);
|
|
372
|
+
this.dirty = true;
|
|
373
|
+
if (this.cfg.rebuildStrategy === "eager") await this.rebuildFromPoints();
|
|
374
|
+
}
|
|
375
|
+
async search(query, k) {
|
|
376
|
+
if (this.dirty && this.cfg.rebuildStrategy === "lazy") {
|
|
377
|
+
await this.rebuildFromPoints();
|
|
378
|
+
}
|
|
379
|
+
if (!this.index) return [];
|
|
380
|
+
const q = this.metric === "cosine" ? Array.from(normalise(query)) : Array.from(query);
|
|
381
|
+
const res = this.index.search(q, k);
|
|
382
|
+
const out = [];
|
|
383
|
+
for (let i = 0; i < res.labels.length; i++) {
|
|
384
|
+
const label = res.labels[i];
|
|
385
|
+
if (label < 0) continue;
|
|
386
|
+
const id = this.labelToId[label];
|
|
387
|
+
if (!id) continue;
|
|
388
|
+
const d = res.distances[i];
|
|
389
|
+
const score = this.metric === "l2" ? -d : d;
|
|
390
|
+
out.push({ id, score });
|
|
391
|
+
}
|
|
392
|
+
out.sort((a, b) => b.score - a.score);
|
|
393
|
+
return out;
|
|
394
|
+
}
|
|
395
|
+
async count() {
|
|
396
|
+
if (this.index && typeof this.index.ntotal === "function") return this.index.ntotal();
|
|
397
|
+
return this.labelToId.length;
|
|
398
|
+
}
|
|
399
|
+
async flush() {
|
|
400
|
+
}
|
|
401
|
+
async close() {
|
|
402
|
+
if (this.dirty && this.cfg.rebuildStrategy === "lazy") {
|
|
403
|
+
try {
|
|
404
|
+
await this.rebuildFromPoints();
|
|
405
|
+
} catch {
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
this.index = null;
|
|
409
|
+
}
|
|
410
|
+
};
|
|
411
|
+
|
|
412
|
+
// src/vector/hnswlib.ts
|
|
413
|
+
import fs2 from "fs";
|
|
414
|
+
import path3 from "path";
|
|
415
|
+
async function importHnswlib() {
|
|
416
|
+
const modName = "hnswlib-node";
|
|
417
|
+
try {
|
|
418
|
+
const mod = await import(modName);
|
|
419
|
+
const ns = mod?.HierarchicalNSW ? mod : mod?.default;
|
|
420
|
+
if (!ns?.HierarchicalNSW) {
|
|
421
|
+
throw new Error("hnswlib-node did not export HierarchicalNSW");
|
|
422
|
+
}
|
|
423
|
+
return ns;
|
|
424
|
+
} catch (e) {
|
|
425
|
+
const hint = "To use the 'hnswlib' provider, install the optional dependency: npm i hnswlib-node";
|
|
426
|
+
throw new Error(`${String(e?.message ?? e)}
|
|
427
|
+
${hint}`);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
function safeReadJson2(p) {
|
|
431
|
+
try {
|
|
432
|
+
return JSON.parse(fs2.readFileSync(p, "utf8"));
|
|
433
|
+
} catch {
|
|
434
|
+
return null;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
var HnswlibVectorIndex = class {
|
|
438
|
+
kind = "hnswlib";
|
|
439
|
+
metric = "cosine";
|
|
440
|
+
dimension = 0;
|
|
441
|
+
cfg;
|
|
442
|
+
index = null;
|
|
443
|
+
idToLabel = /* @__PURE__ */ new Map();
|
|
444
|
+
labelToId = /* @__PURE__ */ new Map();
|
|
445
|
+
nextLabel = 0;
|
|
446
|
+
indexPath = "";
|
|
447
|
+
mapPath = "";
|
|
448
|
+
dirty = false;
|
|
449
|
+
constructor(cfg = {}) {
|
|
450
|
+
this.cfg = {
|
|
451
|
+
maxElements: cfg.maxElements ?? 5e4,
|
|
452
|
+
m: cfg.m ?? 16,
|
|
453
|
+
efConstruction: cfg.efConstruction ?? 200,
|
|
454
|
+
efSearch: cfg.efSearch ?? 64,
|
|
455
|
+
persist: cfg.persist ?? true,
|
|
456
|
+
persistDebounceMs: cfg.persistDebounceMs ?? 2e3
|
|
457
|
+
};
|
|
458
|
+
}
|
|
459
|
+
async init(init) {
|
|
460
|
+
this.metric = init.metric;
|
|
461
|
+
this.dimension = init.dimension;
|
|
462
|
+
const baseDir = path3.join(init.cacheDir, "vector", init.repoId);
|
|
463
|
+
fs2.mkdirSync(baseDir, { recursive: true });
|
|
464
|
+
this.indexPath = path3.join(baseDir, `${init.commit}.hnsw.dat`);
|
|
465
|
+
this.mapPath = path3.join(baseDir, `${init.commit}.hnsw.map.json`);
|
|
466
|
+
if (this.cfg.persist && fs2.existsSync(this.indexPath) && fs2.existsSync(this.mapPath)) {
|
|
467
|
+
const map = safeReadJson2(this.mapPath);
|
|
468
|
+
if (map && map.version === 1 && map.dimension === this.dimension && map.metric === this.metric) {
|
|
469
|
+
const { HierarchicalNSW } = await importHnswlib();
|
|
470
|
+
const space = this.toHnswSpace(this.metric);
|
|
471
|
+
const idx = new HierarchicalNSW(space, this.dimension);
|
|
472
|
+
idx.readIndexSync(this.indexPath, true);
|
|
473
|
+
if (typeof idx.setEf === "function") idx.setEf(this.cfg.efSearch);
|
|
474
|
+
this.index = idx;
|
|
475
|
+
this.idToLabel = new Map(Object.entries(map.idToLabel).map(([id, label]) => [id, label]));
|
|
476
|
+
this.labelToId = new Map(Array.from(this.idToLabel.entries()).map(([id, label]) => [label, id]));
|
|
477
|
+
this.nextLabel = Math.max(-1, ...Array.from(this.labelToId.keys())) + 1;
|
|
478
|
+
return;
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
await this.resetEmpty();
|
|
482
|
+
}
|
|
483
|
+
async resetEmpty() {
|
|
484
|
+
const { HierarchicalNSW } = await importHnswlib();
|
|
485
|
+
const idx = new HierarchicalNSW(this.toHnswSpace(this.metric), this.dimension);
|
|
486
|
+
idx.initIndex({
|
|
487
|
+
maxElements: this.cfg.maxElements,
|
|
488
|
+
m: this.cfg.m,
|
|
489
|
+
efConstruction: this.cfg.efConstruction,
|
|
490
|
+
allowReplaceDeleted: true
|
|
491
|
+
});
|
|
492
|
+
if (typeof idx.setEf === "function") idx.setEf(this.cfg.efSearch);
|
|
493
|
+
this.index = idx;
|
|
494
|
+
this.idToLabel = /* @__PURE__ */ new Map();
|
|
495
|
+
this.labelToId = /* @__PURE__ */ new Map();
|
|
496
|
+
this.nextLabel = 0;
|
|
497
|
+
this.dirty = false;
|
|
498
|
+
}
|
|
499
|
+
async rebuild(points) {
|
|
500
|
+
const desired = Math.max(this.cfg.maxElements, Math.ceil(points.length * 1.1) + 1024);
|
|
501
|
+
const { HierarchicalNSW } = await importHnswlib();
|
|
502
|
+
const idx = new HierarchicalNSW(this.toHnswSpace(this.metric), this.dimension);
|
|
503
|
+
idx.initIndex({
|
|
504
|
+
maxElements: desired,
|
|
505
|
+
m: this.cfg.m,
|
|
506
|
+
efConstruction: this.cfg.efConstruction,
|
|
507
|
+
allowReplaceDeleted: true
|
|
508
|
+
});
|
|
509
|
+
if (typeof idx.setEf === "function") idx.setEf(this.cfg.efSearch);
|
|
510
|
+
this.index = idx;
|
|
511
|
+
this.idToLabel = /* @__PURE__ */ new Map();
|
|
512
|
+
this.labelToId = /* @__PURE__ */ new Map();
|
|
513
|
+
this.nextLabel = 0;
|
|
514
|
+
for (const p of points) {
|
|
515
|
+
const label = this.nextLabel++;
|
|
516
|
+
this.idToLabel.set(p.id, label);
|
|
517
|
+
this.labelToId.set(label, p.id);
|
|
518
|
+
idx.addPoint(Array.from(normalise(p.vector)), label, true);
|
|
519
|
+
}
|
|
520
|
+
this.markDirty();
|
|
521
|
+
await this.flush();
|
|
522
|
+
}
|
|
523
|
+
async upsert(points) {
|
|
524
|
+
if (!this.index) throw new Error("HNSW index not initialised");
|
|
525
|
+
for (const p of points) {
|
|
526
|
+
let label = this.idToLabel.get(p.id);
|
|
527
|
+
if (label === void 0) {
|
|
528
|
+
label = this.nextLabel++;
|
|
529
|
+
this.idToLabel.set(p.id, label);
|
|
530
|
+
this.labelToId.set(label, p.id);
|
|
531
|
+
} else {
|
|
532
|
+
if (typeof this.index.unmarkDelete === "function") {
|
|
533
|
+
try {
|
|
534
|
+
this.index.unmarkDelete(label);
|
|
535
|
+
} catch {
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
if (typeof this.index.getMaxElements === "function" && typeof this.index.getCurrentCount === "function") {
|
|
540
|
+
const max = this.index.getMaxElements();
|
|
541
|
+
const cur = this.index.getCurrentCount();
|
|
542
|
+
if (cur + 1 > max && typeof this.index.resizeIndex === "function") {
|
|
543
|
+
this.index.resizeIndex(Math.ceil((cur + 1) * 1.2) + 1024);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
this.index.addPoint(Array.from(normalise(p.vector)), label, true);
|
|
547
|
+
}
|
|
548
|
+
this.markDirty();
|
|
549
|
+
}
|
|
550
|
+
async remove(ids) {
|
|
551
|
+
if (!this.index) throw new Error("HNSW index not initialised");
|
|
552
|
+
for (const id of ids) {
|
|
553
|
+
const label = this.idToLabel.get(id);
|
|
554
|
+
if (label === void 0) continue;
|
|
555
|
+
if (typeof this.index.markDelete === "function") {
|
|
556
|
+
try {
|
|
557
|
+
this.index.markDelete(label);
|
|
558
|
+
} catch {
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
this.markDirty();
|
|
563
|
+
}
|
|
564
|
+
async search(query, k) {
|
|
565
|
+
if (!this.index) throw new Error("HNSW index not initialised");
|
|
566
|
+
const q = Array.from(normalise(query));
|
|
567
|
+
const res = this.index.searchKnn(q, k);
|
|
568
|
+
const out = [];
|
|
569
|
+
for (let i = 0; i < res.neighbors.length; i++) {
|
|
570
|
+
const label = res.neighbors[i];
|
|
571
|
+
if (label < 0) continue;
|
|
572
|
+
const id = this.labelToId.get(label);
|
|
573
|
+
if (!id) continue;
|
|
574
|
+
const d = res.distances[i];
|
|
575
|
+
const score = this.metric === "cosine" ? 1 - d : -d;
|
|
576
|
+
out.push({ id, score });
|
|
577
|
+
}
|
|
578
|
+
out.sort((a, b) => b.score - a.score);
|
|
579
|
+
return out;
|
|
580
|
+
}
|
|
581
|
+
async count() {
|
|
582
|
+
if (!this.index) return 0;
|
|
583
|
+
if (typeof this.index.getCurrentCount === "function") return this.index.getCurrentCount();
|
|
584
|
+
return this.idToLabel.size;
|
|
585
|
+
}
|
|
586
|
+
markDirty() {
|
|
587
|
+
if (!this.cfg.persist) return;
|
|
588
|
+
this.dirty = true;
|
|
589
|
+
}
|
|
590
|
+
async flush() {
|
|
591
|
+
if (!this.cfg.persist) return;
|
|
592
|
+
if (!this.dirty) return;
|
|
593
|
+
if (!this.index) return;
|
|
594
|
+
this.dirty = false;
|
|
595
|
+
const map = {
|
|
596
|
+
version: 1,
|
|
597
|
+
dimension: this.dimension,
|
|
598
|
+
metric: this.metric,
|
|
599
|
+
idToLabel: Object.fromEntries(this.idToLabel.entries())
|
|
600
|
+
};
|
|
601
|
+
fs2.writeFileSync(this.mapPath, JSON.stringify(map));
|
|
602
|
+
this.index.writeIndexSync(this.indexPath);
|
|
603
|
+
}
|
|
604
|
+
async close() {
|
|
605
|
+
await this.flush();
|
|
606
|
+
this.index = null;
|
|
607
|
+
}
|
|
608
|
+
toHnswSpace(metric) {
|
|
609
|
+
if (metric === "l2") return "l2";
|
|
610
|
+
if (metric === "ip") return "ip";
|
|
611
|
+
return "cosine";
|
|
612
|
+
}
|
|
613
|
+
};
|
|
614
|
+
|
|
615
|
+
// src/vector/qdrant.ts
|
|
616
|
+
import crypto2 from "crypto";
|
|
617
|
+
async function importQdrant() {
|
|
618
|
+
const modName = "@qdrant/js-client-rest";
|
|
619
|
+
try {
|
|
620
|
+
const mod = await import(modName);
|
|
621
|
+
const ns = mod?.QdrantClient ? mod : mod?.default;
|
|
622
|
+
if (!ns?.QdrantClient) throw new Error("@qdrant/js-client-rest did not export QdrantClient");
|
|
623
|
+
return ns;
|
|
624
|
+
} catch (e) {
|
|
625
|
+
const hint = "To use the 'qdrant' provider, install the optional dependency: npm i @qdrant/js-client-rest";
|
|
626
|
+
throw new Error(`${String(e?.message ?? e)}
|
|
627
|
+
${hint}`);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
var PETRI_UUID_NAMESPACE = "b0f67f3b-2c75-44b8-9b4d-8f71a8a2f3f2";
|
|
631
|
+
function uuidToBytes(uuid) {
|
|
632
|
+
const hex = uuid.replace(/-/g, "");
|
|
633
|
+
if (hex.length !== 32) throw new Error(`Invalid UUID: ${uuid}`);
|
|
634
|
+
const out = new Uint8Array(16);
|
|
635
|
+
for (let i = 0; i < 16; i++) out[i] = parseInt(hex.slice(i * 2, i * 2 + 2), 16);
|
|
636
|
+
return out;
|
|
637
|
+
}
|
|
638
|
+
function bytesToUuid(b) {
|
|
639
|
+
const hex = Array.from(b).map((x) => x.toString(16).padStart(2, "0")).join("");
|
|
640
|
+
return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`;
|
|
641
|
+
}
|
|
642
|
+
function uuidv5(name, namespace) {
|
|
643
|
+
const ns = uuidToBytes(namespace);
|
|
644
|
+
const nameBytes = Buffer.from(name, "utf8");
|
|
645
|
+
const hash = crypto2.createHash("sha1").update(ns).update(nameBytes).digest();
|
|
646
|
+
const out = new Uint8Array(hash.subarray(0, 16));
|
|
647
|
+
out[6] = out[6] & 15 | 80;
|
|
648
|
+
out[8] = out[8] & 63 | 128;
|
|
649
|
+
return bytesToUuid(out);
|
|
650
|
+
}
|
|
651
|
+
function sanitizeCollection(s) {
|
|
652
|
+
return s.toLowerCase().replace(/[^a-z0-9_\-]/g, "_").replace(/_+/g, "_").replace(/^_+|_+$/g, "").slice(0, 60);
|
|
653
|
+
}
|
|
654
|
+
var QdrantVectorIndex = class {
|
|
655
|
+
kind = "qdrant";
|
|
656
|
+
metric = "cosine";
|
|
657
|
+
dimension = 0;
|
|
658
|
+
cfg;
|
|
659
|
+
client = null;
|
|
660
|
+
collection = "";
|
|
661
|
+
commit = "";
|
|
662
|
+
collectionMode = "commit";
|
|
663
|
+
constructor(cfg = {}) {
|
|
664
|
+
this.cfg = {
|
|
665
|
+
url: cfg.url ?? "",
|
|
666
|
+
host: cfg.host ?? "localhost",
|
|
667
|
+
port: cfg.port ?? 6333,
|
|
668
|
+
apiKey: cfg.apiKey ?? "",
|
|
669
|
+
collectionPrefix: cfg.collectionPrefix ?? "petri",
|
|
670
|
+
collectionMode: cfg.collectionMode ?? "commit",
|
|
671
|
+
recreateOnRebuild: cfg.recreateOnRebuild ?? true
|
|
672
|
+
};
|
|
673
|
+
}
|
|
674
|
+
async init(init) {
|
|
675
|
+
this.metric = init.metric;
|
|
676
|
+
this.dimension = init.dimension;
|
|
677
|
+
this.commit = init.commit;
|
|
678
|
+
this.collectionMode = this.cfg.collectionMode;
|
|
679
|
+
const { QdrantClient } = await importQdrant();
|
|
680
|
+
const args = this.cfg.url ? { url: this.cfg.url, apiKey: this.cfg.apiKey || void 0 } : { host: this.cfg.host, port: this.cfg.port, apiKey: this.cfg.apiKey || void 0 };
|
|
681
|
+
this.client = new QdrantClient(args);
|
|
682
|
+
const repoPart = sanitizeCollection(init.repoId);
|
|
683
|
+
const metricPart = this.metric === "cosine" ? "cos" : this.metric;
|
|
684
|
+
const dimPart = String(this.dimension);
|
|
685
|
+
const commitPart = init.commit.slice(0, 8);
|
|
686
|
+
const prefix = sanitizeCollection(this.cfg.collectionPrefix);
|
|
687
|
+
this.collection = this.collectionMode === "commit" ? sanitizeCollection(`${prefix}_${repoPart}_${commitPart}_${dimPart}_${metricPart}`) : sanitizeCollection(`${prefix}_${repoPart}_${dimPart}_${metricPart}`);
|
|
688
|
+
await this.ensureCollection();
|
|
689
|
+
}
|
|
690
|
+
pointId(chunkId) {
|
|
691
|
+
const name = this.collectionMode === "commit" ? chunkId : `${this.commit}:${chunkId}`;
|
|
692
|
+
return uuidv5(name, PETRI_UUID_NAMESPACE);
|
|
693
|
+
}
|
|
694
|
+
commitFilter() {
|
|
695
|
+
return {
|
|
696
|
+
must: [{ key: "commit", match: { value: this.commit } }]
|
|
697
|
+
};
|
|
698
|
+
}
|
|
699
|
+
distanceName(metric) {
|
|
700
|
+
if (metric === "l2") return "Euclid";
|
|
701
|
+
if (metric === "ip") return "Dot";
|
|
702
|
+
return "Cosine";
|
|
703
|
+
}
|
|
704
|
+
async ensureCollection() {
|
|
705
|
+
if (!this.client) throw new Error("Qdrant client not initialised");
|
|
706
|
+
const existing = await this.client.getCollections();
|
|
707
|
+
const names = (existing?.collections ?? existing?.result?.collections ?? []).map((c) => c.name);
|
|
708
|
+
if (names.includes(this.collection)) return;
|
|
709
|
+
await this.client.createCollection(this.collection, {
|
|
710
|
+
vectors: { size: this.dimension, distance: this.distanceName(this.metric) }
|
|
711
|
+
});
|
|
712
|
+
}
|
|
713
|
+
async rebuild(points) {
|
|
714
|
+
if (!this.client) throw new Error("Qdrant client not initialised");
|
|
715
|
+
if (this.collectionMode === "commit") {
|
|
716
|
+
if (this.cfg.recreateOnRebuild) {
|
|
717
|
+
try {
|
|
718
|
+
await this.client.deleteCollection(this.collection);
|
|
719
|
+
} catch {
|
|
720
|
+
}
|
|
721
|
+
} else {
|
|
722
|
+
try {
|
|
723
|
+
await this.client.delete(this.collection, { filter: {} });
|
|
724
|
+
} catch {
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
await this.ensureCollection();
|
|
728
|
+
} else {
|
|
729
|
+
try {
|
|
730
|
+
await this.client.delete(this.collection, { filter: this.commitFilter() });
|
|
731
|
+
} catch {
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
const batchSize = 256;
|
|
735
|
+
for (let i = 0; i < points.length; i += batchSize) {
|
|
736
|
+
const batch = points.slice(i, i + batchSize);
|
|
737
|
+
await this.upsert(batch);
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
async upsert(points) {
|
|
741
|
+
if (!this.client) throw new Error("Qdrant client not initialised");
|
|
742
|
+
if (points.length === 0) return;
|
|
743
|
+
const qPoints = points.map((p) => {
|
|
744
|
+
const payload = { ...p.payload ?? {}, cid: p.id };
|
|
745
|
+
if (this.collectionMode === "repo") payload.commit = this.commit;
|
|
746
|
+
return {
|
|
747
|
+
id: this.pointId(p.id),
|
|
748
|
+
vector: Array.from(normalise(p.vector)),
|
|
749
|
+
payload
|
|
750
|
+
};
|
|
751
|
+
});
|
|
752
|
+
await this.client.upsert(this.collection, { points: qPoints });
|
|
753
|
+
}
|
|
754
|
+
async remove(ids) {
|
|
755
|
+
if (!this.client) throw new Error("Qdrant client not initialised");
|
|
756
|
+
if (ids.length === 0) return;
|
|
757
|
+
const pointIds = ids.map((id) => this.pointId(id));
|
|
758
|
+
await this.client.delete(this.collection, { points: pointIds });
|
|
759
|
+
}
|
|
760
|
+
async search(query, k) {
|
|
761
|
+
if (!this.client) throw new Error("Qdrant client not initialised");
|
|
762
|
+
const q = Array.from(normalise(query));
|
|
763
|
+
const req = {
|
|
764
|
+
query: q,
|
|
765
|
+
limit: k,
|
|
766
|
+
with_payload: true
|
|
767
|
+
};
|
|
768
|
+
if (this.collectionMode === "repo") req.filter = this.commitFilter();
|
|
769
|
+
let res;
|
|
770
|
+
if (typeof this.client.query === "function") {
|
|
771
|
+
res = await this.client.query(this.collection, req);
|
|
772
|
+
} else if (typeof this.client.search === "function") {
|
|
773
|
+
res = await this.client.search(this.collection, { vector: q, limit: k, with_payload: true, filter: req.filter });
|
|
774
|
+
} else {
|
|
775
|
+
throw new Error("Qdrant client missing query/search method");
|
|
776
|
+
}
|
|
777
|
+
const hits = res?.points ?? res?.result ?? res ?? [];
|
|
778
|
+
const out = [];
|
|
779
|
+
for (const h of hits) {
|
|
780
|
+
const payload = h.payload ?? h?.payloads;
|
|
781
|
+
const cid = payload?.cid;
|
|
782
|
+
if (!cid) continue;
|
|
783
|
+
const score = typeof h.score === "number" ? h.score : typeof h?.result?.score === "number" ? h.result.score : 0;
|
|
784
|
+
out.push({ id: String(cid), score });
|
|
785
|
+
}
|
|
786
|
+
out.sort((a, b) => b.score - a.score);
|
|
787
|
+
return out;
|
|
788
|
+
}
|
|
789
|
+
async count() {
|
|
790
|
+
if (!this.client) return 0;
|
|
791
|
+
try {
|
|
792
|
+
if (this.collectionMode === "repo" && typeof this.client.count === "function") {
|
|
793
|
+
const res = await this.client.count(this.collection, { filter: this.commitFilter(), exact: false });
|
|
794
|
+
return Number(res?.count ?? res?.result?.count ?? 0);
|
|
795
|
+
}
|
|
796
|
+
const info = await this.client.getCollection(this.collection);
|
|
797
|
+
const c = info?.points_count ?? info?.result?.points_count;
|
|
798
|
+
if (typeof c === "number") return c;
|
|
799
|
+
} catch {
|
|
800
|
+
}
|
|
801
|
+
return 0;
|
|
802
|
+
}
|
|
803
|
+
async flush() {
|
|
804
|
+
}
|
|
805
|
+
async close() {
|
|
806
|
+
this.client = null;
|
|
807
|
+
}
|
|
808
|
+
};
|
|
809
|
+
|
|
810
|
+
// src/vector/factory.ts
|
|
811
|
+
function isVectorIndex(obj) {
|
|
812
|
+
return obj && typeof obj.init === "function" && typeof obj.rebuild === "function" && typeof obj.upsert === "function" && typeof obj.remove === "function" && typeof obj.search === "function" && typeof obj.count === "function" && typeof obj.flush === "function" && typeof obj.close === "function";
|
|
813
|
+
}
|
|
814
|
+
async function createVectorIndex(vector) {
|
|
815
|
+
const provider = vector?.provider ?? "bruteforce";
|
|
816
|
+
if (provider === "bruteforce") return new BruteForceVectorIndex();
|
|
817
|
+
if (provider === "hnswlib") return new HnswlibVectorIndex(vector?.hnswlib);
|
|
818
|
+
if (provider === "qdrant") return new QdrantVectorIndex(vector?.qdrant);
|
|
819
|
+
if (provider === "faiss") return new FaissVectorIndex(vector?.faiss);
|
|
820
|
+
if (provider === "custom") {
|
|
821
|
+
const c = vector?.custom;
|
|
822
|
+
if (!c?.module) {
|
|
823
|
+
throw new Error("vector.provider=custom requires vector.custom.module");
|
|
824
|
+
}
|
|
825
|
+
const abs = path4.isAbsolute(c.module) ? c.module : path4.resolve(process.cwd(), c.module);
|
|
826
|
+
const url = pathToFileURL(abs).href;
|
|
827
|
+
const mod = await import(url);
|
|
828
|
+
const expName = c.export ?? "default";
|
|
829
|
+
const exp = mod[expName] ?? mod.default ?? mod;
|
|
830
|
+
const instance = typeof exp === "function" ? (
|
|
831
|
+
// If it's a class, `new` works; if it's a factory function, it should return a VectorIndex.
|
|
832
|
+
(() => {
|
|
833
|
+
try {
|
|
834
|
+
return new exp(c.options);
|
|
835
|
+
} catch {
|
|
836
|
+
return exp(c.options);
|
|
837
|
+
}
|
|
838
|
+
})()
|
|
839
|
+
) : exp;
|
|
840
|
+
if (!isVectorIndex(instance)) {
|
|
841
|
+
throw new Error(
|
|
842
|
+
`Custom vector provider '${abs}' export '${expName}' did not produce a VectorIndex implementation.`
|
|
843
|
+
);
|
|
844
|
+
}
|
|
845
|
+
return instance;
|
|
846
|
+
}
|
|
847
|
+
if (provider === "auto") {
|
|
848
|
+
if (vector?.qdrant?.url || vector?.qdrant?.host) {
|
|
849
|
+
try {
|
|
850
|
+
const q = "@qdrant/js-client-rest";
|
|
851
|
+
await import(q);
|
|
852
|
+
return new QdrantVectorIndex(vector.qdrant);
|
|
853
|
+
} catch {
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
try {
|
|
857
|
+
const modName = "hnswlib-node";
|
|
858
|
+
await import(modName);
|
|
859
|
+
return new HnswlibVectorIndex(vector?.hnswlib);
|
|
860
|
+
} catch {
|
|
861
|
+
return new BruteForceVectorIndex();
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
return new BruteForceVectorIndex();
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
// src/symbolGraph/util.ts
|
|
868
|
+
function stableSymbolId(input) {
|
|
869
|
+
const r = input.range;
|
|
870
|
+
const key = `repoRoot:${input.repoRoot}
|
|
871
|
+
path:${input.path}
|
|
872
|
+
lang:${input.language}
|
|
873
|
+
kind:${input.kind}
|
|
874
|
+
name:${input.name}
|
|
875
|
+
range:${r.startLine}:${r.startCharacter}-${r.endLine}:${r.endCharacter}`;
|
|
876
|
+
return sha256Hex(key).slice(0, 32);
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
// src/ann/noop.ts
|
|
880
|
+
var NoopAnnIndex = class {
|
|
881
|
+
id = "noop";
|
|
882
|
+
kind = "noop";
|
|
883
|
+
async init(_init) {
|
|
884
|
+
}
|
|
885
|
+
async upsert(_points) {
|
|
886
|
+
}
|
|
887
|
+
async remove(_ids) {
|
|
888
|
+
}
|
|
889
|
+
async search(_query, _k) {
|
|
890
|
+
return [];
|
|
891
|
+
}
|
|
892
|
+
async flush() {
|
|
893
|
+
}
|
|
894
|
+
async close() {
|
|
895
|
+
}
|
|
896
|
+
};
|
|
897
|
+
|
|
898
|
+
// src/ann/factory.ts
|
|
899
|
+
function createAnnIndex(config) {
|
|
900
|
+
const provider = config?.provider ?? "noop";
|
|
901
|
+
if (provider === "noop") return new NoopAnnIndex();
|
|
902
|
+
if (provider === "faiss-pq") {
|
|
903
|
+
throw new Error("ANN provider 'faiss-pq' is not implemented yet (use provider=noop for now).");
|
|
904
|
+
}
|
|
905
|
+
return new NoopAnnIndex();
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
// src/indexer/repoIndexer.ts
|
|
909
|
+
import fs9 from "fs";
|
|
910
|
+
import path12 from "path";
|
|
911
|
+
import pLimit from "p-limit";
|
|
912
|
+
|
|
913
|
+
// src/git.ts
|
|
914
|
+
import { execFile } from "child_process";
|
|
915
|
+
import { promisify } from "util";
|
|
916
|
+
var execFileAsync = promisify(execFile);
|
|
917
|
+
async function git(repoRoot, args) {
|
|
918
|
+
const { stdout } = await execFileAsync("git", args, { cwd: repoRoot, encoding: "utf8" });
|
|
919
|
+
return stdout;
|
|
920
|
+
}
|
|
921
|
+
async function getHeadCommit(repoRoot) {
|
|
922
|
+
return (await git(repoRoot, ["rev-parse", "HEAD"])).trim();
|
|
923
|
+
}
|
|
924
|
+
async function getBranchName(repoRoot) {
|
|
925
|
+
return (await git(repoRoot, ["rev-parse", "--abbrev-ref", "HEAD"])).trim();
|
|
926
|
+
}
|
|
927
|
+
async function listWorkingFiles(repoRoot) {
|
|
928
|
+
const out = await git(repoRoot, ["ls-files", "-z", "--cached", "--others", "--exclude-standard"]);
|
|
929
|
+
const parts = out.split("\0").map((s) => s.trim()).filter(Boolean);
|
|
930
|
+
const seen = /* @__PURE__ */ new Set();
|
|
931
|
+
const files = [];
|
|
932
|
+
for (const p of parts) {
|
|
933
|
+
if (!seen.has(p)) {
|
|
934
|
+
seen.add(p);
|
|
935
|
+
files.push(p);
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
return files;
|
|
939
|
+
}
|
|
940
|
+
async function listChangedFiles(repoRoot, baseRef = "HEAD~1") {
|
|
941
|
+
const out = await git(repoRoot, ["diff", "--name-only", "-z", `${baseRef}...HEAD`]);
|
|
942
|
+
return out.split("\0").map((s) => s.trim()).filter(Boolean);
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
// src/ignore.ts
|
|
946
|
+
import fs3 from "fs";
|
|
947
|
+
import path5 from "path";
|
|
948
|
+
import ignore from "ignore";
|
|
949
|
+
function loadExtraIgnore(repoRoot, ignoreFiles) {
|
|
950
|
+
const ig = ignore();
|
|
951
|
+
for (const name of ignoreFiles) {
|
|
952
|
+
const p = path5.join(repoRoot, name);
|
|
953
|
+
if (!fs3.existsSync(p)) continue;
|
|
954
|
+
const raw = fs3.readFileSync(p, "utf8");
|
|
955
|
+
ig.add(raw.split(/\r?\n/));
|
|
956
|
+
}
|
|
957
|
+
return (posixRelPath) => ig.ignores(posixRelPath);
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
// src/store/embeddingCache.ts
|
|
961
|
+
import fs4 from "fs";
|
|
962
|
+
import path6 from "path";
|
|
963
|
+
import Database from "better-sqlite3";
|
|
964
|
+
var EmbeddingCache = class {
|
|
965
|
+
db;
|
|
966
|
+
constructor(cacheFilePath) {
|
|
967
|
+
fs4.mkdirSync(path6.dirname(cacheFilePath), { recursive: true });
|
|
968
|
+
this.db = new Database(cacheFilePath);
|
|
969
|
+
this.db.pragma("journal_mode = WAL");
|
|
970
|
+
this.db.exec(`
|
|
971
|
+
CREATE TABLE IF NOT EXISTS embedding_cache (
|
|
972
|
+
provider_id TEXT NOT NULL,
|
|
973
|
+
content_hash TEXT NOT NULL,
|
|
974
|
+
embedding BLOB NOT NULL,
|
|
975
|
+
dim INTEGER NOT NULL,
|
|
976
|
+
created_at INTEGER NOT NULL,
|
|
977
|
+
PRIMARY KEY(provider_id, content_hash)
|
|
978
|
+
);
|
|
979
|
+
`);
|
|
980
|
+
}
|
|
981
|
+
get(providerId, contentHash) {
|
|
982
|
+
const row = this.db.prepare(
|
|
983
|
+
`SELECT embedding, dim FROM embedding_cache WHERE provider_id = ? AND content_hash = ?`
|
|
984
|
+
).get(providerId, contentHash);
|
|
985
|
+
if (!row) return null;
|
|
986
|
+
const buf = row.embedding;
|
|
987
|
+
const view = new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
|
|
988
|
+
return new Float32Array(view);
|
|
989
|
+
}
|
|
990
|
+
put(providerId, contentHash, embedding) {
|
|
991
|
+
const buf = Buffer.from(embedding.buffer, embedding.byteOffset, embedding.byteLength);
|
|
992
|
+
this.db.prepare(
|
|
993
|
+
`INSERT OR REPLACE INTO embedding_cache(provider_id, content_hash, embedding, dim, created_at)
|
|
994
|
+
VALUES (?, ?, ?, ?, ?)`
|
|
995
|
+
).run(providerId, contentHash, buf, embedding.length, Date.now());
|
|
996
|
+
}
|
|
997
|
+
close() {
|
|
998
|
+
this.db.close();
|
|
999
|
+
}
|
|
1000
|
+
};
|
|
1001
|
+
|
|
1002
|
+
// src/store/repoStore.ts
|
|
1003
|
+
import fs5 from "fs";
|
|
1004
|
+
import path7 from "path";
|
|
1005
|
+
import Database2 from "better-sqlite3";
|
|
1006
|
+
var RepoStore = class {
|
|
1007
|
+
db;
|
|
1008
|
+
constructor(dbPath) {
|
|
1009
|
+
fs5.mkdirSync(path7.dirname(dbPath), { recursive: true });
|
|
1010
|
+
this.db = new Database2(dbPath);
|
|
1011
|
+
this.db.pragma("journal_mode = WAL");
|
|
1012
|
+
this.db.exec(`
|
|
1013
|
+
CREATE TABLE IF NOT EXISTS meta (
|
|
1014
|
+
k TEXT PRIMARY KEY,
|
|
1015
|
+
v TEXT NOT NULL
|
|
1016
|
+
);
|
|
1017
|
+
|
|
1018
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
1019
|
+
path TEXT PRIMARY KEY,
|
|
1020
|
+
hash TEXT NOT NULL,
|
|
1021
|
+
mtime INTEGER NOT NULL,
|
|
1022
|
+
language TEXT NOT NULL,
|
|
1023
|
+
size INTEGER NOT NULL
|
|
1024
|
+
);
|
|
1025
|
+
|
|
1026
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
1027
|
+
id TEXT PRIMARY KEY,
|
|
1028
|
+
path TEXT NOT NULL,
|
|
1029
|
+
language TEXT NOT NULL,
|
|
1030
|
+
kind TEXT NOT NULL DEFAULT 'chunk',
|
|
1031
|
+
start_line INTEGER NOT NULL,
|
|
1032
|
+
end_line INTEGER NOT NULL,
|
|
1033
|
+
content_hash TEXT NOT NULL,
|
|
1034
|
+
tokens INTEGER NOT NULL,
|
|
1035
|
+
file_mtime INTEGER NOT NULL,
|
|
1036
|
+
text TEXT NOT NULL,
|
|
1037
|
+
embedding BLOB NOT NULL
|
|
1038
|
+
);
|
|
1039
|
+
|
|
1040
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);
|
|
1041
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_kind_path ON chunks(kind, path);
|
|
1042
|
+
|
|
1043
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
1044
|
+
from_path TEXT NOT NULL,
|
|
1045
|
+
kind TEXT NOT NULL,
|
|
1046
|
+
value TEXT NOT NULL,
|
|
1047
|
+
PRIMARY KEY(from_path, kind, value)
|
|
1048
|
+
);
|
|
1049
|
+
|
|
1050
|
+
CREATE INDEX IF NOT EXISTS idx_edges_from ON edges(from_path);
|
|
1051
|
+
`);
|
|
1052
|
+
try {
|
|
1053
|
+
this.db.exec(`
|
|
1054
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
1055
|
+
id UNINDEXED,
|
|
1056
|
+
path,
|
|
1057
|
+
language,
|
|
1058
|
+
kind,
|
|
1059
|
+
text,
|
|
1060
|
+
tokenize='unicode61'
|
|
1061
|
+
);
|
|
1062
|
+
`);
|
|
1063
|
+
this.setMeta("fts", "1");
|
|
1064
|
+
} catch {
|
|
1065
|
+
this.setMeta("fts", "0");
|
|
1066
|
+
}
|
|
1067
|
+
if (this.getMeta("storeVersion") === null) this.setMeta("storeVersion", "0");
|
|
1068
|
+
}
|
|
1069
|
+
/** Monotonically increases whenever the chunk-store is mutated. */
|
|
1070
|
+
getStoreVersion() {
|
|
1071
|
+
const v = this.getMeta("storeVersion");
|
|
1072
|
+
return v ? Number(v) : 0;
|
|
1073
|
+
}
|
|
1074
|
+
/** Internal: bump store version (call inside the same transaction that mutates chunks). */
|
|
1075
|
+
bumpStoreVersion() {
|
|
1076
|
+
const next = this.getStoreVersion() + 1;
|
|
1077
|
+
this.setMeta("storeVersion", String(next));
|
|
1078
|
+
}
|
|
1079
|
+
/** Vector index sync marker (per backend kind). */
|
|
1080
|
+
getVectorIndexVersion(kind) {
|
|
1081
|
+
const v = this.getMeta(`vector.${kind}.storeVersion`);
|
|
1082
|
+
return v ? Number(v) : 0;
|
|
1083
|
+
}
|
|
1084
|
+
setVectorIndexVersion(kind, storeVersion) {
|
|
1085
|
+
this.setMeta(`vector.${kind}.storeVersion`, String(storeVersion));
|
|
1086
|
+
}
|
|
1087
|
+
setMeta(k, v) {
|
|
1088
|
+
this.db.prepare(`INSERT OR REPLACE INTO meta(k, v) VALUES (?, ?)`).run(k, v);
|
|
1089
|
+
}
|
|
1090
|
+
getMeta(k) {
|
|
1091
|
+
const row = this.db.prepare(`SELECT v FROM meta WHERE k = ?`).get(k);
|
|
1092
|
+
return row?.v ?? null;
|
|
1093
|
+
}
|
|
1094
|
+
getFileHash(posixPath) {
|
|
1095
|
+
const row = this.db.prepare(`SELECT hash FROM files WHERE path = ?`).get(posixPath);
|
|
1096
|
+
return row?.hash ?? null;
|
|
1097
|
+
}
|
|
1098
|
+
getFileMtime(posixPath) {
|
|
1099
|
+
const row = this.db.prepare(`SELECT mtime FROM files WHERE path = ?`).get(posixPath);
|
|
1100
|
+
return row?.mtime ?? null;
|
|
1101
|
+
}
|
|
1102
|
+
upsertFile(posixPath, hash, mtime, language, size) {
|
|
1103
|
+
this.db.prepare(`
|
|
1104
|
+
INSERT INTO files(path, hash, mtime, language, size)
|
|
1105
|
+
VALUES (?, ?, ?, ?, ?)
|
|
1106
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
1107
|
+
hash = excluded.hash,
|
|
1108
|
+
mtime = excluded.mtime,
|
|
1109
|
+
language = excluded.language,
|
|
1110
|
+
size = excluded.size
|
|
1111
|
+
`).run(posixPath, hash, mtime, language, size);
|
|
1112
|
+
}
|
|
1113
|
+
deleteFile(posixPath) {
|
|
1114
|
+
const tx = this.db.transaction(() => {
|
|
1115
|
+
this.db.prepare(`DELETE FROM chunks WHERE path = ?`).run(posixPath);
|
|
1116
|
+
this.db.prepare(`DELETE FROM chunks_fts WHERE path = ?`).run(posixPath);
|
|
1117
|
+
this.db.prepare(`DELETE FROM edges WHERE from_path = ?`).run(posixPath);
|
|
1118
|
+
this.db.prepare(`DELETE FROM files WHERE path = ?`).run(posixPath);
|
|
1119
|
+
this.bumpStoreVersion();
|
|
1120
|
+
});
|
|
1121
|
+
tx();
|
|
1122
|
+
}
|
|
1123
|
+
replaceChunksForFile(posixPath, rows) {
|
|
1124
|
+
const tx = this.db.transaction(() => {
|
|
1125
|
+
this.db.prepare(`DELETE FROM chunks WHERE path = ?`).run(posixPath);
|
|
1126
|
+
this.db.prepare(`DELETE FROM chunks_fts WHERE path = ?`).run(posixPath);
|
|
1127
|
+
const ins = this.db.prepare(`
|
|
1128
|
+
INSERT INTO chunks(id, path, language, kind, start_line, end_line, content_hash, tokens, file_mtime, text, embedding)
|
|
1129
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1130
|
+
`);
|
|
1131
|
+
const insFts = this.db.prepare(`
|
|
1132
|
+
INSERT INTO chunks_fts(id, path, language, kind, text)
|
|
1133
|
+
VALUES (?, ?, ?, ?, ?)
|
|
1134
|
+
`);
|
|
1135
|
+
for (const r of rows) {
|
|
1136
|
+
const buf = Buffer.from(r.embedding.buffer, r.embedding.byteOffset, r.embedding.byteLength);
|
|
1137
|
+
ins.run(r.id, posixPath, r.language, r.kind, r.startLine, r.endLine, r.contentHash, r.tokens, r.fileMtime, r.text, buf);
|
|
1138
|
+
insFts.run(r.id, posixPath, r.language, r.kind, r.ftsText);
|
|
1139
|
+
}
|
|
1140
|
+
this.bumpStoreVersion();
|
|
1141
|
+
});
|
|
1142
|
+
tx();
|
|
1143
|
+
}
|
|
1144
|
+
setEdges(fromPath, kind, values) {
|
|
1145
|
+
const tx = this.db.transaction(() => {
|
|
1146
|
+
this.db.prepare(`DELETE FROM edges WHERE from_path = ? AND kind = ?`).run(fromPath, kind);
|
|
1147
|
+
const ins = this.db.prepare(`INSERT OR REPLACE INTO edges(from_path, kind, value) VALUES (?, ?, ?)`);
|
|
1148
|
+
for (const v of values) ins.run(fromPath, kind, v);
|
|
1149
|
+
});
|
|
1150
|
+
tx();
|
|
1151
|
+
}
|
|
1152
|
+
listEdges(fromPath, kind) {
|
|
1153
|
+
const rows = this.db.prepare(`SELECT value FROM edges WHERE from_path = ? AND kind = ?`).all(fromPath, kind);
|
|
1154
|
+
return rows.map((r) => r.value);
|
|
1155
|
+
}
|
|
1156
|
+
listAllFiles() {
|
|
1157
|
+
const rows = this.db.prepare(`SELECT path FROM files`).all();
|
|
1158
|
+
return rows.map((r) => r.path);
|
|
1159
|
+
}
|
|
1160
|
+
countChunks() {
|
|
1161
|
+
const row = this.db.prepare(`SELECT COUNT(*) AS c FROM chunks`).get();
|
|
1162
|
+
return row.c;
|
|
1163
|
+
}
|
|
1164
|
+
/**
|
|
1165
|
+
* Returns the embedding dimension if any chunks exist, otherwise null.
|
|
1166
|
+
* Efficient (doesn't load all embeddings).
|
|
1167
|
+
*/
|
|
1168
|
+
getAnyEmbeddingDimension() {
|
|
1169
|
+
const row = this.db.prepare(`SELECT embedding FROM chunks LIMIT 1`).get();
|
|
1170
|
+
if (!row) return null;
|
|
1171
|
+
return Math.floor(row.embedding.byteLength / 4);
|
|
1172
|
+
}
|
|
1173
|
+
loadAllChunkEmbeddings() {
|
|
1174
|
+
const rows = this.db.prepare(`SELECT id, embedding FROM chunks`).all();
|
|
1175
|
+
return rows.map((r) => {
|
|
1176
|
+
const view = new Float32Array(r.embedding.buffer, r.embedding.byteOffset, r.embedding.byteLength / 4);
|
|
1177
|
+
return { id: r.id, embedding: new Float32Array(view) };
|
|
1178
|
+
});
|
|
1179
|
+
}
|
|
1180
|
+
getChunkById(id) {
|
|
1181
|
+
const row = this.db.prepare(`SELECT * FROM chunks WHERE id = ?`).get(id);
|
|
1182
|
+
return row ?? null;
|
|
1183
|
+
}
|
|
1184
|
+
listChunksForFile(posixPath, kind) {
|
|
1185
|
+
if (kind) {
|
|
1186
|
+
return this.db.prepare(`SELECT id, start_line, end_line, kind FROM chunks WHERE path = ? AND kind = ? ORDER BY start_line ASC`).all(posixPath, kind);
|
|
1187
|
+
}
|
|
1188
|
+
return this.db.prepare(`SELECT id, start_line, end_line, kind FROM chunks WHERE path = ? ORDER BY kind DESC, start_line ASC`).all(posixPath);
|
|
1189
|
+
}
|
|
1190
|
+
/**
|
|
1191
|
+
* Best-effort lexical search using SQLite FTS5.
|
|
1192
|
+
* Returns ids with bm25 values (lower is better).
|
|
1193
|
+
*/
|
|
1194
|
+
searchFts(ftq, limit, includePaths) {
|
|
1195
|
+
try {
|
|
1196
|
+
if (this.getMeta("fts") !== "1") return [];
|
|
1197
|
+
if (includePaths && includePaths.length > 0) {
|
|
1198
|
+
const placeholders = includePaths.map(() => "?").join(", ");
|
|
1199
|
+
const sql2 = `
|
|
1200
|
+
SELECT id, bm25(chunks_fts) AS bm25
|
|
1201
|
+
FROM chunks_fts
|
|
1202
|
+
WHERE chunks_fts MATCH ? AND path IN (${placeholders})
|
|
1203
|
+
ORDER BY bm25 ASC
|
|
1204
|
+
LIMIT ?
|
|
1205
|
+
`;
|
|
1206
|
+
const args = [ftq, ...includePaths, limit];
|
|
1207
|
+
return this.db.prepare(sql2).all(...args);
|
|
1208
|
+
}
|
|
1209
|
+
const sql = `
|
|
1210
|
+
SELECT id, bm25(chunks_fts) AS bm25
|
|
1211
|
+
FROM chunks_fts
|
|
1212
|
+
WHERE chunks_fts MATCH ?
|
|
1213
|
+
ORDER BY bm25 ASC
|
|
1214
|
+
LIMIT ?
|
|
1215
|
+
`;
|
|
1216
|
+
return this.db.prepare(sql).all(ftq, limit);
|
|
1217
|
+
} catch {
|
|
1218
|
+
return [];
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
close() {
|
|
1222
|
+
this.db.close();
|
|
1223
|
+
}
|
|
1224
|
+
};
|
|
1225
|
+
|
|
1226
|
+
// src/indexer/repoIndexer/config.ts
|
|
1227
|
+
function resolveIndexerConfig(config = {}) {
|
|
1228
|
+
const cacheDir = config.cacheDir ?? defaultCacheDir();
|
|
1229
|
+
return {
|
|
1230
|
+
cacheDir,
|
|
1231
|
+
includeExtensions: config.includeExtensions ?? [
|
|
1232
|
+
".ts",
|
|
1233
|
+
".tsx",
|
|
1234
|
+
".js",
|
|
1235
|
+
".jsx",
|
|
1236
|
+
".mjs",
|
|
1237
|
+
".cjs",
|
|
1238
|
+
".py",
|
|
1239
|
+
".go",
|
|
1240
|
+
".ca",
|
|
1241
|
+
".cpnexpr",
|
|
1242
|
+
".ispec",
|
|
1243
|
+
".isl",
|
|
1244
|
+
".ltl",
|
|
1245
|
+
".ctl",
|
|
1246
|
+
".rs",
|
|
1247
|
+
".java",
|
|
1248
|
+
".kt",
|
|
1249
|
+
".kts",
|
|
1250
|
+
".cs",
|
|
1251
|
+
".md",
|
|
1252
|
+
".json",
|
|
1253
|
+
".yml",
|
|
1254
|
+
".yaml",
|
|
1255
|
+
".toml"
|
|
1256
|
+
],
|
|
1257
|
+
maxFileBytes: config.maxFileBytes ?? 2e6,
|
|
1258
|
+
chunk: {
|
|
1259
|
+
maxChars: config.chunk?.maxChars ?? 12e3,
|
|
1260
|
+
maxLines: config.chunk?.maxLines ?? 240,
|
|
1261
|
+
overlapLines: config.chunk?.overlapLines ?? 40
|
|
1262
|
+
},
|
|
1263
|
+
embed: {
|
|
1264
|
+
batchSize: config.embed?.batchSize ?? 32,
|
|
1265
|
+
concurrency: config.embed?.concurrency ?? 4
|
|
1266
|
+
},
|
|
1267
|
+
watch: {
|
|
1268
|
+
debounceMs: config.watch?.debounceMs ?? 250
|
|
1269
|
+
},
|
|
1270
|
+
ignoreFiles: config.ignoreFiles ?? [".petriignore", ".augmentignore"],
|
|
1271
|
+
redact: {
|
|
1272
|
+
enabled: config.redact?.enabled ?? true,
|
|
1273
|
+
skipPathSubstrings: config.redact?.skipPathSubstrings ?? [".env", "id_rsa", ".pem", ".p12", "secrets"],
|
|
1274
|
+
patterns: config.redact?.patterns ?? [
|
|
1275
|
+
{
|
|
1276
|
+
name: "private_key_block",
|
|
1277
|
+
regex: /-----BEGIN [A-Z ]+PRIVATE KEY-----[\s\S]*?-----END [A-Z ]+PRIVATE KEY-----/g,
|
|
1278
|
+
replaceWith: "[REDACTED_PRIVATE_KEY]"
|
|
1279
|
+
},
|
|
1280
|
+
{
|
|
1281
|
+
name: "aws_access_key_id",
|
|
1282
|
+
regex: /\bAKIA[0-9A-Z]{16}\b/g,
|
|
1283
|
+
replaceWith: "[REDACTED_AWS_KEY]"
|
|
1284
|
+
}
|
|
1285
|
+
]
|
|
1286
|
+
},
|
|
1287
|
+
storage: {
|
|
1288
|
+
storeText: config.storage?.storeText ?? true,
|
|
1289
|
+
ftsMode: config.storage?.ftsMode ?? "full"
|
|
1290
|
+
},
|
|
1291
|
+
vector: {
|
|
1292
|
+
provider: config.vector?.provider ?? "bruteforce",
|
|
1293
|
+
metric: config.vector?.metric ?? "cosine",
|
|
1294
|
+
hnswlib: config.vector?.hnswlib,
|
|
1295
|
+
qdrant: config.vector?.qdrant,
|
|
1296
|
+
faiss: config.vector?.faiss,
|
|
1297
|
+
custom: config.vector?.custom
|
|
1298
|
+
},
|
|
1299
|
+
profiles: config.profiles ?? {}
|
|
1300
|
+
};
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
// src/indexer/repoIndexer/utils.ts
|
|
1304
|
+
import fs6 from "fs";
|
|
1305
|
+
import path8 from "path";
|
|
1306
|
+
function repoIdFromRoot(repoRoot) {
|
|
1307
|
+
return sha256Hex(path8.resolve(repoRoot)).slice(0, 16);
|
|
1308
|
+
}
|
|
1309
|
+
function looksBinary(buf) {
|
|
1310
|
+
let nul = 0;
|
|
1311
|
+
const n = Math.min(buf.length, 4096);
|
|
1312
|
+
for (let i = 0; i < n; i++) if (buf[i] === 0) nul++;
|
|
1313
|
+
return nul > 2;
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
// src/indexer/repoIndexer/vectorManager.ts
|
|
1317
|
+
var VectorManager = class {
|
|
1318
|
+
constructor(vectorConfig, vectorMetric, flushDebounceMs, onFlush) {
|
|
1319
|
+
this.vectorConfig = vectorConfig;
|
|
1320
|
+
this.vectorMetric = vectorMetric;
|
|
1321
|
+
this.flushDebounceMs = flushDebounceMs;
|
|
1322
|
+
this.onFlush = onFlush;
|
|
1323
|
+
}
|
|
1324
|
+
vec = null;
|
|
1325
|
+
flushTimer = null;
|
|
1326
|
+
flushInFlight = null;
|
|
1327
|
+
get() {
|
|
1328
|
+
return this.vec;
|
|
1329
|
+
}
|
|
1330
|
+
async close() {
|
|
1331
|
+
if (this.flushTimer) {
|
|
1332
|
+
clearTimeout(this.flushTimer);
|
|
1333
|
+
this.flushTimer = null;
|
|
1334
|
+
}
|
|
1335
|
+
if (this.vec) {
|
|
1336
|
+
try {
|
|
1337
|
+
await this.vec.close();
|
|
1338
|
+
} catch {
|
|
1339
|
+
}
|
|
1340
|
+
this.vec = null;
|
|
1341
|
+
}
|
|
1342
|
+
this.flushInFlight = null;
|
|
1343
|
+
}
|
|
1344
|
+
async ensure(init) {
|
|
1345
|
+
if (this.vec && this.vec.dimension === init.dimension) return this.vec;
|
|
1346
|
+
await this.close();
|
|
1347
|
+
const vec = await createVectorIndex(this.vectorConfig);
|
|
1348
|
+
await vec.init({
|
|
1349
|
+
repoId: init.repoId,
|
|
1350
|
+
repoRoot: init.repoRoot,
|
|
1351
|
+
commit: init.commit,
|
|
1352
|
+
branch: init.branch,
|
|
1353
|
+
cacheDir: init.cacheDir,
|
|
1354
|
+
dimension: init.dimension,
|
|
1355
|
+
metric: this.vectorMetric
|
|
1356
|
+
});
|
|
1357
|
+
this.vec = vec;
|
|
1358
|
+
return vec;
|
|
1359
|
+
}
|
|
1360
|
+
scheduleFlush() {
|
|
1361
|
+
if (!this.vec) return;
|
|
1362
|
+
const ms = this.flushDebounceMs(this.vec.kind);
|
|
1363
|
+
if (this.flushTimer) {
|
|
1364
|
+
clearTimeout(this.flushTimer);
|
|
1365
|
+
this.flushTimer = null;
|
|
1366
|
+
}
|
|
1367
|
+
if (ms <= 0) {
|
|
1368
|
+
void this.flushNow();
|
|
1369
|
+
return;
|
|
1370
|
+
}
|
|
1371
|
+
this.flushTimer = setTimeout(() => {
|
|
1372
|
+
this.flushTimer = null;
|
|
1373
|
+
void this.flushNow();
|
|
1374
|
+
}, ms);
|
|
1375
|
+
}
|
|
1376
|
+
async flushNow() {
|
|
1377
|
+
if (!this.vec) return;
|
|
1378
|
+
if (this.flushTimer) {
|
|
1379
|
+
clearTimeout(this.flushTimer);
|
|
1380
|
+
this.flushTimer = null;
|
|
1381
|
+
}
|
|
1382
|
+
if (this.flushInFlight) return this.flushInFlight;
|
|
1383
|
+
const work = (async () => {
|
|
1384
|
+
await this.onFlush(this.vec);
|
|
1385
|
+
})();
|
|
1386
|
+
this.flushInFlight = work.finally(() => {
|
|
1387
|
+
this.flushInFlight = null;
|
|
1388
|
+
});
|
|
1389
|
+
return this.flushInFlight;
|
|
1390
|
+
}
|
|
1391
|
+
};
|
|
1392
|
+
|
|
1393
|
+
// src/indexer/repoIndexer/fileIndexer.ts
|
|
1394
|
+
import fs7 from "fs";
|
|
1395
|
+
import path9 from "path";
|
|
1396
|
+
|
|
1397
|
+
// src/relations.ts
|
|
1398
|
+
function extractTsRelations(virtualFileName, sourceText) {
|
|
1399
|
+
const tsMod = getTypeScript();
|
|
1400
|
+
if (!tsMod) return { imports: [], exports: [] };
|
|
1401
|
+
const sf = tsMod.createSourceFile(virtualFileName, sourceText, tsMod.ScriptTarget.Latest, true);
|
|
1402
|
+
const imports = [];
|
|
1403
|
+
const exports = [];
|
|
1404
|
+
const isExported2 = (node) => {
|
|
1405
|
+
if (!tsMod.canHaveModifiers(node)) return false;
|
|
1406
|
+
const mods = tsMod.getModifiers(node);
|
|
1407
|
+
return !!mods?.some((m) => m.kind === tsMod.SyntaxKind.ExportKeyword);
|
|
1408
|
+
};
|
|
1409
|
+
for (const stmt of sf.statements) {
|
|
1410
|
+
if (tsMod.isImportDeclaration(stmt) && tsMod.isStringLiteral(stmt.moduleSpecifier)) {
|
|
1411
|
+
imports.push(stmt.moduleSpecifier.text);
|
|
1412
|
+
}
|
|
1413
|
+
if (tsMod.isExportDeclaration(stmt)) {
|
|
1414
|
+
if (stmt.exportClause && tsMod.isNamedExports(stmt.exportClause)) {
|
|
1415
|
+
for (const el of stmt.exportClause.elements) exports.push(el.name.text);
|
|
1416
|
+
} else {
|
|
1417
|
+
exports.push("*");
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
if (tsMod.isFunctionDeclaration(stmt) && isExported2(stmt) && stmt.name) {
|
|
1421
|
+
exports.push(stmt.name.text);
|
|
1422
|
+
}
|
|
1423
|
+
if (tsMod.isClassDeclaration(stmt) && isExported2(stmt) && stmt.name) {
|
|
1424
|
+
exports.push(stmt.name.text);
|
|
1425
|
+
}
|
|
1426
|
+
if (tsMod.isVariableStatement(stmt) && isExported2(stmt)) {
|
|
1427
|
+
for (const decl of stmt.declarationList.declarations) {
|
|
1428
|
+
if (tsMod.isIdentifier(decl.name)) exports.push(decl.name.text);
|
|
1429
|
+
}
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
return { imports, exports };
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
// src/synopsis.ts
|
|
1436
|
+
function isExported(tsMod, node) {
|
|
1437
|
+
if (!tsMod.canHaveModifiers(node)) return false;
|
|
1438
|
+
const mods = tsMod.getModifiers(node);
|
|
1439
|
+
return !!mods?.some((m) => m.kind === tsMod.SyntaxKind.ExportKeyword);
|
|
1440
|
+
}
|
|
1441
|
+
function leadingCommentExcerpt(sourceText, maxChars = 600) {
|
|
1442
|
+
const t = sourceText.trimStart();
|
|
1443
|
+
const block = t.match(/^\/\*[\s\S]*?\*\//);
|
|
1444
|
+
if (block) return block[0].slice(0, maxChars);
|
|
1445
|
+
const lines = t.split(/\r?\n/);
|
|
1446
|
+
const commentLines = [];
|
|
1447
|
+
for (const line of lines) {
|
|
1448
|
+
const m = line.match(/^\s*\/\/(.*)$/);
|
|
1449
|
+
if (!m) break;
|
|
1450
|
+
commentLines.push(line.trim());
|
|
1451
|
+
if (commentLines.join("\n").length >= maxChars) break;
|
|
1452
|
+
}
|
|
1453
|
+
if (commentLines.length) return commentLines.join("\n").slice(0, maxChars);
|
|
1454
|
+
return "";
|
|
1455
|
+
}
|
|
1456
|
+
function uniq(xs) {
|
|
1457
|
+
const out = [];
|
|
1458
|
+
const s = /* @__PURE__ */ new Set();
|
|
1459
|
+
for (const x of xs) {
|
|
1460
|
+
if (!x) continue;
|
|
1461
|
+
if (s.has(x)) continue;
|
|
1462
|
+
s.add(x);
|
|
1463
|
+
out.push(x);
|
|
1464
|
+
}
|
|
1465
|
+
return out;
|
|
1466
|
+
}
|
|
1467
|
+
function renderGenericSynopsis(input) {
|
|
1468
|
+
const lead = leadingCommentExcerpt(input.sourceText);
|
|
1469
|
+
const parts = [];
|
|
1470
|
+
parts.push(`File synopsis`);
|
|
1471
|
+
parts.push(`path: ${input.posixPath}`);
|
|
1472
|
+
parts.push(`language: ${input.language}`);
|
|
1473
|
+
if (lead) parts.push(`comment: ${lead.replace(/\s+/g, " ").trim()}`);
|
|
1474
|
+
return parts.join("\n").slice(0, 1200);
|
|
1475
|
+
}
|
|
1476
|
+
function buildTsLikeSynopsis(input) {
|
|
1477
|
+
const tsMod = getTypeScript();
|
|
1478
|
+
if (!tsMod) return renderGenericSynopsis(input);
|
|
1479
|
+
const lead = leadingCommentExcerpt(input.sourceText);
|
|
1480
|
+
const sf = tsMod.createSourceFile(input.posixPath, input.sourceText, tsMod.ScriptTarget.Latest, true);
|
|
1481
|
+
const imports = [];
|
|
1482
|
+
const exports = [];
|
|
1483
|
+
const topLevel = [];
|
|
1484
|
+
for (const stmt of sf.statements) {
|
|
1485
|
+
if (tsMod.isImportDeclaration(stmt) && tsMod.isStringLiteral(stmt.moduleSpecifier)) {
|
|
1486
|
+
imports.push(stmt.moduleSpecifier.text);
|
|
1487
|
+
}
|
|
1488
|
+
const isExportedStmt = isExported(tsMod, stmt);
|
|
1489
|
+
if (tsMod.isFunctionDeclaration(stmt) && stmt.name) topLevel.push(`fn ${stmt.name.text}${isExportedStmt ? " (export)" : ""}`);
|
|
1490
|
+
if (tsMod.isClassDeclaration(stmt) && stmt.name) topLevel.push(`class ${stmt.name.text}${isExportedStmt ? " (export)" : ""}`);
|
|
1491
|
+
if (tsMod.isInterfaceDeclaration(stmt) && stmt.name) topLevel.push(`interface ${stmt.name.text}${isExportedStmt ? " (export)" : ""}`);
|
|
1492
|
+
if (tsMod.isTypeAliasDeclaration(stmt) && stmt.name) topLevel.push(`type ${stmt.name.text}${isExportedStmt ? " (export)" : ""}`);
|
|
1493
|
+
if (tsMod.isEnumDeclaration(stmt) && stmt.name) topLevel.push(`enum ${stmt.name.text}${isExportedStmt ? " (export)" : ""}`);
|
|
1494
|
+
if (tsMod.isModuleDeclaration(stmt) && stmt.name && tsMod.isIdentifier(stmt.name)) topLevel.push(`namespace ${stmt.name.text}${isExportedStmt ? " (export)" : ""}`);
|
|
1495
|
+
if (tsMod.isVariableStatement(stmt)) {
|
|
1496
|
+
const isExp = isExported(tsMod, stmt);
|
|
1497
|
+
for (const decl of stmt.declarationList.declarations) {
|
|
1498
|
+
if (tsMod.isIdentifier(decl.name)) topLevel.push(`var ${decl.name.text}${isExp ? " (export)" : ""}`);
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
if (tsMod.isExportDeclaration(stmt)) {
|
|
1502
|
+
if (stmt.exportClause && tsMod.isNamedExports(stmt.exportClause)) {
|
|
1503
|
+
for (const el of stmt.exportClause.elements) exports.push(el.name.text);
|
|
1504
|
+
} else {
|
|
1505
|
+
exports.push("*");
|
|
1506
|
+
}
|
|
1507
|
+
}
|
|
1508
|
+
if ((tsMod.isFunctionDeclaration(stmt) || tsMod.isClassDeclaration(stmt)) && isExportedStmt && stmt.name) {
|
|
1509
|
+
exports.push(stmt.name.text);
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
const importsU = uniq(imports).slice(0, 30);
|
|
1513
|
+
const exportsU = uniq(exports).slice(0, 40);
|
|
1514
|
+
const topU = uniq(topLevel).slice(0, 60);
|
|
1515
|
+
const parts = [];
|
|
1516
|
+
parts.push(`File synopsis`);
|
|
1517
|
+
parts.push(`path: ${input.posixPath}`);
|
|
1518
|
+
parts.push(`language: ${input.language}`);
|
|
1519
|
+
if (importsU.length) parts.push(`imports: ${importsU.join(", ")}`);
|
|
1520
|
+
if (exportsU.length) parts.push(`exports: ${exportsU.join(", ")}`);
|
|
1521
|
+
if (topU.length) parts.push(`top-level: ${topU.join(" | ")}`);
|
|
1522
|
+
if (lead) parts.push(`comment: ${lead.replace(/\s+/g, " ").trim()}`);
|
|
1523
|
+
return parts.join("\n").slice(0, 1800);
|
|
1524
|
+
}
|
|
1525
|
+
var builtInStrategies = [
|
|
1526
|
+
{
|
|
1527
|
+
name: "ts-like",
|
|
1528
|
+
supports: ({ language }) => language === "typescript" || language === "javascript",
|
|
1529
|
+
build: buildTsLikeSynopsis
|
|
1530
|
+
},
|
|
1531
|
+
{
|
|
1532
|
+
name: "markdown",
|
|
1533
|
+
supports: ({ language }) => language === "markdown",
|
|
1534
|
+
build: (input) => {
|
|
1535
|
+
const lead = leadingCommentExcerpt(input.sourceText);
|
|
1536
|
+
const parts = [];
|
|
1537
|
+
parts.push(`File synopsis`);
|
|
1538
|
+
parts.push(`path: ${input.posixPath}`);
|
|
1539
|
+
parts.push(`language: ${input.language}`);
|
|
1540
|
+
if (lead) parts.push(`comment: ${lead.replace(/\s+/g, " ").trim()}`);
|
|
1541
|
+
const headings = input.sourceText.split(/\r?\n/).filter((l) => /^#{1,3}\s+/.test(l)).slice(0, 12);
|
|
1542
|
+
if (headings.length) parts.push(`headings: ${headings.map((h) => h.replace(/\s+/g, " ").trim()).join(" | ")}`);
|
|
1543
|
+
return parts.join("\n").slice(0, 1200);
|
|
1544
|
+
}
|
|
1545
|
+
},
|
|
1546
|
+
{
|
|
1547
|
+
name: "generic",
|
|
1548
|
+
supports: () => true,
|
|
1549
|
+
build: renderGenericSynopsis
|
|
1550
|
+
}
|
|
1551
|
+
];
|
|
1552
|
+
var customStrategies = [];
|
|
1553
|
+
function buildSynopsis(posixPath, language, sourceText) {
|
|
1554
|
+
const input = { posixPath, language, sourceText };
|
|
1555
|
+
const all = [...customStrategies, ...builtInStrategies];
|
|
1556
|
+
const strategy = all.find((s) => s.supports(input)) ?? builtInStrategies[builtInStrategies.length - 1];
|
|
1557
|
+
try {
|
|
1558
|
+
return strategy.build(input);
|
|
1559
|
+
} catch {
|
|
1560
|
+
return renderGenericSynopsis(input);
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
// src/retrieval/tokens.ts
|
|
1565
|
+
function uniq2(xs) {
|
|
1566
|
+
const out = [];
|
|
1567
|
+
const s = /* @__PURE__ */ new Set();
|
|
1568
|
+
for (const x of xs) {
|
|
1569
|
+
if (!x) continue;
|
|
1570
|
+
if (s.has(x)) continue;
|
|
1571
|
+
s.add(x);
|
|
1572
|
+
out.push(x);
|
|
1573
|
+
}
|
|
1574
|
+
return out;
|
|
1575
|
+
}
|
|
1576
|
+
function extractLexicalTokens(text, maxTokens = 2500) {
|
|
1577
|
+
const toks = text.match(/[A-Za-z_][A-Za-z0-9_]{1,}/g) ?? [];
|
|
1578
|
+
const u = uniq2(toks).slice(0, maxTokens);
|
|
1579
|
+
return u.join(" ");
|
|
1580
|
+
}
|
|
1581
|
+
|
|
1582
|
+
// src/indexer/repoIndexer/fileIndexer.ts
|
|
1583
|
+
var RepoFileIndexer = class {
|
|
1584
|
+
constructor(repoRoot, repoId, embedder, config, store, embeddingCache, emit, ensureVectorUpToDate, workspaceStore, symbolGraphProvider, graphStore, getHead, rawConfig) {
|
|
1585
|
+
this.repoRoot = repoRoot;
|
|
1586
|
+
this.repoId = repoId;
|
|
1587
|
+
this.embedder = embedder;
|
|
1588
|
+
this.config = config;
|
|
1589
|
+
this.store = store;
|
|
1590
|
+
this.embeddingCache = embeddingCache;
|
|
1591
|
+
this.emit = emit;
|
|
1592
|
+
this.ensureVectorUpToDate = ensureVectorUpToDate;
|
|
1593
|
+
this.workspaceStore = workspaceStore;
|
|
1594
|
+
this.symbolGraphProvider = symbolGraphProvider;
|
|
1595
|
+
this.graphStore = graphStore;
|
|
1596
|
+
this.getHead = getHead;
|
|
1597
|
+
this.rawConfig = rawConfig;
|
|
1598
|
+
}
|
|
1599
|
+
shouldIndexPath(posixRelPath, fileIgnore) {
|
|
1600
|
+
const lower = posixRelPath.toLowerCase();
|
|
1601
|
+
const ext = "." + (lower.split(".").pop() ?? "");
|
|
1602
|
+
if (!this.config.includeExtensions.includes(ext)) return false;
|
|
1603
|
+
if (this.config.redact.enabled) {
|
|
1604
|
+
for (const s of this.config.redact.skipPathSubstrings) {
|
|
1605
|
+
if (lower.includes(s.toLowerCase())) return false;
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
if (fileIgnore && fileIgnore(posixRelPath)) return false;
|
|
1609
|
+
return true;
|
|
1610
|
+
}
|
|
1611
|
+
applyRedactions(text) {
|
|
1612
|
+
if (!this.config.redact.enabled) return text;
|
|
1613
|
+
let t = text;
|
|
1614
|
+
for (const p of this.config.redact.patterns) t = t.replace(p.regex, p.replaceWith);
|
|
1615
|
+
return t;
|
|
1616
|
+
}
|
|
1617
|
+
async indexFile(posixRelPath, fileIgnore) {
|
|
1618
|
+
const startedAt = Date.now();
|
|
1619
|
+
if (!this.shouldIndexPath(posixRelPath, fileIgnore)) {
|
|
1620
|
+
this.emit({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "filtered" });
|
|
1621
|
+
return;
|
|
1622
|
+
}
|
|
1623
|
+
this.emit({ type: "repo/index/file/start", repoRoot: this.repoRoot, path: posixRelPath });
|
|
1624
|
+
const abs = path9.join(this.repoRoot, fromPosixPath(posixRelPath));
|
|
1625
|
+
let stat;
|
|
1626
|
+
try {
|
|
1627
|
+
stat = fs7.statSync(abs);
|
|
1628
|
+
} catch {
|
|
1629
|
+
this.emit({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "missing" });
|
|
1630
|
+
return;
|
|
1631
|
+
}
|
|
1632
|
+
if (!stat.isFile()) {
|
|
1633
|
+
this.emit({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "not_a_file" });
|
|
1634
|
+
return;
|
|
1635
|
+
}
|
|
1636
|
+
if (stat.size > this.config.maxFileBytes) {
|
|
1637
|
+
this.emit({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "too_large" });
|
|
1638
|
+
return;
|
|
1639
|
+
}
|
|
1640
|
+
const buf = fs7.readFileSync(abs);
|
|
1641
|
+
if (looksBinary(buf)) {
|
|
1642
|
+
this.emit({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "binary" });
|
|
1643
|
+
return;
|
|
1644
|
+
}
|
|
1645
|
+
const raw = buf.toString("utf8");
|
|
1646
|
+
const redacted = this.applyRedactions(raw);
|
|
1647
|
+
const fileHash = sha256Hex(redacted);
|
|
1648
|
+
const prev = this.store.getFileHash(posixRelPath);
|
|
1649
|
+
if (prev === fileHash) {
|
|
1650
|
+
this.emit({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "unchanged" });
|
|
1651
|
+
return;
|
|
1652
|
+
}
|
|
1653
|
+
const { language, chunks } = chunkSource(posixRelPath, redacted, this.config.chunk);
|
|
1654
|
+
let imports = [];
|
|
1655
|
+
let exports = [];
|
|
1656
|
+
if (language === "typescript" || language === "javascript") {
|
|
1657
|
+
const rel = extractTsRelations(posixRelPath, redacted);
|
|
1658
|
+
imports = rel.imports;
|
|
1659
|
+
exports = rel.exports;
|
|
1660
|
+
this.store.setEdges(posixRelPath, "import", rel.imports);
|
|
1661
|
+
this.store.setEdges(posixRelPath, "export", rel.exports);
|
|
1662
|
+
this.workspaceStore?.setEdges(this.repoId, posixRelPath, "import", rel.imports);
|
|
1663
|
+
this.workspaceStore?.setEdges(this.repoId, posixRelPath, "export", rel.exports);
|
|
1664
|
+
} else {
|
|
1665
|
+
this.store.setEdges(posixRelPath, "import", []);
|
|
1666
|
+
this.store.setEdges(posixRelPath, "export", []);
|
|
1667
|
+
this.workspaceStore?.setEdges(this.repoId, posixRelPath, "import", []);
|
|
1668
|
+
this.workspaceStore?.setEdges(this.repoId, posixRelPath, "export", []);
|
|
1669
|
+
}
|
|
1670
|
+
const synopsisText = buildSynopsis(posixRelPath, language, redacted);
|
|
1671
|
+
const synopsis = synopsisText.trim() ? [
|
|
1672
|
+
{
|
|
1673
|
+
startLine: 1,
|
|
1674
|
+
endLine: 1,
|
|
1675
|
+
text: synopsisText,
|
|
1676
|
+
contentHash: sha256Hex(synopsisText),
|
|
1677
|
+
tokens: Math.max(1, Math.ceil(synopsisText.length / 4)),
|
|
1678
|
+
kind: "synopsis"
|
|
1679
|
+
}
|
|
1680
|
+
] : [];
|
|
1681
|
+
const headerLines = Math.min(80, this.config.chunk.maxLines);
|
|
1682
|
+
const headerText = redacted.split(/\r?\n/).slice(0, headerLines).join("\n").trimEnd();
|
|
1683
|
+
const headerChunk = headerText.trim() ? [
|
|
1684
|
+
{
|
|
1685
|
+
startLine: 1,
|
|
1686
|
+
endLine: headerLines,
|
|
1687
|
+
text: headerText,
|
|
1688
|
+
contentHash: sha256Hex(headerText),
|
|
1689
|
+
tokens: Math.max(1, Math.ceil(headerText.length / 4)),
|
|
1690
|
+
kind: "chunk"
|
|
1691
|
+
}
|
|
1692
|
+
] : [];
|
|
1693
|
+
const combined = [...synopsis, ...headerChunk, ...chunks.map((c) => ({ ...c, kind: "chunk" }))];
|
|
1694
|
+
const embedTexts = [];
|
|
1695
|
+
const embedPlan = [];
|
|
1696
|
+
const embeddings = combined.map(() => null);
|
|
1697
|
+
for (let i = 0; i < combined.length; i++) {
|
|
1698
|
+
const ch = combined[i];
|
|
1699
|
+
const cached2 = this.embeddingCache.get(this.embedder.id, ch.contentHash);
|
|
1700
|
+
if (cached2) {
|
|
1701
|
+
embeddings[i] = cached2;
|
|
1702
|
+
continue;
|
|
1703
|
+
}
|
|
1704
|
+
embedTexts.push(
|
|
1705
|
+
`repo:${path9.basename(this.repoRoot)}
|
|
1706
|
+
path:${posixRelPath}
|
|
1707
|
+
language:${language}
|
|
1708
|
+
kind:${ch.kind}
|
|
1709
|
+
lines:${ch.startLine}-${ch.endLine}
|
|
1710
|
+
---
|
|
1711
|
+
${ch.text}`
|
|
1712
|
+
);
|
|
1713
|
+
embedPlan.push({ chunkIdx: i, contentHash: ch.contentHash });
|
|
1714
|
+
}
|
|
1715
|
+
const batchSize = this.config.embed.batchSize;
|
|
1716
|
+
for (let start = 0; start < embedTexts.length; start += batchSize) {
|
|
1717
|
+
const end = Math.min(embedTexts.length, start + batchSize);
|
|
1718
|
+
const batch = embedTexts.slice(start, end);
|
|
1719
|
+
this.emit({
|
|
1720
|
+
type: "repo/index/embed/batch",
|
|
1721
|
+
repoRoot: this.repoRoot,
|
|
1722
|
+
batchSize: batch.length,
|
|
1723
|
+
batchIndex: Math.floor(start / batchSize),
|
|
1724
|
+
batchesTotal: Math.ceil(embedTexts.length / batchSize)
|
|
1725
|
+
});
|
|
1726
|
+
const vecs = await this.embedder.embed(batch);
|
|
1727
|
+
for (let j = 0; j < vecs.length; j++) {
|
|
1728
|
+
const plan = embedPlan[start + j];
|
|
1729
|
+
embeddings[plan.chunkIdx] = vecs[j];
|
|
1730
|
+
this.embeddingCache.put(this.embedder.id, plan.contentHash, vecs[j]);
|
|
1731
|
+
}
|
|
1732
|
+
}
|
|
1733
|
+
const fileMtime = stat.mtimeMs;
|
|
1734
|
+
const ftsMode = this.config.storage.ftsMode;
|
|
1735
|
+
const storeText = this.config.storage.storeText;
|
|
1736
|
+
const oldChunkIds = this.store.listChunksForFile(posixRelPath).map((r) => r.id);
|
|
1737
|
+
const points = [];
|
|
1738
|
+
const rows = combined.map((ch, i) => {
|
|
1739
|
+
const id = sha256Hex(
|
|
1740
|
+
`${this.repoId}:${posixRelPath}:${ch.kind}:${ch.startLine}:${ch.endLine}:${ch.contentHash}`
|
|
1741
|
+
).slice(0, 32);
|
|
1742
|
+
const emb = embeddings[i];
|
|
1743
|
+
if (!emb) throw new Error("Embedding missing unexpectedly");
|
|
1744
|
+
points.push({ id, vector: emb });
|
|
1745
|
+
const textToStore = ch.kind === "synopsis" ? ch.text : storeText ? ch.text : "";
|
|
1746
|
+
const ftsText = ftsMode === "off" ? "" : ftsMode === "tokens" ? extractLexicalTokens(ch.text) : ch.text;
|
|
1747
|
+
return {
|
|
1748
|
+
id,
|
|
1749
|
+
language,
|
|
1750
|
+
kind: ch.kind,
|
|
1751
|
+
startLine: ch.startLine,
|
|
1752
|
+
endLine: ch.endLine,
|
|
1753
|
+
contentHash: ch.contentHash,
|
|
1754
|
+
text: textToStore,
|
|
1755
|
+
ftsText,
|
|
1756
|
+
tokens: ch.tokens,
|
|
1757
|
+
fileMtime,
|
|
1758
|
+
embedding: emb
|
|
1759
|
+
};
|
|
1760
|
+
});
|
|
1761
|
+
this.store.upsertFile(posixRelPath, fileHash, fileMtime, language, stat.size);
|
|
1762
|
+
this.store.replaceChunksForFile(posixRelPath, rows);
|
|
1763
|
+
this.workspaceStore?.upsertFile(this.repoId, posixRelPath, fileHash, fileMtime, language, stat.size);
|
|
1764
|
+
this.workspaceStore?.replaceChunksForFile(this.repoId, this.repoRoot, posixRelPath, rows);
|
|
1765
|
+
const symbolOut = await this.indexSymbolsIfEnabled(posixRelPath, language, redacted, fileHash);
|
|
1766
|
+
const head = this.getHead();
|
|
1767
|
+
if (this.graphStore && head) {
|
|
1768
|
+
await this.graphStore.replaceFileGraph({
|
|
1769
|
+
repoId: this.repoId,
|
|
1770
|
+
repoRoot: this.repoRoot,
|
|
1771
|
+
commit: head.commit,
|
|
1772
|
+
branch: head.branch,
|
|
1773
|
+
path: posixRelPath,
|
|
1774
|
+
language,
|
|
1775
|
+
imports,
|
|
1776
|
+
exports,
|
|
1777
|
+
symbols: symbolOut?.symbols ?? [],
|
|
1778
|
+
symbolEdges: (symbolOut?.edges ?? []).map((e) => ({ fromId: e.fromId, toId: e.toId, kind: e.kind }))
|
|
1779
|
+
}).catch(() => void 0);
|
|
1780
|
+
}
|
|
1781
|
+
await this.ensureVectorUpToDate(points, oldChunkIds);
|
|
1782
|
+
this.emit({
|
|
1783
|
+
type: "repo/index/file/done",
|
|
1784
|
+
repoRoot: this.repoRoot,
|
|
1785
|
+
path: posixRelPath,
|
|
1786
|
+
chunkCount: combined.length,
|
|
1787
|
+
ms: Date.now() - startedAt
|
|
1788
|
+
});
|
|
1789
|
+
}
|
|
1790
|
+
async deleteFile(posixRelPath) {
|
|
1791
|
+
const chunkIds = this.store.listChunksForFile(posixRelPath).map((r) => r.id);
|
|
1792
|
+
this.store.deleteFile(posixRelPath);
|
|
1793
|
+
this.workspaceStore?.deleteFile(this.repoId, posixRelPath);
|
|
1794
|
+
await this.graphStore?.deleteFile({ repoId: this.repoId, path: posixRelPath }).catch(() => void 0);
|
|
1795
|
+
await this.ensureVectorUpToDate([], chunkIds);
|
|
1796
|
+
}
|
|
1797
|
+
async indexSymbolsIfEnabled(posixRelPath, language, redactedText, contentHash) {
|
|
1798
|
+
if (!this.symbolGraphProvider) return null;
|
|
1799
|
+
if (!this.symbolGraphProvider.supports(language)) return null;
|
|
1800
|
+
let out;
|
|
1801
|
+
try {
|
|
1802
|
+
out = await this.symbolGraphProvider.indexDocument({
|
|
1803
|
+
repoRoot: this.repoRoot,
|
|
1804
|
+
path: posixRelPath,
|
|
1805
|
+
language,
|
|
1806
|
+
text: redactedText,
|
|
1807
|
+
contentHash
|
|
1808
|
+
});
|
|
1809
|
+
} catch {
|
|
1810
|
+
return null;
|
|
1811
|
+
}
|
|
1812
|
+
const symbols = out.symbols.map((s) => {
|
|
1813
|
+
const id = s.id || stableSymbolId({
|
|
1814
|
+
repoRoot: this.repoRoot,
|
|
1815
|
+
path: posixRelPath,
|
|
1816
|
+
language,
|
|
1817
|
+
kind: s.kind,
|
|
1818
|
+
name: s.name,
|
|
1819
|
+
range: s.range
|
|
1820
|
+
});
|
|
1821
|
+
return {
|
|
1822
|
+
id,
|
|
1823
|
+
language,
|
|
1824
|
+
name: s.name,
|
|
1825
|
+
kind: s.kind,
|
|
1826
|
+
range: s.range,
|
|
1827
|
+
containerName: s.containerName,
|
|
1828
|
+
detail: s.detail
|
|
1829
|
+
};
|
|
1830
|
+
});
|
|
1831
|
+
const knownIds = new Set(symbols.map((s) => s.id));
|
|
1832
|
+
const edges = out.edges.filter((e) => knownIds.has(e.fromId)).map((e) => ({ fromId: e.fromId, toId: e.toId, kind: e.kind, toPath: e.toPath }));
|
|
1833
|
+
if (this.workspaceStore) {
|
|
1834
|
+
const symbolRows = symbols.map((s) => ({
|
|
1835
|
+
id: s.id,
|
|
1836
|
+
language: s.language,
|
|
1837
|
+
name: s.name,
|
|
1838
|
+
kind: s.kind,
|
|
1839
|
+
startLine: s.range.startLine,
|
|
1840
|
+
startCharacter: s.range.startCharacter,
|
|
1841
|
+
endLine: s.range.endLine,
|
|
1842
|
+
endCharacter: s.range.endCharacter,
|
|
1843
|
+
containerName: s.containerName,
|
|
1844
|
+
detail: s.detail
|
|
1845
|
+
}));
|
|
1846
|
+
this.workspaceStore.replaceSymbolsForFile(
|
|
1847
|
+
this.repoId,
|
|
1848
|
+
this.repoRoot,
|
|
1849
|
+
posixRelPath,
|
|
1850
|
+
symbolRows,
|
|
1851
|
+
edges.map((e) => ({ ...e, fromPath: posixRelPath, toPath: e.toPath ?? posixRelPath }))
|
|
1852
|
+
);
|
|
1853
|
+
}
|
|
1854
|
+
return { symbols, edges };
|
|
1855
|
+
}
|
|
1856
|
+
};
|
|
1857
|
+
|
|
1858
|
+
// src/indexer/repoIndexer/retriever.ts
|
|
1859
|
+
import fs8 from "fs";
|
|
1860
|
+
import path10 from "path";
|
|
1861
|
+
|
|
1862
|
+
// src/retrieval/fts.ts
|
|
1863
|
+
function ftsQueryFromText(input) {
|
|
1864
|
+
const tokens = (input.match(/[A-Za-z0-9_]{2,}/g) ?? []).slice(0, 24);
|
|
1865
|
+
if (tokens.length === 0) return "";
|
|
1866
|
+
const parts = tokens.map((t) => {
|
|
1867
|
+
const safe = t.replace(/"/g, '""');
|
|
1868
|
+
return `${safe}*`;
|
|
1869
|
+
});
|
|
1870
|
+
return parts.join(" OR ");
|
|
1871
|
+
}
|
|
1872
|
+
function bm25ToScore01(bm25) {
|
|
1873
|
+
if (!Number.isFinite(bm25)) return 0;
|
|
1874
|
+
const s = 1 / (1 + Math.max(0, bm25));
|
|
1875
|
+
return clamp(s, 0, 1);
|
|
1876
|
+
}
|
|
1877
|
+
function vectorCosineToScore01(cosine) {
|
|
1878
|
+
if (!Number.isFinite(cosine)) return 0;
|
|
1879
|
+
return clamp((cosine + 1) / 2, 0, 1);
|
|
1880
|
+
}
|
|
1881
|
+
|
|
1882
|
+
// src/indexer/repoIndexer/retriever.ts
|
|
1883
|
+
var RepoRetriever = class {
|
|
1884
|
+
constructor(repoRoot, repoId, getStore, getVectorIndex, getAnnIndex, ensureOpen, ensureVectorReady, ftsMode) {
|
|
1885
|
+
this.repoRoot = repoRoot;
|
|
1886
|
+
this.repoId = repoId;
|
|
1887
|
+
this.getStore = getStore;
|
|
1888
|
+
this.getVectorIndex = getVectorIndex;
|
|
1889
|
+
this.getAnnIndex = getAnnIndex;
|
|
1890
|
+
this.ensureOpen = ensureOpen;
|
|
1891
|
+
this.ensureVectorReady = ensureVectorReady;
|
|
1892
|
+
this.ftsMode = ftsMode;
|
|
1893
|
+
}
|
|
1894
|
+
chunkCache = /* @__PURE__ */ new Map();
|
|
1895
|
+
getChunkRowCached(id) {
|
|
1896
|
+
const cached2 = this.chunkCache.get(id);
|
|
1897
|
+
if (cached2) return cached2;
|
|
1898
|
+
const store = this.getStore();
|
|
1899
|
+
if (!store) return null;
|
|
1900
|
+
const row = store.getChunkById(id);
|
|
1901
|
+
if (row) this.chunkCache.set(id, row);
|
|
1902
|
+
return row ?? null;
|
|
1903
|
+
}
|
|
1904
|
+
clearCache() {
|
|
1905
|
+
this.chunkCache.clear();
|
|
1906
|
+
}
|
|
1907
|
+
async vectorCandidates(queryEmbedding, k, includePaths) {
|
|
1908
|
+
await this.ensureOpen();
|
|
1909
|
+
await this.ensureVectorReady();
|
|
1910
|
+
const vec = this.getVectorIndex();
|
|
1911
|
+
if (!vec) return [];
|
|
1912
|
+
if (!includePaths || includePaths.length === 0) {
|
|
1913
|
+
try {
|
|
1914
|
+
const annHits = await this.getAnnIndex().search(queryEmbedding, k);
|
|
1915
|
+
if (annHits.length > 0) return annHits;
|
|
1916
|
+
} catch {
|
|
1917
|
+
}
|
|
1918
|
+
}
|
|
1919
|
+
if (!includePaths || includePaths.length === 0) {
|
|
1920
|
+
return await vec.search(queryEmbedding, k);
|
|
1921
|
+
}
|
|
1922
|
+
const tries = [k * 5, k * 15, k * 40].map((x) => Math.max(k, x));
|
|
1923
|
+
for (const kk of tries) {
|
|
1924
|
+
const cand2 = await vec.search(queryEmbedding, kk);
|
|
1925
|
+
const filtered2 = [];
|
|
1926
|
+
for (const c of cand2) {
|
|
1927
|
+
const row = this.getChunkRowCached(c.id);
|
|
1928
|
+
if (row && includePaths.includes(row.path)) filtered2.push(c);
|
|
1929
|
+
if (filtered2.length >= k) break;
|
|
1930
|
+
}
|
|
1931
|
+
if (filtered2.length >= k) return filtered2.slice(0, k);
|
|
1932
|
+
}
|
|
1933
|
+
const cand = await vec.search(queryEmbedding, tries[tries.length - 1]);
|
|
1934
|
+
const filtered = cand.filter((c) => {
|
|
1935
|
+
const row = this.getChunkRowCached(c.id);
|
|
1936
|
+
return row ? includePaths.includes(row.path) : false;
|
|
1937
|
+
});
|
|
1938
|
+
return filtered.slice(0, k);
|
|
1939
|
+
}
|
|
1940
|
+
async lexicalCandidates(queryText, k, includePaths) {
|
|
1941
|
+
await this.ensureOpen();
|
|
1942
|
+
const store = this.getStore();
|
|
1943
|
+
if (!store) throw new Error("RepoStore not initialised");
|
|
1944
|
+
if (this.ftsMode() === "off") return [];
|
|
1945
|
+
const ftq = ftsQueryFromText(queryText);
|
|
1946
|
+
if (!ftq) return [];
|
|
1947
|
+
const rows = store.searchFts(ftq, k, includePaths);
|
|
1948
|
+
return rows.map((r) => ({ id: r.id, score: bm25ToScore01(r.bm25) }));
|
|
1949
|
+
}
|
|
1950
|
+
readChunkTextFallback(row) {
|
|
1951
|
+
const abs = path10.join(this.repoRoot, fromPosixPath(row.path));
|
|
1952
|
+
try {
|
|
1953
|
+
const raw = fs8.readFileSync(abs, "utf8");
|
|
1954
|
+
const lines = raw.split(/\r?\n/);
|
|
1955
|
+
const start = Math.max(1, row.start_line);
|
|
1956
|
+
const end = Math.max(start, row.end_line);
|
|
1957
|
+
return lines.slice(start - 1, end).join("\n");
|
|
1958
|
+
} catch {
|
|
1959
|
+
return "";
|
|
1960
|
+
}
|
|
1961
|
+
}
|
|
1962
|
+
getChunkRecord(id) {
|
|
1963
|
+
const row = this.getChunkRowCached(id);
|
|
1964
|
+
if (!row) return null;
|
|
1965
|
+
const text = row.text && row.text.trim().length > 0 ? row.text : this.readChunkTextFallback(row);
|
|
1966
|
+
return {
|
|
1967
|
+
id: row.id,
|
|
1968
|
+
repoId: this.repoId,
|
|
1969
|
+
repoRoot: this.repoRoot,
|
|
1970
|
+
path: row.path,
|
|
1971
|
+
language: row.language,
|
|
1972
|
+
startLine: row.start_line,
|
|
1973
|
+
endLine: row.end_line,
|
|
1974
|
+
contentHash: row.content_hash,
|
|
1975
|
+
text,
|
|
1976
|
+
tokens: row.tokens,
|
|
1977
|
+
fileMtimeMs: row.file_mtime,
|
|
1978
|
+
kind: row.kind === "synopsis" ? "synopsis" : "chunk"
|
|
1979
|
+
};
|
|
1980
|
+
}
|
|
1981
|
+
getChunkMeta(id) {
|
|
1982
|
+
const row = this.getChunkRowCached(id);
|
|
1983
|
+
if (!row) return null;
|
|
1984
|
+
return {
|
|
1985
|
+
id: row.id,
|
|
1986
|
+
repoId: this.repoId,
|
|
1987
|
+
repoRoot: this.repoRoot,
|
|
1988
|
+
path: row.path,
|
|
1989
|
+
language: row.language,
|
|
1990
|
+
startLine: row.start_line,
|
|
1991
|
+
endLine: row.end_line,
|
|
1992
|
+
contentHash: row.content_hash,
|
|
1993
|
+
tokens: row.tokens,
|
|
1994
|
+
fileMtimeMs: row.file_mtime,
|
|
1995
|
+
kind: row.kind === "synopsis" ? "synopsis" : "chunk"
|
|
1996
|
+
};
|
|
1997
|
+
}
|
|
1998
|
+
getChunkText(id) {
|
|
1999
|
+
const row = this.getChunkRowCached(id);
|
|
2000
|
+
if (!row) return "";
|
|
2001
|
+
return row.text && row.text.trim().length > 0 ? row.text : this.readChunkTextFallback(row);
|
|
2002
|
+
}
|
|
2003
|
+
getChunkPreview(id) {
|
|
2004
|
+
const r = this.getChunkRecord(id);
|
|
2005
|
+
return r ? makePreview(r.text) : "";
|
|
2006
|
+
}
|
|
2007
|
+
async expandContext(chunkId, opts) {
|
|
2008
|
+
await this.ensureOpen();
|
|
2009
|
+
const store = this.getStore();
|
|
2010
|
+
if (!store) throw new Error("RepoStore not initialised");
|
|
2011
|
+
const row = this.getChunkRowCached(chunkId);
|
|
2012
|
+
if (!row) return [];
|
|
2013
|
+
const out = [];
|
|
2014
|
+
if (opts.includeFileSynopsis) {
|
|
2015
|
+
const synopsis = store.listChunksForFile(row.path, "synopsis")[0];
|
|
2016
|
+
if (synopsis) out.push({ id: synopsis.id, reason: "file synopsis" });
|
|
2017
|
+
}
|
|
2018
|
+
const adj = Math.max(0, opts.adjacentChunks);
|
|
2019
|
+
if (adj > 0) {
|
|
2020
|
+
const fileChunks = store.listChunksForFile(row.path).filter((c) => c.kind !== "synopsis");
|
|
2021
|
+
const idx = fileChunks.findIndex((c) => c.id === chunkId);
|
|
2022
|
+
if (idx >= 0) {
|
|
2023
|
+
for (let d = 1; d <= adj; d++) {
|
|
2024
|
+
for (const j of [idx - d, idx + d]) {
|
|
2025
|
+
if (j < 0 || j >= fileChunks.length) continue;
|
|
2026
|
+
out.push({ id: fileChunks[j].id, reason: `adjacent chunk (\xB1${d})` });
|
|
2027
|
+
}
|
|
2028
|
+
}
|
|
2029
|
+
}
|
|
2030
|
+
}
|
|
2031
|
+
const follow = Math.max(0, opts.followImports);
|
|
2032
|
+
if (follow > 0) {
|
|
2033
|
+
const imports = store.listEdges(row.path, "import").filter((s) => s.startsWith("."));
|
|
2034
|
+
for (const spec of imports.slice(0, follow)) {
|
|
2035
|
+
const candidates = [
|
|
2036
|
+
spec,
|
|
2037
|
+
`${spec}.ts`,
|
|
2038
|
+
`${spec}.tsx`,
|
|
2039
|
+
`${spec}.js`,
|
|
2040
|
+
`${spec}.jsx`,
|
|
2041
|
+
`${spec}/index.ts`,
|
|
2042
|
+
`${spec}/index.tsx`,
|
|
2043
|
+
`${spec}/index.js`,
|
|
2044
|
+
`${spec}/index.jsx`
|
|
2045
|
+
].map((s) => path10.posix.normalize(path10.posix.join(path10.posix.dirname(row.path), s)));
|
|
2046
|
+
for (const c of candidates) {
|
|
2047
|
+
const syn = store.listChunksForFile(c, "synopsis")[0];
|
|
2048
|
+
if (syn) {
|
|
2049
|
+
out.push({ id: syn.id, reason: `imported file synopsis (${spec})` });
|
|
2050
|
+
break;
|
|
2051
|
+
}
|
|
2052
|
+
const header = store.listChunksForFile(c).find((x) => x.kind !== "synopsis");
|
|
2053
|
+
if (header) {
|
|
2054
|
+
out.push({ id: header.id, reason: `imported file header (${spec})` });
|
|
2055
|
+
break;
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
}
|
|
2059
|
+
}
|
|
2060
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2061
|
+
const deduped = [];
|
|
2062
|
+
for (const x of out) {
|
|
2063
|
+
if (seen.has(x.id)) continue;
|
|
2064
|
+
seen.add(x.id);
|
|
2065
|
+
deduped.push(x);
|
|
2066
|
+
}
|
|
2067
|
+
return deduped;
|
|
2068
|
+
}
|
|
2069
|
+
};
|
|
2070
|
+
|
|
2071
|
+
// src/indexer/repoIndexer/watcher.ts
|
|
2072
|
+
import path11 from "path";
|
|
2073
|
+
import chokidar from "chokidar";
|
|
2074
|
+
var RepoWatcher = class {
|
|
2075
|
+
constructor(repoRoot, debounceMs, ignored, onHeadChanged, onFileChanged, onFileAdded, onFileDeleted, onWatchEvent) {
|
|
2076
|
+
this.repoRoot = repoRoot;
|
|
2077
|
+
this.debounceMs = debounceMs;
|
|
2078
|
+
this.ignored = ignored;
|
|
2079
|
+
this.onHeadChanged = onHeadChanged;
|
|
2080
|
+
this.onFileChanged = onFileChanged;
|
|
2081
|
+
this.onFileAdded = onFileAdded;
|
|
2082
|
+
this.onFileDeleted = onFileDeleted;
|
|
2083
|
+
this.onWatchEvent = onWatchEvent;
|
|
2084
|
+
}
|
|
2085
|
+
watcher = null;
|
|
2086
|
+
async start() {
|
|
2087
|
+
if (this.watcher) return;
|
|
2088
|
+
let timer = null;
|
|
2089
|
+
const schedule = (fn) => {
|
|
2090
|
+
if (timer) clearTimeout(timer);
|
|
2091
|
+
timer = setTimeout(fn, this.debounceMs);
|
|
2092
|
+
};
|
|
2093
|
+
const headPath = path11.join(this.repoRoot, ".git", "HEAD");
|
|
2094
|
+
this.watcher = chokidar.watch([this.repoRoot, headPath], {
|
|
2095
|
+
ignoreInitial: true,
|
|
2096
|
+
ignored: this.ignored
|
|
2097
|
+
});
|
|
2098
|
+
this.watcher.on("change", (p) => {
|
|
2099
|
+
const rel = path11.relative(this.repoRoot, p);
|
|
2100
|
+
const posix = rel.split(path11.sep).join("/");
|
|
2101
|
+
if (posix === ".git/HEAD") {
|
|
2102
|
+
this.onWatchEvent?.("head", posix);
|
|
2103
|
+
schedule(() => this.onHeadChanged());
|
|
2104
|
+
return;
|
|
2105
|
+
}
|
|
2106
|
+
this.onWatchEvent?.("change", posix);
|
|
2107
|
+
schedule(() => this.onFileChanged(posix));
|
|
2108
|
+
});
|
|
2109
|
+
this.watcher.on("add", (p) => {
|
|
2110
|
+
const rel = path11.relative(this.repoRoot, p);
|
|
2111
|
+
const posix = rel.split(path11.sep).join("/");
|
|
2112
|
+
this.onWatchEvent?.("add", posix);
|
|
2113
|
+
schedule(() => this.onFileAdded(posix));
|
|
2114
|
+
});
|
|
2115
|
+
this.watcher.on("unlink", (p) => {
|
|
2116
|
+
const rel = path11.relative(this.repoRoot, p);
|
|
2117
|
+
const posix = rel.split(path11.sep).join("/");
|
|
2118
|
+
this.onWatchEvent?.("unlink", posix);
|
|
2119
|
+
schedule(() => this.onFileDeleted(posix));
|
|
2120
|
+
});
|
|
2121
|
+
}
|
|
2122
|
+
async close() {
|
|
2123
|
+
await this.watcher?.close().catch(() => void 0);
|
|
2124
|
+
this.watcher = null;
|
|
2125
|
+
}
|
|
2126
|
+
};
|
|
2127
|
+
|
|
2128
|
+
// src/indexer/repoIndexer.ts
|
|
2129
|
+
var RepoIndexer = class {
|
|
2130
|
+
constructor(repoRoot, embedder, config = {}, workspaceStore, graphStore) {
|
|
2131
|
+
this.embedder = embedder;
|
|
2132
|
+
this.repoRoot = path12.resolve(repoRoot);
|
|
2133
|
+
this.repoId = repoIdFromRoot(this.repoRoot);
|
|
2134
|
+
this.rawConfig = { ...config };
|
|
2135
|
+
if (!this.rawConfig.cacheDir) this.rawConfig.cacheDir = defaultCacheDir();
|
|
2136
|
+
this.config = resolveIndexerConfig(this.rawConfig);
|
|
2137
|
+
this.progress = asProgressSink(this.rawConfig.progress);
|
|
2138
|
+
this.symbolGraphProvider = this.rawConfig.symbolGraphProvider ?? null;
|
|
2139
|
+
this.workspaceStore = workspaceStore ?? null;
|
|
2140
|
+
this.graphStore = graphStore ?? null;
|
|
2141
|
+
this.ann = createAnnIndex(this.rawConfig.ann);
|
|
2142
|
+
this.embeddingCache = new EmbeddingCache(path12.join(this.config.cacheDir, "embedding-cache.sqlite"));
|
|
2143
|
+
this.vector = new VectorManager(
|
|
2144
|
+
this.config.vector,
|
|
2145
|
+
this.vectorMetric(),
|
|
2146
|
+
(kind) => this.vectorFlushDebounceMs(kind),
|
|
2147
|
+
async (vec) => {
|
|
2148
|
+
this.emitProgress({ type: "repo/vector/flush", repoRoot: this.repoRoot, kind: vec.kind });
|
|
2149
|
+
await vec.flush();
|
|
2150
|
+
const sv = this.store.getStoreVersion();
|
|
2151
|
+
this.store.setVectorIndexVersion(vec.kind, sv);
|
|
2152
|
+
}
|
|
2153
|
+
);
|
|
2154
|
+
this.retriever = new RepoRetriever(
|
|
2155
|
+
this.repoRoot,
|
|
2156
|
+
this.repoId,
|
|
2157
|
+
() => this.store,
|
|
2158
|
+
() => this.vector.get(),
|
|
2159
|
+
() => this.ann,
|
|
2160
|
+
async () => this.openForCurrentHead(),
|
|
2161
|
+
async () => {
|
|
2162
|
+
if (!this.store) throw new Error("RepoStore not initialised");
|
|
2163
|
+
if (!this.vector.get()) {
|
|
2164
|
+
const dim = this.embedder.dimension ?? this.store.getAnyEmbeddingDimension();
|
|
2165
|
+
if (dim) await this.ensureVectorIndex(dim);
|
|
2166
|
+
}
|
|
2167
|
+
},
|
|
2168
|
+
() => this.config.storage.ftsMode
|
|
2169
|
+
);
|
|
2170
|
+
}
|
|
2171
|
+
repoRoot;
|
|
2172
|
+
repoId;
|
|
2173
|
+
rawConfig;
|
|
2174
|
+
config;
|
|
2175
|
+
progress;
|
|
2176
|
+
embeddingCache;
|
|
2177
|
+
symbolGraphProvider;
|
|
2178
|
+
store = null;
|
|
2179
|
+
workspaceStore = null;
|
|
2180
|
+
graphStore = null;
|
|
2181
|
+
vector;
|
|
2182
|
+
ann;
|
|
2183
|
+
retriever;
|
|
2184
|
+
fileIndexer = null;
|
|
2185
|
+
watcher = null;
|
|
2186
|
+
currentCommit = null;
|
|
2187
|
+
currentBranch = null;
|
|
2188
|
+
fileIgnore = null;
|
|
2189
|
+
serial = pLimit(1);
|
|
2190
|
+
emitProgress(event) {
|
|
2191
|
+
try {
|
|
2192
|
+
this.progress?.emit(event);
|
|
2193
|
+
} catch {
|
|
2194
|
+
}
|
|
2195
|
+
}
|
|
2196
|
+
getCommit() {
|
|
2197
|
+
return this.currentCommit;
|
|
2198
|
+
}
|
|
2199
|
+
getBranch() {
|
|
2200
|
+
return this.currentBranch;
|
|
2201
|
+
}
|
|
2202
|
+
getStore() {
|
|
2203
|
+
return this.store;
|
|
2204
|
+
}
|
|
2205
|
+
dbPathForCommit(commit) {
|
|
2206
|
+
return path12.join(this.config.cacheDir, "index", this.repoId, `${commit}.sqlite`);
|
|
2207
|
+
}
|
|
2208
|
+
vectorMetric() {
|
|
2209
|
+
return this.config.vector.metric ?? "cosine";
|
|
2210
|
+
}
|
|
2211
|
+
vectorFlushDebounceMs(kind) {
|
|
2212
|
+
if (kind === "hnswlib") return this.config.vector.hnswlib?.persistDebounceMs ?? 2e3;
|
|
2213
|
+
if (kind === "faiss") return this.config.vector.faiss?.persistDebounceMs ?? 2e3;
|
|
2214
|
+
return 0;
|
|
2215
|
+
}
|
|
2216
|
+
async ensureVectorIndex(dimension) {
|
|
2217
|
+
if (!this.currentCommit || !this.currentBranch) return null;
|
|
2218
|
+
if (!this.store) return null;
|
|
2219
|
+
return await this.vector.ensure({
|
|
2220
|
+
repoId: this.repoId,
|
|
2221
|
+
repoRoot: this.repoRoot,
|
|
2222
|
+
commit: this.currentCommit,
|
|
2223
|
+
branch: this.currentBranch,
|
|
2224
|
+
cacheDir: this.config.cacheDir,
|
|
2225
|
+
dimension
|
|
2226
|
+
});
|
|
2227
|
+
}
|
|
2228
|
+
async openForCurrentHead() {
|
|
2229
|
+
const commit = await getHeadCommit(this.repoRoot);
|
|
2230
|
+
const branch = await getBranchName(this.repoRoot);
|
|
2231
|
+
if (this.currentCommit === commit && this.store) return;
|
|
2232
|
+
this.currentCommit = commit;
|
|
2233
|
+
this.currentBranch = branch;
|
|
2234
|
+
this.retriever.clearCache();
|
|
2235
|
+
await this.vector.close();
|
|
2236
|
+
if (this.store) this.store.close();
|
|
2237
|
+
this.fileIgnore = loadExtraIgnore(this.repoRoot, this.config.ignoreFiles);
|
|
2238
|
+
this.store = new RepoStore(this.dbPathForCommit(commit));
|
|
2239
|
+
this.store.setMeta("repoRoot", this.repoRoot);
|
|
2240
|
+
this.store.setMeta("repoId", this.repoId);
|
|
2241
|
+
this.store.setMeta("commit", commit);
|
|
2242
|
+
this.store.setMeta("branch", branch);
|
|
2243
|
+
this.store.setMeta("embedderId", this.embedder.id);
|
|
2244
|
+
this.emitProgress({ type: "repo/open", repoRoot: this.repoRoot, repoId: this.repoId, commit, branch });
|
|
2245
|
+
this.workspaceStore?.setRepoHead(this.repoId, this.repoRoot, commit, branch);
|
|
2246
|
+
await this.graphStore?.setRepoHead({ repoId: this.repoId, repoRoot: this.repoRoot, commit, branch }).catch(() => void 0);
|
|
2247
|
+
await this.ann.init({
|
|
2248
|
+
repoId: this.repoId,
|
|
2249
|
+
repoRoot: this.repoRoot,
|
|
2250
|
+
commit,
|
|
2251
|
+
branch,
|
|
2252
|
+
cacheDir: this.config.cacheDir,
|
|
2253
|
+
dimension: this.embedder.dimension ?? (this.store.getAnyEmbeddingDimension() ?? 0),
|
|
2254
|
+
metric: this.vectorMetric(),
|
|
2255
|
+
embedderId: this.embedder.id
|
|
2256
|
+
}).catch(() => void 0);
|
|
2257
|
+
this.fileIndexer = new RepoFileIndexer(
|
|
2258
|
+
this.repoRoot,
|
|
2259
|
+
this.repoId,
|
|
2260
|
+
this.embedder,
|
|
2261
|
+
this.config,
|
|
2262
|
+
this.store,
|
|
2263
|
+
this.embeddingCache,
|
|
2264
|
+
(e) => this.emitProgress(e),
|
|
2265
|
+
async (points, oldChunkIds) => {
|
|
2266
|
+
const vec2 = this.vector.get();
|
|
2267
|
+
if (!vec2 && points.length > 0) {
|
|
2268
|
+
const dim2 = points[0].vector.length;
|
|
2269
|
+
await this.ensureVectorIndex(dim2);
|
|
2270
|
+
}
|
|
2271
|
+
const v = this.vector.get();
|
|
2272
|
+
if (!v) return;
|
|
2273
|
+
if (oldChunkIds.length > 0) {
|
|
2274
|
+
this.emitProgress({ type: "repo/index/vector/remove", repoRoot: this.repoRoot, ids: oldChunkIds.length });
|
|
2275
|
+
await this.ann.remove(oldChunkIds).catch(() => void 0);
|
|
2276
|
+
await v.remove(oldChunkIds);
|
|
2277
|
+
}
|
|
2278
|
+
if (points.length > 0) {
|
|
2279
|
+
this.emitProgress({ type: "repo/index/vector/upsert", repoRoot: this.repoRoot, points: points.length });
|
|
2280
|
+
await this.ann.upsert(points).catch(() => void 0);
|
|
2281
|
+
await v.upsert(points);
|
|
2282
|
+
}
|
|
2283
|
+
this.vector.scheduleFlush();
|
|
2284
|
+
},
|
|
2285
|
+
this.workspaceStore,
|
|
2286
|
+
this.symbolGraphProvider,
|
|
2287
|
+
this.graphStore,
|
|
2288
|
+
() => this.currentCommit && this.currentBranch ? { commit: this.currentCommit, branch: this.currentBranch } : null,
|
|
2289
|
+
this.rawConfig
|
|
2290
|
+
);
|
|
2291
|
+
const dim = this.embedder.dimension ?? this.store.getAnyEmbeddingDimension();
|
|
2292
|
+
if (!dim) return;
|
|
2293
|
+
const vec = await this.ensureVectorIndex(dim);
|
|
2294
|
+
if (!vec) return;
|
|
2295
|
+
const storeCount = this.store.countChunks();
|
|
2296
|
+
const storeVersion = this.store.getStoreVersion();
|
|
2297
|
+
const vecVersion = this.store.getVectorIndexVersion(vec.kind);
|
|
2298
|
+
const vecCount = await vec.count();
|
|
2299
|
+
if (storeCount > 0 && (vecVersion !== storeVersion || vecCount !== storeCount || vec.kind === "faiss")) {
|
|
2300
|
+
const startedAt = Date.now();
|
|
2301
|
+
this.emitProgress({ type: "repo/vector/rebuild/start", repoRoot: this.repoRoot, kind: vec.kind, points: storeCount });
|
|
2302
|
+
await vec.rebuild(this.store.loadAllChunkEmbeddings().map((e) => ({ id: e.id, vector: e.embedding })));
|
|
2303
|
+
await this.vector.flushNow();
|
|
2304
|
+
this.emitProgress({ type: "repo/vector/rebuild/done", repoRoot: this.repoRoot, kind: vec.kind, ms: Date.now() - startedAt });
|
|
2305
|
+
}
|
|
2306
|
+
}
|
|
2307
|
+
async indexAll() {
|
|
2308
|
+
const startedAt = Date.now();
|
|
2309
|
+
await this.openForCurrentHead();
|
|
2310
|
+
if (!this.store || !this.fileIndexer) throw new Error("RepoStore not initialised");
|
|
2311
|
+
const files = (await listWorkingFiles(this.repoRoot)).filter((p) => this.fileIndexer.shouldIndexPath(p, this.fileIgnore));
|
|
2312
|
+
this.emitProgress({ type: "repo/index/start", repoRoot: this.repoRoot, fileCount: files.length });
|
|
2313
|
+
const currentSet = new Set(files);
|
|
2314
|
+
for (const known of this.store.listAllFiles()) {
|
|
2315
|
+
if (!currentSet.has(known)) {
|
|
2316
|
+
await this.deleteFile(known);
|
|
2317
|
+
}
|
|
2318
|
+
}
|
|
2319
|
+
const limit = pLimit(this.config.embed.concurrency);
|
|
2320
|
+
await Promise.all(files.map((f) => limit(() => this.indexFile(f))));
|
|
2321
|
+
if (this.vector.get()) await this.vector.flushNow();
|
|
2322
|
+
this.emitProgress({
|
|
2323
|
+
type: "repo/index/done",
|
|
2324
|
+
repoRoot: this.repoRoot,
|
|
2325
|
+
fileCount: files.length,
|
|
2326
|
+
ms: Date.now() - startedAt
|
|
2327
|
+
});
|
|
2328
|
+
}
|
|
2329
|
+
async indexFile(posixRelPath) {
|
|
2330
|
+
return this.serial(async () => {
|
|
2331
|
+
await this.openForCurrentHead();
|
|
2332
|
+
if (!this.store || !this.fileIndexer) throw new Error("RepoStore not initialised");
|
|
2333
|
+
const abs = path12.join(this.repoRoot, posixRelPath.split("/").join(path12.sep));
|
|
2334
|
+
if (!fs9.existsSync(abs)) {
|
|
2335
|
+
await this.deleteFile(posixRelPath);
|
|
2336
|
+
return;
|
|
2337
|
+
}
|
|
2338
|
+
await this.fileIndexer.indexFile(posixRelPath, this.fileIgnore);
|
|
2339
|
+
});
|
|
2340
|
+
}
|
|
2341
|
+
async deleteFile(posixRelPath) {
|
|
2342
|
+
return this.serial(async () => {
|
|
2343
|
+
await this.openForCurrentHead();
|
|
2344
|
+
if (!this.store || !this.fileIndexer) return;
|
|
2345
|
+
await this.fileIndexer.deleteFile(posixRelPath);
|
|
2346
|
+
});
|
|
2347
|
+
}
|
|
2348
|
+
async vectorCandidates(queryEmbedding, k, includePaths) {
|
|
2349
|
+
return await this.retriever.vectorCandidates(queryEmbedding, k, includePaths);
|
|
2350
|
+
}
|
|
2351
|
+
async lexicalCandidates(queryText, k, includePaths) {
|
|
2352
|
+
return await this.retriever.lexicalCandidates(queryText, k, includePaths);
|
|
2353
|
+
}
|
|
2354
|
+
getChunkRecord(id) {
|
|
2355
|
+
return this.retriever.getChunkRecord(id);
|
|
2356
|
+
}
|
|
2357
|
+
getChunkMeta(id) {
|
|
2358
|
+
return this.retriever.getChunkMeta(id);
|
|
2359
|
+
}
|
|
2360
|
+
getChunkText(id) {
|
|
2361
|
+
return this.retriever.getChunkText(id);
|
|
2362
|
+
}
|
|
2363
|
+
getChunkPreview(id) {
|
|
2364
|
+
return this.retriever.getChunkPreview(id);
|
|
2365
|
+
}
|
|
2366
|
+
async getRepresentativeChunkIdForFile(posixPath, preferSynopsis = true) {
|
|
2367
|
+
await this.openForCurrentHead();
|
|
2368
|
+
if (!this.store) return null;
|
|
2369
|
+
const syn = preferSynopsis ? this.store.listChunksForFile(posixPath, "synopsis")[0] : void 0;
|
|
2370
|
+
if (syn?.id) return syn.id;
|
|
2371
|
+
const any = this.store.listChunksForFile(posixPath).find((c) => c.kind !== "synopsis");
|
|
2372
|
+
return any?.id ?? null;
|
|
2373
|
+
}
|
|
2374
|
+
async expandContext(chunkId, opts) {
|
|
2375
|
+
return await this.retriever.expandContext(chunkId, opts);
|
|
2376
|
+
}
|
|
2377
|
+
/**
|
|
2378
|
+
* Staged symbol-graph expansion: compute expensive cross-file edges only for the specified files.
|
|
2379
|
+
* Safe to call repeatedly; provider caching should avoid redundant work.
|
|
2380
|
+
*/
|
|
2381
|
+
async warmSymbolGraphEdges(posixPaths, opts) {
|
|
2382
|
+
await this.openForCurrentHead();
|
|
2383
|
+
if (!this.fileIndexer) return;
|
|
2384
|
+
if (!this.store) return;
|
|
2385
|
+
if (!this.workspaceStore) return;
|
|
2386
|
+
const provider = this.symbolGraphProvider;
|
|
2387
|
+
if (!provider?.expandDocumentEdges) return;
|
|
2388
|
+
const maxFiles = opts?.maxFiles ?? 6;
|
|
2389
|
+
const uniq3 = Array.from(new Set(posixPaths)).slice(0, maxFiles);
|
|
2390
|
+
for (const p of uniq3) {
|
|
2391
|
+
if (opts?.signal?.aborted) return;
|
|
2392
|
+
const abs = path12.join(this.repoRoot, p.split("/").join(path12.sep));
|
|
2393
|
+
let text = "";
|
|
2394
|
+
try {
|
|
2395
|
+
text = fs9.readFileSync(abs, "utf8");
|
|
2396
|
+
} catch {
|
|
2397
|
+
continue;
|
|
2398
|
+
}
|
|
2399
|
+
const contentHash = this.store.getFileHash(p) ?? void 0;
|
|
2400
|
+
const lang = languageFromPath(p);
|
|
2401
|
+
const edges = await provider.expandDocumentEdges({ repoRoot: this.repoRoot, path: p, language: lang, text, contentHash }, { signal: opts?.signal }).catch(() => []);
|
|
2402
|
+
const normalized = (Array.isArray(edges) ? edges : []).map((e) => ({
|
|
2403
|
+
fromId: e.fromId,
|
|
2404
|
+
toId: e.toId,
|
|
2405
|
+
kind: e.kind,
|
|
2406
|
+
toPath: String(e.toPath ?? "")
|
|
2407
|
+
}));
|
|
2408
|
+
this.workspaceStore.replaceSymbolEdgesFromFile(this.repoId, p, normalized);
|
|
2409
|
+
await this.graphStore?.replaceOutgoingSymbolEdgesFromFile?.({
|
|
2410
|
+
repoId: this.repoId,
|
|
2411
|
+
fromPath: p,
|
|
2412
|
+
edges: normalized.map((e) => ({ fromId: e.fromId, toId: e.toId, kind: e.kind, toPath: e.toPath }))
|
|
2413
|
+
}).catch(() => void 0);
|
|
2414
|
+
}
|
|
2415
|
+
}
|
|
2416
|
+
async watch() {
|
|
2417
|
+
await this.openForCurrentHead();
|
|
2418
|
+
this.emitProgress({ type: "repo/watch/start", repoRoot: this.repoRoot });
|
|
2419
|
+
const ignored = (p) => {
|
|
2420
|
+
const rel = path12.relative(this.repoRoot, p);
|
|
2421
|
+
if (!rel) return false;
|
|
2422
|
+
const posix = rel.split(path12.sep).join("/");
|
|
2423
|
+
if (posix.startsWith(".git/")) return true;
|
|
2424
|
+
if (posix.includes("node_modules/")) return true;
|
|
2425
|
+
if (posix.includes("/.cache/")) return true;
|
|
2426
|
+
return false;
|
|
2427
|
+
};
|
|
2428
|
+
this.watcher = new RepoWatcher(
|
|
2429
|
+
this.repoRoot,
|
|
2430
|
+
this.config.watch.debounceMs,
|
|
2431
|
+
ignored,
|
|
2432
|
+
() => void this.indexAll().catch(() => void 0),
|
|
2433
|
+
(p) => void this.indexFile(p).catch(() => void 0),
|
|
2434
|
+
(p) => void this.indexFile(p).catch(() => void 0),
|
|
2435
|
+
(p) => void this.deleteFile(p).catch(() => void 0),
|
|
2436
|
+
(event, p) => this.emitProgress({ type: "repo/watch/event", repoRoot: this.repoRoot, event, path: p })
|
|
2437
|
+
);
|
|
2438
|
+
await this.watcher.start();
|
|
2439
|
+
}
|
|
2440
|
+
async closeAsync() {
|
|
2441
|
+
if (this.vector.get() && this.store) {
|
|
2442
|
+
try {
|
|
2443
|
+
await this.vector.flushNow();
|
|
2444
|
+
} catch {
|
|
2445
|
+
}
|
|
2446
|
+
}
|
|
2447
|
+
await this.vector.close();
|
|
2448
|
+
await this.ann.close().catch(() => void 0);
|
|
2449
|
+
await this.watcher?.close().catch(() => void 0);
|
|
2450
|
+
this.watcher = null;
|
|
2451
|
+
this.store?.close();
|
|
2452
|
+
this.store = null;
|
|
2453
|
+
this.fileIndexer = null;
|
|
2454
|
+
this.embeddingCache.close();
|
|
2455
|
+
this.retriever.clearCache();
|
|
2456
|
+
}
|
|
2457
|
+
close() {
|
|
2458
|
+
void this.closeAsync();
|
|
2459
|
+
}
|
|
2460
|
+
};
|
|
2461
|
+
|
|
2462
|
+
// src/profiles.ts
|
|
2463
|
+
var DEFAULT_PROFILES = {
|
|
2464
|
+
search: {
|
|
2465
|
+
name: "search",
|
|
2466
|
+
k: 10,
|
|
2467
|
+
weights: { vector: 0.65, lexical: 0.35, recency: 0 },
|
|
2468
|
+
expand: { adjacentChunks: 0, followImports: 0, includeFileSynopsis: false },
|
|
2469
|
+
candidates: { vectorK: 25, lexicalK: 25, maxMergedCandidates: 60 }
|
|
2470
|
+
},
|
|
2471
|
+
refactor: {
|
|
2472
|
+
name: "refactor",
|
|
2473
|
+
k: 15,
|
|
2474
|
+
weights: { vector: 0.55, lexical: 0.35, recency: 0.1 },
|
|
2475
|
+
expand: { adjacentChunks: 1, followImports: 2, includeFileSynopsis: true },
|
|
2476
|
+
candidates: { vectorK: 60, lexicalK: 40, maxMergedCandidates: 140 }
|
|
2477
|
+
},
|
|
2478
|
+
review: {
|
|
2479
|
+
name: "review",
|
|
2480
|
+
k: 20,
|
|
2481
|
+
weights: { vector: 0.45, lexical: 0.35, recency: 0.2 },
|
|
2482
|
+
expand: { adjacentChunks: 1, followImports: 1, includeFileSynopsis: true },
|
|
2483
|
+
candidates: { vectorK: 80, lexicalK: 60, maxMergedCandidates: 180 }
|
|
2484
|
+
},
|
|
2485
|
+
architecture: {
|
|
2486
|
+
name: "architecture",
|
|
2487
|
+
k: 20,
|
|
2488
|
+
weights: { vector: 0.7, lexical: 0.2, recency: 0.1 },
|
|
2489
|
+
expand: { adjacentChunks: 0, followImports: 3, includeFileSynopsis: true },
|
|
2490
|
+
candidates: { vectorK: 120, lexicalK: 40, maxMergedCandidates: 220 }
|
|
2491
|
+
},
|
|
2492
|
+
rca: {
|
|
2493
|
+
name: "rca",
|
|
2494
|
+
k: 25,
|
|
2495
|
+
weights: { vector: 0.5, lexical: 0.25, recency: 0.25 },
|
|
2496
|
+
expand: { adjacentChunks: 2, followImports: 1, includeFileSynopsis: true },
|
|
2497
|
+
candidates: { vectorK: 140, lexicalK: 80, maxMergedCandidates: 260 }
|
|
2498
|
+
},
|
|
2499
|
+
custom: {
|
|
2500
|
+
name: "custom",
|
|
2501
|
+
k: 10,
|
|
2502
|
+
weights: { vector: 0.65, lexical: 0.35, recency: 0 },
|
|
2503
|
+
expand: { adjacentChunks: 0, followImports: 0, includeFileSynopsis: false },
|
|
2504
|
+
candidates: { vectorK: 25, lexicalK: 25, maxMergedCandidates: 60 }
|
|
2505
|
+
}
|
|
2506
|
+
};
|
|
2507
|
+
function deepMergeProfile(base, patch) {
|
|
2508
|
+
if (!patch) return base;
|
|
2509
|
+
const merged = {
|
|
2510
|
+
...base,
|
|
2511
|
+
...patch,
|
|
2512
|
+
weights: { ...base.weights, ...patch.weights ?? {} },
|
|
2513
|
+
expand: { ...base.expand, ...patch.expand ?? {} },
|
|
2514
|
+
candidates: { ...base.candidates, ...patch.candidates ?? {} }
|
|
2515
|
+
};
|
|
2516
|
+
return merged;
|
|
2517
|
+
}
|
|
2518
|
+
|
|
2519
|
+
// src/indexer/repoDiscovery.ts
|
|
2520
|
+
import fs10 from "fs";
|
|
2521
|
+
import path13 from "path";
|
|
2522
|
+
function toPosix(p) {
|
|
2523
|
+
return p.split(path13.sep).join("/");
|
|
2524
|
+
}
|
|
2525
|
+
function escapeRegExp(s) {
|
|
2526
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2527
|
+
}
|
|
2528
|
+
function globToRegExp(glob) {
|
|
2529
|
+
const parts = toPosix(glob).split("/");
|
|
2530
|
+
const reParts = parts.map((part) => {
|
|
2531
|
+
if (part === "**") return "(?:.*)";
|
|
2532
|
+
const seg = escapeRegExp(part).replace(/\\\*/g, "[^/]*");
|
|
2533
|
+
return seg;
|
|
2534
|
+
});
|
|
2535
|
+
return new RegExp(`^${reParts.join("\\/")}$`);
|
|
2536
|
+
}
|
|
2537
|
+
function globToSearchRegExp(glob) {
|
|
2538
|
+
let g = toPosix(glob);
|
|
2539
|
+
if (g.startsWith("./")) g = g.slice(2);
|
|
2540
|
+
if (g.startsWith("**/")) g = g.slice(3);
|
|
2541
|
+
const parts = g.split("/");
|
|
2542
|
+
const reParts = parts.map((part) => {
|
|
2543
|
+
if (part === "**") return "(?:.*)";
|
|
2544
|
+
const seg = escapeRegExp(part).replace(/\\\*/g, "[^/]*");
|
|
2545
|
+
return seg;
|
|
2546
|
+
});
|
|
2547
|
+
return new RegExp(reParts.join("\\/"));
|
|
2548
|
+
}
|
|
2549
|
+
function excludedByAny(globs, relPosix) {
|
|
2550
|
+
if (!globs || globs.length === 0) return false;
|
|
2551
|
+
const p = relPosix.endsWith("/") ? relPosix : `${relPosix}/`;
|
|
2552
|
+
for (const g of globs) {
|
|
2553
|
+
if (globToSearchRegExp(g).test(p)) return true;
|
|
2554
|
+
}
|
|
2555
|
+
return false;
|
|
2556
|
+
}
|
|
2557
|
+
function defaultExcludes() {
|
|
2558
|
+
return [
|
|
2559
|
+
"**/.git/**",
|
|
2560
|
+
"**/node_modules/**",
|
|
2561
|
+
"**/dist/**",
|
|
2562
|
+
"**/build/**",
|
|
2563
|
+
"**/.cache/**",
|
|
2564
|
+
"**/.next/**",
|
|
2565
|
+
"**/target/**",
|
|
2566
|
+
"**/.venv/**",
|
|
2567
|
+
"**/vendor/**"
|
|
2568
|
+
];
|
|
2569
|
+
}
|
|
2570
|
+
function isGitRoot(dir) {
|
|
2571
|
+
const p = path13.join(dir, ".git");
|
|
2572
|
+
if (!fs10.existsSync(p)) return false;
|
|
2573
|
+
const st = fs10.statSync(p);
|
|
2574
|
+
return st.isDirectory() || st.isFile();
|
|
2575
|
+
}
|
|
2576
|
+
function parseGitmodules(repoRoot) {
|
|
2577
|
+
const gm = path13.join(repoRoot, ".gitmodules");
|
|
2578
|
+
if (!fs10.existsSync(gm)) return [];
|
|
2579
|
+
let raw = "";
|
|
2580
|
+
try {
|
|
2581
|
+
raw = fs10.readFileSync(gm, "utf8");
|
|
2582
|
+
} catch {
|
|
2583
|
+
return [];
|
|
2584
|
+
}
|
|
2585
|
+
const out = [];
|
|
2586
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
2587
|
+
const m = line.match(/^\s*path\s*=\s*(.+)\s*$/);
|
|
2588
|
+
if (!m) continue;
|
|
2589
|
+
const rel = m[1].trim();
|
|
2590
|
+
if (!rel) continue;
|
|
2591
|
+
out.push(path13.resolve(repoRoot, rel));
|
|
2592
|
+
}
|
|
2593
|
+
return out;
|
|
2594
|
+
}
|
|
2595
|
+
function discoverGitRepos(workspaceRoot, opts = {}) {
|
|
2596
|
+
const root = path13.resolve(workspaceRoot);
|
|
2597
|
+
const include = opts.include && opts.include.length > 0 ? opts.include : ["**"];
|
|
2598
|
+
const exclude = [...defaultExcludes(), ...opts.exclude ?? []];
|
|
2599
|
+
const maxDepth = opts.maxDepth ?? 6;
|
|
2600
|
+
const stopAtRepoRoot = opts.stopAtRepoRoot ?? true;
|
|
2601
|
+
const includeSubmodules = opts.includeSubmodules ?? true;
|
|
2602
|
+
const repos = /* @__PURE__ */ new Set();
|
|
2603
|
+
const stack = [{ dir: root, depth: 0 }];
|
|
2604
|
+
while (stack.length) {
|
|
2605
|
+
const { dir, depth } = stack.pop();
|
|
2606
|
+
if (depth > maxDepth) continue;
|
|
2607
|
+
const rel = path13.relative(root, dir) || ".";
|
|
2608
|
+
const relPosix = toPosix(rel);
|
|
2609
|
+
if (excludedByAny(exclude, relPosix === "." ? "" : relPosix)) continue;
|
|
2610
|
+
if (isGitRoot(dir)) {
|
|
2611
|
+
repos.add(dir);
|
|
2612
|
+
if (includeSubmodules) {
|
|
2613
|
+
for (const sub of parseGitmodules(dir)) {
|
|
2614
|
+
if (fs10.existsSync(sub) && isGitRoot(sub)) repos.add(sub);
|
|
2615
|
+
}
|
|
2616
|
+
}
|
|
2617
|
+
if (stopAtRepoRoot) continue;
|
|
2618
|
+
}
|
|
2619
|
+
let entries;
|
|
2620
|
+
try {
|
|
2621
|
+
entries = fs10.readdirSync(dir, { withFileTypes: true });
|
|
2622
|
+
} catch {
|
|
2623
|
+
continue;
|
|
2624
|
+
}
|
|
2625
|
+
for (const e of entries) {
|
|
2626
|
+
if (!e.isDirectory()) continue;
|
|
2627
|
+
if (e.name === ".git") continue;
|
|
2628
|
+
if (e.name.startsWith(".")) continue;
|
|
2629
|
+
const child = path13.join(dir, e.name);
|
|
2630
|
+
const childRel = path13.relative(root, child) || ".";
|
|
2631
|
+
const childRelPosix = toPosix(childRel);
|
|
2632
|
+
if (excludedByAny(exclude, childRelPosix === "." ? "" : childRelPosix)) continue;
|
|
2633
|
+
stack.push({ dir: child, depth: depth + 1 });
|
|
2634
|
+
}
|
|
2635
|
+
}
|
|
2636
|
+
const arr = Array.from(repos);
|
|
2637
|
+
arr.sort((a, b) => {
|
|
2638
|
+
const da = path13.relative(root, a).split(path13.sep).length;
|
|
2639
|
+
const db = path13.relative(root, b).split(path13.sep).length;
|
|
2640
|
+
if (da !== db) return da - db;
|
|
2641
|
+
return a.localeCompare(b);
|
|
2642
|
+
});
|
|
2643
|
+
return arr;
|
|
2644
|
+
}
|
|
2645
|
+
function pickRepoOverride(base, repoRoot, workspaceRoot, overrides) {
|
|
2646
|
+
if (!overrides || overrides.length === 0) return base;
|
|
2647
|
+
const rel = toPosix(path13.relative(path13.resolve(workspaceRoot), path13.resolve(repoRoot)) || ".");
|
|
2648
|
+
const matches = overrides.filter((o) => globToRegExp(o.match).test(rel));
|
|
2649
|
+
if (matches.length === 0) return base;
|
|
2650
|
+
const out = deepClone(base);
|
|
2651
|
+
for (const m of matches) {
|
|
2652
|
+
mergeIndexerConfig(out, m.config);
|
|
2653
|
+
}
|
|
2654
|
+
return out;
|
|
2655
|
+
}
|
|
2656
|
+
function isObj(x) {
|
|
2657
|
+
return !!x && typeof x === "object" && !Array.isArray(x);
|
|
2658
|
+
}
|
|
2659
|
+
function deepClone(value) {
|
|
2660
|
+
if (value === null || value === void 0) return value;
|
|
2661
|
+
if (value instanceof RegExp) return value;
|
|
2662
|
+
if (Array.isArray(value)) return value.map((v) => deepClone(v));
|
|
2663
|
+
if (typeof value === "object") {
|
|
2664
|
+
const out = {};
|
|
2665
|
+
for (const [k, v] of Object.entries(value)) out[k] = deepClone(v);
|
|
2666
|
+
return out;
|
|
2667
|
+
}
|
|
2668
|
+
return value;
|
|
2669
|
+
}
|
|
2670
|
+
function mergeIndexerConfig(target, patch) {
|
|
2671
|
+
for (const [k, v] of Object.entries(patch ?? {})) {
|
|
2672
|
+
if (v === void 0) continue;
|
|
2673
|
+
if (isObj(v) && isObj(target[k])) {
|
|
2674
|
+
mergeIndexerConfig(target[k], v);
|
|
2675
|
+
continue;
|
|
2676
|
+
}
|
|
2677
|
+
target[k] = v;
|
|
2678
|
+
}
|
|
2679
|
+
}
|
|
2680
|
+
|
|
2681
|
+
// src/store/workspaceStore.ts
|
|
2682
|
+
import fs12 from "fs";
|
|
2683
|
+
import path15 from "path";
|
|
2684
|
+
|
|
2685
|
+
// src/store/workspace/unitOfWork.ts
|
|
2686
|
+
var UnitOfWork = class {
|
|
2687
|
+
constructor(db) {
|
|
2688
|
+
this.db = db;
|
|
2689
|
+
}
|
|
2690
|
+
run(fn) {
|
|
2691
|
+
const tx = this.db.transaction(fn);
|
|
2692
|
+
return tx();
|
|
2693
|
+
}
|
|
2694
|
+
};
|
|
2695
|
+
|
|
2696
|
+
// src/store/workspace/repositories/meta.ts
|
|
2697
|
+
var MetaRepository = class {
|
|
2698
|
+
constructor(db) {
|
|
2699
|
+
this.db = db;
|
|
2700
|
+
}
|
|
2701
|
+
set(k, v) {
|
|
2702
|
+
this.db.prepare(`INSERT OR REPLACE INTO meta(k, v) VALUES (?, ?)`).run(k, v);
|
|
2703
|
+
}
|
|
2704
|
+
get(k) {
|
|
2705
|
+
const row = this.db.prepare(`SELECT v FROM meta WHERE k = ?`).get(k);
|
|
2706
|
+
return row?.v ?? null;
|
|
2707
|
+
}
|
|
2708
|
+
getSchemaVersion() {
|
|
2709
|
+
const v = this.get("schema_version");
|
|
2710
|
+
const n = v ? Number(v) : NaN;
|
|
2711
|
+
return Number.isFinite(n) ? n : 0;
|
|
2712
|
+
}
|
|
2713
|
+
setSchemaVersion(v) {
|
|
2714
|
+
this.set("schema_version", String(v));
|
|
2715
|
+
}
|
|
2716
|
+
};
|
|
2717
|
+
|
|
2718
|
+
// src/store/workspace/repositories/repoHeads.ts
|
|
2719
|
+
var RepoHeadsRepository = class {
|
|
2720
|
+
constructor(db) {
|
|
2721
|
+
this.db = db;
|
|
2722
|
+
}
|
|
2723
|
+
getHeadCommit(repoId) {
|
|
2724
|
+
const row = this.db.prepare(`SELECT head_commit FROM repos WHERE repo_id = ?`).get(repoId);
|
|
2725
|
+
return row?.head_commit ?? null;
|
|
2726
|
+
}
|
|
2727
|
+
upsertHead(repoId, repoRoot, commit, branch) {
|
|
2728
|
+
this.db.prepare(`
|
|
2729
|
+
INSERT INTO repos(repo_id, repo_root, head_commit, head_branch, updated_at)
|
|
2730
|
+
VALUES (?, ?, ?, ?, ?)
|
|
2731
|
+
ON CONFLICT(repo_id) DO UPDATE SET
|
|
2732
|
+
repo_root = excluded.repo_root,
|
|
2733
|
+
head_commit = excluded.head_commit,
|
|
2734
|
+
head_branch = excluded.head_branch,
|
|
2735
|
+
updated_at = excluded.updated_at
|
|
2736
|
+
`).run(repoId, repoRoot, commit, branch, Date.now());
|
|
2737
|
+
}
|
|
2738
|
+
};
|
|
2739
|
+
|
|
2740
|
+
// src/store/workspace/repositories/files.ts
|
|
2741
|
+
var FilesRepository = class {
|
|
2742
|
+
constructor(db) {
|
|
2743
|
+
this.db = db;
|
|
2744
|
+
}
|
|
2745
|
+
upsert(repoId, posixPath, hash, mtime, language, size) {
|
|
2746
|
+
this.db.prepare(`
|
|
2747
|
+
INSERT INTO files(repo_id, path, hash, mtime, language, size)
|
|
2748
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
2749
|
+
ON CONFLICT(repo_id, path) DO UPDATE SET
|
|
2750
|
+
hash = excluded.hash,
|
|
2751
|
+
mtime = excluded.mtime,
|
|
2752
|
+
language = excluded.language,
|
|
2753
|
+
size = excluded.size
|
|
2754
|
+
`).run(repoId, posixPath, hash, mtime, language, size);
|
|
2755
|
+
}
|
|
2756
|
+
delete(repoId, posixPath) {
|
|
2757
|
+
this.db.prepare(`DELETE FROM files WHERE repo_id = ? AND path = ?`).run(repoId, posixPath);
|
|
2758
|
+
}
|
|
2759
|
+
deleteAllForRepo(repoId) {
|
|
2760
|
+
this.db.prepare(`DELETE FROM files WHERE repo_id = ?`).run(repoId);
|
|
2761
|
+
}
|
|
2762
|
+
};
|
|
2763
|
+
|
|
2764
|
+
// src/store/workspace/repositories/chunks.ts
|
|
2765
|
+
var ChunksRepository = class {
|
|
2766
|
+
constructor(db, fts) {
|
|
2767
|
+
this.db = db;
|
|
2768
|
+
this.fts = fts;
|
|
2769
|
+
}
|
|
2770
|
+
deleteFile(repoId, posixPath) {
|
|
2771
|
+
this.db.prepare(`DELETE FROM chunks WHERE repo_id = ? AND path = ?`).run(repoId, posixPath);
|
|
2772
|
+
this.fts.clearFile(repoId, posixPath);
|
|
2773
|
+
}
|
|
2774
|
+
deleteAllForRepo(repoId) {
|
|
2775
|
+
this.db.prepare(`DELETE FROM chunks WHERE repo_id = ?`).run(repoId);
|
|
2776
|
+
this.fts.clearRepo(repoId);
|
|
2777
|
+
}
|
|
2778
|
+
replaceForFile(repoId, repoRoot, posixPath, rows) {
|
|
2779
|
+
this.deleteFile(repoId, posixPath);
|
|
2780
|
+
const ins = this.db.prepare(`
|
|
2781
|
+
INSERT INTO chunks(id, repo_id, repo_root, path, language, kind, start_line, end_line, content_hash, tokens, file_mtime, text, embedding)
|
|
2782
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2783
|
+
`);
|
|
2784
|
+
for (const r of rows) {
|
|
2785
|
+
const buf = Buffer.from(r.embedding.buffer, r.embedding.byteOffset, r.embedding.byteLength);
|
|
2786
|
+
ins.run(r.id, repoId, repoRoot, posixPath, r.language, r.kind, r.startLine, r.endLine, r.contentHash, r.tokens, r.fileMtime, r.text, buf);
|
|
2787
|
+
this.fts.insertChunk({ id: r.id, repoId, repoRoot, path: posixPath, language: r.language, kind: r.kind, text: r.ftsText });
|
|
2788
|
+
}
|
|
2789
|
+
}
|
|
2790
|
+
getById(id) {
|
|
2791
|
+
const row = this.db.prepare(`SELECT * FROM chunks WHERE id = ?`).get(id);
|
|
2792
|
+
return row ?? null;
|
|
2793
|
+
}
|
|
2794
|
+
listForFile(repoId, posixPath, kind) {
|
|
2795
|
+
if (kind) {
|
|
2796
|
+
return this.db.prepare(`SELECT id, start_line, end_line, kind FROM chunks WHERE repo_id = ? AND path = ? AND kind = ? ORDER BY start_line ASC`).all(repoId, posixPath, kind);
|
|
2797
|
+
}
|
|
2798
|
+
return this.db.prepare(`SELECT id, start_line, end_line, kind FROM chunks WHERE repo_id = ? AND path = ? ORDER BY kind DESC, start_line ASC`).all(repoId, posixPath);
|
|
2799
|
+
}
|
|
2800
|
+
};
|
|
2801
|
+
|
|
2802
|
+
// src/store/workspace/repositories/edges.ts
|
|
2803
|
+
var EdgesRepository = class {
|
|
2804
|
+
constructor(db) {
|
|
2805
|
+
this.db = db;
|
|
2806
|
+
}
|
|
2807
|
+
set(repoId, fromPath, kind, values) {
|
|
2808
|
+
this.db.prepare(`DELETE FROM edges WHERE repo_id = ? AND from_path = ? AND kind = ?`).run(repoId, fromPath, kind);
|
|
2809
|
+
const ins = this.db.prepare(`INSERT OR REPLACE INTO edges(repo_id, from_path, kind, value) VALUES (?, ?, ?, ?)`);
|
|
2810
|
+
for (const v of values) ins.run(repoId, fromPath, kind, v);
|
|
2811
|
+
}
|
|
2812
|
+
list(repoId, fromPath, kind) {
|
|
2813
|
+
const rows = this.db.prepare(`SELECT value FROM edges WHERE repo_id = ? AND from_path = ? AND kind = ?`).all(repoId, fromPath, kind);
|
|
2814
|
+
return rows.map((r) => r.value);
|
|
2815
|
+
}
|
|
2816
|
+
deleteFile(repoId, posixPath) {
|
|
2817
|
+
this.db.prepare(`DELETE FROM edges WHERE repo_id = ? AND from_path = ?`).run(repoId, posixPath);
|
|
2818
|
+
}
|
|
2819
|
+
deleteAllForRepo(repoId) {
|
|
2820
|
+
this.db.prepare(`DELETE FROM edges WHERE repo_id = ?`).run(repoId);
|
|
2821
|
+
}
|
|
2822
|
+
};
|
|
2823
|
+
|
|
2824
|
+
// src/store/workspace/repositories/symbols.ts
|
|
2825
|
+
var SymbolsRepository = class {
|
|
2826
|
+
constructor(db) {
|
|
2827
|
+
this.db = db;
|
|
2828
|
+
}
|
|
2829
|
+
replaceForFile(repoId, repoRoot, posixPath, rows, edges) {
|
|
2830
|
+
this.db.prepare(`DELETE FROM symbols WHERE repo_id = ? AND path = ?`).run(repoId, posixPath);
|
|
2831
|
+
this.db.prepare(`DELETE FROM symbol_edges WHERE repo_id = ? AND from_path = ?`).run(repoId, posixPath);
|
|
2832
|
+
const ins = this.db.prepare(`
|
|
2833
|
+
INSERT OR REPLACE INTO symbols(
|
|
2834
|
+
id, repo_id, repo_root, path, language, name, kind,
|
|
2835
|
+
start_line, start_char, end_line, end_char, container_name, detail
|
|
2836
|
+
)
|
|
2837
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2838
|
+
`);
|
|
2839
|
+
for (const r of rows) {
|
|
2840
|
+
ins.run(
|
|
2841
|
+
r.id,
|
|
2842
|
+
repoId,
|
|
2843
|
+
repoRoot,
|
|
2844
|
+
posixPath,
|
|
2845
|
+
r.language,
|
|
2846
|
+
r.name,
|
|
2847
|
+
r.kind,
|
|
2848
|
+
r.startLine,
|
|
2849
|
+
r.startCharacter,
|
|
2850
|
+
r.endLine,
|
|
2851
|
+
r.endCharacter,
|
|
2852
|
+
r.containerName ?? "",
|
|
2853
|
+
r.detail ?? ""
|
|
2854
|
+
);
|
|
2855
|
+
}
|
|
2856
|
+
const insE = this.db.prepare(`
|
|
2857
|
+
INSERT OR REPLACE INTO symbol_edges(repo_id, from_id, to_id, kind, from_path, to_path)
|
|
2858
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
2859
|
+
`);
|
|
2860
|
+
for (const e of edges) {
|
|
2861
|
+
insE.run(repoId, e.fromId, e.toId, e.kind, e.fromPath, e.toPath);
|
|
2862
|
+
}
|
|
2863
|
+
}
|
|
2864
|
+
replaceEdgesFromFile(repoId, fromPath, edges) {
|
|
2865
|
+
this.db.prepare(`DELETE FROM symbol_edges WHERE repo_id = ? AND from_path = ?`).run(repoId, fromPath);
|
|
2866
|
+
const insE = this.db.prepare(`
|
|
2867
|
+
INSERT OR REPLACE INTO symbol_edges(repo_id, from_id, to_id, kind, from_path, to_path)
|
|
2868
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
2869
|
+
`);
|
|
2870
|
+
for (const e of edges) {
|
|
2871
|
+
insE.run(repoId, e.fromId, e.toId, e.kind, fromPath, e.toPath);
|
|
2872
|
+
}
|
|
2873
|
+
}
|
|
2874
|
+
deleteFile(repoId, posixPath) {
|
|
2875
|
+
this.db.prepare(`DELETE FROM symbols WHERE repo_id = ? AND path = ?`).run(repoId, posixPath);
|
|
2876
|
+
this.db.prepare(`DELETE FROM symbol_edges WHERE repo_id = ? AND (from_path = ? OR to_path = ?)`).run(repoId, posixPath, posixPath);
|
|
2877
|
+
}
|
|
2878
|
+
deleteAllForRepo(repoId) {
|
|
2879
|
+
this.db.prepare(`DELETE FROM symbols WHERE repo_id = ?`).run(repoId);
|
|
2880
|
+
this.db.prepare(`DELETE FROM symbol_edges WHERE repo_id = ?`).run(repoId);
|
|
2881
|
+
}
|
|
2882
|
+
};
|
|
2883
|
+
|
|
2884
|
+
// src/store/workspace/repositories/repoLinks.ts
|
|
2885
|
+
var RepoLinksRepository = class {
|
|
2886
|
+
constructor(db) {
|
|
2887
|
+
this.db = db;
|
|
2888
|
+
}
|
|
2889
|
+
replaceFromRepo(fromRepoId, links) {
|
|
2890
|
+
this.db.prepare(`DELETE FROM repo_links WHERE from_repo_id = ?`).run(fromRepoId);
|
|
2891
|
+
const ins = this.db.prepare(`INSERT OR REPLACE INTO repo_links(from_repo_id, to_repo_id, kind, detail) VALUES (?, ?, ?, ?)`);
|
|
2892
|
+
for (const l of links) {
|
|
2893
|
+
ins.run(fromRepoId, l.toRepoId, l.kind, l.detail ?? "");
|
|
2894
|
+
}
|
|
2895
|
+
}
|
|
2896
|
+
listFromRepo(fromRepoId) {
|
|
2897
|
+
const rows = this.db.prepare(`SELECT to_repo_id AS toRepoId, kind, detail FROM repo_links WHERE from_repo_id = ? ORDER BY kind ASC, to_repo_id ASC`).all(fromRepoId);
|
|
2898
|
+
return rows;
|
|
2899
|
+
}
|
|
2900
|
+
};
|
|
2901
|
+
|
|
2902
|
+
// src/store/workspace/factory.ts
|
|
2903
|
+
import fs11 from "fs";
|
|
2904
|
+
import path14 from "path";
|
|
2905
|
+
|
|
2906
|
+
// src/store/workspace/db.ts
|
|
2907
|
+
import Database3 from "better-sqlite3";
|
|
2908
|
+
var BetterSqlite3Adapter = class {
|
|
2909
|
+
db;
|
|
2910
|
+
constructor(dbPath) {
|
|
2911
|
+
this.db = new Database3(dbPath);
|
|
2912
|
+
}
|
|
2913
|
+
pragma(sql) {
|
|
2914
|
+
this.db.pragma(sql);
|
|
2915
|
+
}
|
|
2916
|
+
exec(sql) {
|
|
2917
|
+
this.db.exec(sql);
|
|
2918
|
+
}
|
|
2919
|
+
prepare(sql) {
|
|
2920
|
+
return this.db.prepare(sql);
|
|
2921
|
+
}
|
|
2922
|
+
transaction(fn) {
|
|
2923
|
+
return this.db.transaction(fn);
|
|
2924
|
+
}
|
|
2925
|
+
close() {
|
|
2926
|
+
this.db.close();
|
|
2927
|
+
}
|
|
2928
|
+
};
|
|
2929
|
+
|
|
2930
|
+
// src/store/workspace/fts.ts
|
|
2931
|
+
var NoopFtsStrategy = class {
|
|
2932
|
+
enabled = false;
|
|
2933
|
+
init(_db) {
|
|
2934
|
+
}
|
|
2935
|
+
clearRepo(_repoId) {
|
|
2936
|
+
}
|
|
2937
|
+
clearFile(_repoId, _posixPath) {
|
|
2938
|
+
}
|
|
2939
|
+
insertChunk(_args) {
|
|
2940
|
+
}
|
|
2941
|
+
search(_ftq, _limit, _repoIds) {
|
|
2942
|
+
return [];
|
|
2943
|
+
}
|
|
2944
|
+
};
|
|
2945
|
+
var Fts5Strategy = class {
|
|
2946
|
+
constructor(db) {
|
|
2947
|
+
this.db = db;
|
|
2948
|
+
}
|
|
2949
|
+
enabled = true;
|
|
2950
|
+
ins = null;
|
|
2951
|
+
init(db) {
|
|
2952
|
+
db.exec(`
|
|
2953
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
2954
|
+
id UNINDEXED,
|
|
2955
|
+
repo_id UNINDEXED,
|
|
2956
|
+
repo_root UNINDEXED,
|
|
2957
|
+
path,
|
|
2958
|
+
language,
|
|
2959
|
+
kind,
|
|
2960
|
+
text,
|
|
2961
|
+
tokenize='unicode61'
|
|
2962
|
+
);
|
|
2963
|
+
`);
|
|
2964
|
+
}
|
|
2965
|
+
clearRepo(repoId) {
|
|
2966
|
+
this.db.prepare(`DELETE FROM chunks_fts WHERE repo_id = ?`).run(repoId);
|
|
2967
|
+
}
|
|
2968
|
+
clearFile(repoId, posixPath) {
|
|
2969
|
+
this.db.prepare(`DELETE FROM chunks_fts WHERE repo_id = ? AND path = ?`).run(repoId, posixPath);
|
|
2970
|
+
}
|
|
2971
|
+
insertChunk(args) {
|
|
2972
|
+
if (!this.ins) {
|
|
2973
|
+
this.ins = this.db.prepare(`INSERT INTO chunks_fts(id, repo_id, repo_root, path, language, kind, text) VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
2974
|
+
}
|
|
2975
|
+
this.ins.run(args.id, args.repoId, args.repoRoot, args.path, args.language, args.kind, args.text);
|
|
2976
|
+
}
|
|
2977
|
+
search(ftq, limit, repoIds) {
|
|
2978
|
+
if (repoIds && repoIds.length > 0) {
|
|
2979
|
+
const placeholders = repoIds.map(() => "?").join(", ");
|
|
2980
|
+
const sql2 = `
|
|
2981
|
+
SELECT id, bm25(chunks_fts) AS bm25
|
|
2982
|
+
FROM chunks_fts
|
|
2983
|
+
WHERE chunks_fts MATCH ? AND repo_id IN (${placeholders})
|
|
2984
|
+
ORDER BY bm25 ASC
|
|
2985
|
+
LIMIT ?
|
|
2986
|
+
`;
|
|
2987
|
+
return this.db.prepare(sql2).all(ftq, ...repoIds, limit);
|
|
2988
|
+
}
|
|
2989
|
+
const sql = `
|
|
2990
|
+
SELECT id, bm25(chunks_fts) AS bm25
|
|
2991
|
+
FROM chunks_fts
|
|
2992
|
+
WHERE chunks_fts MATCH ?
|
|
2993
|
+
ORDER BY bm25 ASC
|
|
2994
|
+
LIMIT ?
|
|
2995
|
+
`;
|
|
2996
|
+
return this.db.prepare(sql).all(ftq, limit);
|
|
2997
|
+
}
|
|
2998
|
+
};
|
|
2999
|
+
|
|
3000
|
+
// src/store/workspace/migrations.ts
|
|
3001
|
+
var WORKSPACE_SCHEMA_LATEST = 2;
|
|
3002
|
+
var WORKSPACE_MIGRATIONS = [
|
|
3003
|
+
{
|
|
3004
|
+
version: 2,
|
|
3005
|
+
name: "repo_links",
|
|
3006
|
+
up: (db) => {
|
|
3007
|
+
db.exec(`
|
|
3008
|
+
CREATE TABLE IF NOT EXISTS repo_links (
|
|
3009
|
+
from_repo_id TEXT NOT NULL,
|
|
3010
|
+
to_repo_id TEXT NOT NULL,
|
|
3011
|
+
kind TEXT NOT NULL,
|
|
3012
|
+
detail TEXT NOT NULL DEFAULT '',
|
|
3013
|
+
PRIMARY KEY(from_repo_id, to_repo_id, kind)
|
|
3014
|
+
);
|
|
3015
|
+
CREATE INDEX IF NOT EXISTS idx_repo_links_from ON repo_links(from_repo_id);
|
|
3016
|
+
CREATE INDEX IF NOT EXISTS idx_repo_links_to ON repo_links(to_repo_id);
|
|
3017
|
+
`);
|
|
3018
|
+
}
|
|
3019
|
+
}
|
|
3020
|
+
];
|
|
3021
|
+
var WorkspaceMigrator = class {
|
|
3022
|
+
constructor(db, meta) {
|
|
3023
|
+
this.db = db;
|
|
3024
|
+
this.meta = meta;
|
|
3025
|
+
}
|
|
3026
|
+
get version() {
|
|
3027
|
+
return this.meta.getSchemaVersion();
|
|
3028
|
+
}
|
|
3029
|
+
migrateToLatest() {
|
|
3030
|
+
let v = this.meta.getSchemaVersion();
|
|
3031
|
+
if (v === 0) {
|
|
3032
|
+
this.meta.setSchemaVersion(1);
|
|
3033
|
+
v = 1;
|
|
3034
|
+
}
|
|
3035
|
+
const pending = WORKSPACE_MIGRATIONS.filter((m) => m.version > v).sort((a, b) => a.version - b.version);
|
|
3036
|
+
for (const m of pending) {
|
|
3037
|
+
m.up(this.db);
|
|
3038
|
+
this.meta.setSchemaVersion(m.version);
|
|
3039
|
+
v = m.version;
|
|
3040
|
+
}
|
|
3041
|
+
if (v !== WORKSPACE_SCHEMA_LATEST) {
|
|
3042
|
+
this.meta.setSchemaVersion(WORKSPACE_SCHEMA_LATEST);
|
|
3043
|
+
}
|
|
3044
|
+
}
|
|
3045
|
+
};
|
|
3046
|
+
|
|
3047
|
+
// src/store/workspace/factory.ts
|
|
3048
|
+
function createWorkspaceDb(dbPath) {
|
|
3049
|
+
fs11.mkdirSync(path14.dirname(dbPath), { recursive: true });
|
|
3050
|
+
const db = new BetterSqlite3Adapter(dbPath);
|
|
3051
|
+
db.pragma("journal_mode = WAL");
|
|
3052
|
+
return db;
|
|
3053
|
+
}
|
|
3054
|
+
function createWorkspaceBaseSchema(db) {
|
|
3055
|
+
db.exec(`
|
|
3056
|
+
CREATE TABLE IF NOT EXISTS meta (
|
|
3057
|
+
k TEXT PRIMARY KEY,
|
|
3058
|
+
v TEXT NOT NULL
|
|
3059
|
+
);
|
|
3060
|
+
|
|
3061
|
+
CREATE TABLE IF NOT EXISTS repos (
|
|
3062
|
+
repo_id TEXT PRIMARY KEY,
|
|
3063
|
+
repo_root TEXT NOT NULL,
|
|
3064
|
+
head_commit TEXT NOT NULL,
|
|
3065
|
+
head_branch TEXT NOT NULL,
|
|
3066
|
+
updated_at INTEGER NOT NULL
|
|
3067
|
+
);
|
|
3068
|
+
|
|
3069
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_repos_root ON repos(repo_root);
|
|
3070
|
+
|
|
3071
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
3072
|
+
repo_id TEXT NOT NULL,
|
|
3073
|
+
path TEXT NOT NULL,
|
|
3074
|
+
hash TEXT NOT NULL,
|
|
3075
|
+
mtime INTEGER NOT NULL,
|
|
3076
|
+
language TEXT NOT NULL,
|
|
3077
|
+
size INTEGER NOT NULL,
|
|
3078
|
+
PRIMARY KEY(repo_id, path)
|
|
3079
|
+
);
|
|
3080
|
+
|
|
3081
|
+
CREATE INDEX IF NOT EXISTS idx_files_repo ON files(repo_id);
|
|
3082
|
+
|
|
3083
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
3084
|
+
id TEXT PRIMARY KEY,
|
|
3085
|
+
repo_id TEXT NOT NULL,
|
|
3086
|
+
repo_root TEXT NOT NULL,
|
|
3087
|
+
path TEXT NOT NULL,
|
|
3088
|
+
language TEXT NOT NULL,
|
|
3089
|
+
kind TEXT NOT NULL DEFAULT 'chunk',
|
|
3090
|
+
start_line INTEGER NOT NULL,
|
|
3091
|
+
end_line INTEGER NOT NULL,
|
|
3092
|
+
content_hash TEXT NOT NULL,
|
|
3093
|
+
tokens INTEGER NOT NULL,
|
|
3094
|
+
file_mtime INTEGER NOT NULL,
|
|
3095
|
+
text TEXT NOT NULL,
|
|
3096
|
+
embedding BLOB NOT NULL
|
|
3097
|
+
);
|
|
3098
|
+
|
|
3099
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_repo_path ON chunks(repo_id, path);
|
|
3100
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_kind_repo_path ON chunks(kind, repo_id, path);
|
|
3101
|
+
|
|
3102
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
3103
|
+
repo_id TEXT NOT NULL,
|
|
3104
|
+
from_path TEXT NOT NULL,
|
|
3105
|
+
kind TEXT NOT NULL,
|
|
3106
|
+
value TEXT NOT NULL,
|
|
3107
|
+
PRIMARY KEY(repo_id, from_path, kind, value)
|
|
3108
|
+
);
|
|
3109
|
+
|
|
3110
|
+
CREATE INDEX IF NOT EXISTS idx_edges_repo_from ON edges(repo_id, from_path);
|
|
3111
|
+
|
|
3112
|
+
CREATE TABLE IF NOT EXISTS symbols (
|
|
3113
|
+
id TEXT PRIMARY KEY,
|
|
3114
|
+
repo_id TEXT NOT NULL,
|
|
3115
|
+
repo_root TEXT NOT NULL,
|
|
3116
|
+
path TEXT NOT NULL,
|
|
3117
|
+
language TEXT NOT NULL,
|
|
3118
|
+
name TEXT NOT NULL,
|
|
3119
|
+
kind TEXT NOT NULL,
|
|
3120
|
+
start_line INTEGER NOT NULL,
|
|
3121
|
+
start_char INTEGER NOT NULL,
|
|
3122
|
+
end_line INTEGER NOT NULL,
|
|
3123
|
+
end_char INTEGER NOT NULL,
|
|
3124
|
+
container_name TEXT NOT NULL DEFAULT '',
|
|
3125
|
+
detail TEXT NOT NULL DEFAULT ''
|
|
3126
|
+
);
|
|
3127
|
+
|
|
3128
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_repo_path ON symbols(repo_id, path);
|
|
3129
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
|
|
3130
|
+
|
|
3131
|
+
CREATE TABLE IF NOT EXISTS symbol_edges (
|
|
3132
|
+
repo_id TEXT NOT NULL,
|
|
3133
|
+
from_id TEXT NOT NULL,
|
|
3134
|
+
to_id TEXT NOT NULL,
|
|
3135
|
+
kind TEXT NOT NULL,
|
|
3136
|
+
from_path TEXT NOT NULL,
|
|
3137
|
+
to_path TEXT NOT NULL,
|
|
3138
|
+
PRIMARY KEY(repo_id, from_id, to_id, kind)
|
|
3139
|
+
);
|
|
3140
|
+
|
|
3141
|
+
CREATE INDEX IF NOT EXISTS idx_symbol_edges_from ON symbol_edges(repo_id, from_id);
|
|
3142
|
+
CREATE INDEX IF NOT EXISTS idx_symbol_edges_paths ON symbol_edges(repo_id, from_path);
|
|
3143
|
+
`);
|
|
3144
|
+
}
|
|
3145
|
+
function createWorkspaceFts(db, meta, opts = {}) {
|
|
3146
|
+
if (opts.fts === "off") {
|
|
3147
|
+
meta.set("fts", "0");
|
|
3148
|
+
return new NoopFtsStrategy();
|
|
3149
|
+
}
|
|
3150
|
+
try {
|
|
3151
|
+
const fts = new Fts5Strategy(db);
|
|
3152
|
+
fts.init(db);
|
|
3153
|
+
meta.set("fts", "1");
|
|
3154
|
+
return fts;
|
|
3155
|
+
} catch {
|
|
3156
|
+
meta.set("fts", "0");
|
|
3157
|
+
return new NoopFtsStrategy();
|
|
3158
|
+
}
|
|
3159
|
+
}
|
|
3160
|
+
function migrateWorkspaceDb(db, meta) {
|
|
3161
|
+
const migrator = new WorkspaceMigrator(db, meta);
|
|
3162
|
+
migrator.migrateToLatest();
|
|
3163
|
+
}
|
|
3164
|
+
|
|
3165
|
+
// src/store/workspaceStore.ts
|
|
3166
|
+
var WorkspaceStore = class {
|
|
3167
|
+
constructor(dbPath, opts = {}) {
|
|
3168
|
+
this.dbPath = dbPath;
|
|
3169
|
+
this.opts = opts;
|
|
3170
|
+
this.db = createWorkspaceDb(dbPath);
|
|
3171
|
+
this.uow = new UnitOfWork(this.db);
|
|
3172
|
+
createWorkspaceBaseSchema(this.db);
|
|
3173
|
+
this.meta = new MetaRepository(this.db);
|
|
3174
|
+
migrateWorkspaceDb(this.db, this.meta);
|
|
3175
|
+
const fts = createWorkspaceFts(this.db, this.meta, opts);
|
|
3176
|
+
this.repoHeads = new RepoHeadsRepository(this.db);
|
|
3177
|
+
this.files = new FilesRepository(this.db);
|
|
3178
|
+
this.edges = new EdgesRepository(this.db);
|
|
3179
|
+
this.repoLinks = new RepoLinksRepository(this.db);
|
|
3180
|
+
this.chunks = new ChunksRepository(this.db, fts);
|
|
3181
|
+
this.symbols = new SymbolsRepository(this.db);
|
|
3182
|
+
}
|
|
3183
|
+
db;
|
|
3184
|
+
uow;
|
|
3185
|
+
meta;
|
|
3186
|
+
repoHeads;
|
|
3187
|
+
files;
|
|
3188
|
+
edges;
|
|
3189
|
+
repoLinks;
|
|
3190
|
+
symbols;
|
|
3191
|
+
chunks;
|
|
3192
|
+
opts;
|
|
3193
|
+
setMeta(k, v) {
|
|
3194
|
+
this.meta.set(k, v);
|
|
3195
|
+
}
|
|
3196
|
+
getMeta(k) {
|
|
3197
|
+
return this.meta.get(k);
|
|
3198
|
+
}
|
|
3199
|
+
/**
|
|
3200
|
+
* Set repo head. If commit changes, clears prior rows for that repo_id to keep the workspace index "current".
|
|
3201
|
+
*/
|
|
3202
|
+
setRepoHead(repoId, repoRoot, commit, branch) {
|
|
3203
|
+
const prevCommit = this.repoHeads.getHeadCommit(repoId);
|
|
3204
|
+
this.uow.run(() => {
|
|
3205
|
+
if (prevCommit && prevCommit !== commit) {
|
|
3206
|
+
this.chunks.deleteAllForRepo(repoId);
|
|
3207
|
+
this.edges.deleteAllForRepo(repoId);
|
|
3208
|
+
this.files.deleteAllForRepo(repoId);
|
|
3209
|
+
this.symbols.deleteAllForRepo(repoId);
|
|
3210
|
+
}
|
|
3211
|
+
this.repoHeads.upsertHead(repoId, repoRoot, commit, branch);
|
|
3212
|
+
});
|
|
3213
|
+
}
|
|
3214
|
+
deleteFile(repoId, posixPath) {
|
|
3215
|
+
this.uow.run(() => {
|
|
3216
|
+
this.chunks.deleteFile(repoId, posixPath);
|
|
3217
|
+
this.edges.deleteFile(repoId, posixPath);
|
|
3218
|
+
this.files.delete(repoId, posixPath);
|
|
3219
|
+
this.symbols.deleteFile(repoId, posixPath);
|
|
3220
|
+
});
|
|
3221
|
+
}
|
|
3222
|
+
upsertFile(repoId, posixPath, hash, mtime, language, size) {
|
|
3223
|
+
this.files.upsert(repoId, posixPath, hash, mtime, language, size);
|
|
3224
|
+
}
|
|
3225
|
+
replaceChunksForFile(repoId, repoRoot, posixPath, rows) {
|
|
3226
|
+
this.uow.run(() => {
|
|
3227
|
+
this.chunks.replaceForFile(repoId, repoRoot, posixPath, rows);
|
|
3228
|
+
});
|
|
3229
|
+
}
|
|
3230
|
+
setEdges(repoId, fromPath, kind, values) {
|
|
3231
|
+
this.uow.run(() => {
|
|
3232
|
+
this.edges.set(repoId, fromPath, kind, values);
|
|
3233
|
+
});
|
|
3234
|
+
}
|
|
3235
|
+
replaceSymbolsForFile(repoId, repoRoot, posixPath, rows, edges) {
|
|
3236
|
+
this.uow.run(() => {
|
|
3237
|
+
this.symbols.replaceForFile(repoId, repoRoot, posixPath, rows, edges);
|
|
3238
|
+
});
|
|
3239
|
+
}
|
|
3240
|
+
replaceSymbolEdgesFromFile(repoId, fromPath, edges) {
|
|
3241
|
+
this.uow.run(() => {
|
|
3242
|
+
this.symbols.replaceEdgesFromFile(repoId, fromPath, edges);
|
|
3243
|
+
});
|
|
3244
|
+
}
|
|
3245
|
+
replaceRepoLinks(fromRepoId, links) {
|
|
3246
|
+
this.uow.run(() => {
|
|
3247
|
+
this.repoLinks.replaceFromRepo(fromRepoId, links);
|
|
3248
|
+
});
|
|
3249
|
+
}
|
|
3250
|
+
listRepoLinks(fromRepoId) {
|
|
3251
|
+
return this.repoLinks.listFromRepo(fromRepoId);
|
|
3252
|
+
}
|
|
3253
|
+
listChunksForFile(repoId, posixPath, kind) {
|
|
3254
|
+
return this.chunks.listForFile(repoId, posixPath, kind);
|
|
3255
|
+
}
|
|
3256
|
+
listEdges(repoId, fromPath, kind) {
|
|
3257
|
+
return this.edges.list(repoId, fromPath, kind);
|
|
3258
|
+
}
|
|
3259
|
+
getChunkById(id) {
|
|
3260
|
+
return this.chunks.getById(id);
|
|
3261
|
+
}
|
|
3262
|
+
searchFts(ftq, limit, repoIds) {
|
|
3263
|
+
try {
|
|
3264
|
+
const fts = this.getMeta("fts");
|
|
3265
|
+
if (fts !== "1") return [];
|
|
3266
|
+
return this.chunks.fts.search(ftq, limit, repoIds);
|
|
3267
|
+
} catch {
|
|
3268
|
+
return [];
|
|
3269
|
+
}
|
|
3270
|
+
}
|
|
3271
|
+
/**
|
|
3272
|
+
* If text is omitted from storage, fall back to slicing from disk.
|
|
3273
|
+
* The chunk boundaries are approximate; the stored row includes start/end line.
|
|
3274
|
+
*/
|
|
3275
|
+
getChunkTextFallback(row) {
|
|
3276
|
+
const abs = path15.join(row.repo_root, row.path.split("/").join(path15.sep));
|
|
3277
|
+
try {
|
|
3278
|
+
const raw = fs12.readFileSync(abs, "utf8");
|
|
3279
|
+
const lines = raw.split(/\r?\n/);
|
|
3280
|
+
const start = Math.max(1, row.start_line);
|
|
3281
|
+
const end = Math.max(start, row.end_line);
|
|
3282
|
+
return lines.slice(start - 1, end).join("\n");
|
|
3283
|
+
} catch {
|
|
3284
|
+
return "";
|
|
3285
|
+
}
|
|
3286
|
+
}
|
|
3287
|
+
close() {
|
|
3288
|
+
this.db.close();
|
|
3289
|
+
}
|
|
3290
|
+
};
|
|
3291
|
+
|
|
3292
|
+
// src/graph/neo4j.ts
|
|
3293
|
+
import { createRequire as createRequire2 } from "module";
|
|
3294
|
+
async function runSession(driver, database, fn) {
|
|
3295
|
+
const session = driver.session(database ? { database } : void 0);
|
|
3296
|
+
try {
|
|
3297
|
+
await fn(session);
|
|
3298
|
+
} finally {
|
|
3299
|
+
await session.close().catch(() => void 0);
|
|
3300
|
+
}
|
|
3301
|
+
}
|
|
3302
|
+
function lp(prefix, label) {
|
|
3303
|
+
const safe = prefix.replace(/[^A-Za-z0-9_]/g, "") || "Petri";
|
|
3304
|
+
return `${safe}_${label}`;
|
|
3305
|
+
}
|
|
3306
|
+
var Neo4jGraphStore = class {
|
|
3307
|
+
constructor(driver, cfg) {
|
|
3308
|
+
this.driver = driver;
|
|
3309
|
+
this.cfg = cfg;
|
|
3310
|
+
this.labelPrefix = cfg.labelPrefix ?? "Petri";
|
|
3311
|
+
}
|
|
3312
|
+
id = "neo4j";
|
|
3313
|
+
labelPrefix;
|
|
3314
|
+
schemaVersionLatest = 2;
|
|
3315
|
+
labels() {
|
|
3316
|
+
return {
|
|
3317
|
+
Repo: lp(this.labelPrefix, "Repo"),
|
|
3318
|
+
File: lp(this.labelPrefix, "File"),
|
|
3319
|
+
Symbol: lp(this.labelPrefix, "Symbol"),
|
|
3320
|
+
External: lp(this.labelPrefix, "External"),
|
|
3321
|
+
Schema: lp(this.labelPrefix, "Schema")
|
|
3322
|
+
};
|
|
3323
|
+
}
|
|
3324
|
+
async init() {
|
|
3325
|
+
const { Schema } = this.labels();
|
|
3326
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3327
|
+
await s.run(
|
|
3328
|
+
`MERGE (m:${Schema} {key:$key})
|
|
3329
|
+
ON CREATE SET m.version=0, m.created_at=timestamp(), m.updated_at=timestamp()
|
|
3330
|
+
RETURN m.version AS version`,
|
|
3331
|
+
{ key: "schema" }
|
|
3332
|
+
);
|
|
3333
|
+
});
|
|
3334
|
+
await this.runMigrations().catch(() => void 0);
|
|
3335
|
+
}
|
|
3336
|
+
async getSchemaVersion() {
|
|
3337
|
+
const { Schema } = this.labels();
|
|
3338
|
+
let version = 0;
|
|
3339
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3340
|
+
const res = await s.run(`MATCH (m:${Schema} {key:$key}) RETURN m.version AS version`, { key: "schema" });
|
|
3341
|
+
const v = res?.records?.[0]?.get?.("version");
|
|
3342
|
+
const n = typeof v?.toNumber === "function" ? v.toNumber() : Number(v);
|
|
3343
|
+
version = Number.isFinite(n) ? n : 0;
|
|
3344
|
+
});
|
|
3345
|
+
return version;
|
|
3346
|
+
}
|
|
3347
|
+
async setSchemaVersion(v) {
|
|
3348
|
+
const { Schema } = this.labels();
|
|
3349
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3350
|
+
await s.run(
|
|
3351
|
+
`MERGE (m:${Schema} {key:$key})
|
|
3352
|
+
SET m.version=$v, m.updated_at=timestamp()`,
|
|
3353
|
+
{ key: "schema", v }
|
|
3354
|
+
);
|
|
3355
|
+
});
|
|
3356
|
+
}
|
|
3357
|
+
async runMigrations() {
|
|
3358
|
+
const { Repo, File, Symbol, External } = this.labels();
|
|
3359
|
+
let v = await this.getSchemaVersion();
|
|
3360
|
+
if (v < 1) {
|
|
3361
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3362
|
+
const stmts = [
|
|
3363
|
+
`CREATE CONSTRAINT ${Repo}_repo_id IF NOT EXISTS FOR (r:${Repo}) REQUIRE r.repo_id IS UNIQUE`,
|
|
3364
|
+
`CREATE CONSTRAINT ${Symbol}_id IF NOT EXISTS FOR (s:${Symbol}) REQUIRE s.id IS UNIQUE`,
|
|
3365
|
+
`CREATE CONSTRAINT ${External}_key IF NOT EXISTS FOR (e:${External}) REQUIRE (e.repo_id, e.kind, e.value) IS NODE KEY`,
|
|
3366
|
+
`CREATE CONSTRAINT ${File}_key IF NOT EXISTS FOR (f:${File}) REQUIRE (f.repo_id, f.path) IS NODE KEY`
|
|
3367
|
+
];
|
|
3368
|
+
for (const q of stmts) {
|
|
3369
|
+
try {
|
|
3370
|
+
await s.run(q);
|
|
3371
|
+
} catch {
|
|
3372
|
+
}
|
|
3373
|
+
}
|
|
3374
|
+
});
|
|
3375
|
+
await this.setSchemaVersion(1);
|
|
3376
|
+
v = 1;
|
|
3377
|
+
}
|
|
3378
|
+
if (v < 2) {
|
|
3379
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3380
|
+
const stmts = [
|
|
3381
|
+
`CREATE INDEX ${Symbol}_repo_path IF NOT EXISTS FOR (s:${Symbol}) ON (s.repo_id, s.path)`,
|
|
3382
|
+
`CREATE INDEX ${File}_repo IF NOT EXISTS FOR (f:${File}) ON (f.repo_id)`,
|
|
3383
|
+
`CREATE INDEX ${External}_repo IF NOT EXISTS FOR (e:${External}) ON (e.repo_id)`
|
|
3384
|
+
];
|
|
3385
|
+
for (const q of stmts) {
|
|
3386
|
+
try {
|
|
3387
|
+
await s.run(q);
|
|
3388
|
+
} catch {
|
|
3389
|
+
}
|
|
3390
|
+
}
|
|
3391
|
+
});
|
|
3392
|
+
await this.setSchemaVersion(2);
|
|
3393
|
+
v = 2;
|
|
3394
|
+
}
|
|
3395
|
+
if (v !== this.schemaVersionLatest) {
|
|
3396
|
+
await this.setSchemaVersion(this.schemaVersionLatest);
|
|
3397
|
+
}
|
|
3398
|
+
}
|
|
3399
|
+
async setRepoHead(args) {
|
|
3400
|
+
const { Repo, File, Symbol, External } = this.labels();
|
|
3401
|
+
const { repoId, repoRoot, commit, branch } = args;
|
|
3402
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3403
|
+
const prev = await s.run(
|
|
3404
|
+
`MERGE (r:${Repo} {repo_id:$repoId})
|
|
3405
|
+
ON CREATE SET r.repo_root=$repoRoot, r.commit=$commit, r.branch=$branch, r.updated_at=timestamp()
|
|
3406
|
+
ON MATCH SET r.repo_root=$repoRoot, r.branch=$branch, r.updated_at=timestamp()
|
|
3407
|
+
RETURN r.commit AS commit`,
|
|
3408
|
+
{ repoId, repoRoot, commit, branch }
|
|
3409
|
+
);
|
|
3410
|
+
const prevCommit = prev?.records?.[0]?.get?.("commit");
|
|
3411
|
+
const prevCommitStr = typeof prevCommit === "string" ? prevCommit : `${prevCommit ?? ""}`;
|
|
3412
|
+
if (prevCommitStr && prevCommitStr !== commit) {
|
|
3413
|
+
await s.run(`MATCH (s:${Symbol} {repo_id:$repoId}) DETACH DELETE s`, { repoId });
|
|
3414
|
+
await s.run(
|
|
3415
|
+
`MATCH (f:${File} {repo_id:$repoId})
|
|
3416
|
+
DETACH DELETE f`,
|
|
3417
|
+
{ repoId }
|
|
3418
|
+
);
|
|
3419
|
+
await s.run(
|
|
3420
|
+
`MATCH (e:${External} {repo_id:$repoId})
|
|
3421
|
+
WHERE NOT (()-[:FILE_EDGE]->(e))
|
|
3422
|
+
DELETE e`,
|
|
3423
|
+
{ repoId }
|
|
3424
|
+
).catch(() => void 0);
|
|
3425
|
+
await s.run(`MATCH (r:${Repo} {repo_id:$repoId}) SET r.commit=$commit`, { repoId, commit });
|
|
3426
|
+
} else {
|
|
3427
|
+
await s.run(`MATCH (r:${Repo} {repo_id:$repoId}) SET r.commit=$commit`, { repoId, commit });
|
|
3428
|
+
}
|
|
3429
|
+
});
|
|
3430
|
+
}
|
|
3431
|
+
async deleteFile(args) {
|
|
3432
|
+
const { File, Symbol } = this.labels();
|
|
3433
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3434
|
+
await s.run(`MATCH (s:${Symbol} {repo_id:$repoId, path:$path}) DETACH DELETE s`, args);
|
|
3435
|
+
await s.run(`MATCH (f:${File} {repo_id:$repoId, path:$path}) DETACH DELETE f`, args);
|
|
3436
|
+
});
|
|
3437
|
+
}
|
|
3438
|
+
async replaceOutgoingSymbolEdgesFromFile(args) {
|
|
3439
|
+
const { Symbol } = this.labels();
|
|
3440
|
+
const kinds = args.edges.length > 0 ? Array.from(new Set(args.edges.map((e) => e.kind))) : ["definition", "reference", "implementation", "typeDefinition"];
|
|
3441
|
+
const keep = args.edges.map((e) => `${e.fromId}|${e.kind}|${e.toId}`);
|
|
3442
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3443
|
+
await s.run(
|
|
3444
|
+
`UNWIND $edges AS e
|
|
3445
|
+
MERGE (a:${Symbol} {id:e.fromId})
|
|
3446
|
+
MERGE (b:${Symbol} {id:e.toId})
|
|
3447
|
+
ON CREATE SET b.repo_id=$repoId, b.path=coalesce(e.toPath, '')
|
|
3448
|
+
SET b.repo_id = coalesce(b.repo_id, $repoId),
|
|
3449
|
+
b.path = CASE WHEN coalesce(b.path, '') = '' AND e.toPath IS NOT NULL THEN e.toPath ELSE b.path END
|
|
3450
|
+
MERGE (a)-[:SYMBOL_EDGE {kind:e.kind}]->(b)`,
|
|
3451
|
+
{ repoId: args.repoId, edges: args.edges }
|
|
3452
|
+
).catch(() => void 0);
|
|
3453
|
+
if (keep.length === 0) {
|
|
3454
|
+
await s.run(
|
|
3455
|
+
`MATCH (a:${Symbol} {repo_id:$repoId, path:$fromPath})-[r:SYMBOL_EDGE]->()
|
|
3456
|
+
WHERE r.kind IN $kinds
|
|
3457
|
+
DELETE r`,
|
|
3458
|
+
{ repoId: args.repoId, fromPath: args.fromPath, kinds }
|
|
3459
|
+
).catch(() => void 0);
|
|
3460
|
+
return;
|
|
3461
|
+
}
|
|
3462
|
+
await s.run(
|
|
3463
|
+
`MATCH (a:${Symbol} {repo_id:$repoId, path:$fromPath})-[r:SYMBOL_EDGE]->(b:${Symbol})
|
|
3464
|
+
WHERE r.kind IN $kinds
|
|
3465
|
+
AND NOT (a.id + '|' + r.kind + '|' + b.id) IN $keep
|
|
3466
|
+
DELETE r`,
|
|
3467
|
+
{ repoId: args.repoId, fromPath: args.fromPath, kinds, keep }
|
|
3468
|
+
).catch(() => void 0);
|
|
3469
|
+
});
|
|
3470
|
+
}
|
|
3471
|
+
async replaceFileGraph(update) {
|
|
3472
|
+
const { Repo, File, Symbol, External } = this.labels();
|
|
3473
|
+
const symbols = update.symbols.map((s) => ({
|
|
3474
|
+
id: s.id,
|
|
3475
|
+
name: s.name,
|
|
3476
|
+
kind: s.kind,
|
|
3477
|
+
language: s.language,
|
|
3478
|
+
start_line: s.range.startLine,
|
|
3479
|
+
start_char: s.range.startCharacter,
|
|
3480
|
+
end_line: s.range.endLine,
|
|
3481
|
+
end_char: s.range.endCharacter,
|
|
3482
|
+
container_name: s.containerName ?? "",
|
|
3483
|
+
detail: s.detail ?? ""
|
|
3484
|
+
}));
|
|
3485
|
+
const symbolEdges = update.symbolEdges.map((e) => ({ fromId: e.fromId, toId: e.toId, kind: e.kind }));
|
|
3486
|
+
const managedSymbolEdgeKinds = Array.from(new Set(symbolEdges.map((e) => e.kind)));
|
|
3487
|
+
const symbolEdgeKeep = symbolEdges.map((e) => `${e.fromId}|${e.kind}|${e.toId}`);
|
|
3488
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3489
|
+
await s.run(
|
|
3490
|
+
`MERGE (r:${Repo} {repo_id:$repoId})
|
|
3491
|
+
SET r.repo_root=$repoRoot, r.commit=$commit, r.branch=$branch, r.updated_at=timestamp()
|
|
3492
|
+
MERGE (f:${File} {repo_id:$repoId, path:$path})
|
|
3493
|
+
SET f.repo_root=$repoRoot, f.language=$language, f.updated_at=timestamp()
|
|
3494
|
+
MERGE (r)-[:HAS_FILE]->(f)`,
|
|
3495
|
+
{ repoId: update.repoId, repoRoot: update.repoRoot, commit: update.commit, branch: update.branch, path: update.path, language: update.language }
|
|
3496
|
+
);
|
|
3497
|
+
const fileEdges = [
|
|
3498
|
+
...update.imports.map((v) => ({ kind: "import", value: v })),
|
|
3499
|
+
...update.exports.map((v) => ({ kind: "export", value: v }))
|
|
3500
|
+
];
|
|
3501
|
+
const fileEdgeKeep = fileEdges.map((e) => `${e.kind}|${e.value}`);
|
|
3502
|
+
await s.run(
|
|
3503
|
+
`MATCH (f:${File} {repo_id:$repoId, path:$path})
|
|
3504
|
+
UNWIND $edges AS e
|
|
3505
|
+
MERGE (x:${External} {repo_id:$repoId, kind:e.kind, value:e.value})
|
|
3506
|
+
MERGE (f)-[:FILE_EDGE {kind:e.kind}]->(x)`,
|
|
3507
|
+
{ repoId: update.repoId, path: update.path, edges: fileEdges }
|
|
3508
|
+
).catch(() => void 0);
|
|
3509
|
+
await s.run(
|
|
3510
|
+
`MATCH (f:${File} {repo_id:$repoId, path:$path})-[r:FILE_EDGE]->(x:${External})
|
|
3511
|
+
WHERE NOT (r.kind + '|' + x.value) IN $keep
|
|
3512
|
+
DELETE r`,
|
|
3513
|
+
{ repoId: update.repoId, path: update.path, keep: fileEdgeKeep }
|
|
3514
|
+
).catch(() => void 0);
|
|
3515
|
+
if (symbols.length) {
|
|
3516
|
+
await s.run(
|
|
3517
|
+
`MATCH (f:${File} {repo_id:$repoId, path:$path})
|
|
3518
|
+
UNWIND $symbols AS s
|
|
3519
|
+
MERGE (n:${Symbol} {id:s.id})
|
|
3520
|
+
ON CREATE SET n.repo_id=$repoId
|
|
3521
|
+
SET n.repo_root=$repoRoot, n.path=$path,
|
|
3522
|
+
n.language=s.language, n.name=s.name, n.kind=s.kind,
|
|
3523
|
+
n.start_line=s.start_line, n.start_char=s.start_char, n.end_line=s.end_line, n.end_char=s.end_char,
|
|
3524
|
+
n.container_name=s.container_name, n.detail=s.detail
|
|
3525
|
+
MERGE (f)-[:CONTAINS]->(n)`,
|
|
3526
|
+
{ repoId: update.repoId, repoRoot: update.repoRoot, path: update.path, symbols }
|
|
3527
|
+
);
|
|
3528
|
+
const ids = symbols.map((s2) => s2.id);
|
|
3529
|
+
await s.run(
|
|
3530
|
+
`MATCH (f:${File} {repo_id:$repoId, path:$path})-[r:CONTAINS]->(n:${Symbol})
|
|
3531
|
+
WHERE NOT n.id IN $ids
|
|
3532
|
+
DELETE r`,
|
|
3533
|
+
{ repoId: update.repoId, path: update.path, ids }
|
|
3534
|
+
).catch(() => void 0);
|
|
3535
|
+
await s.run(
|
|
3536
|
+
`MATCH (n:${Symbol} {repo_id:$repoId, path:$path})
|
|
3537
|
+
WHERE NOT n.id IN $ids
|
|
3538
|
+
DETACH DELETE n`,
|
|
3539
|
+
{ repoId: update.repoId, path: update.path, ids }
|
|
3540
|
+
).catch(() => void 0);
|
|
3541
|
+
} else {
|
|
3542
|
+
await s.run(
|
|
3543
|
+
`MATCH (f:${File} {repo_id:$repoId, path:$path})-[r:CONTAINS]->() DELETE r`,
|
|
3544
|
+
{ repoId: update.repoId, path: update.path }
|
|
3545
|
+
).catch(() => void 0);
|
|
3546
|
+
await s.run(
|
|
3547
|
+
`MATCH (n:${Symbol} {repo_id:$repoId, path:$path}) DETACH DELETE n`,
|
|
3548
|
+
{ repoId: update.repoId, path: update.path }
|
|
3549
|
+
).catch(() => void 0);
|
|
3550
|
+
}
|
|
3551
|
+
if (symbolEdges.length) {
|
|
3552
|
+
await s.run(
|
|
3553
|
+
`UNWIND $edges AS e
|
|
3554
|
+
MERGE (a:${Symbol} {id:e.fromId})
|
|
3555
|
+
MERGE (b:${Symbol} {id:e.toId})
|
|
3556
|
+
MERGE (a)-[:SYMBOL_EDGE {kind:e.kind}]->(b)`,
|
|
3557
|
+
{ edges: symbolEdges }
|
|
3558
|
+
).catch(() => void 0);
|
|
3559
|
+
if (managedSymbolEdgeKinds.length > 0) {
|
|
3560
|
+
await s.run(
|
|
3561
|
+
`MATCH (a:${Symbol} {repo_id:$repoId, path:$path})-[r:SYMBOL_EDGE]->(b:${Symbol})
|
|
3562
|
+
WHERE r.kind IN $kinds
|
|
3563
|
+
AND NOT (a.id + '|' + r.kind + '|' + b.id) IN $keep
|
|
3564
|
+
DELETE r`,
|
|
3565
|
+
{ repoId: update.repoId, path: update.path, kinds: managedSymbolEdgeKinds, keep: symbolEdgeKeep }
|
|
3566
|
+
).catch(() => void 0);
|
|
3567
|
+
}
|
|
3568
|
+
}
|
|
3569
|
+
});
|
|
3570
|
+
}
|
|
3571
|
+
async replaceRepoLinks(args) {
|
|
3572
|
+
const { Repo } = this.labels();
|
|
3573
|
+
const byFrom = /* @__PURE__ */ new Map();
|
|
3574
|
+
for (const l of args.links) {
|
|
3575
|
+
const arr = byFrom.get(l.fromRepoId) ?? [];
|
|
3576
|
+
arr.push({ toRepoId: l.toRepoId, kind: l.kind, detail: l.detail });
|
|
3577
|
+
byFrom.set(l.fromRepoId, arr);
|
|
3578
|
+
}
|
|
3579
|
+
for (const [fromRepoId, links] of byFrom) {
|
|
3580
|
+
const keep = links.map((l) => `${l.toRepoId}|${l.kind}`);
|
|
3581
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3582
|
+
await s.run(
|
|
3583
|
+
`UNWIND $links AS l
|
|
3584
|
+
MERGE (a:${Repo} {repo_id:$fromRepoId})
|
|
3585
|
+
MERGE (b:${Repo} {repo_id:l.toRepoId})
|
|
3586
|
+
MERGE (a)-[r:REPO_LINK {kind:l.kind}]->(b)
|
|
3587
|
+
SET r.detail = coalesce(l.detail, '')`,
|
|
3588
|
+
{ fromRepoId, links }
|
|
3589
|
+
).catch(() => void 0);
|
|
3590
|
+
if (keep.length === 0) {
|
|
3591
|
+
await s.run(`MATCH (a:${Repo} {repo_id:$fromRepoId})-[r:REPO_LINK]->() DELETE r`, { fromRepoId }).catch(() => void 0);
|
|
3592
|
+
return;
|
|
3593
|
+
}
|
|
3594
|
+
await s.run(
|
|
3595
|
+
`MATCH (a:${Repo} {repo_id:$fromRepoId})-[r:REPO_LINK]->(b:${Repo})
|
|
3596
|
+
WHERE NOT (b.repo_id + '|' + r.kind) IN $keep
|
|
3597
|
+
DELETE r`,
|
|
3598
|
+
{ fromRepoId, keep }
|
|
3599
|
+
).catch(() => void 0);
|
|
3600
|
+
});
|
|
3601
|
+
}
|
|
3602
|
+
}
|
|
3603
|
+
async neighborFiles(args) {
|
|
3604
|
+
const { File, Symbol } = this.labels();
|
|
3605
|
+
const limit = args.limit ?? 20;
|
|
3606
|
+
const kinds = args.kinds && args.kinds.length > 0 ? args.kinds : null;
|
|
3607
|
+
const rows = [];
|
|
3608
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3609
|
+
const res = await s.run(
|
|
3610
|
+
`UNWIND $seeds AS seed
|
|
3611
|
+
MATCH (f:${File} {repo_id:seed.repoId, path:seed.path})-[:CONTAINS]->(a:${Symbol})-[e:SYMBOL_EDGE]->(b:${Symbol})<-[:CONTAINS]-(g:${File})
|
|
3612
|
+
WHERE (g.repo_id <> seed.repoId OR g.path <> seed.path)
|
|
3613
|
+
AND ($kinds IS NULL OR e.kind IN $kinds)
|
|
3614
|
+
RETURN g.repo_id AS repoId, g.path AS path, count(*) AS weight
|
|
3615
|
+
ORDER BY weight DESC
|
|
3616
|
+
LIMIT $limit`,
|
|
3617
|
+
{ seeds: args.seeds, kinds, limit }
|
|
3618
|
+
);
|
|
3619
|
+
for (const r of res?.records ?? []) {
|
|
3620
|
+
const repoId = String(r.get("repoId"));
|
|
3621
|
+
const p = String(r.get("path"));
|
|
3622
|
+
const wRaw = r.get("weight");
|
|
3623
|
+
const w = typeof wRaw?.toNumber === "function" ? wRaw.toNumber() : Number(wRaw);
|
|
3624
|
+
rows.push({ repoId, path: p, weight: Number.isFinite(w) ? w : 0 });
|
|
3625
|
+
}
|
|
3626
|
+
});
|
|
3627
|
+
return rows;
|
|
3628
|
+
}
|
|
3629
|
+
async shortestFilePath(args) {
|
|
3630
|
+
const { File } = this.labels();
|
|
3631
|
+
const maxRels = args.maxRels ?? 16;
|
|
3632
|
+
let out = null;
|
|
3633
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3634
|
+
const res = await s.run(
|
|
3635
|
+
`MATCH (a:${File} {repo_id:$aRepoId, path:$aPath})
|
|
3636
|
+
MATCH (b:${File} {repo_id:$bRepoId, path:$bPath})
|
|
3637
|
+
MATCH p = shortestPath((a)-[:CONTAINS|SYMBOL_EDGE*..${maxRels}]-(b))
|
|
3638
|
+
RETURN [n IN nodes(p) WHERE n:${File} | {repoId:n.repo_id, path:n.path}] AS files`,
|
|
3639
|
+
{ aRepoId: args.from.repoId, aPath: args.from.path, bRepoId: args.to.repoId, bPath: args.to.path }
|
|
3640
|
+
).catch(() => null);
|
|
3641
|
+
const files = res?.records?.[0]?.get?.("files");
|
|
3642
|
+
if (!Array.isArray(files)) return;
|
|
3643
|
+
out = files.map((x) => ({ repoId: String(x.repoId), path: String(x.path) }));
|
|
3644
|
+
});
|
|
3645
|
+
return out;
|
|
3646
|
+
}
|
|
3647
|
+
async extractFileSubgraph(args) {
|
|
3648
|
+
const { File, Symbol } = this.labels();
|
|
3649
|
+
const limitEdges = args.limitEdges ?? 200;
|
|
3650
|
+
const nodes = /* @__PURE__ */ new Map();
|
|
3651
|
+
const edges = [];
|
|
3652
|
+
await runSession(this.driver, this.cfg.database, async (s) => {
|
|
3653
|
+
const res = await s.run(
|
|
3654
|
+
`UNWIND $seeds AS seed
|
|
3655
|
+
MATCH (f:${File} {repo_id:seed.repoId, path:seed.path})-[:CONTAINS]->(a:${Symbol})-[e:SYMBOL_EDGE]->(b:${Symbol})<-[:CONTAINS]-(g:${File})
|
|
3656
|
+
RETURN f.repo_id AS fromRepoId, f.path AS fromPath, g.repo_id AS toRepoId, g.path AS toPath, e.kind AS kind
|
|
3657
|
+
LIMIT $limit`,
|
|
3658
|
+
{ seeds: args.seeds, limit: limitEdges }
|
|
3659
|
+
);
|
|
3660
|
+
for (const r of res?.records ?? []) {
|
|
3661
|
+
const from = { repoId: String(r.get("fromRepoId")), path: String(r.get("fromPath")) };
|
|
3662
|
+
const to = { repoId: String(r.get("toRepoId")), path: String(r.get("toPath")) };
|
|
3663
|
+
const kind = String(r.get("kind"));
|
|
3664
|
+
nodes.set(`${from.repoId}:${from.path}`, from);
|
|
3665
|
+
nodes.set(`${to.repoId}:${to.path}`, to);
|
|
3666
|
+
edges.push({ from, to, kind });
|
|
3667
|
+
}
|
|
3668
|
+
});
|
|
3669
|
+
return { nodes: Array.from(nodes.values()), edges };
|
|
3670
|
+
}
|
|
3671
|
+
async close() {
|
|
3672
|
+
await this.driver.close().catch(() => void 0);
|
|
3673
|
+
}
|
|
3674
|
+
};
|
|
3675
|
+
async function createNeo4jGraphStore(cfg) {
|
|
3676
|
+
try {
|
|
3677
|
+
const require2 = createRequire2(import.meta.url);
|
|
3678
|
+
const neo4j = require2("neo4j-driver");
|
|
3679
|
+
const driver = neo4j.driver(cfg.uri, neo4j.auth.basic(cfg.user, cfg.password));
|
|
3680
|
+
const store = new Neo4jGraphStore(driver, cfg);
|
|
3681
|
+
await store.init();
|
|
3682
|
+
return store;
|
|
3683
|
+
} catch (e) {
|
|
3684
|
+
const hint = "To enable Neo4j graph storage, install: npm i neo4j-driver";
|
|
3685
|
+
throw new Error(`${String(e?.message ?? e)}
|
|
3686
|
+
${hint}`);
|
|
3687
|
+
}
|
|
3688
|
+
}
|
|
3689
|
+
|
|
3690
|
+
// src/indexer/workspaceLinker.ts
|
|
3691
|
+
import fs13 from "fs";
|
|
3692
|
+
import path16 from "path";
|
|
3693
|
+
function readText(absPath) {
|
|
3694
|
+
try {
|
|
3695
|
+
return fs13.readFileSync(absPath, "utf8");
|
|
3696
|
+
} catch {
|
|
3697
|
+
return null;
|
|
3698
|
+
}
|
|
3699
|
+
}
|
|
3700
|
+
function readJson(absPath) {
|
|
3701
|
+
const raw = readText(absPath);
|
|
3702
|
+
if (!raw) return null;
|
|
3703
|
+
try {
|
|
3704
|
+
return JSON.parse(raw);
|
|
3705
|
+
} catch {
|
|
3706
|
+
return null;
|
|
3707
|
+
}
|
|
3708
|
+
}
|
|
3709
|
+
function normalizeWorkspacePackageName(spec) {
|
|
3710
|
+
if (!spec) return "";
|
|
3711
|
+
if (spec.startsWith("@")) {
|
|
3712
|
+
const parts = spec.split("/");
|
|
3713
|
+
return parts.length >= 2 ? `${parts[0]}/${parts[1]}` : spec;
|
|
3714
|
+
}
|
|
3715
|
+
return spec.split("/")[0];
|
|
3716
|
+
}
|
|
3717
|
+
var NestedRepoLinkStrategy = class {
|
|
3718
|
+
id = "nested";
|
|
3719
|
+
collect(ctx) {
|
|
3720
|
+
const out = [];
|
|
3721
|
+
const sorted = ctx.repos.slice().sort((a, b) => a.absRoot.length - b.absRoot.length);
|
|
3722
|
+
for (const child of sorted) {
|
|
3723
|
+
for (const parent of sorted) {
|
|
3724
|
+
if (child.repoId === parent.repoId) continue;
|
|
3725
|
+
if (child.absRoot.startsWith(parent.absRoot + path16.sep)) {
|
|
3726
|
+
out.push({
|
|
3727
|
+
fromRepoId: child.repoId,
|
|
3728
|
+
toRepoId: parent.repoId,
|
|
3729
|
+
kind: "nested",
|
|
3730
|
+
detail: "repo root is nested under another repo root"
|
|
3731
|
+
});
|
|
3732
|
+
break;
|
|
3733
|
+
}
|
|
3734
|
+
}
|
|
3735
|
+
}
|
|
3736
|
+
return out;
|
|
3737
|
+
}
|
|
3738
|
+
};
|
|
3739
|
+
var NpmDependencyLinkStrategy = class {
|
|
3740
|
+
id = "npm";
|
|
3741
|
+
collect(ctx) {
|
|
3742
|
+
const out = [];
|
|
3743
|
+
const depSections = ["dependencies", "devDependencies", "peerDependencies", "optionalDependencies"];
|
|
3744
|
+
for (const r of ctx.repos) {
|
|
3745
|
+
const pkg = readJson(path16.join(r.absRoot, "package.json"));
|
|
3746
|
+
if (!pkg) continue;
|
|
3747
|
+
for (const sec of depSections) {
|
|
3748
|
+
const deps = pkg?.[sec];
|
|
3749
|
+
if (!deps || typeof deps !== "object") continue;
|
|
3750
|
+
for (const k of Object.keys(deps)) {
|
|
3751
|
+
const name = normalizeWorkspacePackageName(k);
|
|
3752
|
+
const toRepoId = ctx.npmNameToRepoId.get(name);
|
|
3753
|
+
if (!toRepoId) continue;
|
|
3754
|
+
if (toRepoId === r.repoId) continue;
|
|
3755
|
+
out.push({ fromRepoId: r.repoId, toRepoId, kind: "npm", detail: `${sec}:${name}` });
|
|
3756
|
+
}
|
|
3757
|
+
}
|
|
3758
|
+
}
|
|
3759
|
+
return out;
|
|
3760
|
+
}
|
|
3761
|
+
};
|
|
3762
|
+
function parseGoModule(absRepoRoot) {
|
|
3763
|
+
const raw = readText(path16.join(absRepoRoot, "go.mod"));
|
|
3764
|
+
if (!raw) return null;
|
|
3765
|
+
const m = raw.match(/^\s*module\s+(.+)\s*$/m);
|
|
3766
|
+
return m ? String(m[1]).trim() : null;
|
|
3767
|
+
}
|
|
3768
|
+
function parseGoRequires(absRepoRoot) {
|
|
3769
|
+
const raw = readText(path16.join(absRepoRoot, "go.mod"));
|
|
3770
|
+
if (!raw) return [];
|
|
3771
|
+
const out = [];
|
|
3772
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
3773
|
+
const t = line.trim();
|
|
3774
|
+
if (!t || t.startsWith("//")) continue;
|
|
3775
|
+
const m = t.match(/^require\s+([^\s]+)\s+/);
|
|
3776
|
+
if (m) out.push(m[1]);
|
|
3777
|
+
}
|
|
3778
|
+
return out;
|
|
3779
|
+
}
|
|
3780
|
+
var GoModuleLinkStrategy = class {
|
|
3781
|
+
id = "go";
|
|
3782
|
+
collect(ctx) {
|
|
3783
|
+
const out = [];
|
|
3784
|
+
for (const r of ctx.repos) {
|
|
3785
|
+
const requires = parseGoRequires(r.absRoot);
|
|
3786
|
+
if (requires.length === 0) continue;
|
|
3787
|
+
for (const mod of requires) {
|
|
3788
|
+
const toRepoId = ctx.goModuleToRepoId.get(mod);
|
|
3789
|
+
if (!toRepoId) continue;
|
|
3790
|
+
if (toRepoId === r.repoId) continue;
|
|
3791
|
+
out.push({ fromRepoId: r.repoId, toRepoId, kind: "go", detail: `require:${mod}` });
|
|
3792
|
+
}
|
|
3793
|
+
}
|
|
3794
|
+
return out;
|
|
3795
|
+
}
|
|
3796
|
+
};
|
|
3797
|
+
function isSkippableDir(name) {
|
|
3798
|
+
return name === ".git" || name === "node_modules" || name === "dist" || name === "out" || name === "build" || name === "coverage" || name === ".cache" || name === ".vscode";
|
|
3799
|
+
}
|
|
3800
|
+
function walkFiles(root, opts, onFile) {
|
|
3801
|
+
const maxDepth = Math.max(0, opts.maxDepth);
|
|
3802
|
+
const maxFiles = Math.max(1, opts.maxFiles);
|
|
3803
|
+
let seen = 0;
|
|
3804
|
+
const visit = (dir, depth) => {
|
|
3805
|
+
if (seen >= maxFiles) return;
|
|
3806
|
+
if (depth > maxDepth) return;
|
|
3807
|
+
let ents = [];
|
|
3808
|
+
try {
|
|
3809
|
+
ents = fs13.readdirSync(dir, { withFileTypes: true });
|
|
3810
|
+
} catch {
|
|
3811
|
+
return;
|
|
3812
|
+
}
|
|
3813
|
+
for (const e of ents) {
|
|
3814
|
+
if (seen >= maxFiles) return;
|
|
3815
|
+
const abs = path16.join(dir, e.name);
|
|
3816
|
+
if (opts.shouldVisit && !opts.shouldVisit(abs, e)) continue;
|
|
3817
|
+
if (e.isDirectory()) {
|
|
3818
|
+
if (isSkippableDir(e.name)) continue;
|
|
3819
|
+
visit(abs, depth + 1);
|
|
3820
|
+
} else if (e.isFile()) {
|
|
3821
|
+
seen++;
|
|
3822
|
+
onFile(abs);
|
|
3823
|
+
}
|
|
3824
|
+
}
|
|
3825
|
+
};
|
|
3826
|
+
visit(root, 0);
|
|
3827
|
+
}
|
|
3828
|
+
function collectVsCodeLanguagesForRepo(absRepoRoot) {
|
|
3829
|
+
const out = [];
|
|
3830
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3831
|
+
walkFiles(
|
|
3832
|
+
absRepoRoot,
|
|
3833
|
+
{
|
|
3834
|
+
maxDepth: 6,
|
|
3835
|
+
maxFiles: 2e4,
|
|
3836
|
+
shouldVisit: (_abs, dirent) => !(dirent.isDirectory() && isSkippableDir(dirent.name))
|
|
3837
|
+
},
|
|
3838
|
+
(absPath) => {
|
|
3839
|
+
if (path16.basename(absPath) !== "package.json") return;
|
|
3840
|
+
const pkg = readJson(absPath);
|
|
3841
|
+
const langs = pkg?.contributes?.languages;
|
|
3842
|
+
if (!Array.isArray(langs)) return;
|
|
3843
|
+
for (const l of langs) {
|
|
3844
|
+
const id = typeof l?.id === "string" ? l.id : null;
|
|
3845
|
+
if (!id) continue;
|
|
3846
|
+
const extsRaw = Array.isArray(l?.extensions) ? l.extensions : [];
|
|
3847
|
+
const extensions = extsRaw.filter((x) => typeof x === "string" && x.startsWith("."));
|
|
3848
|
+
const key = `${id}::${extensions.sort().join(",")}`;
|
|
3849
|
+
if (seen.has(key)) continue;
|
|
3850
|
+
seen.add(key);
|
|
3851
|
+
out.push({ id, extensions });
|
|
3852
|
+
}
|
|
3853
|
+
}
|
|
3854
|
+
);
|
|
3855
|
+
return out;
|
|
3856
|
+
}
|
|
3857
|
+
function repoUsedExtensions(absRepoRoot, exts) {
|
|
3858
|
+
const used = /* @__PURE__ */ new Set();
|
|
3859
|
+
if (exts.size === 0) return used;
|
|
3860
|
+
walkFiles(
|
|
3861
|
+
absRepoRoot,
|
|
3862
|
+
{
|
|
3863
|
+
maxDepth: 7,
|
|
3864
|
+
maxFiles: 5e4,
|
|
3865
|
+
shouldVisit: (_abs, dirent) => !(dirent.isDirectory() && isSkippableDir(dirent.name))
|
|
3866
|
+
},
|
|
3867
|
+
(absPath) => {
|
|
3868
|
+
const ext = path16.extname(absPath).toLowerCase();
|
|
3869
|
+
if (!ext) return;
|
|
3870
|
+
if (exts.has(ext)) used.add(ext);
|
|
3871
|
+
}
|
|
3872
|
+
);
|
|
3873
|
+
return used;
|
|
3874
|
+
}
|
|
3875
|
+
var VsCodeContributesLanguageLinkStrategy = class {
|
|
3876
|
+
constructor(opts = {}) {
|
|
3877
|
+
this.opts = opts;
|
|
3878
|
+
}
|
|
3879
|
+
id = "vscode-language";
|
|
3880
|
+
collect(ctx) {
|
|
3881
|
+
const target = this.opts.targetLanguageIds ? new Set(this.opts.targetLanguageIds) : null;
|
|
3882
|
+
const providers = /* @__PURE__ */ new Map();
|
|
3883
|
+
for (const r of ctx.repos) {
|
|
3884
|
+
const langs = collectVsCodeLanguagesForRepo(r.absRoot);
|
|
3885
|
+
for (const l of langs) {
|
|
3886
|
+
if (target && !target.has(l.id)) continue;
|
|
3887
|
+
const arr = providers.get(l.id) ?? [];
|
|
3888
|
+
arr.push({ repoId: r.repoId, languageId: l.id, extensions: l.extensions });
|
|
3889
|
+
providers.set(l.id, arr);
|
|
3890
|
+
}
|
|
3891
|
+
}
|
|
3892
|
+
const extToProvider = /* @__PURE__ */ new Map();
|
|
3893
|
+
const allExts = /* @__PURE__ */ new Set();
|
|
3894
|
+
for (const [languageId, provs] of providers) {
|
|
3895
|
+
const sorted = provs.slice().sort((a, b) => a.repoId.localeCompare(b.repoId));
|
|
3896
|
+
for (const p of sorted) {
|
|
3897
|
+
for (const ext of p.extensions) {
|
|
3898
|
+
allExts.add(ext.toLowerCase());
|
|
3899
|
+
if (!extToProvider.has(ext.toLowerCase())) {
|
|
3900
|
+
extToProvider.set(ext.toLowerCase(), { languageId, repoId: p.repoId });
|
|
3901
|
+
}
|
|
3902
|
+
}
|
|
3903
|
+
}
|
|
3904
|
+
}
|
|
3905
|
+
const out = [];
|
|
3906
|
+
for (const r of ctx.repos) {
|
|
3907
|
+
const usedExts = repoUsedExtensions(r.absRoot, allExts);
|
|
3908
|
+
for (const ext of usedExts) {
|
|
3909
|
+
const provider = extToProvider.get(ext);
|
|
3910
|
+
if (!provider) continue;
|
|
3911
|
+
if (provider.repoId === r.repoId) continue;
|
|
3912
|
+
out.push({
|
|
3913
|
+
fromRepoId: r.repoId,
|
|
3914
|
+
toRepoId: provider.repoId,
|
|
3915
|
+
kind: "vscode-language",
|
|
3916
|
+
detail: `${provider.languageId}:${ext}`
|
|
3917
|
+
});
|
|
3918
|
+
}
|
|
3919
|
+
}
|
|
3920
|
+
return out;
|
|
3921
|
+
}
|
|
3922
|
+
};
|
|
3923
|
+
var WorkspaceLinker = class _WorkspaceLinker {
|
|
3924
|
+
constructor(strategies = _WorkspaceLinker.defaultStrategies()) {
|
|
3925
|
+
this.strategies = strategies;
|
|
3926
|
+
}
|
|
3927
|
+
static defaultStrategies() {
|
|
3928
|
+
return [
|
|
3929
|
+
new NestedRepoLinkStrategy(),
|
|
3930
|
+
new NpmDependencyLinkStrategy(),
|
|
3931
|
+
new GoModuleLinkStrategy(),
|
|
3932
|
+
// Covers Petri languages (ctl/ltl/isl/colour-algebra) and any other VS Code language extension repos in the workspace.
|
|
3933
|
+
new VsCodeContributesLanguageLinkStrategy({ targetLanguageIds: ["ctl", "ltl", "isl", "colour-algebra"] })
|
|
3934
|
+
];
|
|
3935
|
+
}
|
|
3936
|
+
buildContext(workspaceRoot, repoRoots) {
|
|
3937
|
+
const repos = repoRoots.map((repoRoot) => ({
|
|
3938
|
+
repoRoot,
|
|
3939
|
+
repoId: repoIdFromRoot(repoRoot),
|
|
3940
|
+
absRoot: path16.resolve(repoRoot)
|
|
3941
|
+
}));
|
|
3942
|
+
const npmNameToRepoId = /* @__PURE__ */ new Map();
|
|
3943
|
+
const goModuleToRepoId = /* @__PURE__ */ new Map();
|
|
3944
|
+
for (const r of repos) {
|
|
3945
|
+
const pkg = readJson(path16.join(r.absRoot, "package.json"));
|
|
3946
|
+
const name = typeof pkg?.name === "string" ? pkg.name : null;
|
|
3947
|
+
if (name) npmNameToRepoId.set(name, r.repoId);
|
|
3948
|
+
const mod = parseGoModule(r.absRoot);
|
|
3949
|
+
if (mod) goModuleToRepoId.set(mod, r.repoId);
|
|
3950
|
+
}
|
|
3951
|
+
return { workspaceRoot, repos, npmNameToRepoId, goModuleToRepoId };
|
|
3952
|
+
}
|
|
3953
|
+
buildLinks(ctx) {
|
|
3954
|
+
const out = [];
|
|
3955
|
+
for (const s of this.strategies) {
|
|
3956
|
+
try {
|
|
3957
|
+
out.push(...s.collect(ctx));
|
|
3958
|
+
} catch {
|
|
3959
|
+
}
|
|
3960
|
+
}
|
|
3961
|
+
return out;
|
|
3962
|
+
}
|
|
3963
|
+
};
|
|
3964
|
+
function groupByFrom(links) {
|
|
3965
|
+
const byFrom = /* @__PURE__ */ new Map();
|
|
3966
|
+
for (const l of links) {
|
|
3967
|
+
const arr = byFrom.get(l.fromRepoId) ?? [];
|
|
3968
|
+
arr.push({ toRepoId: l.toRepoId, kind: l.kind, detail: l.detail });
|
|
3969
|
+
byFrom.set(l.fromRepoId, arr);
|
|
3970
|
+
}
|
|
3971
|
+
return byFrom;
|
|
3972
|
+
}
|
|
3973
|
+
async function linkWorkspaceRepos(args) {
|
|
3974
|
+
const linker = new WorkspaceLinker(args.strategies ?? WorkspaceLinker.defaultStrategies());
|
|
3975
|
+
const ctx = linker.buildContext(args.workspaceRoot, args.repoRoots);
|
|
3976
|
+
const links = linker.buildLinks(ctx);
|
|
3977
|
+
const byFrom = groupByFrom(links);
|
|
3978
|
+
if (args.workspaceStore) {
|
|
3979
|
+
for (const r of ctx.repos) {
|
|
3980
|
+
args.workspaceStore.replaceRepoLinks(r.repoId, byFrom.get(r.repoId) ?? []);
|
|
3981
|
+
}
|
|
3982
|
+
}
|
|
3983
|
+
if (args.graphStore?.replaceRepoLinks) {
|
|
3984
|
+
await args.graphStore.replaceRepoLinks({ links });
|
|
3985
|
+
}
|
|
3986
|
+
return { repos: ctx.repos, links };
|
|
3987
|
+
}
|
|
3988
|
+
|
|
3989
|
+
// src/indexer/workspaceIndexer.ts
|
|
3990
|
+
import path17 from "path";
|
|
3991
|
+
function halfLifeDaysForProfile(profileName) {
|
|
3992
|
+
if (profileName === "rca") return 7;
|
|
3993
|
+
if (profileName === "review") return 14;
|
|
3994
|
+
if (profileName === "refactor") return 21;
|
|
3995
|
+
return 30;
|
|
3996
|
+
}
|
|
3997
|
+
var WorkspaceIndexer = class {
|
|
3998
|
+
constructor(workspaceRoot, embedder, config = {}) {
|
|
3999
|
+
this.workspaceRoot = workspaceRoot;
|
|
4000
|
+
this.embedder = embedder;
|
|
4001
|
+
this.config = { ...config };
|
|
4002
|
+
if (!this.config.cacheDir) this.config.cacheDir = defaultCacheDir();
|
|
4003
|
+
this.progress = asProgressSink(this.config.progress);
|
|
4004
|
+
const wsId = sha256Hex(path17.resolve(this.workspaceRoot)).slice(0, 16);
|
|
4005
|
+
const dbPath = path17.join(this.config.cacheDir, "workspace", wsId, "workspace.sqlite");
|
|
4006
|
+
this.workspaceStore = new WorkspaceStore(dbPath);
|
|
4007
|
+
this.workspaceStore.setMeta("workspaceRoot", path17.resolve(this.workspaceRoot));
|
|
4008
|
+
}
|
|
4009
|
+
repos = [];
|
|
4010
|
+
config;
|
|
4011
|
+
progress = asProgressSink();
|
|
4012
|
+
workspaceStore = null;
|
|
4013
|
+
graphStore = null;
|
|
4014
|
+
emitProgress(event) {
|
|
4015
|
+
try {
|
|
4016
|
+
this.progress?.emit(event);
|
|
4017
|
+
} catch {
|
|
4018
|
+
}
|
|
4019
|
+
}
|
|
4020
|
+
async open() {
|
|
4021
|
+
if (!this.graphStore && this.config.workspace?.graph?.provider === "neo4j") {
|
|
4022
|
+
try {
|
|
4023
|
+
const n = this.config.workspace.graph.neo4j;
|
|
4024
|
+
this.graphStore = await createNeo4jGraphStore({
|
|
4025
|
+
uri: n.uri,
|
|
4026
|
+
user: n.user,
|
|
4027
|
+
password: n.password,
|
|
4028
|
+
database: n.database,
|
|
4029
|
+
labelPrefix: n.labelPrefix
|
|
4030
|
+
});
|
|
4031
|
+
} catch (e) {
|
|
4032
|
+
this.emitProgress({
|
|
4033
|
+
type: "error",
|
|
4034
|
+
scope: "workspace",
|
|
4035
|
+
message: String(e?.message ?? e),
|
|
4036
|
+
stack: e?.stack ? String(e.stack) : void 0
|
|
4037
|
+
});
|
|
4038
|
+
this.graphStore = null;
|
|
4039
|
+
}
|
|
4040
|
+
}
|
|
4041
|
+
const d = this.config.workspace?.discovery ?? {};
|
|
4042
|
+
const repoRoots = discoverGitRepos(this.workspaceRoot, {
|
|
4043
|
+
include: d.include,
|
|
4044
|
+
exclude: d.exclude,
|
|
4045
|
+
maxDepth: d.maxDepth,
|
|
4046
|
+
stopAtRepoRoot: d.stopAtRepoRoot,
|
|
4047
|
+
includeSubmodules: d.includeSubmodules
|
|
4048
|
+
});
|
|
4049
|
+
this.emitProgress({ type: "workspace/open", workspaceRoot: this.workspaceRoot, repoRoots });
|
|
4050
|
+
this.repos = repoRoots.map((r) => {
|
|
4051
|
+
const cfg = pickRepoOverride(this.config, r, this.workspaceRoot, this.config.workspace?.repoOverrides);
|
|
4052
|
+
return new RepoIndexer(r, this.embedder, cfg, this.workspaceStore ?? void 0, this.graphStore ?? void 0);
|
|
4053
|
+
});
|
|
4054
|
+
try {
|
|
4055
|
+
await linkWorkspaceRepos({
|
|
4056
|
+
workspaceRoot: this.workspaceRoot,
|
|
4057
|
+
repoRoots,
|
|
4058
|
+
workspaceStore: this.workspaceStore,
|
|
4059
|
+
graphStore: this.graphStore
|
|
4060
|
+
});
|
|
4061
|
+
} catch {
|
|
4062
|
+
}
|
|
4063
|
+
}
|
|
4064
|
+
async indexAll() {
|
|
4065
|
+
if (this.repos.length === 0) await this.open();
|
|
4066
|
+
const startedAt = Date.now();
|
|
4067
|
+
this.emitProgress({ type: "workspace/index/start", workspaceRoot: this.workspaceRoot, repoCount: this.repos.length });
|
|
4068
|
+
await Promise.all(this.repos.map((r) => r.indexAll()));
|
|
4069
|
+
this.emitProgress({
|
|
4070
|
+
type: "workspace/index/done",
|
|
4071
|
+
workspaceRoot: this.workspaceRoot,
|
|
4072
|
+
repoCount: this.repos.length,
|
|
4073
|
+
ms: Date.now() - startedAt
|
|
4074
|
+
});
|
|
4075
|
+
}
|
|
4076
|
+
async watch() {
|
|
4077
|
+
if (this.repos.length === 0) await this.open();
|
|
4078
|
+
const startedAt = Date.now();
|
|
4079
|
+
this.emitProgress({ type: "workspace/watch/start", workspaceRoot: this.workspaceRoot, repoCount: this.repos.length });
|
|
4080
|
+
await Promise.all(this.repos.map((r) => r.watch()));
|
|
4081
|
+
this.emitProgress({
|
|
4082
|
+
type: "workspace/watch/done",
|
|
4083
|
+
workspaceRoot: this.workspaceRoot,
|
|
4084
|
+
repoCount: this.repos.length,
|
|
4085
|
+
ms: Date.now() - startedAt
|
|
4086
|
+
});
|
|
4087
|
+
}
|
|
4088
|
+
getRepoIndexers() {
|
|
4089
|
+
return this.repos.slice();
|
|
4090
|
+
}
|
|
4091
|
+
resolveProfile(opts) {
|
|
4092
|
+
const name = opts?.profile ?? "search";
|
|
4093
|
+
const base = DEFAULT_PROFILES[name] ?? DEFAULT_PROFILES.search;
|
|
4094
|
+
const configPatch = this.config.profiles?.[name] ?? {};
|
|
4095
|
+
const merged1 = deepMergeProfile(base, configPatch);
|
|
4096
|
+
const merged2 = deepMergeProfile(merged1, opts?.profileOverrides);
|
|
4097
|
+
const w = merged2.weights;
|
|
4098
|
+
const sum = Math.max(1e-6, w.vector + w.lexical + w.recency);
|
|
4099
|
+
merged2.weights = { vector: w.vector / sum, lexical: w.lexical / sum, recency: w.recency / sum };
|
|
4100
|
+
return merged2;
|
|
4101
|
+
}
|
|
4102
|
+
async retrieve(query, opts = {}) {
|
|
4103
|
+
if (this.repos.length === 0) await this.open();
|
|
4104
|
+
const profile = this.resolveProfile(opts);
|
|
4105
|
+
const startedAt = Date.now();
|
|
4106
|
+
this.emitProgress({ type: "workspace/retrieve/start", workspaceRoot: this.workspaceRoot, profile: profile.name, query });
|
|
4107
|
+
const qVec = (await this.embedder.embed([query]))[0];
|
|
4108
|
+
const vectorK = profile.candidates?.vectorK ?? Math.max(profile.k * 3, 30);
|
|
4109
|
+
const lexicalK = profile.candidates?.lexicalK ?? Math.max(profile.k * 3, 30);
|
|
4110
|
+
const maxMerged = profile.candidates?.maxMergedCandidates ?? Math.max(profile.k * 8, 120);
|
|
4111
|
+
const repoFilters = opts.filters?.repoRoots;
|
|
4112
|
+
const langFilter = opts.filters?.language;
|
|
4113
|
+
const pathPrefix = opts.filters?.pathPrefix;
|
|
4114
|
+
const candidates = [];
|
|
4115
|
+
let vecCount = 0;
|
|
4116
|
+
let lexCount = 0;
|
|
4117
|
+
const canUseWorkspaceLex = !!this.workspaceStore && this.config.storage?.ftsMode !== "off" && !opts.scope?.includePaths && !opts.scope?.changedOnly;
|
|
4118
|
+
const workspaceLexByRepoRoot = /* @__PURE__ */ new Map();
|
|
4119
|
+
if (canUseWorkspaceLex && profile.weights.lexical > 0) {
|
|
4120
|
+
const ftq = ftsQueryFromText(query);
|
|
4121
|
+
const allowRoots = repoFilters ? new Set(repoFilters.map((r) => path17.resolve(r))) : null;
|
|
4122
|
+
const repoIds = allowRoots ? this.repos.filter((r) => allowRoots.has(path17.resolve(r.repoRoot))).map((r) => r.repoId) : void 0;
|
|
4123
|
+
if (ftq) {
|
|
4124
|
+
const rows = this.workspaceStore.searchFts(ftq, lexicalK, repoIds);
|
|
4125
|
+
lexCount += rows.length;
|
|
4126
|
+
for (const r of rows) {
|
|
4127
|
+
const row = this.workspaceStore.getChunkById(r.id);
|
|
4128
|
+
if (!row) continue;
|
|
4129
|
+
const rootKey = path17.resolve(row.repo_root);
|
|
4130
|
+
const arr = workspaceLexByRepoRoot.get(rootKey) ?? [];
|
|
4131
|
+
arr.push({ id: r.id, score: bm25ToScore01(r.bm25) });
|
|
4132
|
+
workspaceLexByRepoRoot.set(rootKey, arr);
|
|
4133
|
+
}
|
|
4134
|
+
}
|
|
4135
|
+
}
|
|
4136
|
+
for (const repo of this.repos) {
|
|
4137
|
+
if (repoFilters && !repoFilters.includes(repo.repoRoot)) continue;
|
|
4138
|
+
let includePaths = opts.scope?.includePaths?.slice();
|
|
4139
|
+
if (opts.scope?.changedOnly) {
|
|
4140
|
+
try {
|
|
4141
|
+
const changed = await listChangedFiles(repo.repoRoot, opts.scope.baseRef ?? "HEAD~1");
|
|
4142
|
+
includePaths = includePaths ? includePaths.filter((p) => changed.includes(p)) : changed;
|
|
4143
|
+
} catch {
|
|
4144
|
+
}
|
|
4145
|
+
}
|
|
4146
|
+
const [vHits, lHits] = await Promise.all([
|
|
4147
|
+
repo.vectorCandidates(qVec, vectorK, includePaths),
|
|
4148
|
+
canUseWorkspaceLex ? Promise.resolve(workspaceLexByRepoRoot.get(path17.resolve(repo.repoRoot)) ?? []) : repo.lexicalCandidates(query, lexicalK, includePaths)
|
|
4149
|
+
]);
|
|
4150
|
+
vecCount += vHits.length;
|
|
4151
|
+
if (!canUseWorkspaceLex) lexCount += lHits.length;
|
|
4152
|
+
const m = /* @__PURE__ */ new Map();
|
|
4153
|
+
for (const vh of vHits) {
|
|
4154
|
+
const id = vh.id;
|
|
4155
|
+
const vector01 = vectorCosineToScore01(vh.score);
|
|
4156
|
+
m.set(id, { repo, id, vector01, combined: 0 });
|
|
4157
|
+
}
|
|
4158
|
+
for (const lh of lHits) {
|
|
4159
|
+
const id = lh.id;
|
|
4160
|
+
const prev = m.get(id);
|
|
4161
|
+
if (prev) prev.lexical01 = lh.score;
|
|
4162
|
+
else m.set(id, { repo, id, lexical01: lh.score, combined: 0 });
|
|
4163
|
+
}
|
|
4164
|
+
const halfLife = halfLifeDaysForProfile(profile.name);
|
|
4165
|
+
for (const c of m.values()) {
|
|
4166
|
+
const meta = repo.getChunkMeta(c.id);
|
|
4167
|
+
if (!meta) continue;
|
|
4168
|
+
if (langFilter && meta.language !== langFilter) continue;
|
|
4169
|
+
if (pathPrefix && !meta.path.startsWith(pathPrefix)) continue;
|
|
4170
|
+
c.recency01 = profile.weights.recency > 0 ? recencyScore(meta.fileMtimeMs, halfLife) : 0;
|
|
4171
|
+
let kindFactor = 1;
|
|
4172
|
+
if (meta.kind === "synopsis" && profile.name === "search") kindFactor = 0.85;
|
|
4173
|
+
if (meta.kind === "synopsis" && profile.name === "architecture") kindFactor = 1.05;
|
|
4174
|
+
const v = c.vector01 ?? 0;
|
|
4175
|
+
const l = c.lexical01 ?? 0;
|
|
4176
|
+
const r = c.recency01 ?? 0;
|
|
4177
|
+
c.combined = clamp(
|
|
4178
|
+
kindFactor * (profile.weights.vector * v + profile.weights.lexical * l + profile.weights.recency * r),
|
|
4179
|
+
0,
|
|
4180
|
+
1
|
|
4181
|
+
);
|
|
4182
|
+
candidates.push(c);
|
|
4183
|
+
}
|
|
4184
|
+
}
|
|
4185
|
+
candidates.sort((a, b) => b.combined - a.combined);
|
|
4186
|
+
const merged = candidates.slice(0, maxMerged);
|
|
4187
|
+
const top = merged.slice(0, profile.k);
|
|
4188
|
+
const hits = top.map((c) => {
|
|
4189
|
+
const meta = c.repo.getChunkMeta(c.id);
|
|
4190
|
+
const preview = makePreview(c.repo.getChunkText(c.id));
|
|
4191
|
+
return {
|
|
4192
|
+
score: c.combined,
|
|
4193
|
+
scoreBreakdown: { vector: c.vector01, lexical: c.lexical01, recency: c.recency01 },
|
|
4194
|
+
chunk: { ...meta, preview }
|
|
4195
|
+
};
|
|
4196
|
+
});
|
|
4197
|
+
try {
|
|
4198
|
+
const byRepo = /* @__PURE__ */ new Map();
|
|
4199
|
+
for (const h of hits) {
|
|
4200
|
+
const s = byRepo.get(h.chunk.repoRoot) ?? /* @__PURE__ */ new Set();
|
|
4201
|
+
s.add(h.chunk.path);
|
|
4202
|
+
byRepo.set(h.chunk.repoRoot, s);
|
|
4203
|
+
}
|
|
4204
|
+
for (const [repoRoot, paths] of byRepo) {
|
|
4205
|
+
const repo = this.repos.find((r) => r.repoRoot === repoRoot);
|
|
4206
|
+
if (!repo) continue;
|
|
4207
|
+
await repo.warmSymbolGraphEdges(Array.from(paths), { maxFiles: 6 });
|
|
4208
|
+
}
|
|
4209
|
+
} catch {
|
|
4210
|
+
}
|
|
4211
|
+
let graphNeighborFiles = [];
|
|
4212
|
+
try {
|
|
4213
|
+
if (this.graphStore?.neighborFiles) {
|
|
4214
|
+
const seeds = [];
|
|
4215
|
+
const seen = /* @__PURE__ */ new Set();
|
|
4216
|
+
for (const h of hits) {
|
|
4217
|
+
const repo = this.repos.find((r) => r.repoRoot === h.chunk.repoRoot);
|
|
4218
|
+
if (!repo) continue;
|
|
4219
|
+
const key = `${repo.repoId}:${h.chunk.path}`;
|
|
4220
|
+
if (seen.has(key)) continue;
|
|
4221
|
+
seen.add(key);
|
|
4222
|
+
seeds.push({ repoId: repo.repoId, path: h.chunk.path });
|
|
4223
|
+
if (seeds.length >= 4) break;
|
|
4224
|
+
}
|
|
4225
|
+
if (seeds.length > 0) {
|
|
4226
|
+
graphNeighborFiles = await this.graphStore.neighborFiles({
|
|
4227
|
+
seeds,
|
|
4228
|
+
limit: profile.name === "architecture" ? 16 : 10,
|
|
4229
|
+
kinds: ["definition", "reference", "implementation", "typeDefinition"]
|
|
4230
|
+
});
|
|
4231
|
+
}
|
|
4232
|
+
}
|
|
4233
|
+
} catch {
|
|
4234
|
+
graphNeighborFiles = [];
|
|
4235
|
+
}
|
|
4236
|
+
const contextBlocks = [];
|
|
4237
|
+
const seenKey = /* @__PURE__ */ new Set();
|
|
4238
|
+
const addBlock = (repoRoot, path19, startLine, endLine, text, reason) => {
|
|
4239
|
+
const key = `${repoRoot}:${path19}:${startLine}:${endLine}:${text.length}:${reason}`;
|
|
4240
|
+
if (seenKey.has(key)) return;
|
|
4241
|
+
seenKey.add(key);
|
|
4242
|
+
if (!text.trim()) return;
|
|
4243
|
+
contextBlocks.push({ repoRoot, path: path19, startLine, endLine, text, reason });
|
|
4244
|
+
};
|
|
4245
|
+
try {
|
|
4246
|
+
const byRepoId = /* @__PURE__ */ new Map();
|
|
4247
|
+
for (const r of this.repos) byRepoId.set(r.repoId, r);
|
|
4248
|
+
for (const n of graphNeighborFiles.slice(0, 10)) {
|
|
4249
|
+
const repo = byRepoId.get(n.repoId);
|
|
4250
|
+
if (!repo) continue;
|
|
4251
|
+
const chunkId = await repo.getRepresentativeChunkIdForFile(n.path, true);
|
|
4252
|
+
if (!chunkId) continue;
|
|
4253
|
+
const meta = repo.getChunkMeta(chunkId);
|
|
4254
|
+
if (!meta) continue;
|
|
4255
|
+
const text = repo.getChunkText(chunkId);
|
|
4256
|
+
addBlock(meta.repoRoot, meta.path, meta.startLine, meta.endLine, text, `graph neighbor (${n.weight})`);
|
|
4257
|
+
}
|
|
4258
|
+
} catch {
|
|
4259
|
+
}
|
|
4260
|
+
for (const h of hits) {
|
|
4261
|
+
const repo = this.repos.find((r) => r.repoRoot === h.chunk.repoRoot);
|
|
4262
|
+
if (!repo) continue;
|
|
4263
|
+
const text = repo.getChunkText(h.chunk.id);
|
|
4264
|
+
addBlock(h.chunk.repoRoot, h.chunk.path, h.chunk.startLine, h.chunk.endLine, text, "primary hit");
|
|
4265
|
+
const expanded = await repo.expandContext(h.chunk.id, {
|
|
4266
|
+
adjacentChunks: profile.expand.adjacentChunks ?? 0,
|
|
4267
|
+
followImports: profile.expand.followImports ?? 0,
|
|
4268
|
+
includeFileSynopsis: profile.expand.includeFileSynopsis ?? false
|
|
4269
|
+
});
|
|
4270
|
+
for (const ex of expanded) {
|
|
4271
|
+
const meta = repo.getChunkMeta(ex.id);
|
|
4272
|
+
if (!meta) continue;
|
|
4273
|
+
const t = repo.getChunkText(ex.id);
|
|
4274
|
+
addBlock(meta.repoRoot, meta.path, meta.startLine, meta.endLine, t, ex.reason);
|
|
4275
|
+
}
|
|
4276
|
+
}
|
|
4277
|
+
const bundle = {
|
|
4278
|
+
hits,
|
|
4279
|
+
context: contextBlocks,
|
|
4280
|
+
stats: {
|
|
4281
|
+
profile: profile.name,
|
|
4282
|
+
reposSearched: this.repos.length,
|
|
4283
|
+
candidates: {
|
|
4284
|
+
vector: vecCount,
|
|
4285
|
+
lexical: lexCount,
|
|
4286
|
+
merged: merged.length,
|
|
4287
|
+
returned: hits.length
|
|
4288
|
+
}
|
|
4289
|
+
}
|
|
4290
|
+
};
|
|
4291
|
+
this.emitProgress({
|
|
4292
|
+
type: "workspace/retrieve/done",
|
|
4293
|
+
workspaceRoot: this.workspaceRoot,
|
|
4294
|
+
profile: profile.name,
|
|
4295
|
+
ms: Date.now() - startedAt,
|
|
4296
|
+
hits: hits.length,
|
|
4297
|
+
candidates: { vector: vecCount, lexical: lexCount, merged: merged.length }
|
|
4298
|
+
});
|
|
4299
|
+
return bundle;
|
|
4300
|
+
}
|
|
4301
|
+
async search(query, k = 10) {
|
|
4302
|
+
const bundle = await this.retrieve(query, { profile: "search", profileOverrides: { k } });
|
|
4303
|
+
return bundle.hits;
|
|
4304
|
+
}
|
|
4305
|
+
async closeAsync() {
|
|
4306
|
+
for (const r of this.repos) {
|
|
4307
|
+
await r.closeAsync().catch(() => void 0);
|
|
4308
|
+
}
|
|
4309
|
+
this.repos = [];
|
|
4310
|
+
this.workspaceStore?.close();
|
|
4311
|
+
this.workspaceStore = null;
|
|
4312
|
+
await this.graphStore?.close().catch(() => void 0);
|
|
4313
|
+
this.graphStore = null;
|
|
4314
|
+
}
|
|
4315
|
+
close() {
|
|
4316
|
+
void this.closeAsync();
|
|
4317
|
+
}
|
|
4318
|
+
};
|
|
4319
|
+
|
|
4320
|
+
// src/embeddings/ollama.ts
|
|
4321
|
+
import pLimit2 from "p-limit";
|
|
4322
|
+
var OllamaEmbeddingsProvider = class {
|
|
4323
|
+
id;
|
|
4324
|
+
dimension = null;
|
|
4325
|
+
baseUrl;
|
|
4326
|
+
model;
|
|
4327
|
+
concurrency;
|
|
4328
|
+
constructor(opts) {
|
|
4329
|
+
this.model = opts.model;
|
|
4330
|
+
this.baseUrl = opts.baseUrl ?? "http://localhost:11434";
|
|
4331
|
+
this.concurrency = opts.concurrency ?? 4;
|
|
4332
|
+
this.id = `ollama:${this.model}`;
|
|
4333
|
+
}
|
|
4334
|
+
async tryBatchEndpoint(texts) {
|
|
4335
|
+
const res = await fetch(`${this.baseUrl}/api/embed`, {
|
|
4336
|
+
method: "POST",
|
|
4337
|
+
headers: { "Content-Type": "application/json" },
|
|
4338
|
+
body: JSON.stringify({ model: this.model, input: texts })
|
|
4339
|
+
}).catch(() => null);
|
|
4340
|
+
if (!res || !res.ok) return null;
|
|
4341
|
+
const json = await res.json();
|
|
4342
|
+
const embeddings = json.embeddings;
|
|
4343
|
+
if (!embeddings) return null;
|
|
4344
|
+
const out = embeddings.map((v) => Float32Array.from(v));
|
|
4345
|
+
if (out.length > 0) this.dimension = out[0].length;
|
|
4346
|
+
return out;
|
|
4347
|
+
}
|
|
4348
|
+
async embedOne(text) {
|
|
4349
|
+
const res = await fetch(`${this.baseUrl}/api/embeddings`, {
|
|
4350
|
+
method: "POST",
|
|
4351
|
+
headers: { "Content-Type": "application/json" },
|
|
4352
|
+
body: JSON.stringify({ model: this.model, prompt: text })
|
|
4353
|
+
});
|
|
4354
|
+
if (!res.ok) {
|
|
4355
|
+
const errText = await res.text().catch(() => "");
|
|
4356
|
+
throw new Error(`Ollama embeddings failed: ${res.status} ${res.statusText} ${errText}`);
|
|
4357
|
+
}
|
|
4358
|
+
const json = await res.json();
|
|
4359
|
+
const emb = Float32Array.from(json.embedding);
|
|
4360
|
+
this.dimension = emb.length;
|
|
4361
|
+
return emb;
|
|
4362
|
+
}
|
|
4363
|
+
async embed(texts) {
|
|
4364
|
+
const batch = await this.tryBatchEndpoint(texts);
|
|
4365
|
+
if (batch) return batch;
|
|
4366
|
+
const limit = pLimit2(this.concurrency);
|
|
4367
|
+
const out = await Promise.all(texts.map((t) => limit(() => this.embedOne(t))));
|
|
4368
|
+
return out;
|
|
4369
|
+
}
|
|
4370
|
+
};
|
|
4371
|
+
|
|
4372
|
+
// src/embeddings/openai.ts
|
|
4373
|
+
var OpenAIEmbeddingsProvider = class {
|
|
4374
|
+
constructor(opts) {
|
|
4375
|
+
this.opts = opts;
|
|
4376
|
+
this.id = `openai:${opts.model}`;
|
|
4377
|
+
}
|
|
4378
|
+
id;
|
|
4379
|
+
dimension = null;
|
|
4380
|
+
async embed(texts) {
|
|
4381
|
+
const baseUrl = this.opts.baseUrl ?? "https://api.openai.com";
|
|
4382
|
+
const res = await fetch(`${baseUrl}/v1/embeddings`, {
|
|
4383
|
+
method: "POST",
|
|
4384
|
+
headers: {
|
|
4385
|
+
"Authorization": `Bearer ${this.opts.apiKey}`,
|
|
4386
|
+
"Content-Type": "application/json"
|
|
4387
|
+
},
|
|
4388
|
+
body: JSON.stringify({
|
|
4389
|
+
model: this.opts.model,
|
|
4390
|
+
input: texts
|
|
4391
|
+
})
|
|
4392
|
+
});
|
|
4393
|
+
if (!res.ok) {
|
|
4394
|
+
const errText = await res.text().catch(() => "");
|
|
4395
|
+
throw new Error(`OpenAI embeddings failed: ${res.status} ${res.statusText} ${errText}`);
|
|
4396
|
+
}
|
|
4397
|
+
const json = await res.json();
|
|
4398
|
+
const arr = json.data.map((d) => Float32Array.from(d.embedding));
|
|
4399
|
+
if (arr.length > 0) this.dimension = arr[0].length;
|
|
4400
|
+
return arr;
|
|
4401
|
+
}
|
|
4402
|
+
};
|
|
4403
|
+
|
|
4404
|
+
// src/embeddings/hash.ts
|
|
4405
|
+
import crypto3 from "crypto";
|
|
4406
|
+
var HashEmbeddingsProvider = class {
|
|
4407
|
+
id;
|
|
4408
|
+
dimension;
|
|
4409
|
+
constructor(dimension = 384) {
|
|
4410
|
+
this.dimension = dimension;
|
|
4411
|
+
this.id = `hash:${dimension}`;
|
|
4412
|
+
}
|
|
4413
|
+
async embed(texts) {
|
|
4414
|
+
return texts.map((t) => this.embedOne(t));
|
|
4415
|
+
}
|
|
4416
|
+
embedOne(text) {
|
|
4417
|
+
const v = new Float32Array(this.dimension);
|
|
4418
|
+
const tokens = text.split(/[^A-Za-z0-9_]+/).filter(Boolean).slice(0, 6e3);
|
|
4419
|
+
for (const tok of tokens) {
|
|
4420
|
+
const h = crypto3.createHash("sha256").update(tok).digest();
|
|
4421
|
+
const idx = h.readUInt32LE(0) % this.dimension;
|
|
4422
|
+
const sign = h[4] & 1 ? 1 : -1;
|
|
4423
|
+
v[idx] += sign;
|
|
4424
|
+
}
|
|
4425
|
+
let sumSq = 0;
|
|
4426
|
+
for (let i = 0; i < v.length; i++) sumSq += v[i] * v[i];
|
|
4427
|
+
const norm = Math.sqrt(sumSq) || 1;
|
|
4428
|
+
for (let i = 0; i < v.length; i++) v[i] /= norm;
|
|
4429
|
+
return v;
|
|
4430
|
+
}
|
|
4431
|
+
};
|
|
4432
|
+
|
|
4433
|
+
// src/config.ts
|
|
4434
|
+
import fs14 from "fs";
|
|
4435
|
+
import path18 from "path";
|
|
4436
|
+
function loadConfigFile(filePath) {
|
|
4437
|
+
const abs = path18.resolve(filePath);
|
|
4438
|
+
const raw = fs14.readFileSync(abs, "utf8");
|
|
4439
|
+
const json = JSON.parse(raw);
|
|
4440
|
+
const cfg = { ...json };
|
|
4441
|
+
if (json.redact?.patterns && Array.isArray(json.redact.patterns)) {
|
|
4442
|
+
const pats = json.redact.patterns;
|
|
4443
|
+
cfg.redact = cfg.redact ?? {};
|
|
4444
|
+
cfg.redact.patterns = pats.map((p) => {
|
|
4445
|
+
if (p.regex instanceof RegExp) {
|
|
4446
|
+
return { name: p.name, regex: p.regex, replaceWith: p.replaceWith };
|
|
4447
|
+
}
|
|
4448
|
+
return {
|
|
4449
|
+
name: p.name,
|
|
4450
|
+
regex: new RegExp(p.regex, p.flags ?? "g"),
|
|
4451
|
+
replaceWith: p.replaceWith
|
|
4452
|
+
};
|
|
4453
|
+
});
|
|
4454
|
+
}
|
|
4455
|
+
return cfg;
|
|
4456
|
+
}
|
|
4457
|
+
|
|
4458
|
+
export {
|
|
4459
|
+
IndexerProgressObservable,
|
|
4460
|
+
asProgressSink,
|
|
4461
|
+
languageFromPath,
|
|
4462
|
+
chunkSource,
|
|
4463
|
+
createVectorIndex,
|
|
4464
|
+
stableSymbolId,
|
|
4465
|
+
NoopAnnIndex,
|
|
4466
|
+
createAnnIndex,
|
|
4467
|
+
RepoIndexer,
|
|
4468
|
+
DEFAULT_PROFILES,
|
|
4469
|
+
deepMergeProfile,
|
|
4470
|
+
discoverGitRepos,
|
|
4471
|
+
pickRepoOverride,
|
|
4472
|
+
mergeIndexerConfig,
|
|
4473
|
+
WorkspaceStore,
|
|
4474
|
+
Neo4jGraphStore,
|
|
4475
|
+
createNeo4jGraphStore,
|
|
4476
|
+
NestedRepoLinkStrategy,
|
|
4477
|
+
NpmDependencyLinkStrategy,
|
|
4478
|
+
GoModuleLinkStrategy,
|
|
4479
|
+
VsCodeContributesLanguageLinkStrategy,
|
|
4480
|
+
WorkspaceLinker,
|
|
4481
|
+
linkWorkspaceRepos,
|
|
4482
|
+
WorkspaceIndexer,
|
|
4483
|
+
OllamaEmbeddingsProvider,
|
|
4484
|
+
OpenAIEmbeddingsProvider,
|
|
4485
|
+
HashEmbeddingsProvider,
|
|
4486
|
+
loadConfigFile
|
|
4487
|
+
};
|