@comfanion/usethis_search 0.2.0-dev.0 → 3.0.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/file-indexer.ts +13 -0
- package/index.ts +7 -1
- package/package.json +12 -3
- package/tools/codeindex.ts +155 -6
- package/tools/read-interceptor.ts +127 -0
- package/tools/search.ts +14 -1
- package/vectorizer/analyzers/lsp-analyzer.ts +293 -0
- package/vectorizer/analyzers/lsp-client.ts +369 -0
- package/vectorizer/analyzers/regex-analyzer.ts +255 -0
- package/vectorizer/graph-builder.ts +198 -0
- package/vectorizer/graph-db.ts +289 -0
- package/vectorizer/index.js +167 -9
- package/vectorizer/usage-tracker.ts +204 -0
- package/vectorizer.yaml +14 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
import levelgraph from "levelgraph"
|
|
2
|
+
import { Level } from "level"
|
|
3
|
+
|
|
4
|
+
export interface Triple {
|
|
5
|
+
subject: string
|
|
6
|
+
predicate: string
|
|
7
|
+
object: string
|
|
8
|
+
weight: number
|
|
9
|
+
source: string
|
|
10
|
+
file: string
|
|
11
|
+
line?: number
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export class GraphDB {
|
|
15
|
+
private db: any
|
|
16
|
+
private initialized: boolean = false
|
|
17
|
+
|
|
18
|
+
constructor(private dbPath: string) {}
|
|
19
|
+
|
|
20
|
+
async init(): Promise<this> {
|
|
21
|
+
const levelDb = new Level(this.dbPath)
|
|
22
|
+
this.db = levelgraph(levelDb)
|
|
23
|
+
this.initialized = true
|
|
24
|
+
return this
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async putEdges(triples: Triple[]): Promise<void> {
|
|
28
|
+
if (!this.initialized) {
|
|
29
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
30
|
+
}
|
|
31
|
+
await new Promise<void>((resolve, reject) => {
|
|
32
|
+
this.db.put(triples, (err: Error | undefined) => {
|
|
33
|
+
if (err) reject(err)
|
|
34
|
+
else resolve()
|
|
35
|
+
})
|
|
36
|
+
})
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async getOutgoing(chunkId: string): Promise<Triple[]> {
|
|
40
|
+
if (!this.initialized) {
|
|
41
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
42
|
+
}
|
|
43
|
+
return new Promise<Triple[]>((resolve, reject) => {
|
|
44
|
+
this.db.get({ subject: chunkId }, (err: Error | undefined, triples: Triple[]) => {
|
|
45
|
+
if (err) reject(err)
|
|
46
|
+
else resolve(triples || [])
|
|
47
|
+
})
|
|
48
|
+
})
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async getIncoming(chunkId: string): Promise<Triple[]> {
|
|
52
|
+
if (!this.initialized) {
|
|
53
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
54
|
+
}
|
|
55
|
+
return new Promise<Triple[]>((resolve, reject) => {
|
|
56
|
+
this.db.get({ object: chunkId }, (err: Error | undefined, triples: Triple[]) => {
|
|
57
|
+
if (err) reject(err)
|
|
58
|
+
else resolve(triples || [])
|
|
59
|
+
})
|
|
60
|
+
})
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async deleteByFile(filePath: string): Promise<void> {
|
|
64
|
+
if (!this.initialized) {
|
|
65
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
66
|
+
}
|
|
67
|
+
const allTriples = await new Promise<Triple[]>((resolve, reject) => {
|
|
68
|
+
this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
|
|
69
|
+
if (err) reject(err)
|
|
70
|
+
else resolve(triples || [])
|
|
71
|
+
})
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
const toDelete = allTriples.filter(t => t.file === filePath)
|
|
75
|
+
|
|
76
|
+
for (const t of toDelete) {
|
|
77
|
+
await new Promise<void>((resolve, reject) => {
|
|
78
|
+
this.db.del(t, (err: Error | undefined) => {
|
|
79
|
+
if (err) reject(err)
|
|
80
|
+
else resolve()
|
|
81
|
+
})
|
|
82
|
+
})
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async close(): Promise<void> {
|
|
87
|
+
if (this.initialized && this.db) {
|
|
88
|
+
await new Promise<void>((resolve, reject) => {
|
|
89
|
+
this.db.close((err: Error | undefined) => {
|
|
90
|
+
if (err) reject(err)
|
|
91
|
+
else resolve()
|
|
92
|
+
})
|
|
93
|
+
})
|
|
94
|
+
this.initialized = false
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// ---- FR-054: File metadata triples for incremental updates -----------------
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Store graph build metadata for a file as a special triple.
|
|
102
|
+
* Subject: `meta:<filePath>`, Predicate: `graph_built`, Object: `<hash>`.
|
|
103
|
+
* Weight encodes the timestamp (seconds since epoch).
|
|
104
|
+
*/
|
|
105
|
+
async setFileMeta(filePath: string, hash: string, timestamp: number): Promise<void> {
|
|
106
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
107
|
+
|
|
108
|
+
// Remove old meta triple for this file first
|
|
109
|
+
await this.deleteFileMeta(filePath)
|
|
110
|
+
|
|
111
|
+
const triple: Triple = {
|
|
112
|
+
subject: `meta:${filePath}`,
|
|
113
|
+
predicate: "graph_built",
|
|
114
|
+
object: hash,
|
|
115
|
+
weight: Math.floor(timestamp / 1000), // seconds since epoch fits in weight
|
|
116
|
+
source: "meta",
|
|
117
|
+
file: filePath,
|
|
118
|
+
}
|
|
119
|
+
await this.putEdges([triple])
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Get the stored graph build metadata for a file.
|
|
124
|
+
* Returns { hash, timestamp } or null if not found.
|
|
125
|
+
*/
|
|
126
|
+
async getFileMeta(filePath: string): Promise<{ hash: string; timestamp: number } | null> {
|
|
127
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
128
|
+
|
|
129
|
+
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
130
|
+
this.db.get(
|
|
131
|
+
{ subject: `meta:${filePath}`, predicate: "graph_built" },
|
|
132
|
+
(err: Error | undefined, result: Triple[]) => {
|
|
133
|
+
if (err) reject(err)
|
|
134
|
+
else resolve(result || [])
|
|
135
|
+
},
|
|
136
|
+
)
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
if (triples.length === 0) return null
|
|
140
|
+
return {
|
|
141
|
+
hash: triples[0].object,
|
|
142
|
+
timestamp: triples[0].weight * 1000, // back to ms
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Delete file meta triple.
|
|
148
|
+
*/
|
|
149
|
+
async deleteFileMeta(filePath: string): Promise<void> {
|
|
150
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
151
|
+
|
|
152
|
+
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
153
|
+
this.db.get(
|
|
154
|
+
{ subject: `meta:${filePath}`, predicate: "graph_built" },
|
|
155
|
+
(err: Error | undefined, result: Triple[]) => {
|
|
156
|
+
if (err) reject(err)
|
|
157
|
+
else resolve(result || [])
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
for (const t of triples) {
|
|
163
|
+
await new Promise<void>((resolve, reject) => {
|
|
164
|
+
this.db.del(t, (err: Error | undefined) => {
|
|
165
|
+
if (err) reject(err)
|
|
166
|
+
else resolve()
|
|
167
|
+
})
|
|
168
|
+
})
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Get all file metadata triples (for validation / stats).
|
|
174
|
+
*/
|
|
175
|
+
async getAllFileMeta(): Promise<Array<{ filePath: string; hash: string; timestamp: number }>> {
|
|
176
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
177
|
+
|
|
178
|
+
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
179
|
+
this.db.get({ predicate: "graph_built" }, (err: Error | undefined, result: Triple[]) => {
|
|
180
|
+
if (err) reject(err)
|
|
181
|
+
else resolve(result || [])
|
|
182
|
+
})
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
return triples.map((t) => ({
|
|
186
|
+
filePath: t.subject.replace(/^meta:/, ""),
|
|
187
|
+
hash: t.object,
|
|
188
|
+
timestamp: t.weight * 1000,
|
|
189
|
+
}))
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Get all triples in the graph (for validation/stats).
|
|
194
|
+
* Excludes meta triples (predicate === "graph_built").
|
|
195
|
+
*/
|
|
196
|
+
async getAllTriples(): Promise<Triple[]> {
|
|
197
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
198
|
+
|
|
199
|
+
const allTriples = await new Promise<Triple[]>((resolve, reject) => {
|
|
200
|
+
this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
|
|
201
|
+
if (err) reject(err)
|
|
202
|
+
else resolve(triples || [])
|
|
203
|
+
})
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
return allTriples.filter(t => t.predicate !== "graph_built")
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
|
|
210
|
+
if (!this.initialized) {
|
|
211
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
|
|
215
|
+
const visited = new Set<string>()
|
|
216
|
+
|
|
217
|
+
async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
|
|
218
|
+
if (currentDepth > maxDepth || visited.has(currentId)) {
|
|
219
|
+
return
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
visited.add(currentId)
|
|
223
|
+
|
|
224
|
+
try {
|
|
225
|
+
const outgoing = await new Promise<Triple[]>((resolve, reject) => {
|
|
226
|
+
this.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
|
|
227
|
+
if (err) reject(err)
|
|
228
|
+
else resolve(triples || [])
|
|
229
|
+
})
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
for (const triple of outgoing) {
|
|
233
|
+
const fileId = triple.object
|
|
234
|
+
|
|
235
|
+
// Aggregate relations and weights
|
|
236
|
+
const existing = relatedFiles.get(fileId)
|
|
237
|
+
if (existing) {
|
|
238
|
+
existing.weight = Math.max(existing.weight, triple.weight)
|
|
239
|
+
} else {
|
|
240
|
+
relatedFiles.set(fileId, {
|
|
241
|
+
relation: currentRelation || triple.predicate,
|
|
242
|
+
weight: triple.weight
|
|
243
|
+
})
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Recurse for imports/extends relations
|
|
247
|
+
if (triple.predicate === "imports" || triple.predicate === "extends") {
|
|
248
|
+
await traverse(fileId, currentDepth + 1, triple.predicate)
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const incoming = await new Promise<Triple[]>((resolve, reject) => {
|
|
253
|
+
this.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
|
|
254
|
+
if (err) reject(err)
|
|
255
|
+
else resolve(triples || [])
|
|
256
|
+
})
|
|
257
|
+
})
|
|
258
|
+
|
|
259
|
+
for (const triple of incoming) {
|
|
260
|
+
const fileId = triple.subject
|
|
261
|
+
|
|
262
|
+
const existing = relatedFiles.get(fileId)
|
|
263
|
+
if (existing) {
|
|
264
|
+
existing.weight = Math.max(existing.weight, triple.weight)
|
|
265
|
+
} else {
|
|
266
|
+
relatedFiles.set(fileId, {
|
|
267
|
+
relation: `used_by`,
|
|
268
|
+
weight: triple.weight
|
|
269
|
+
})
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
} catch (error) {
|
|
273
|
+
console.error(`Error traversing graph for ${currentId}:`, error)
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
await traverse(chunkId, 0, "")
|
|
278
|
+
|
|
279
|
+
const result = Array.from(relatedFiles.entries())
|
|
280
|
+
.map(([path, data]) => ({
|
|
281
|
+
path,
|
|
282
|
+
relation: data.relation,
|
|
283
|
+
weight: data.weight
|
|
284
|
+
}))
|
|
285
|
+
.sort((a, b) => b.weight - a.weight)
|
|
286
|
+
|
|
287
|
+
return result
|
|
288
|
+
}
|
|
289
|
+
}
|
package/vectorizer/index.js
CHANGED
|
@@ -15,6 +15,9 @@ import { BM25Index } from "./bm25-index.ts";
|
|
|
15
15
|
import { mergeResults, DEFAULT_HYBRID_CONFIG } from "./hybrid-search.ts";
|
|
16
16
|
import { QueryCache, DEFAULT_CACHE_CONFIG } from "./query-cache.ts";
|
|
17
17
|
import { SearchMetrics } from "./search-metrics.ts";
|
|
18
|
+
import { GraphDB } from "./graph-db.ts";
|
|
19
|
+
import { GraphBuilder } from "./graph-builder.ts";
|
|
20
|
+
import { UsageTracker } from "./usage-tracker.ts";
|
|
18
21
|
|
|
19
22
|
// Suppress transformers.js logs unless DEBUG is set
|
|
20
23
|
const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*";
|
|
@@ -350,6 +353,10 @@ class CodebaseIndexer {
|
|
|
350
353
|
this.configLoaded = false;
|
|
351
354
|
this.bm25 = null; // lazy-built BM25 index
|
|
352
355
|
this.metrics = null; // lazy-loaded SearchMetrics
|
|
356
|
+
this.graphDB = null; // Graph DB for relationships
|
|
357
|
+
this.graphBuilder = null; // Graph builder orchestrator
|
|
358
|
+
this._chunkCache = null; // Lazy Map<chunk_id, row> for findChunkById
|
|
359
|
+
this.usageTracker = null; // Usage tracking & provenance (v3)
|
|
353
360
|
}
|
|
354
361
|
|
|
355
362
|
async init() {
|
|
@@ -360,6 +367,17 @@ class CodebaseIndexer {
|
|
|
360
367
|
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
361
368
|
this.db = await lancedb.connect(path.join(this.cacheDir, "lancedb"));
|
|
362
369
|
await this.loadHashes();
|
|
370
|
+
|
|
371
|
+
const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
|
|
372
|
+
const graphPath = path.join(this.root, ".opencode", "graph", graphType);
|
|
373
|
+
await fs.mkdir(path.dirname(graphPath), { recursive: true });
|
|
374
|
+
this.graphDB = await new GraphDB(graphPath).init();
|
|
375
|
+
this.graphBuilder = new GraphBuilder(this.graphDB, this.root);
|
|
376
|
+
|
|
377
|
+
// Usage tracker — provenance & usage stats
|
|
378
|
+
this.usageTracker = new UsageTracker(this.cacheDir);
|
|
379
|
+
await this.usageTracker.load();
|
|
380
|
+
|
|
363
381
|
return this;
|
|
364
382
|
}
|
|
365
383
|
|
|
@@ -388,6 +406,18 @@ class CodebaseIndexer {
|
|
|
388
406
|
}
|
|
389
407
|
this._bm25Rows = null;
|
|
390
408
|
this.metrics = null;
|
|
409
|
+
// Close graph DB to release LevelDB lock
|
|
410
|
+
if (this.graphDB) {
|
|
411
|
+
try { await this.graphDB.close(); } catch { /* best effort */ }
|
|
412
|
+
this.graphDB = null;
|
|
413
|
+
this.graphBuilder = null;
|
|
414
|
+
}
|
|
415
|
+
// Save & release usage tracker
|
|
416
|
+
if (this.usageTracker) {
|
|
417
|
+
try { await this.usageTracker.save(); } catch { /* best effort */ }
|
|
418
|
+
this.usageTracker = null;
|
|
419
|
+
}
|
|
420
|
+
this._chunkCache = null;
|
|
391
421
|
clearQueryCache();
|
|
392
422
|
if (global.gc) global.gc();
|
|
393
423
|
}
|
|
@@ -503,13 +533,28 @@ class CodebaseIndexer {
|
|
|
503
533
|
// Semantic chunking
|
|
504
534
|
const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
|
|
505
535
|
|
|
536
|
+
// v3: Assign chunk IDs for graph tracking
|
|
537
|
+
const chunksWithIds = this.graphBuilder.assignChunkIds(relPath, chunks);
|
|
538
|
+
|
|
539
|
+
// v3: Delete old edges for this file and build new ones
|
|
540
|
+
await this.graphDB.deleteByFile(relPath);
|
|
541
|
+
await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
|
|
542
|
+
|
|
543
|
+
// FR-054: Store graph build timestamp + file hash as metadata triple
|
|
544
|
+
try {
|
|
545
|
+
await this.graphDB.setFileMeta(relPath, hash, Date.now());
|
|
546
|
+
} catch {
|
|
547
|
+
// non-fatal — metadata is advisory
|
|
548
|
+
}
|
|
549
|
+
|
|
506
550
|
const data = [];
|
|
507
|
-
for (let i = 0; i <
|
|
508
|
-
const embedding = await this.embed(
|
|
551
|
+
for (let i = 0; i < chunksWithIds.length; i++) {
|
|
552
|
+
const embedding = await this.embed(chunksWithIds[i].content);
|
|
509
553
|
data.push({
|
|
554
|
+
chunk_id: chunksWithIds[i].chunk_id,
|
|
510
555
|
file: relPath,
|
|
511
556
|
chunk_index: i,
|
|
512
|
-
content:
|
|
557
|
+
content: chunksWithIds[i].content,
|
|
513
558
|
vector: embedding,
|
|
514
559
|
archived: archived,
|
|
515
560
|
// v2 metadata
|
|
@@ -517,9 +562,9 @@ class CodebaseIndexer {
|
|
|
517
562
|
language: fileMeta.language,
|
|
518
563
|
last_modified: fileMeta.last_modified,
|
|
519
564
|
file_size: fileMeta.file_size,
|
|
520
|
-
heading_context:
|
|
521
|
-
function_name:
|
|
522
|
-
class_name:
|
|
565
|
+
heading_context: chunksWithIds[i].heading_context || "",
|
|
566
|
+
function_name: chunksWithIds[i].function_name || "",
|
|
567
|
+
class_name: chunksWithIds[i].class_name || "",
|
|
523
568
|
tags: (fileMeta.tags || []).join(","),
|
|
524
569
|
});
|
|
525
570
|
}
|
|
@@ -720,9 +765,97 @@ class CodebaseIndexer {
|
|
|
720
765
|
}
|
|
721
766
|
}
|
|
722
767
|
|
|
768
|
+
// ── Graph context expansion (v3) ───────────────────────────────────────
|
|
769
|
+
if (this.graphDB) {
|
|
770
|
+
for (const result of finalResults) {
|
|
771
|
+
if (!result.chunk_id) continue;
|
|
772
|
+
|
|
773
|
+
const outgoing = await this.graphDB.getOutgoing(result.chunk_id);
|
|
774
|
+
const incoming = await this.graphDB.getIncoming(result.chunk_id);
|
|
775
|
+
const allEdges = [...outgoing, ...incoming];
|
|
776
|
+
|
|
777
|
+
const neighbors = [];
|
|
778
|
+
for (const edge of allEdges) {
|
|
779
|
+
const neighborId = edge.subject === result.chunk_id ? edge.object : edge.subject;
|
|
780
|
+
const neighborChunk = await this.findChunkById(neighborId);
|
|
781
|
+
if (!neighborChunk) continue;
|
|
782
|
+
|
|
783
|
+
const similarity = this.cosineSimilarity(neighborChunk.vector, queryEmbedding);
|
|
784
|
+
const score = edge.weight * similarity;
|
|
785
|
+
|
|
786
|
+
neighbors.push({
|
|
787
|
+
chunk_id: neighborId,
|
|
788
|
+
file: neighborChunk.file,
|
|
789
|
+
content: neighborChunk.content,
|
|
790
|
+
relation: edge.predicate,
|
|
791
|
+
score,
|
|
792
|
+
via: edge.source
|
|
793
|
+
});
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
neighbors.sort((a, b) => b.score - a.score);
|
|
797
|
+
result.relatedContext = neighbors.slice(0, 3);
|
|
798
|
+
|
|
799
|
+
// FR-060: Record provenance for each attached chunk
|
|
800
|
+
if (this.usageTracker) {
|
|
801
|
+
for (const n of result.relatedContext) {
|
|
802
|
+
this.usageTracker.recordProvenance(query, result.chunk_id, n.chunk_id, n.relation);
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// FR-061: Record usage counts for all returned chunks (main + attached)
|
|
809
|
+
if (this.usageTracker) {
|
|
810
|
+
const allChunkIds = [];
|
|
811
|
+
for (const r of finalResults) {
|
|
812
|
+
if (r.chunk_id) allChunkIds.push(r.chunk_id);
|
|
813
|
+
if (r.relatedContext) {
|
|
814
|
+
for (const rc of r.relatedContext) {
|
|
815
|
+
if (rc.chunk_id) allChunkIds.push(rc.chunk_id);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
this.usageTracker.recordSearchResults(allChunkIds);
|
|
820
|
+
// Save asynchronously (non-blocking)
|
|
821
|
+
this.usageTracker.save().catch(() => {});
|
|
822
|
+
}
|
|
823
|
+
|
|
723
824
|
return finalResults;
|
|
724
825
|
}
|
|
725
826
|
|
|
827
|
+
async findChunkById(chunkId) {
|
|
828
|
+
// Lazy-build an in-memory Map keyed by chunk_id on first call.
|
|
829
|
+
// The cache lives until unloadModel() clears it.
|
|
830
|
+
if (!this._chunkCache) {
|
|
831
|
+
const tableName = "chunks";
|
|
832
|
+
const tables = await this.db.tableNames();
|
|
833
|
+
if (!tables.includes(tableName)) return null;
|
|
834
|
+
|
|
835
|
+
const table = await this.db.openTable(tableName);
|
|
836
|
+
const rows = await table.search([0]).limit(100000).execute();
|
|
837
|
+
this._chunkCache = new Map();
|
|
838
|
+
for (const row of rows) {
|
|
839
|
+
if (row.chunk_id) {
|
|
840
|
+
this._chunkCache.set(row.chunk_id, row);
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
return this._chunkCache.get(chunkId) || null;
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
cosineSimilarity(vecA, vecB) {
|
|
848
|
+
let dotProduct = 0;
|
|
849
|
+
let normA = 0;
|
|
850
|
+
let normB = 0;
|
|
851
|
+
for (let i = 0; i < vecA.length; i++) {
|
|
852
|
+
dotProduct += vecA[i] * vecB[i];
|
|
853
|
+
normA += vecA[i] * vecA[i];
|
|
854
|
+
normB += vecB[i] * vecB[i];
|
|
855
|
+
}
|
|
856
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
857
|
+
}
|
|
858
|
+
|
|
726
859
|
async checkHealth(extraIgnore = []) {
|
|
727
860
|
const { glob } = await import("glob");
|
|
728
861
|
const preset = INDEX_PRESETS[this.indexName] || DEFAULT_PRESETS.code;
|
|
@@ -806,14 +939,17 @@ class CodebaseIndexer {
|
|
|
806
939
|
|
|
807
940
|
let indexed = 0;
|
|
808
941
|
let skipped = 0;
|
|
942
|
+
const total = files.length;
|
|
809
943
|
|
|
810
|
-
for (
|
|
944
|
+
for (let i = 0; i < files.length; i++) {
|
|
945
|
+
const relPath = files[i];
|
|
811
946
|
const filePath = path.join(this.root, relPath);
|
|
812
947
|
try {
|
|
813
948
|
const wasIndexed = await this.indexFile(filePath);
|
|
814
949
|
if (wasIndexed) {
|
|
815
950
|
indexed++;
|
|
816
|
-
|
|
951
|
+
// FR-053: progress indicator includes graph building phase
|
|
952
|
+
if (onProgress) onProgress(indexed, total, relPath, i + 1);
|
|
817
953
|
} else {
|
|
818
954
|
skipped++;
|
|
819
955
|
}
|
|
@@ -822,7 +958,29 @@ class CodebaseIndexer {
|
|
|
822
958
|
}
|
|
823
959
|
}
|
|
824
960
|
|
|
825
|
-
|
|
961
|
+
// FR-005: Build semantic similarity edges as post-pass
|
|
962
|
+
// Only if we actually indexed new files and have a graph builder
|
|
963
|
+
let semanticEdges = 0;
|
|
964
|
+
if (indexed > 0 && this.graphBuilder && this.graphDB) {
|
|
965
|
+
try {
|
|
966
|
+
const tableName = "chunks";
|
|
967
|
+
const tables = await this.db.tableNames();
|
|
968
|
+
if (tables.includes(tableName)) {
|
|
969
|
+
const table = await this.db.openTable(tableName);
|
|
970
|
+
const allRows = await table.search([0]).limit(100000).execute();
|
|
971
|
+
const chunkData = allRows
|
|
972
|
+
.filter(r => r.chunk_id && r.vector)
|
|
973
|
+
.map(r => ({ chunk_id: r.chunk_id, vector: Array.from(r.vector), file: r.file }));
|
|
974
|
+
semanticEdges = await this.graphBuilder.buildSemanticEdges(chunkData, 0.8, 3);
|
|
975
|
+
if (DEBUG) console.log(`[vectorizer] Built ${semanticEdges} semantic similarity edges`);
|
|
976
|
+
}
|
|
977
|
+
} catch (e) {
|
|
978
|
+
if (DEBUG) console.log(`[vectorizer] Semantic edge building failed:`, e.message);
|
|
979
|
+
// non-fatal — explicit edges still work
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
return { indexed, skipped, total, semanticEdges };
|
|
826
984
|
}
|
|
827
985
|
|
|
828
986
|
async indexSingleFile(filePath) {
|