@comfanion/usethis_search 0.2.0-dev.0 → 3.0.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,289 @@
1
+ import levelgraph from "levelgraph"
2
+ import { Level } from "level"
3
+
4
+ export interface Triple {
5
+ subject: string
6
+ predicate: string
7
+ object: string
8
+ weight: number
9
+ source: string
10
+ file: string
11
+ line?: number
12
+ }
13
+
14
+ export class GraphDB {
15
+ private db: any
16
+ private initialized: boolean = false
17
+
18
+ constructor(private dbPath: string) {}
19
+
20
+ async init(): Promise<this> {
21
+ const levelDb = new Level(this.dbPath)
22
+ this.db = levelgraph(levelDb)
23
+ this.initialized = true
24
+ return this
25
+ }
26
+
27
+ async putEdges(triples: Triple[]): Promise<void> {
28
+ if (!this.initialized) {
29
+ throw new Error("GraphDB not initialized. Call init() first.")
30
+ }
31
+ await new Promise<void>((resolve, reject) => {
32
+ this.db.put(triples, (err: Error | undefined) => {
33
+ if (err) reject(err)
34
+ else resolve()
35
+ })
36
+ })
37
+ }
38
+
39
+ async getOutgoing(chunkId: string): Promise<Triple[]> {
40
+ if (!this.initialized) {
41
+ throw new Error("GraphDB not initialized. Call init() first.")
42
+ }
43
+ return new Promise<Triple[]>((resolve, reject) => {
44
+ this.db.get({ subject: chunkId }, (err: Error | undefined, triples: Triple[]) => {
45
+ if (err) reject(err)
46
+ else resolve(triples || [])
47
+ })
48
+ })
49
+ }
50
+
51
+ async getIncoming(chunkId: string): Promise<Triple[]> {
52
+ if (!this.initialized) {
53
+ throw new Error("GraphDB not initialized. Call init() first.")
54
+ }
55
+ return new Promise<Triple[]>((resolve, reject) => {
56
+ this.db.get({ object: chunkId }, (err: Error | undefined, triples: Triple[]) => {
57
+ if (err) reject(err)
58
+ else resolve(triples || [])
59
+ })
60
+ })
61
+ }
62
+
63
+ async deleteByFile(filePath: string): Promise<void> {
64
+ if (!this.initialized) {
65
+ throw new Error("GraphDB not initialized. Call init() first.")
66
+ }
67
+ const allTriples = await new Promise<Triple[]>((resolve, reject) => {
68
+ this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
69
+ if (err) reject(err)
70
+ else resolve(triples || [])
71
+ })
72
+ })
73
+
74
+ const toDelete = allTriples.filter(t => t.file === filePath)
75
+
76
+ for (const t of toDelete) {
77
+ await new Promise<void>((resolve, reject) => {
78
+ this.db.del(t, (err: Error | undefined) => {
79
+ if (err) reject(err)
80
+ else resolve()
81
+ })
82
+ })
83
+ }
84
+ }
85
+
86
+ async close(): Promise<void> {
87
+ if (this.initialized && this.db) {
88
+ await new Promise<void>((resolve, reject) => {
89
+ this.db.close((err: Error | undefined) => {
90
+ if (err) reject(err)
91
+ else resolve()
92
+ })
93
+ })
94
+ this.initialized = false
95
+ }
96
+ }
97
+
98
+ // ---- FR-054: File metadata triples for incremental updates -----------------
99
+
100
+ /**
101
+ * Store graph build metadata for a file as a special triple.
102
+ * Subject: `meta:<filePath>`, Predicate: `graph_built`, Object: `<hash>`.
103
+ * Weight encodes the timestamp (seconds since epoch).
104
+ */
105
+ async setFileMeta(filePath: string, hash: string, timestamp: number): Promise<void> {
106
+ if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
107
+
108
+ // Remove old meta triple for this file first
109
+ await this.deleteFileMeta(filePath)
110
+
111
+ const triple: Triple = {
112
+ subject: `meta:${filePath}`,
113
+ predicate: "graph_built",
114
+ object: hash,
115
+ weight: Math.floor(timestamp / 1000), // seconds since epoch fits in weight
116
+ source: "meta",
117
+ file: filePath,
118
+ }
119
+ await this.putEdges([triple])
120
+ }
121
+
122
+ /**
123
+ * Get the stored graph build metadata for a file.
124
+ * Returns { hash, timestamp } or null if not found.
125
+ */
126
+ async getFileMeta(filePath: string): Promise<{ hash: string; timestamp: number } | null> {
127
+ if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
128
+
129
+ const triples = await new Promise<Triple[]>((resolve, reject) => {
130
+ this.db.get(
131
+ { subject: `meta:${filePath}`, predicate: "graph_built" },
132
+ (err: Error | undefined, result: Triple[]) => {
133
+ if (err) reject(err)
134
+ else resolve(result || [])
135
+ },
136
+ )
137
+ })
138
+
139
+ if (triples.length === 0) return null
140
+ return {
141
+ hash: triples[0].object,
142
+ timestamp: triples[0].weight * 1000, // back to ms
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Delete file meta triple.
148
+ */
149
+ async deleteFileMeta(filePath: string): Promise<void> {
150
+ if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
151
+
152
+ const triples = await new Promise<Triple[]>((resolve, reject) => {
153
+ this.db.get(
154
+ { subject: `meta:${filePath}`, predicate: "graph_built" },
155
+ (err: Error | undefined, result: Triple[]) => {
156
+ if (err) reject(err)
157
+ else resolve(result || [])
158
+ },
159
+ )
160
+ })
161
+
162
+ for (const t of triples) {
163
+ await new Promise<void>((resolve, reject) => {
164
+ this.db.del(t, (err: Error | undefined) => {
165
+ if (err) reject(err)
166
+ else resolve()
167
+ })
168
+ })
169
+ }
170
+ }
171
+
172
+ /**
173
+ * Get all file metadata triples (for validation / stats).
174
+ */
175
+ async getAllFileMeta(): Promise<Array<{ filePath: string; hash: string; timestamp: number }>> {
176
+ if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
177
+
178
+ const triples = await new Promise<Triple[]>((resolve, reject) => {
179
+ this.db.get({ predicate: "graph_built" }, (err: Error | undefined, result: Triple[]) => {
180
+ if (err) reject(err)
181
+ else resolve(result || [])
182
+ })
183
+ })
184
+
185
+ return triples.map((t) => ({
186
+ filePath: t.subject.replace(/^meta:/, ""),
187
+ hash: t.object,
188
+ timestamp: t.weight * 1000,
189
+ }))
190
+ }
191
+
192
+ /**
193
+ * Get all triples in the graph (for validation/stats).
194
+ * Excludes meta triples (predicate === "graph_built").
195
+ */
196
+ async getAllTriples(): Promise<Triple[]> {
197
+ if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
198
+
199
+ const allTriples = await new Promise<Triple[]>((resolve, reject) => {
200
+ this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
201
+ if (err) reject(err)
202
+ else resolve(triples || [])
203
+ })
204
+ })
205
+
206
+ return allTriples.filter(t => t.predicate !== "graph_built")
207
+ }
208
+
209
+ async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
210
+ if (!this.initialized) {
211
+ throw new Error("GraphDB not initialized. Call init() first.")
212
+ }
213
+
214
+ const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
215
+ const visited = new Set<string>()
216
+
217
+ async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
218
+ if (currentDepth > maxDepth || visited.has(currentId)) {
219
+ return
220
+ }
221
+
222
+ visited.add(currentId)
223
+
224
+ try {
225
+ const outgoing = await new Promise<Triple[]>((resolve, reject) => {
226
+ this.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
227
+ if (err) reject(err)
228
+ else resolve(triples || [])
229
+ })
230
+ })
231
+
232
+ for (const triple of outgoing) {
233
+ const fileId = triple.object
234
+
235
+ // Aggregate relations and weights
236
+ const existing = relatedFiles.get(fileId)
237
+ if (existing) {
238
+ existing.weight = Math.max(existing.weight, triple.weight)
239
+ } else {
240
+ relatedFiles.set(fileId, {
241
+ relation: currentRelation || triple.predicate,
242
+ weight: triple.weight
243
+ })
244
+ }
245
+
246
+ // Recurse for imports/extends relations
247
+ if (triple.predicate === "imports" || triple.predicate === "extends") {
248
+ await traverse(fileId, currentDepth + 1, triple.predicate)
249
+ }
250
+ }
251
+
252
+ const incoming = await new Promise<Triple[]>((resolve, reject) => {
253
+ this.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
254
+ if (err) reject(err)
255
+ else resolve(triples || [])
256
+ })
257
+ })
258
+
259
+ for (const triple of incoming) {
260
+ const fileId = triple.subject
261
+
262
+ const existing = relatedFiles.get(fileId)
263
+ if (existing) {
264
+ existing.weight = Math.max(existing.weight, triple.weight)
265
+ } else {
266
+ relatedFiles.set(fileId, {
267
+ relation: `used_by`,
268
+ weight: triple.weight
269
+ })
270
+ }
271
+ }
272
+ } catch (error) {
273
+ console.error(`Error traversing graph for ${currentId}:`, error)
274
+ }
275
+ }
276
+
277
+ await traverse(chunkId, 0, "")
278
+
279
+ const result = Array.from(relatedFiles.entries())
280
+ .map(([path, data]) => ({
281
+ path,
282
+ relation: data.relation,
283
+ weight: data.weight
284
+ }))
285
+ .sort((a, b) => b.weight - a.weight)
286
+
287
+ return result
288
+ }
289
+ }
@@ -15,6 +15,9 @@ import { BM25Index } from "./bm25-index.ts";
15
15
  import { mergeResults, DEFAULT_HYBRID_CONFIG } from "./hybrid-search.ts";
16
16
  import { QueryCache, DEFAULT_CACHE_CONFIG } from "./query-cache.ts";
17
17
  import { SearchMetrics } from "./search-metrics.ts";
18
+ import { GraphDB } from "./graph-db.ts";
19
+ import { GraphBuilder } from "./graph-builder.ts";
20
+ import { UsageTracker } from "./usage-tracker.ts";
18
21
 
19
22
  // Suppress transformers.js logs unless DEBUG is set
20
23
  const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*";
@@ -350,6 +353,10 @@ class CodebaseIndexer {
350
353
  this.configLoaded = false;
351
354
  this.bm25 = null; // lazy-built BM25 index
352
355
  this.metrics = null; // lazy-loaded SearchMetrics
356
+ this.graphDB = null; // Graph DB for relationships
357
+ this.graphBuilder = null; // Graph builder orchestrator
358
+ this._chunkCache = null; // Lazy Map<chunk_id, row> for findChunkById
359
+ this.usageTracker = null; // Usage tracking & provenance (v3)
353
360
  }
354
361
 
355
362
  async init() {
@@ -360,6 +367,17 @@ class CodebaseIndexer {
360
367
  await fs.mkdir(this.cacheDir, { recursive: true });
361
368
  this.db = await lancedb.connect(path.join(this.cacheDir, "lancedb"));
362
369
  await this.loadHashes();
370
+
371
+ const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
372
+ const graphPath = path.join(this.root, ".opencode", "graph", graphType);
373
+ await fs.mkdir(path.dirname(graphPath), { recursive: true });
374
+ this.graphDB = await new GraphDB(graphPath).init();
375
+ this.graphBuilder = new GraphBuilder(this.graphDB, this.root);
376
+
377
+ // Usage tracker — provenance & usage stats
378
+ this.usageTracker = new UsageTracker(this.cacheDir);
379
+ await this.usageTracker.load();
380
+
363
381
  return this;
364
382
  }
365
383
 
@@ -388,6 +406,18 @@ class CodebaseIndexer {
388
406
  }
389
407
  this._bm25Rows = null;
390
408
  this.metrics = null;
409
+ // Close graph DB to release LevelDB lock
410
+ if (this.graphDB) {
411
+ try { await this.graphDB.close(); } catch { /* best effort */ }
412
+ this.graphDB = null;
413
+ this.graphBuilder = null;
414
+ }
415
+ // Save & release usage tracker
416
+ if (this.usageTracker) {
417
+ try { await this.usageTracker.save(); } catch { /* best effort */ }
418
+ this.usageTracker = null;
419
+ }
420
+ this._chunkCache = null;
391
421
  clearQueryCache();
392
422
  if (global.gc) global.gc();
393
423
  }
@@ -503,13 +533,28 @@ class CodebaseIndexer {
503
533
  // Semantic chunking
504
534
  const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
505
535
 
536
+ // v3: Assign chunk IDs for graph tracking
537
+ const chunksWithIds = this.graphBuilder.assignChunkIds(relPath, chunks);
538
+
539
+ // v3: Delete old edges for this file and build new ones
540
+ await this.graphDB.deleteByFile(relPath);
541
+ await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
542
+
543
+ // FR-054: Store graph build timestamp + file hash as metadata triple
544
+ try {
545
+ await this.graphDB.setFileMeta(relPath, hash, Date.now());
546
+ } catch {
547
+ // non-fatal — metadata is advisory
548
+ }
549
+
506
550
  const data = [];
507
- for (let i = 0; i < chunks.length; i++) {
508
- const embedding = await this.embed(chunks[i].content);
551
+ for (let i = 0; i < chunksWithIds.length; i++) {
552
+ const embedding = await this.embed(chunksWithIds[i].content);
509
553
  data.push({
554
+ chunk_id: chunksWithIds[i].chunk_id,
510
555
  file: relPath,
511
556
  chunk_index: i,
512
- content: chunks[i].content,
557
+ content: chunksWithIds[i].content,
513
558
  vector: embedding,
514
559
  archived: archived,
515
560
  // v2 metadata
@@ -517,9 +562,9 @@ class CodebaseIndexer {
517
562
  language: fileMeta.language,
518
563
  last_modified: fileMeta.last_modified,
519
564
  file_size: fileMeta.file_size,
520
- heading_context: chunks[i].heading_context || "",
521
- function_name: chunks[i].function_name || "",
522
- class_name: chunks[i].class_name || "",
565
+ heading_context: chunksWithIds[i].heading_context || "",
566
+ function_name: chunksWithIds[i].function_name || "",
567
+ class_name: chunksWithIds[i].class_name || "",
523
568
  tags: (fileMeta.tags || []).join(","),
524
569
  });
525
570
  }
@@ -720,9 +765,97 @@ class CodebaseIndexer {
720
765
  }
721
766
  }
722
767
 
768
+ // ── Graph context expansion (v3) ───────────────────────────────────────
769
+ if (this.graphDB) {
770
+ for (const result of finalResults) {
771
+ if (!result.chunk_id) continue;
772
+
773
+ const outgoing = await this.graphDB.getOutgoing(result.chunk_id);
774
+ const incoming = await this.graphDB.getIncoming(result.chunk_id);
775
+ const allEdges = [...outgoing, ...incoming];
776
+
777
+ const neighbors = [];
778
+ for (const edge of allEdges) {
779
+ const neighborId = edge.subject === result.chunk_id ? edge.object : edge.subject;
780
+ const neighborChunk = await this.findChunkById(neighborId);
781
+ if (!neighborChunk) continue;
782
+
783
+ const similarity = this.cosineSimilarity(neighborChunk.vector, queryEmbedding);
784
+ const score = edge.weight * similarity;
785
+
786
+ neighbors.push({
787
+ chunk_id: neighborId,
788
+ file: neighborChunk.file,
789
+ content: neighborChunk.content,
790
+ relation: edge.predicate,
791
+ score,
792
+ via: edge.source
793
+ });
794
+ }
795
+
796
+ neighbors.sort((a, b) => b.score - a.score);
797
+ result.relatedContext = neighbors.slice(0, 3);
798
+
799
+ // FR-060: Record provenance for each attached chunk
800
+ if (this.usageTracker) {
801
+ for (const n of result.relatedContext) {
802
+ this.usageTracker.recordProvenance(query, result.chunk_id, n.chunk_id, n.relation);
803
+ }
804
+ }
805
+ }
806
+ }
807
+
808
+ // FR-061: Record usage counts for all returned chunks (main + attached)
809
+ if (this.usageTracker) {
810
+ const allChunkIds = [];
811
+ for (const r of finalResults) {
812
+ if (r.chunk_id) allChunkIds.push(r.chunk_id);
813
+ if (r.relatedContext) {
814
+ for (const rc of r.relatedContext) {
815
+ if (rc.chunk_id) allChunkIds.push(rc.chunk_id);
816
+ }
817
+ }
818
+ }
819
+ this.usageTracker.recordSearchResults(allChunkIds);
820
+ // Save asynchronously (non-blocking)
821
+ this.usageTracker.save().catch(() => {});
822
+ }
823
+
723
824
  return finalResults;
724
825
  }
725
826
 
827
+ async findChunkById(chunkId) {
828
+ // Lazy-build an in-memory Map keyed by chunk_id on first call.
829
+ // The cache lives until unloadModel() clears it.
830
+ if (!this._chunkCache) {
831
+ const tableName = "chunks";
832
+ const tables = await this.db.tableNames();
833
+ if (!tables.includes(tableName)) return null;
834
+
835
+ const table = await this.db.openTable(tableName);
836
+ const rows = await table.search([0]).limit(100000).execute();
837
+ this._chunkCache = new Map();
838
+ for (const row of rows) {
839
+ if (row.chunk_id) {
840
+ this._chunkCache.set(row.chunk_id, row);
841
+ }
842
+ }
843
+ }
844
+ return this._chunkCache.get(chunkId) || null;
845
+ }
846
+
847
+ cosineSimilarity(vecA, vecB) {
848
+ let dotProduct = 0;
849
+ let normA = 0;
850
+ let normB = 0;
851
+ for (let i = 0; i < vecA.length; i++) {
852
+ dotProduct += vecA[i] * vecB[i];
853
+ normA += vecA[i] * vecA[i];
854
+ normB += vecB[i] * vecB[i];
855
+ }
856
+ return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
857
+ }
858
+
726
859
  async checkHealth(extraIgnore = []) {
727
860
  const { glob } = await import("glob");
728
861
  const preset = INDEX_PRESETS[this.indexName] || DEFAULT_PRESETS.code;
@@ -806,14 +939,17 @@ class CodebaseIndexer {
806
939
 
807
940
  let indexed = 0;
808
941
  let skipped = 0;
942
+ const total = files.length;
809
943
 
810
- for (const relPath of files) {
944
+ for (let i = 0; i < files.length; i++) {
945
+ const relPath = files[i];
811
946
  const filePath = path.join(this.root, relPath);
812
947
  try {
813
948
  const wasIndexed = await this.indexFile(filePath);
814
949
  if (wasIndexed) {
815
950
  indexed++;
816
- if (onProgress) onProgress(indexed, files.length, relPath);
951
+ // FR-053: progress indicator includes graph building phase
952
+ if (onProgress) onProgress(indexed, total, relPath, i + 1);
817
953
  } else {
818
954
  skipped++;
819
955
  }
@@ -822,7 +958,29 @@ class CodebaseIndexer {
822
958
  }
823
959
  }
824
960
 
825
- return { indexed, skipped, total: files.length };
961
+ // FR-005: Build semantic similarity edges as post-pass
962
+ // Only if we actually indexed new files and have a graph builder
963
+ let semanticEdges = 0;
964
+ if (indexed > 0 && this.graphBuilder && this.graphDB) {
965
+ try {
966
+ const tableName = "chunks";
967
+ const tables = await this.db.tableNames();
968
+ if (tables.includes(tableName)) {
969
+ const table = await this.db.openTable(tableName);
970
+ const allRows = await table.search([0]).limit(100000).execute();
971
+ const chunkData = allRows
972
+ .filter(r => r.chunk_id && r.vector)
973
+ .map(r => ({ chunk_id: r.chunk_id, vector: Array.from(r.vector), file: r.file }));
974
+ semanticEdges = await this.graphBuilder.buildSemanticEdges(chunkData, 0.8, 3);
975
+ if (DEBUG) console.log(`[vectorizer] Built ${semanticEdges} semantic similarity edges`);
976
+ }
977
+ } catch (e) {
978
+ if (DEBUG) console.log(`[vectorizer] Semantic edge building failed:`, e.message);
979
+ // non-fatal — explicit edges still work
980
+ }
981
+ }
982
+
983
+ return { indexed, skipped, total, semanticEdges };
826
984
  }
827
985
 
828
986
  async indexSingleFile(filePath) {