@comfanion/usethis_search 3.0.0-dev.8 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +263 -0
- package/file-indexer.ts +1 -1
- package/index.ts +0 -8
- package/package.json +12 -5
- package/tools/codeindex.ts +2 -2
- package/tools/search.ts +254 -66
- package/vectorizer/analyzers/lsp-analyzer.ts +7 -7
- package/vectorizer/analyzers/regex-analyzer.ts +358 -61
- package/vectorizer/chunk-store.ts +207 -0
- package/vectorizer/chunkers/code-chunker.ts +74 -24
- package/vectorizer/chunkers/markdown-chunker.ts +69 -7
- package/vectorizer/graph-builder.ts +207 -15
- package/vectorizer/graph-db.ts +161 -164
- package/vectorizer/hybrid-search.ts +1 -1
- package/vectorizer/{index.js → index.ts} +796 -160
- package/vectorizer.yaml +20 -2
package/vectorizer/graph-db.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
import {
|
|
1
|
+
import { Database } from "bun:sqlite"
|
|
2
|
+
import { filePathFromNodeId, isStructuralPredicate } from "./graph-builder"
|
|
3
3
|
|
|
4
4
|
export interface Triple {
|
|
5
5
|
subject: string
|
|
@@ -12,98 +12,137 @@ export interface Triple {
|
|
|
12
12
|
}
|
|
13
13
|
|
|
14
14
|
export class GraphDB {
|
|
15
|
-
private db:
|
|
15
|
+
private db: Database | null = null
|
|
16
16
|
private initialized: boolean = false
|
|
17
17
|
|
|
18
|
+
// Prepared statements (cached for performance)
|
|
19
|
+
private _stmtInsert: any = null
|
|
20
|
+
private _stmtBySubject: any = null
|
|
21
|
+
private _stmtByObject: any = null
|
|
22
|
+
private _stmtByFile: any = null
|
|
23
|
+
private _stmtDeleteByFile: any = null
|
|
24
|
+
private _stmtBySubjectPredicate: any = null
|
|
25
|
+
private _stmtByPredicate: any = null
|
|
26
|
+
private _stmtAll: any = null
|
|
27
|
+
|
|
18
28
|
constructor(private dbPath: string) {}
|
|
19
29
|
|
|
20
30
|
async init(): Promise<this> {
|
|
21
|
-
|
|
22
|
-
this.db
|
|
31
|
+
// bun:sqlite uses a file path; append .db if not already
|
|
32
|
+
const fullPath = this.dbPath.endsWith(".db") ? this.dbPath : this.dbPath + ".db"
|
|
33
|
+
this.db = new Database(fullPath)
|
|
34
|
+
|
|
35
|
+
// WAL mode for concurrent readers
|
|
36
|
+
this.db.exec("PRAGMA journal_mode = WAL")
|
|
37
|
+
this.db.exec("PRAGMA synchronous = NORMAL") // faster writes, safe with WAL
|
|
38
|
+
this.db.exec("PRAGMA cache_size = -2000") // 2MB cache
|
|
39
|
+
|
|
40
|
+
// Create triples table
|
|
41
|
+
this.db.exec(`
|
|
42
|
+
CREATE TABLE IF NOT EXISTS triples (
|
|
43
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
44
|
+
subject TEXT NOT NULL,
|
|
45
|
+
predicate TEXT NOT NULL,
|
|
46
|
+
object TEXT NOT NULL,
|
|
47
|
+
weight REAL NOT NULL DEFAULT 0,
|
|
48
|
+
source TEXT NOT NULL DEFAULT '',
|
|
49
|
+
file TEXT NOT NULL DEFAULT '',
|
|
50
|
+
line INTEGER
|
|
51
|
+
)
|
|
52
|
+
`)
|
|
53
|
+
|
|
54
|
+
// Indexes for fast lookups
|
|
55
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_subject ON triples(subject)")
|
|
56
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_object ON triples(object)")
|
|
57
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_file ON triples(file)")
|
|
58
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_predicate ON triples(predicate)")
|
|
59
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_subject_predicate ON triples(subject, predicate)")
|
|
60
|
+
|
|
61
|
+
// Prepare statements
|
|
62
|
+
this._stmtInsert = this.db.prepare(
|
|
63
|
+
"INSERT INTO triples (subject, predicate, object, weight, source, file, line) VALUES (?, ?, ?, ?, ?, ?, ?)"
|
|
64
|
+
)
|
|
65
|
+
this._stmtBySubject = this.db.prepare("SELECT * FROM triples WHERE subject = ?")
|
|
66
|
+
this._stmtByObject = this.db.prepare("SELECT * FROM triples WHERE object = ?")
|
|
67
|
+
this._stmtByFile = this.db.prepare("SELECT * FROM triples WHERE file = ?")
|
|
68
|
+
this._stmtDeleteByFile = this.db.prepare("DELETE FROM triples WHERE file = ?")
|
|
69
|
+
this._stmtBySubjectPredicate = this.db.prepare("SELECT * FROM triples WHERE subject = ? AND predicate = ?")
|
|
70
|
+
this._stmtByPredicate = this.db.prepare("SELECT * FROM triples WHERE predicate = ?")
|
|
71
|
+
this._stmtAll = this.db.prepare("SELECT * FROM triples")
|
|
72
|
+
|
|
23
73
|
this.initialized = true
|
|
24
74
|
return this
|
|
25
75
|
}
|
|
26
76
|
|
|
77
|
+
private toTriple(row: any): Triple {
|
|
78
|
+
return {
|
|
79
|
+
subject: row.subject,
|
|
80
|
+
predicate: row.predicate,
|
|
81
|
+
object: row.object,
|
|
82
|
+
weight: row.weight,
|
|
83
|
+
source: row.source,
|
|
84
|
+
file: row.file,
|
|
85
|
+
line: row.line ?? undefined,
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
27
89
|
async putEdges(triples: Triple[]): Promise<void> {
|
|
28
|
-
if (!this.initialized) {
|
|
90
|
+
if (!this.initialized || !this.db) {
|
|
29
91
|
throw new Error("GraphDB not initialized. Call init() first.")
|
|
30
92
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
93
|
+
|
|
94
|
+
// Batch insert in a single transaction — much faster than individual inserts
|
|
95
|
+
const insertMany = this.db.transaction((items: Triple[]) => {
|
|
96
|
+
for (const t of items) {
|
|
97
|
+
this._stmtInsert.run(t.subject, t.predicate, t.object, t.weight, t.source, t.file, t.line ?? null)
|
|
98
|
+
}
|
|
36
99
|
})
|
|
100
|
+
insertMany(triples)
|
|
37
101
|
}
|
|
38
102
|
|
|
39
103
|
async getOutgoing(chunkId: string): Promise<Triple[]> {
|
|
40
|
-
if (!this.initialized) {
|
|
104
|
+
if (!this.initialized || !this.db) {
|
|
41
105
|
throw new Error("GraphDB not initialized. Call init() first.")
|
|
42
106
|
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
if (err) reject(err)
|
|
46
|
-
else resolve(triples || [])
|
|
47
|
-
})
|
|
48
|
-
})
|
|
107
|
+
const rows = this._stmtBySubject.all(chunkId)
|
|
108
|
+
return rows.map((r: any) => this.toTriple(r))
|
|
49
109
|
}
|
|
50
110
|
|
|
51
111
|
async getIncoming(chunkId: string): Promise<Triple[]> {
|
|
52
|
-
if (!this.initialized) {
|
|
112
|
+
if (!this.initialized || !this.db) {
|
|
53
113
|
throw new Error("GraphDB not initialized. Call init() first.")
|
|
54
114
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
if (err) reject(err)
|
|
58
|
-
else resolve(triples || [])
|
|
59
|
-
})
|
|
60
|
-
})
|
|
115
|
+
const rows = this._stmtByObject.all(chunkId)
|
|
116
|
+
return rows.map((r: any) => this.toTriple(r))
|
|
61
117
|
}
|
|
62
118
|
|
|
63
119
|
async deleteByFile(filePath: string): Promise<void> {
|
|
64
|
-
if (!this.initialized) {
|
|
120
|
+
if (!this.initialized || !this.db) {
|
|
65
121
|
throw new Error("GraphDB not initialized. Call init() first.")
|
|
66
122
|
}
|
|
67
|
-
|
|
68
|
-
this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
|
|
69
|
-
if (err) reject(err)
|
|
70
|
-
else resolve(triples || [])
|
|
71
|
-
})
|
|
72
|
-
})
|
|
73
|
-
|
|
74
|
-
const toDelete = allTriples.filter(t => t.file === filePath)
|
|
75
|
-
|
|
76
|
-
for (const t of toDelete) {
|
|
77
|
-
await new Promise<void>((resolve, reject) => {
|
|
78
|
-
this.db.del(t, (err: Error | undefined) => {
|
|
79
|
-
if (err) reject(err)
|
|
80
|
-
else resolve()
|
|
81
|
-
})
|
|
82
|
-
})
|
|
83
|
-
}
|
|
123
|
+
this._stmtDeleteByFile.run(filePath)
|
|
84
124
|
}
|
|
85
125
|
|
|
86
126
|
async close(): Promise<void> {
|
|
87
127
|
if (this.initialized && this.db) {
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
128
|
+
this.db.close()
|
|
129
|
+
this.db = null
|
|
130
|
+
this._stmtInsert = null
|
|
131
|
+
this._stmtBySubject = null
|
|
132
|
+
this._stmtByObject = null
|
|
133
|
+
this._stmtByFile = null
|
|
134
|
+
this._stmtDeleteByFile = null
|
|
135
|
+
this._stmtBySubjectPredicate = null
|
|
136
|
+
this._stmtByPredicate = null
|
|
137
|
+
this._stmtAll = null
|
|
94
138
|
this.initialized = false
|
|
95
139
|
}
|
|
96
140
|
}
|
|
97
141
|
|
|
98
142
|
// ---- FR-054: File metadata triples for incremental updates -----------------
|
|
99
143
|
|
|
100
|
-
/**
|
|
101
|
-
* Store graph build metadata for a file as a special triple.
|
|
102
|
-
* Subject: `meta:<filePath>`, Predicate: `graph_built`, Object: `<hash>`.
|
|
103
|
-
* Weight encodes the timestamp (seconds since epoch).
|
|
104
|
-
*/
|
|
105
144
|
async setFileMeta(filePath: string, hash: string, timestamp: number): Promise<void> {
|
|
106
|
-
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
145
|
+
if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
|
|
107
146
|
|
|
108
147
|
// Remove old meta triple for this file first
|
|
109
148
|
await this.deleteFileMeta(filePath)
|
|
@@ -112,159 +151,117 @@ export class GraphDB {
|
|
|
112
151
|
subject: `meta:${filePath}`,
|
|
113
152
|
predicate: "graph_built",
|
|
114
153
|
object: hash,
|
|
115
|
-
weight: Math.floor(timestamp / 1000),
|
|
154
|
+
weight: Math.floor(timestamp / 1000),
|
|
116
155
|
source: "meta",
|
|
117
156
|
file: filePath,
|
|
118
157
|
}
|
|
119
158
|
await this.putEdges([triple])
|
|
120
159
|
}
|
|
121
160
|
|
|
122
|
-
/**
|
|
123
|
-
* Get the stored graph build metadata for a file.
|
|
124
|
-
* Returns { hash, timestamp } or null if not found.
|
|
125
|
-
*/
|
|
126
161
|
async getFileMeta(filePath: string): Promise<{ hash: string; timestamp: number } | null> {
|
|
127
|
-
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
128
|
-
|
|
129
|
-
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
130
|
-
this.db.get(
|
|
131
|
-
{ subject: `meta:${filePath}`, predicate: "graph_built" },
|
|
132
|
-
(err: Error | undefined, result: Triple[]) => {
|
|
133
|
-
if (err) reject(err)
|
|
134
|
-
else resolve(result || [])
|
|
135
|
-
},
|
|
136
|
-
)
|
|
137
|
-
})
|
|
162
|
+
if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
|
|
138
163
|
|
|
139
|
-
|
|
164
|
+
const rows = this._stmtBySubjectPredicate.all(`meta:${filePath}`, "graph_built")
|
|
165
|
+
if (rows.length === 0) return null
|
|
140
166
|
return {
|
|
141
|
-
hash:
|
|
142
|
-
timestamp:
|
|
167
|
+
hash: rows[0].object,
|
|
168
|
+
timestamp: rows[0].weight * 1000,
|
|
143
169
|
}
|
|
144
170
|
}
|
|
145
171
|
|
|
146
|
-
/**
|
|
147
|
-
* Delete file meta triple.
|
|
148
|
-
*/
|
|
149
172
|
async deleteFileMeta(filePath: string): Promise<void> {
|
|
150
|
-
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
151
|
-
|
|
152
|
-
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
153
|
-
this.db.get(
|
|
154
|
-
{ subject: `meta:${filePath}`, predicate: "graph_built" },
|
|
155
|
-
(err: Error | undefined, result: Triple[]) => {
|
|
156
|
-
if (err) reject(err)
|
|
157
|
-
else resolve(result || [])
|
|
158
|
-
},
|
|
159
|
-
)
|
|
160
|
-
})
|
|
173
|
+
if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
|
|
161
174
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
})
|
|
168
|
-
})
|
|
175
|
+
try {
|
|
176
|
+
this.db!.prepare("DELETE FROM triples WHERE subject = ? AND predicate = ?")
|
|
177
|
+
.run(`meta:${filePath}`, "graph_built")
|
|
178
|
+
} catch {
|
|
179
|
+
// Silently ignore errors
|
|
169
180
|
}
|
|
170
181
|
}
|
|
171
182
|
|
|
172
|
-
/**
|
|
173
|
-
* Get all file metadata triples (for validation / stats).
|
|
174
|
-
*/
|
|
175
183
|
async getAllFileMeta(): Promise<Array<{ filePath: string; hash: string; timestamp: number }>> {
|
|
176
|
-
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
177
|
-
|
|
178
|
-
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
179
|
-
this.db.get({ predicate: "graph_built" }, (err: Error | undefined, result: Triple[]) => {
|
|
180
|
-
if (err) reject(err)
|
|
181
|
-
else resolve(result || [])
|
|
182
|
-
})
|
|
183
|
-
})
|
|
184
|
+
if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
|
|
184
185
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
186
|
+
const rows = this._stmtByPredicate.all("graph_built")
|
|
187
|
+
return rows.map((r: any) => ({
|
|
188
|
+
filePath: r.subject.replace(/^meta:/, ""),
|
|
189
|
+
hash: r.object,
|
|
190
|
+
timestamp: r.weight * 1000,
|
|
189
191
|
}))
|
|
190
192
|
}
|
|
191
193
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
* Excludes meta triples (predicate === "graph_built").
|
|
195
|
-
*/
|
|
196
|
-
async getAllTriples(): Promise<Triple[]> {
|
|
197
|
-
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
198
|
-
|
|
199
|
-
const allTriples = await new Promise<Triple[]>((resolve, reject) => {
|
|
200
|
-
this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
|
|
201
|
-
if (err) reject(err)
|
|
202
|
-
else resolve(triples || [])
|
|
203
|
-
})
|
|
204
|
-
})
|
|
194
|
+
async getAllTriples(includeStructural: boolean = false): Promise<Triple[]> {
|
|
195
|
+
if (!this.initialized || !this.db) throw new Error("GraphDB not initialized. Call init() first.")
|
|
205
196
|
|
|
206
|
-
|
|
197
|
+
const allRows = this._stmtAll.all()
|
|
198
|
+
return allRows
|
|
199
|
+
.map((r: any) => this.toTriple(r))
|
|
200
|
+
.filter((t: Triple) => {
|
|
201
|
+
if (t.predicate === "graph_built" || t.predicate === "belongs_to") return false
|
|
202
|
+
if (!includeStructural && isStructuralPredicate(t.predicate)) return false
|
|
203
|
+
return true
|
|
204
|
+
})
|
|
207
205
|
}
|
|
208
206
|
|
|
209
207
|
async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
|
|
210
|
-
if (!this.initialized) {
|
|
208
|
+
if (!this.initialized || !this.db) {
|
|
211
209
|
throw new Error("GraphDB not initialized. Call init() first.")
|
|
212
210
|
}
|
|
213
211
|
|
|
214
212
|
const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
|
|
215
213
|
const visited = new Set<string>()
|
|
216
|
-
|
|
214
|
+
const self = this
|
|
215
|
+
|
|
216
|
+
const callerFile = filePathFromNodeId(chunkId)
|
|
217
|
+
|
|
217
218
|
async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
|
|
218
|
-
if (currentDepth
|
|
219
|
+
if (currentDepth >= maxDepth || visited.has(currentId)) {
|
|
219
220
|
return
|
|
220
221
|
}
|
|
221
|
-
|
|
222
|
+
|
|
222
223
|
visited.add(currentId)
|
|
223
|
-
|
|
224
|
+
|
|
224
225
|
try {
|
|
225
|
-
const outgoing =
|
|
226
|
-
|
|
227
|
-
if (err) reject(err)
|
|
228
|
-
else resolve(triples || [])
|
|
229
|
-
})
|
|
230
|
-
})
|
|
231
|
-
|
|
226
|
+
const outgoing = self._stmtBySubject.all(currentId).map((r: any) => self.toTriple(r))
|
|
227
|
+
|
|
232
228
|
for (const triple of outgoing) {
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
const
|
|
229
|
+
if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
|
|
230
|
+
if (isStructuralPredicate(triple.predicate)) continue
|
|
231
|
+
|
|
232
|
+
const targetFile = filePathFromNodeId(triple.object)
|
|
233
|
+
if (!targetFile) continue
|
|
234
|
+
|
|
235
|
+
const existing = relatedFiles.get(targetFile)
|
|
237
236
|
if (existing) {
|
|
238
237
|
existing.weight = Math.max(existing.weight, triple.weight)
|
|
239
238
|
} else {
|
|
240
|
-
relatedFiles.set(
|
|
239
|
+
relatedFiles.set(targetFile, {
|
|
241
240
|
relation: currentRelation || triple.predicate,
|
|
242
241
|
weight: triple.weight
|
|
243
242
|
})
|
|
244
243
|
}
|
|
245
|
-
|
|
246
|
-
// Recurse for imports/extends relations
|
|
244
|
+
|
|
247
245
|
if (triple.predicate === "imports" || triple.predicate === "extends") {
|
|
248
|
-
await traverse(
|
|
246
|
+
await traverse(triple.object, currentDepth + 1, triple.predicate)
|
|
249
247
|
}
|
|
250
248
|
}
|
|
251
|
-
|
|
252
|
-
const incoming =
|
|
253
|
-
|
|
254
|
-
if (err) reject(err)
|
|
255
|
-
else resolve(triples || [])
|
|
256
|
-
})
|
|
257
|
-
})
|
|
258
|
-
|
|
249
|
+
|
|
250
|
+
const incoming = self._stmtByObject.all(currentId).map((r: any) => self.toTriple(r))
|
|
251
|
+
|
|
259
252
|
for (const triple of incoming) {
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
253
|
+
if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
|
|
254
|
+
if (isStructuralPredicate(triple.predicate)) continue
|
|
255
|
+
|
|
256
|
+
const sourceFile = filePathFromNodeId(triple.subject)
|
|
257
|
+
if (!sourceFile) continue
|
|
258
|
+
|
|
259
|
+
const existing = relatedFiles.get(sourceFile)
|
|
263
260
|
if (existing) {
|
|
264
261
|
existing.weight = Math.max(existing.weight, triple.weight)
|
|
265
262
|
} else {
|
|
266
|
-
relatedFiles.set(
|
|
267
|
-
relation:
|
|
263
|
+
relatedFiles.set(sourceFile, {
|
|
264
|
+
relation: "used_by",
|
|
268
265
|
weight: triple.weight
|
|
269
266
|
})
|
|
270
267
|
}
|
|
@@ -273,17 +270,17 @@ export class GraphDB {
|
|
|
273
270
|
console.error(`Error traversing graph for ${currentId}:`, error)
|
|
274
271
|
}
|
|
275
272
|
}
|
|
276
|
-
|
|
273
|
+
|
|
277
274
|
await traverse(chunkId, 0, "")
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
275
|
+
|
|
276
|
+
if (callerFile) relatedFiles.delete(callerFile)
|
|
277
|
+
|
|
278
|
+
return Array.from(relatedFiles.entries())
|
|
279
|
+
.map(([filePath, data]) => ({
|
|
280
|
+
path: filePath,
|
|
282
281
|
relation: data.relation,
|
|
283
282
|
weight: data.weight
|
|
284
283
|
}))
|
|
285
284
|
.sort((a, b) => b.weight - a.weight)
|
|
286
|
-
|
|
287
|
-
return result
|
|
288
285
|
}
|
|
289
286
|
}
|