@comfanion/usethis_search 3.0.0-dev.0 → 3.0.0-dev.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api.ts +92 -0
- package/file-indexer.ts +14 -1
- package/index.ts +20 -6
- package/package.json +5 -3
- package/tools/codeindex.ts +173 -7
- package/tools/search.ts +1 -1
- package/vectorizer/analyzers/lsp-analyzer.ts +225 -94
- package/vectorizer/analyzers/lsp-client.ts +369 -0
- package/vectorizer/graph-builder.ts +106 -3
- package/vectorizer/graph-db.ts +192 -0
- package/vectorizer/{index.js → index.ts} +114 -11
- package/vectorizer/usage-tracker.ts +204 -0
- package/tools/read-interceptor.ts +0 -54
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight LSP client over JSON-RPC / stdio.
|
|
3
|
+
*
|
|
4
|
+
* Spawns a language server as a child process, speaks the Language Server
|
|
5
|
+
* Protocol, and exposes high-level helpers used by LSPAnalyzer.
|
|
6
|
+
*
|
|
7
|
+
* Zero external dependencies — implements JSON-RPC framing inline.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { spawn, ChildProcess } from "child_process"
|
|
11
|
+
import path from "path"
|
|
12
|
+
import fs from "fs"
|
|
13
|
+
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// LSP types (minimal subset we need)
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
export interface LSPPosition {
|
|
19
|
+
line: number
|
|
20
|
+
character: number
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface LSPRange {
|
|
24
|
+
start: LSPPosition
|
|
25
|
+
end: LSPPosition
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface LSPLocation {
|
|
29
|
+
uri: string
|
|
30
|
+
range: LSPRange
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface LSPSymbolInformation {
|
|
34
|
+
name: string
|
|
35
|
+
kind: number
|
|
36
|
+
range: LSPRange
|
|
37
|
+
selectionRange: LSPRange
|
|
38
|
+
children?: LSPSymbolInformation[]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// SymbolKind constants we care about
|
|
42
|
+
export const SymbolKind = {
|
|
43
|
+
File: 1,
|
|
44
|
+
Module: 2,
|
|
45
|
+
Namespace: 3,
|
|
46
|
+
Package: 4,
|
|
47
|
+
Class: 5,
|
|
48
|
+
Method: 6,
|
|
49
|
+
Property: 7,
|
|
50
|
+
Field: 8,
|
|
51
|
+
Constructor: 9,
|
|
52
|
+
Enum: 10,
|
|
53
|
+
Interface: 11,
|
|
54
|
+
Function: 12,
|
|
55
|
+
Variable: 13,
|
|
56
|
+
Constant: 14,
|
|
57
|
+
TypeParameter: 26,
|
|
58
|
+
} as const
|
|
59
|
+
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
// Server binary resolution
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
interface ServerConfig {
|
|
65
|
+
command: string
|
|
66
|
+
args: string[]
|
|
67
|
+
languages: string[]
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const SERVER_CONFIGS: Record<string, ServerConfig> = {
|
|
71
|
+
typescript: {
|
|
72
|
+
command: "typescript-language-server",
|
|
73
|
+
args: ["--stdio"],
|
|
74
|
+
languages: ["typescript", "javascript", "typescriptreact", "javascriptreact"],
|
|
75
|
+
},
|
|
76
|
+
python: {
|
|
77
|
+
command: "pylsp",
|
|
78
|
+
args: [],
|
|
79
|
+
languages: ["python"],
|
|
80
|
+
},
|
|
81
|
+
go: {
|
|
82
|
+
command: "gopls",
|
|
83
|
+
args: ["serve"],
|
|
84
|
+
languages: ["go"],
|
|
85
|
+
},
|
|
86
|
+
rust: {
|
|
87
|
+
command: "rust-analyzer",
|
|
88
|
+
args: [],
|
|
89
|
+
languages: ["rust"],
|
|
90
|
+
},
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function languageToServerId(language: string): string | null {
|
|
94
|
+
for (const [id, cfg] of Object.entries(SERVER_CONFIGS)) {
|
|
95
|
+
if (cfg.languages.includes(language)) return id
|
|
96
|
+
}
|
|
97
|
+
return null
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function findBinary(name: string): string | null {
|
|
101
|
+
// 1. Check PATH via which-style lookup
|
|
102
|
+
const pathEnv = process.env.PATH || ""
|
|
103
|
+
const dirs = pathEnv.split(path.delimiter)
|
|
104
|
+
for (const dir of dirs) {
|
|
105
|
+
const full = path.join(dir, name)
|
|
106
|
+
try {
|
|
107
|
+
fs.accessSync(full, fs.constants.X_OK)
|
|
108
|
+
return full
|
|
109
|
+
} catch { /* not found */ }
|
|
110
|
+
}
|
|
111
|
+
return null
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
// JSON-RPC framing
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
function encodeMessage(body: object): Buffer {
|
|
119
|
+
const json = JSON.stringify(body)
|
|
120
|
+
const content = Buffer.from(json, "utf-8")
|
|
121
|
+
const header = `Content-Length: ${content.byteLength}\r\n\r\n`
|
|
122
|
+
return Buffer.concat([Buffer.from(header, "ascii"), content])
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
// LSPClient
|
|
127
|
+
// ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
export class LSPClient {
|
|
130
|
+
private proc: ChildProcess | null = null
|
|
131
|
+
private requestId = 0
|
|
132
|
+
private pending = new Map<number, { resolve: (v: any) => void; reject: (e: Error) => void }>()
|
|
133
|
+
private buffer = Buffer.alloc(0)
|
|
134
|
+
private initialized = false
|
|
135
|
+
private serverConfig: ServerConfig | null = null
|
|
136
|
+
private rootUri: string
|
|
137
|
+
|
|
138
|
+
constructor(
|
|
139
|
+
private projectRoot: string,
|
|
140
|
+
private timeoutMs: number = 10_000,
|
|
141
|
+
) {
|
|
142
|
+
this.rootUri = `file://${this.projectRoot}`
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// ---- lifecycle ----------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
/** Check if a server binary exists for the given language. */
|
|
148
|
+
static isAvailable(language: string): boolean {
|
|
149
|
+
const serverId = languageToServerId(language)
|
|
150
|
+
if (!serverId) return false
|
|
151
|
+
const cfg = SERVER_CONFIGS[serverId]
|
|
152
|
+
return findBinary(cfg.command) !== null
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/** Start the language server for `language`. Rejects if unavailable. */
|
|
156
|
+
async start(language: string): Promise<void> {
|
|
157
|
+
if (this.initialized) return
|
|
158
|
+
|
|
159
|
+
const serverId = languageToServerId(language)
|
|
160
|
+
if (!serverId) throw new Error(`No LSP server config for language: ${language}`)
|
|
161
|
+
|
|
162
|
+
const cfg = SERVER_CONFIGS[serverId]
|
|
163
|
+
const bin = findBinary(cfg.command)
|
|
164
|
+
if (!bin) throw new Error(`LSP server binary not found: ${cfg.command}`)
|
|
165
|
+
|
|
166
|
+
this.serverConfig = cfg
|
|
167
|
+
|
|
168
|
+
this.proc = spawn(bin, cfg.args, {
|
|
169
|
+
cwd: this.projectRoot,
|
|
170
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
171
|
+
env: { ...process.env },
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
this.proc.stdout!.on("data", (chunk: Buffer) => this.onData(chunk))
|
|
175
|
+
this.proc.stderr!.on("data", (chunk: Buffer) => {
|
|
176
|
+
// Silently consume stderr — language servers are chatty
|
|
177
|
+
})
|
|
178
|
+
this.proc.on("error", (err) => {
|
|
179
|
+
// Reject all pending
|
|
180
|
+
for (const p of this.pending.values()) p.reject(err)
|
|
181
|
+
this.pending.clear()
|
|
182
|
+
})
|
|
183
|
+
this.proc.on("exit", () => {
|
|
184
|
+
for (const p of this.pending.values()) p.reject(new Error("LSP server exited"))
|
|
185
|
+
this.pending.clear()
|
|
186
|
+
this.initialized = false
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
// LSP initialize handshake
|
|
190
|
+
const initResult = await this.sendRequest("initialize", {
|
|
191
|
+
processId: process.pid,
|
|
192
|
+
rootUri: this.rootUri,
|
|
193
|
+
capabilities: {
|
|
194
|
+
textDocument: {
|
|
195
|
+
documentSymbol: { hierarchicalDocumentSymbolSupport: true },
|
|
196
|
+
definition: { linkSupport: false },
|
|
197
|
+
references: {},
|
|
198
|
+
implementation: {},
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
// Send initialized notification (no response expected)
|
|
204
|
+
this.sendNotification("initialized", {})
|
|
205
|
+
this.initialized = true
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/** Shut down gracefully. */
|
|
209
|
+
async stop(): Promise<void> {
|
|
210
|
+
if (!this.proc || !this.initialized) return
|
|
211
|
+
try {
|
|
212
|
+
await this.sendRequest("shutdown", null)
|
|
213
|
+
this.sendNotification("exit", null)
|
|
214
|
+
} catch { /* best effort */ }
|
|
215
|
+
this.proc.kill()
|
|
216
|
+
this.proc = null
|
|
217
|
+
this.initialized = false
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// ---- LSP helpers --------------------------------------------------------
|
|
221
|
+
|
|
222
|
+
async openDocument(filePath: string, content: string, languageId: string): Promise<void> {
|
|
223
|
+
const uri = `file://${path.resolve(this.projectRoot, filePath)}`
|
|
224
|
+
this.sendNotification("textDocument/didOpen", {
|
|
225
|
+
textDocument: {
|
|
226
|
+
uri,
|
|
227
|
+
languageId,
|
|
228
|
+
version: 1,
|
|
229
|
+
text: content,
|
|
230
|
+
},
|
|
231
|
+
})
|
|
232
|
+
// Brief pause to let the notification reach the server.
|
|
233
|
+
// Callers that need full type info should wait separately.
|
|
234
|
+
await new Promise(r => setTimeout(r, 200))
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
async closeDocument(filePath: string): Promise<void> {
|
|
238
|
+
const uri = `file://${path.resolve(this.projectRoot, filePath)}`
|
|
239
|
+
this.sendNotification("textDocument/didClose", {
|
|
240
|
+
textDocument: { uri },
|
|
241
|
+
})
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
async documentSymbol(filePath: string): Promise<LSPSymbolInformation[]> {
|
|
245
|
+
const uri = `file://${path.resolve(this.projectRoot, filePath)}`
|
|
246
|
+
const result = await this.sendRequest("textDocument/documentSymbol", {
|
|
247
|
+
textDocument: { uri },
|
|
248
|
+
})
|
|
249
|
+
return result || []
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
async definition(filePath: string, line: number, character: number): Promise<LSPLocation[]> {
|
|
253
|
+
const uri = `file://${path.resolve(this.projectRoot, filePath)}`
|
|
254
|
+
const result = await this.sendRequest("textDocument/definition", {
|
|
255
|
+
textDocument: { uri },
|
|
256
|
+
position: { line, character },
|
|
257
|
+
})
|
|
258
|
+
return this.normalizeLocations(result)
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
async references(filePath: string, line: number, character: number): Promise<LSPLocation[]> {
|
|
262
|
+
const uri = `file://${path.resolve(this.projectRoot, filePath)}`
|
|
263
|
+
const result = await this.sendRequest("textDocument/references", {
|
|
264
|
+
textDocument: { uri },
|
|
265
|
+
position: { line, character },
|
|
266
|
+
context: { includeDeclaration: false },
|
|
267
|
+
})
|
|
268
|
+
return this.normalizeLocations(result)
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
async implementation(filePath: string, line: number, character: number): Promise<LSPLocation[]> {
|
|
272
|
+
const uri = `file://${path.resolve(this.projectRoot, filePath)}`
|
|
273
|
+
const result = await this.sendRequest("textDocument/implementation", {
|
|
274
|
+
textDocument: { uri },
|
|
275
|
+
position: { line, character },
|
|
276
|
+
})
|
|
277
|
+
return this.normalizeLocations(result)
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// ---- transport ----------------------------------------------------------
|
|
281
|
+
|
|
282
|
+
private sendRequest(method: string, params: any): Promise<any> {
|
|
283
|
+
return new Promise((resolve, reject) => {
|
|
284
|
+
if (!this.proc?.stdin?.writable) {
|
|
285
|
+
return reject(new Error("LSP server not running"))
|
|
286
|
+
}
|
|
287
|
+
const id = ++this.requestId
|
|
288
|
+
const msg = { jsonrpc: "2.0", id, method, params }
|
|
289
|
+
|
|
290
|
+
const timer = setTimeout(() => {
|
|
291
|
+
this.pending.delete(id)
|
|
292
|
+
reject(new Error(`LSP request timed out: ${method} (${this.timeoutMs}ms)`))
|
|
293
|
+
}, this.timeoutMs)
|
|
294
|
+
|
|
295
|
+
this.pending.set(id, {
|
|
296
|
+
resolve: (v: any) => { clearTimeout(timer); resolve(v) },
|
|
297
|
+
reject: (e: Error) => { clearTimeout(timer); reject(e) },
|
|
298
|
+
})
|
|
299
|
+
|
|
300
|
+
this.proc.stdin.write(encodeMessage(msg))
|
|
301
|
+
})
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
private sendNotification(method: string, params: any): void {
|
|
305
|
+
if (!this.proc?.stdin?.writable) return
|
|
306
|
+
const msg = { jsonrpc: "2.0", method, params }
|
|
307
|
+
this.proc.stdin.write(encodeMessage(msg))
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
private onData(chunk: Buffer): void {
|
|
311
|
+
this.buffer = Buffer.concat([this.buffer, chunk])
|
|
312
|
+
this.processBuffer()
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
private processBuffer(): void {
|
|
316
|
+
while (true) {
|
|
317
|
+
// Look for Content-Length header
|
|
318
|
+
const headerEnd = this.buffer.indexOf("\r\n\r\n")
|
|
319
|
+
if (headerEnd === -1) break
|
|
320
|
+
|
|
321
|
+
const headerStr = this.buffer.subarray(0, headerEnd).toString("ascii")
|
|
322
|
+
const match = headerStr.match(/Content-Length:\s*(\d+)/i)
|
|
323
|
+
if (!match) {
|
|
324
|
+
// Corrupt header — skip past it
|
|
325
|
+
this.buffer = this.buffer.subarray(headerEnd + 4)
|
|
326
|
+
continue
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
const contentLength = parseInt(match[1], 10)
|
|
330
|
+
const bodyStart = headerEnd + 4
|
|
331
|
+
const bodyEnd = bodyStart + contentLength
|
|
332
|
+
|
|
333
|
+
if (this.buffer.length < bodyEnd) break // not enough data yet
|
|
334
|
+
|
|
335
|
+
const bodyStr = this.buffer.subarray(bodyStart, bodyEnd).toString("utf-8")
|
|
336
|
+
this.buffer = this.buffer.subarray(bodyEnd)
|
|
337
|
+
|
|
338
|
+
try {
|
|
339
|
+
const msg = JSON.parse(bodyStr)
|
|
340
|
+
this.handleMessage(msg)
|
|
341
|
+
} catch {
|
|
342
|
+
// malformed JSON — skip
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
private handleMessage(msg: any): void {
|
|
348
|
+
// Response to a request we sent
|
|
349
|
+
if (msg.id != null && this.pending.has(msg.id)) {
|
|
350
|
+
const p = this.pending.get(msg.id)!
|
|
351
|
+
this.pending.delete(msg.id)
|
|
352
|
+
if (msg.error) {
|
|
353
|
+
p.reject(new Error(`LSP error ${msg.error.code}: ${msg.error.message}`))
|
|
354
|
+
} else {
|
|
355
|
+
p.resolve(msg.result)
|
|
356
|
+
}
|
|
357
|
+
return
|
|
358
|
+
}
|
|
359
|
+
// Server notifications / requests — ignore for indexing purposes
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
private normalizeLocations(result: any): LSPLocation[] {
|
|
363
|
+
if (!result) return []
|
|
364
|
+
if (Array.isArray(result)) return result
|
|
365
|
+
// Single Location
|
|
366
|
+
if (result.uri) return [result]
|
|
367
|
+
return []
|
|
368
|
+
}
|
|
369
|
+
}
|
|
@@ -52,10 +52,13 @@ export class GraphBuilder {
|
|
|
52
52
|
if (lspAvailable) {
|
|
53
53
|
try {
|
|
54
54
|
relations = await this.lspAnalyzer.analyzeFile(filePath, chunks)
|
|
55
|
-
} catch
|
|
56
|
-
|
|
55
|
+
} catch {
|
|
56
|
+
// LSP threw — fall through to regex
|
|
57
57
|
}
|
|
58
|
-
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Fallback: if LSP unavailable, threw, or returned nothing → use regex
|
|
61
|
+
if (relations.length === 0) {
|
|
59
62
|
relations = this.regexAnalyzer.analyzeCode(filePath, content, chunks)
|
|
60
63
|
}
|
|
61
64
|
}
|
|
@@ -92,4 +95,104 @@ export class GraphBuilder {
|
|
|
92
95
|
|
|
93
96
|
return result
|
|
94
97
|
}
|
|
98
|
+
|
|
99
|
+
// ---- FR-005: Semantic similarity edges ------------------------------------
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Build "similar_to" edges between chunks whose cosine similarity > threshold.
|
|
103
|
+
* Only creates edges where no explicit link already exists.
|
|
104
|
+
*
|
|
105
|
+
* @param chunks — array of { chunk_id, vector } (all indexed chunks)
|
|
106
|
+
* @param threshold — minimum cosine similarity (default 0.8)
|
|
107
|
+
* @param maxEdgesPerChunk — limit outgoing similarity edges per chunk (default 3)
|
|
108
|
+
* @returns number of similarity edges created
|
|
109
|
+
*/
|
|
110
|
+
async buildSemanticEdges(
|
|
111
|
+
chunks: Array<{ chunk_id: string; vector: number[]; file: string }>,
|
|
112
|
+
threshold: number = 0.8,
|
|
113
|
+
maxEdgesPerChunk: number = 3,
|
|
114
|
+
): Promise<number> {
|
|
115
|
+
if (chunks.length < 2) return 0
|
|
116
|
+
|
|
117
|
+
// Pre-collect all existing edges so we can skip pairs that already have links
|
|
118
|
+
const existingPairs = new Set<string>()
|
|
119
|
+
for (const chunk of chunks) {
|
|
120
|
+
try {
|
|
121
|
+
const outgoing = await this.graphDB.getOutgoing(chunk.chunk_id)
|
|
122
|
+
for (const t of outgoing) {
|
|
123
|
+
existingPairs.add(`${t.subject}|${t.object}`)
|
|
124
|
+
existingPairs.add(`${t.object}|${t.subject}`) // bidirectional check
|
|
125
|
+
}
|
|
126
|
+
} catch {
|
|
127
|
+
// skip — chunk may not have edges yet
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const newTriples: Triple[] = []
|
|
132
|
+
|
|
133
|
+
// For each chunk, find top-N most similar chunks above threshold
|
|
134
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
135
|
+
const a = chunks[i]
|
|
136
|
+
if (!a.vector || a.vector.length === 0) continue
|
|
137
|
+
|
|
138
|
+
const candidates: Array<{ idx: number; similarity: number }> = []
|
|
139
|
+
|
|
140
|
+
for (let j = i + 1; j < chunks.length; j++) {
|
|
141
|
+
const b = chunks[j]
|
|
142
|
+
if (!b.vector || b.vector.length === 0) continue
|
|
143
|
+
|
|
144
|
+
// Skip same-file chunks (intra-file similarity is less useful)
|
|
145
|
+
if (a.file === b.file) continue
|
|
146
|
+
|
|
147
|
+
// Skip if explicit edge already exists
|
|
148
|
+
const pairKey = `${a.chunk_id}|${b.chunk_id}`
|
|
149
|
+
if (existingPairs.has(pairKey)) continue
|
|
150
|
+
|
|
151
|
+
const similarity = this.cosineSimilarity(a.vector, b.vector)
|
|
152
|
+
if (similarity > threshold) {
|
|
153
|
+
candidates.push({ idx: j, similarity })
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Sort by similarity descending, take top N
|
|
158
|
+
candidates.sort((x, y) => y.similarity - x.similarity)
|
|
159
|
+
const top = candidates.slice(0, maxEdgesPerChunk)
|
|
160
|
+
|
|
161
|
+
for (const c of top) {
|
|
162
|
+
const b = chunks[c.idx]
|
|
163
|
+
newTriples.push({
|
|
164
|
+
subject: a.chunk_id,
|
|
165
|
+
predicate: "similar_to",
|
|
166
|
+
object: b.chunk_id,
|
|
167
|
+
weight: parseFloat(c.similarity.toFixed(4)),
|
|
168
|
+
source: "semantic",
|
|
169
|
+
file: a.file,
|
|
170
|
+
})
|
|
171
|
+
// Mark as existing so reverse pair isn't duplicated
|
|
172
|
+
existingPairs.add(`${a.chunk_id}|${b.chunk_id}`)
|
|
173
|
+
existingPairs.add(`${b.chunk_id}|${a.chunk_id}`)
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (newTriples.length > 0) {
|
|
178
|
+
// Batch insert in groups of 100 to avoid overwhelming LevelDB
|
|
179
|
+
for (let i = 0; i < newTriples.length; i += 100) {
|
|
180
|
+
const batch = newTriples.slice(i, i + 100)
|
|
181
|
+
await this.graphDB.putEdges(batch)
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return newTriples.length
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
private cosineSimilarity(vecA: number[], vecB: number[]): number {
|
|
189
|
+
let dot = 0, normA = 0, normB = 0
|
|
190
|
+
for (let i = 0; i < vecA.length; i++) {
|
|
191
|
+
dot += vecA[i] * vecB[i]
|
|
192
|
+
normA += vecA[i] * vecA[i]
|
|
193
|
+
normB += vecB[i] * vecB[i]
|
|
194
|
+
}
|
|
195
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB)
|
|
196
|
+
return denom === 0 ? 0 : dot / denom
|
|
197
|
+
}
|
|
95
198
|
}
|
package/vectorizer/graph-db.ts
CHANGED
|
@@ -94,4 +94,196 @@ export class GraphDB {
|
|
|
94
94
|
this.initialized = false
|
|
95
95
|
}
|
|
96
96
|
}
|
|
97
|
+
|
|
98
|
+
// ---- FR-054: File metadata triples for incremental updates -----------------
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Store graph build metadata for a file as a special triple.
|
|
102
|
+
* Subject: `meta:<filePath>`, Predicate: `graph_built`, Object: `<hash>`.
|
|
103
|
+
* Weight encodes the timestamp (seconds since epoch).
|
|
104
|
+
*/
|
|
105
|
+
async setFileMeta(filePath: string, hash: string, timestamp: number): Promise<void> {
|
|
106
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
107
|
+
|
|
108
|
+
// Remove old meta triple for this file first
|
|
109
|
+
await this.deleteFileMeta(filePath)
|
|
110
|
+
|
|
111
|
+
const triple: Triple = {
|
|
112
|
+
subject: `meta:${filePath}`,
|
|
113
|
+
predicate: "graph_built",
|
|
114
|
+
object: hash,
|
|
115
|
+
weight: Math.floor(timestamp / 1000), // seconds since epoch fits in weight
|
|
116
|
+
source: "meta",
|
|
117
|
+
file: filePath,
|
|
118
|
+
}
|
|
119
|
+
await this.putEdges([triple])
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Get the stored graph build metadata for a file.
|
|
124
|
+
* Returns { hash, timestamp } or null if not found.
|
|
125
|
+
*/
|
|
126
|
+
async getFileMeta(filePath: string): Promise<{ hash: string; timestamp: number } | null> {
|
|
127
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
128
|
+
|
|
129
|
+
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
130
|
+
this.db.get(
|
|
131
|
+
{ subject: `meta:${filePath}`, predicate: "graph_built" },
|
|
132
|
+
(err: Error | undefined, result: Triple[]) => {
|
|
133
|
+
if (err) reject(err)
|
|
134
|
+
else resolve(result || [])
|
|
135
|
+
},
|
|
136
|
+
)
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
if (triples.length === 0) return null
|
|
140
|
+
return {
|
|
141
|
+
hash: triples[0].object,
|
|
142
|
+
timestamp: triples[0].weight * 1000, // back to ms
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Delete file meta triple.
|
|
148
|
+
*/
|
|
149
|
+
async deleteFileMeta(filePath: string): Promise<void> {
|
|
150
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
151
|
+
|
|
152
|
+
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
153
|
+
this.db.get(
|
|
154
|
+
{ subject: `meta:${filePath}`, predicate: "graph_built" },
|
|
155
|
+
(err: Error | undefined, result: Triple[]) => {
|
|
156
|
+
if (err) reject(err)
|
|
157
|
+
else resolve(result || [])
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
for (const t of triples) {
|
|
163
|
+
await new Promise<void>((resolve, reject) => {
|
|
164
|
+
this.db.del(t, (err: Error | undefined) => {
|
|
165
|
+
if (err) reject(err)
|
|
166
|
+
else resolve()
|
|
167
|
+
})
|
|
168
|
+
})
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Get all file metadata triples (for validation / stats).
|
|
174
|
+
*/
|
|
175
|
+
async getAllFileMeta(): Promise<Array<{ filePath: string; hash: string; timestamp: number }>> {
|
|
176
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
177
|
+
|
|
178
|
+
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
179
|
+
this.db.get({ predicate: "graph_built" }, (err: Error | undefined, result: Triple[]) => {
|
|
180
|
+
if (err) reject(err)
|
|
181
|
+
else resolve(result || [])
|
|
182
|
+
})
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
return triples.map((t) => ({
|
|
186
|
+
filePath: t.subject.replace(/^meta:/, ""),
|
|
187
|
+
hash: t.object,
|
|
188
|
+
timestamp: t.weight * 1000,
|
|
189
|
+
}))
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Get all triples in the graph (for validation/stats).
|
|
194
|
+
* Excludes meta triples (predicate === "graph_built").
|
|
195
|
+
*/
|
|
196
|
+
async getAllTriples(): Promise<Triple[]> {
|
|
197
|
+
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
198
|
+
|
|
199
|
+
const allTriples = await new Promise<Triple[]>((resolve, reject) => {
|
|
200
|
+
this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
|
|
201
|
+
if (err) reject(err)
|
|
202
|
+
else resolve(triples || [])
|
|
203
|
+
})
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
return allTriples.filter(t => t.predicate !== "graph_built")
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
|
|
210
|
+
if (!this.initialized) {
|
|
211
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
|
|
215
|
+
const visited = new Set<string>()
|
|
216
|
+
|
|
217
|
+
async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
|
|
218
|
+
if (currentDepth > maxDepth || visited.has(currentId)) {
|
|
219
|
+
return
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
visited.add(currentId)
|
|
223
|
+
|
|
224
|
+
try {
|
|
225
|
+
const outgoing = await new Promise<Triple[]>((resolve, reject) => {
|
|
226
|
+
this.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
|
|
227
|
+
if (err) reject(err)
|
|
228
|
+
else resolve(triples || [])
|
|
229
|
+
})
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
for (const triple of outgoing) {
|
|
233
|
+
const fileId = triple.object
|
|
234
|
+
|
|
235
|
+
// Aggregate relations and weights
|
|
236
|
+
const existing = relatedFiles.get(fileId)
|
|
237
|
+
if (existing) {
|
|
238
|
+
existing.weight = Math.max(existing.weight, triple.weight)
|
|
239
|
+
} else {
|
|
240
|
+
relatedFiles.set(fileId, {
|
|
241
|
+
relation: currentRelation || triple.predicate,
|
|
242
|
+
weight: triple.weight
|
|
243
|
+
})
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Recurse for imports/extends relations
|
|
247
|
+
if (triple.predicate === "imports" || triple.predicate === "extends") {
|
|
248
|
+
await traverse(fileId, currentDepth + 1, triple.predicate)
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const incoming = await new Promise<Triple[]>((resolve, reject) => {
|
|
253
|
+
this.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
|
|
254
|
+
if (err) reject(err)
|
|
255
|
+
else resolve(triples || [])
|
|
256
|
+
})
|
|
257
|
+
})
|
|
258
|
+
|
|
259
|
+
for (const triple of incoming) {
|
|
260
|
+
const fileId = triple.subject
|
|
261
|
+
|
|
262
|
+
const existing = relatedFiles.get(fileId)
|
|
263
|
+
if (existing) {
|
|
264
|
+
existing.weight = Math.max(existing.weight, triple.weight)
|
|
265
|
+
} else {
|
|
266
|
+
relatedFiles.set(fileId, {
|
|
267
|
+
relation: `used_by`,
|
|
268
|
+
weight: triple.weight
|
|
269
|
+
})
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
} catch (error) {
|
|
273
|
+
console.error(`Error traversing graph for ${currentId}:`, error)
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
await traverse(chunkId, 0, "")
|
|
278
|
+
|
|
279
|
+
const result = Array.from(relatedFiles.entries())
|
|
280
|
+
.map(([path, data]) => ({
|
|
281
|
+
path,
|
|
282
|
+
relation: data.relation,
|
|
283
|
+
weight: data.weight
|
|
284
|
+
}))
|
|
285
|
+
.sort((a, b) => b.weight - a.weight)
|
|
286
|
+
|
|
287
|
+
return result
|
|
288
|
+
}
|
|
97
289
|
}
|