@comfanion/usethis_search 3.0.0-dev.17 → 3.0.0-dev.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/vectorizer/analyzers/lsp-analyzer.ts +7 -7
- package/vectorizer/analyzers/regex-analyzer.ts +173 -67
- package/vectorizer/graph-builder.ts +206 -15
- package/vectorizer/graph-db.ts +70 -47
- package/vectorizer/index.ts +109 -40
- package/vectorizer.yaml +16 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "3.0.0-dev.
|
|
3
|
+
"version": "3.0.0-dev.18",
|
|
4
4
|
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
import path from "path"
|
|
14
14
|
import fs from "fs/promises"
|
|
15
|
-
import { ChunkWithId } from "../graph-builder"
|
|
15
|
+
import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
|
|
16
16
|
import { LSPClient, LSPSymbolInformation, SymbolKind } from "./lsp-client"
|
|
17
17
|
|
|
18
18
|
export interface Relation {
|
|
@@ -252,7 +252,9 @@ export class LSPAnalyzer {
|
|
|
252
252
|
return result
|
|
253
253
|
}
|
|
254
254
|
|
|
255
|
-
/** Convert LSP location URI + line → chunk_id.
|
|
255
|
+
/** Convert LSP location URI + line → chunk_id.
|
|
256
|
+
* For same-file refs, resolves to exact chunk by line.
|
|
257
|
+
* For cross-file refs, returns the default (first) chunk of the target file. */
|
|
256
258
|
private locationToChunkId(currentFile: string, uri: string, line: number, root: string): string | null {
|
|
257
259
|
// uri = file:///absolute/path/to/file.ts
|
|
258
260
|
const filePath = uri.startsWith("file://") ? uri.slice(7) : uri
|
|
@@ -261,11 +263,9 @@ export class LSPAnalyzer {
|
|
|
261
263
|
// Skip external files (node_modules, etc.)
|
|
262
264
|
if (relPath.startsWith("..") || relPath.includes("node_modules")) return null
|
|
263
265
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
// For same-file, we could be more precise but chunk 0 is sufficient for graph
|
|
268
|
-
return `chunk_${normalized}_0`
|
|
266
|
+
// Same file → use findChunkForPosition (called separately with chunks)
|
|
267
|
+
// Cross-file → default chunk
|
|
268
|
+
return buildDefaultChunkId(relPath)
|
|
269
269
|
}
|
|
270
270
|
|
|
271
271
|
private findChunkForPosition(chunks: ChunkWithId[], line: number): string | null {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import path from "path"
|
|
2
|
-
import
|
|
2
|
+
import fs from "fs"
|
|
3
|
+
import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
|
|
3
4
|
|
|
4
5
|
export interface Relation {
|
|
5
6
|
from: string
|
|
@@ -10,14 +11,127 @@ export interface Relation {
|
|
|
10
11
|
line?: number
|
|
11
12
|
}
|
|
12
13
|
|
|
14
|
+
// ── Module resolution ─────────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
/** Extensions to try when resolving JS/TS imports (in order). */
|
|
17
|
+
const JS_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"]
|
|
18
|
+
|
|
19
|
+
/** Extensions to try when resolving Python imports. */
|
|
20
|
+
const PY_EXTENSIONS = [".py"]
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Resolve a relative import to an actual file on disk.
|
|
24
|
+
*
|
|
25
|
+
* Follows Node.js / TypeScript module resolution:
|
|
26
|
+
* 1. Exact path (has extension) → check exists
|
|
27
|
+
* 2. Try each extension: `target.ts`, `target.tsx`, ...
|
|
28
|
+
* 3. Try directory index: `target/index.ts`, `target/index.tsx`, ...
|
|
29
|
+
*
|
|
30
|
+
* For Python:
|
|
31
|
+
* 1. `target.py`
|
|
32
|
+
* 2. `target/__init__.py`
|
|
33
|
+
*
|
|
34
|
+
* Fallback: if nothing exists on disk, infer extension from the source file
|
|
35
|
+
* (so offline / unit-test scenarios still produce useful edges).
|
|
36
|
+
*
|
|
37
|
+
* Returns a project-relative path (e.g. `src/utils.ts`) or null.
|
|
38
|
+
*/
|
|
39
|
+
function resolveModulePath(
|
|
40
|
+
projectRoot: string,
|
|
41
|
+
sourceFile: string,
|
|
42
|
+
importSpecifier: string,
|
|
43
|
+
language: "js" | "python" | "markdown",
|
|
44
|
+
): string | null {
|
|
45
|
+
const dir = path.dirname(path.resolve(projectRoot, sourceFile))
|
|
46
|
+
const base = path.resolve(dir, importSpecifier)
|
|
47
|
+
|
|
48
|
+
// Security: must stay inside project root
|
|
49
|
+
if (!base.startsWith(projectRoot)) return null
|
|
50
|
+
|
|
51
|
+
const hasExtension = !!path.extname(base)
|
|
52
|
+
|
|
53
|
+
// 1. If specifier already has an extension, check it directly
|
|
54
|
+
if (hasExtension) {
|
|
55
|
+
if (fileExists(base)) return path.relative(projectRoot, base)
|
|
56
|
+
// Even if the exact file doesn't exist, return the relative path so we
|
|
57
|
+
// can still build a "best effort" edge (e.g. markdown link to ./api.md
|
|
58
|
+
// in a test without real files).
|
|
59
|
+
return path.relative(projectRoot, base)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const exts = language === "python" ? PY_EXTENSIONS : JS_EXTENSIONS
|
|
63
|
+
|
|
64
|
+
// 2. Try appending each extension
|
|
65
|
+
for (const ext of exts) {
|
|
66
|
+
const candidate = base + ext
|
|
67
|
+
if (fileExists(candidate)) return path.relative(projectRoot, candidate)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// 3. Try directory index files
|
|
71
|
+
const indexNames = language === "python" ? ["__init__.py"] : exts.map(e => "index" + e)
|
|
72
|
+
for (const idx of indexNames) {
|
|
73
|
+
const candidate = path.join(base, idx)
|
|
74
|
+
if (fileExists(candidate)) return path.relative(projectRoot, candidate)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// 4. Fallback: infer extension from source file
|
|
78
|
+
// `app.ts` imports `./utils` → assume `utils.ts`
|
|
79
|
+
const sourceExt = path.extname(sourceFile)
|
|
80
|
+
if (sourceExt && exts.includes(sourceExt)) {
|
|
81
|
+
return path.relative(projectRoot, base + sourceExt)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Last resort for Python: assume .py
|
|
85
|
+
if (language === "python") {
|
|
86
|
+
return path.relative(projectRoot, base + ".py")
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return null
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Synchronous file-exists check (cheap for module resolution). */
|
|
93
|
+
function fileExists(absPath: string): boolean {
|
|
94
|
+
try {
|
|
95
|
+
return fs.statSync(absPath).isFile()
|
|
96
|
+
} catch {
|
|
97
|
+
return false
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Convert Python relative import specifier to a path.
|
|
103
|
+
* `.utils` → `./utils`
|
|
104
|
+
* `..utils` → `../utils`
|
|
105
|
+
* `...pkg` → `../../pkg`
|
|
106
|
+
*/
|
|
107
|
+
function pythonRelativeToPath(spec: string): string {
|
|
108
|
+
const match = spec.match(/^(\.+)(.*)$/)
|
|
109
|
+
if (!match) return spec
|
|
110
|
+
const dots = match[1].length // number of leading dots
|
|
111
|
+
const module = match[2] // remainder, e.g. "utils"
|
|
112
|
+
// 1 dot = current dir "./", 2 dots = "../", 3 = "../../", ...
|
|
113
|
+
const prefix = dots === 1 ? "./" : "../".repeat(dots - 1)
|
|
114
|
+
// Module part: dots→slashes (e.g. "pkg.sub" → "pkg/sub")
|
|
115
|
+
const modulePath = module.replace(/\./g, "/")
|
|
116
|
+
return prefix + modulePath
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ── RegexAnalyzer ────────────────────────────────────────────────────────────
|
|
120
|
+
|
|
13
121
|
export class RegexAnalyzer {
|
|
122
|
+
private projectRoot: string
|
|
123
|
+
|
|
14
124
|
private readonly patterns = {
|
|
15
125
|
jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
|
|
16
126
|
pythonFromImport: /from\s+(\S+)\s+import/g,
|
|
17
127
|
pythonImport: /import\s+(\S+)/g,
|
|
18
128
|
extends: /class\s+\w+\s+extends\s+(\w+)/g,
|
|
19
129
|
implements: /class\s+\w+\s+implements\s+([^{]+)/g,
|
|
20
|
-
markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
|
|
130
|
+
markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
constructor(projectRoot?: string) {
|
|
134
|
+
this.projectRoot = projectRoot || process.cwd()
|
|
21
135
|
}
|
|
22
136
|
|
|
23
137
|
analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
|
|
@@ -25,9 +139,9 @@ export class RegexAnalyzer {
|
|
|
25
139
|
const ext = path.extname(filePath)
|
|
26
140
|
const lines = content.split("\n")
|
|
27
141
|
|
|
28
|
-
if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
|
|
142
|
+
if ([".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs"].includes(ext)) {
|
|
29
143
|
this.analyzeJSCode(content, lines, filePath, chunks, relations)
|
|
30
|
-
} else if (
|
|
144
|
+
} else if (ext === ".py") {
|
|
31
145
|
this.analyzePythonCode(content, lines, filePath, chunks, relations)
|
|
32
146
|
}
|
|
33
147
|
|
|
@@ -37,17 +151,14 @@ export class RegexAnalyzer {
|
|
|
37
151
|
analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
|
|
38
152
|
const relations: Relation[] = []
|
|
39
153
|
const lines = content.split("\n")
|
|
40
|
-
const dir = path.dirname(filePath)
|
|
41
154
|
|
|
42
155
|
let match
|
|
43
156
|
this.patterns.markdownLink.lastIndex = 0
|
|
44
157
|
while ((match = this.patterns.markdownLink.exec(content)) !== null) {
|
|
45
|
-
const linkText = match[1]
|
|
46
158
|
const linkTarget = match[2]
|
|
47
159
|
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
48
|
-
const line = lines[lineIndex]
|
|
49
160
|
|
|
50
|
-
const targetPath = this.
|
|
161
|
+
const targetPath = this.resolveMarkdownLink(filePath, linkTarget)
|
|
51
162
|
if (!targetPath) continue
|
|
52
163
|
|
|
53
164
|
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
@@ -61,7 +172,7 @@ export class RegexAnalyzer {
|
|
|
61
172
|
predicate: "links_to",
|
|
62
173
|
weight: 1.0,
|
|
63
174
|
source: "markdown",
|
|
64
|
-
line: lineIndex
|
|
175
|
+
line: lineIndex,
|
|
65
176
|
})
|
|
66
177
|
}
|
|
67
178
|
}
|
|
@@ -69,33 +180,34 @@ export class RegexAnalyzer {
|
|
|
69
180
|
return relations
|
|
70
181
|
}
|
|
71
182
|
|
|
72
|
-
|
|
183
|
+
// ── JS / TS ───────────────────────────────────────────────────────────────
|
|
184
|
+
|
|
185
|
+
private analyzeJSCode(
|
|
186
|
+
content: string, lines: string[], filePath: string,
|
|
187
|
+
chunks: ChunkWithId[], relations: Relation[],
|
|
188
|
+
) {
|
|
73
189
|
let match
|
|
74
190
|
|
|
75
191
|
this.patterns.jsImports.lastIndex = 0
|
|
76
192
|
while ((match = this.patterns.jsImports.exec(content)) !== null) {
|
|
77
193
|
const importPath = match[1]
|
|
78
194
|
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
79
|
-
const line = lines[lineIndex]
|
|
80
195
|
|
|
81
196
|
if (importPath.startsWith(".")) {
|
|
82
|
-
const targetPath = this.
|
|
197
|
+
const targetPath = resolveModulePath(this.projectRoot, filePath, importPath, "js")
|
|
83
198
|
if (!targetPath) continue
|
|
84
199
|
|
|
85
200
|
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
86
201
|
if (!fromChunkId) continue
|
|
87
202
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
line: lineIndex
|
|
97
|
-
})
|
|
98
|
-
}
|
|
203
|
+
relations.push({
|
|
204
|
+
from: fromChunkId,
|
|
205
|
+
to: buildDefaultChunkId(targetPath),
|
|
206
|
+
predicate: "imports",
|
|
207
|
+
weight: 0.8,
|
|
208
|
+
source: "regex",
|
|
209
|
+
line: lineIndex,
|
|
210
|
+
})
|
|
99
211
|
}
|
|
100
212
|
}
|
|
101
213
|
|
|
@@ -115,7 +227,7 @@ export class RegexAnalyzer {
|
|
|
115
227
|
predicate: "extends",
|
|
116
228
|
weight: 0.8,
|
|
117
229
|
source: "regex",
|
|
118
|
-
line: lineIndex
|
|
230
|
+
line: lineIndex,
|
|
119
231
|
})
|
|
120
232
|
}
|
|
121
233
|
}
|
|
@@ -137,14 +249,19 @@ export class RegexAnalyzer {
|
|
|
137
249
|
predicate: "implements",
|
|
138
250
|
weight: 0.8,
|
|
139
251
|
source: "regex",
|
|
140
|
-
line: lineIndex
|
|
252
|
+
line: lineIndex,
|
|
141
253
|
})
|
|
142
254
|
}
|
|
143
255
|
}
|
|
144
256
|
}
|
|
145
257
|
}
|
|
146
258
|
|
|
147
|
-
|
|
259
|
+
// ── Python ────────────────────────────────────────────────────────────────
|
|
260
|
+
|
|
261
|
+
private analyzePythonCode(
|
|
262
|
+
content: string, lines: string[], filePath: string,
|
|
263
|
+
chunks: ChunkWithId[], relations: Relation[],
|
|
264
|
+
) {
|
|
148
265
|
let match
|
|
149
266
|
|
|
150
267
|
this.patterns.pythonFromImport.lastIndex = 0
|
|
@@ -153,23 +270,21 @@ export class RegexAnalyzer {
|
|
|
153
270
|
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
154
271
|
|
|
155
272
|
if (importPath.startsWith(".")) {
|
|
156
|
-
const
|
|
273
|
+
const pyPath = pythonRelativeToPath(importPath)
|
|
274
|
+
const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
|
|
157
275
|
if (!targetPath) continue
|
|
158
276
|
|
|
159
277
|
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
160
278
|
if (!fromChunkId) continue
|
|
161
279
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
line: lineIndex
|
|
171
|
-
})
|
|
172
|
-
}
|
|
280
|
+
relations.push({
|
|
281
|
+
from: fromChunkId,
|
|
282
|
+
to: buildDefaultChunkId(targetPath),
|
|
283
|
+
predicate: "imports",
|
|
284
|
+
weight: 0.8,
|
|
285
|
+
source: "regex",
|
|
286
|
+
line: lineIndex,
|
|
287
|
+
})
|
|
173
288
|
}
|
|
174
289
|
}
|
|
175
290
|
|
|
@@ -179,42 +294,38 @@ export class RegexAnalyzer {
|
|
|
179
294
|
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
180
295
|
|
|
181
296
|
if (importPath.startsWith(".")) {
|
|
182
|
-
const
|
|
297
|
+
const pyPath = pythonRelativeToPath(importPath)
|
|
298
|
+
const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
|
|
183
299
|
if (!targetPath) continue
|
|
184
300
|
|
|
185
301
|
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
186
302
|
if (!fromChunkId) continue
|
|
187
303
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
line: lineIndex
|
|
197
|
-
})
|
|
198
|
-
}
|
|
304
|
+
relations.push({
|
|
305
|
+
from: fromChunkId,
|
|
306
|
+
to: buildDefaultChunkId(targetPath),
|
|
307
|
+
predicate: "imports",
|
|
308
|
+
weight: 0.8,
|
|
309
|
+
source: "regex",
|
|
310
|
+
line: lineIndex,
|
|
311
|
+
})
|
|
199
312
|
}
|
|
200
313
|
}
|
|
201
314
|
}
|
|
202
315
|
|
|
203
|
-
|
|
204
|
-
try {
|
|
205
|
-
const dir = path.dirname(filePath)
|
|
206
|
-
const absoluteTarget = path.resolve(dir, target)
|
|
316
|
+
// ── Markdown link resolution ──────────────────────────────────────────────
|
|
207
317
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
318
|
+
private resolveMarkdownLink(filePath: string, target: string): string | null {
|
|
319
|
+
// Strip anchor (#section)
|
|
320
|
+
const hashIdx = target.indexOf("#")
|
|
321
|
+
const cleanTarget = hashIdx >= 0 ? target.substring(0, hashIdx) : target
|
|
322
|
+
if (!cleanTarget) return null
|
|
211
323
|
|
|
212
|
-
|
|
213
|
-
} catch {
|
|
214
|
-
return null
|
|
215
|
-
}
|
|
324
|
+
return resolveModulePath(this.projectRoot, filePath, cleanTarget, "markdown")
|
|
216
325
|
}
|
|
217
326
|
|
|
327
|
+
// ── Chunk lookup helpers ──────────────────────────────────────────────────
|
|
328
|
+
|
|
218
329
|
private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
|
|
219
330
|
for (const chunk of chunks) {
|
|
220
331
|
if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
|
|
@@ -226,11 +337,6 @@ export class RegexAnalyzer {
|
|
|
226
337
|
return null
|
|
227
338
|
}
|
|
228
339
|
|
|
229
|
-
private findFirstChunkInFile(targetPath: string): string | null {
|
|
230
|
-
const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
|
|
231
|
-
return `chunk_${normalized}_0`
|
|
232
|
-
}
|
|
233
|
-
|
|
234
340
|
private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
|
|
235
341
|
for (const chunk of chunks) {
|
|
236
342
|
if (chunk.content.includes(symbol)) {
|
|
@@ -250,6 +356,6 @@ export class RegexAnalyzer {
|
|
|
250
356
|
}
|
|
251
357
|
}
|
|
252
358
|
}
|
|
253
|
-
return
|
|
359
|
+
return buildDefaultChunkId(targetPath)
|
|
254
360
|
}
|
|
255
361
|
}
|
|
@@ -9,29 +9,117 @@ export interface ChunkWithId {
|
|
|
9
9
|
start_line?: number
|
|
10
10
|
end_line?: number
|
|
11
11
|
heading_context?: string
|
|
12
|
+
function_name?: string
|
|
13
|
+
class_name?: string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// ── Chunk ID helpers ────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
/** Build a symbol-aware chunk ID.
|
|
19
|
+
*
|
|
20
|
+
* Format: `chunk:{relPath}::{symbol}`
|
|
21
|
+
* Examples:
|
|
22
|
+
* chunk:src/user-service.ts::UserService
|
|
23
|
+
* chunk:src/user-service.ts::UserService.findById
|
|
24
|
+
* chunk:src/utils.ts::helper
|
|
25
|
+
* chunk:docs/api.md::authentication
|
|
26
|
+
* chunk:src/index.ts::_chunk_0
|
|
27
|
+
*/
|
|
28
|
+
export function buildChunkId(filePath: string, chunk: { class_name?: string; function_name?: string; heading_context?: string }, index: number): string {
|
|
29
|
+
let symbol: string
|
|
30
|
+
|
|
31
|
+
if (chunk.class_name && chunk.function_name) {
|
|
32
|
+
symbol = `${chunk.class_name}.${chunk.function_name}`
|
|
33
|
+
} else if (chunk.class_name) {
|
|
34
|
+
symbol = chunk.class_name
|
|
35
|
+
} else if (chunk.function_name) {
|
|
36
|
+
symbol = chunk.function_name
|
|
37
|
+
} else if (chunk.heading_context) {
|
|
38
|
+
// Markdown: slugify heading
|
|
39
|
+
symbol = chunk.heading_context
|
|
40
|
+
.toLowerCase()
|
|
41
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
42
|
+
.replace(/^-|-$/g, "")
|
|
43
|
+
if (!symbol) symbol = `_chunk_${index}`
|
|
44
|
+
} else {
|
|
45
|
+
symbol = `_chunk_${index}`
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return `chunk:${filePath}::${symbol}`
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** Build the file-level node ID. */
|
|
52
|
+
export function buildFileNodeId(filePath: string): string {
|
|
53
|
+
return `file:${filePath}`
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Extract the file path from any node ID (chunk: or file:). */
|
|
57
|
+
export function filePathFromNodeId(nodeId: string): string | null {
|
|
58
|
+
if (nodeId.startsWith("chunk:")) {
|
|
59
|
+
const sep = nodeId.indexOf("::")
|
|
60
|
+
return sep === -1 ? null : nodeId.slice(6, sep)
|
|
61
|
+
}
|
|
62
|
+
if (nodeId.startsWith("file:")) {
|
|
63
|
+
return nodeId.slice(5)
|
|
64
|
+
}
|
|
65
|
+
if (nodeId.startsWith("meta:")) {
|
|
66
|
+
return nodeId.slice(5)
|
|
67
|
+
}
|
|
68
|
+
return null
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/** Build a cross-file chunk ID that points to the default (first) chunk of the target file.
|
|
72
|
+
* Used by regex/LSP analyzers when we don't know the exact target chunk. */
|
|
73
|
+
export function buildDefaultChunkId(filePath: string): string {
|
|
74
|
+
return `chunk:${filePath}::_chunk_0`
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ── Structural edge predicates ──────────────────────────────────────────────
|
|
78
|
+
|
|
79
|
+
const STRUCTURAL_PREDICATES = new Set([
|
|
80
|
+
"contains_class",
|
|
81
|
+
"contains_function",
|
|
82
|
+
"contains_interface",
|
|
83
|
+
"contains",
|
|
84
|
+
"has_method",
|
|
85
|
+
])
|
|
86
|
+
|
|
87
|
+
export function isStructuralPredicate(predicate: string): boolean {
|
|
88
|
+
return STRUCTURAL_PREDICATES.has(predicate)
|
|
12
89
|
}
|
|
13
90
|
|
|
14
91
|
export class GraphBuilder {
|
|
15
92
|
private lspAnalyzer: LSPAnalyzer
|
|
16
93
|
private regexAnalyzer: RegexAnalyzer
|
|
94
|
+
private lspEnabled: boolean
|
|
17
95
|
|
|
18
96
|
constructor(
|
|
19
97
|
private graphDB: GraphDB,
|
|
20
|
-
private projectRoot: string
|
|
98
|
+
private projectRoot: string,
|
|
99
|
+
lspEnabled: boolean = true,
|
|
100
|
+
lspTimeoutMs: number = 5000,
|
|
21
101
|
) {
|
|
22
|
-
this.
|
|
23
|
-
this.
|
|
102
|
+
this.lspEnabled = lspEnabled
|
|
103
|
+
this.lspAnalyzer = new LSPAnalyzer(projectRoot, lspTimeoutMs)
|
|
104
|
+
this.regexAnalyzer = new RegexAnalyzer(projectRoot)
|
|
24
105
|
}
|
|
25
106
|
|
|
26
107
|
assignChunkIds(filePath: string, chunks: any[]): ChunkWithId[] {
|
|
27
|
-
const
|
|
28
|
-
const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
|
|
108
|
+
const seen = new Map<string, number>()
|
|
29
109
|
|
|
30
110
|
return chunks.map((chunk, index) => {
|
|
31
|
-
|
|
111
|
+
let chunkId = buildChunkId(filePath, chunk, index)
|
|
112
|
+
|
|
113
|
+
// Handle duplicate symbols (e.g. two chunks for same class split by size)
|
|
114
|
+
const count = seen.get(chunkId) || 0
|
|
115
|
+
if (count > 0) {
|
|
116
|
+
chunkId = `${chunkId}#${count}`
|
|
117
|
+
}
|
|
118
|
+
seen.set(chunkId.replace(/#\d+$/, ""), count + 1)
|
|
119
|
+
|
|
32
120
|
return {
|
|
33
121
|
...chunk,
|
|
34
|
-
chunk_id: chunkId
|
|
122
|
+
chunk_id: chunkId,
|
|
35
123
|
} as ChunkWithId
|
|
36
124
|
})
|
|
37
125
|
}
|
|
@@ -47,7 +135,7 @@ export class GraphBuilder {
|
|
|
47
135
|
if (fileType === "docs") {
|
|
48
136
|
relations = this.regexAnalyzer.analyzeMarkdown(filePath, content, chunks)
|
|
49
137
|
} else if (fileType === "code") {
|
|
50
|
-
const lspAvailable = await this.lspAnalyzer.isAvailable(filePath)
|
|
138
|
+
const lspAvailable = this.lspEnabled && await this.lspAnalyzer.isAvailable(filePath)
|
|
51
139
|
|
|
52
140
|
if (lspAvailable) {
|
|
53
141
|
try {
|
|
@@ -63,7 +151,7 @@ export class GraphBuilder {
|
|
|
63
151
|
}
|
|
64
152
|
}
|
|
65
153
|
|
|
66
|
-
const
|
|
154
|
+
const relationTriples: Triple[] = relations.map(rel => ({
|
|
67
155
|
subject: rel.from,
|
|
68
156
|
predicate: rel.predicate,
|
|
69
157
|
object: rel.to,
|
|
@@ -73,14 +161,117 @@ export class GraphBuilder {
|
|
|
73
161
|
line: rel.line
|
|
74
162
|
}))
|
|
75
163
|
|
|
76
|
-
|
|
77
|
-
|
|
164
|
+
// ── Structural edges ────────────────────────────────────────────────────
|
|
165
|
+
const fileNode = buildFileNodeId(filePath)
|
|
166
|
+
const structuralTriples: Triple[] = []
|
|
167
|
+
|
|
168
|
+
// Anchor: every chunk belongs_to its file
|
|
169
|
+
for (const c of chunks) {
|
|
170
|
+
structuralTriples.push({
|
|
171
|
+
subject: c.chunk_id,
|
|
172
|
+
predicate: "belongs_to",
|
|
173
|
+
object: filePath,
|
|
174
|
+
weight: 0,
|
|
175
|
+
source: "anchor",
|
|
176
|
+
file: filePath,
|
|
177
|
+
})
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// File node → symbol chunks
|
|
181
|
+
// Track class chunks for has_method edges
|
|
182
|
+
const classChunkMap = new Map<string, string>() // className → chunk_id
|
|
183
|
+
|
|
184
|
+
for (const c of chunks) {
|
|
185
|
+
if (c.class_name && !c.function_name) {
|
|
186
|
+
// Class/interface chunk (no method = class-level)
|
|
187
|
+
const predicate = c.content.match(/\binterface\s/) ? "contains_interface" : "contains_class"
|
|
188
|
+
structuralTriples.push({
|
|
189
|
+
subject: fileNode,
|
|
190
|
+
predicate,
|
|
191
|
+
object: c.chunk_id,
|
|
192
|
+
weight: 1.0,
|
|
193
|
+
source: "structure",
|
|
194
|
+
file: filePath,
|
|
195
|
+
})
|
|
196
|
+
classChunkMap.set(c.class_name, c.chunk_id)
|
|
197
|
+
} else if (c.function_name && !c.class_name) {
|
|
198
|
+
// Top-level function
|
|
199
|
+
structuralTriples.push({
|
|
200
|
+
subject: fileNode,
|
|
201
|
+
predicate: "contains_function",
|
|
202
|
+
object: c.chunk_id,
|
|
203
|
+
weight: 1.0,
|
|
204
|
+
source: "structure",
|
|
205
|
+
file: filePath,
|
|
206
|
+
})
|
|
207
|
+
} else if (c.function_name && c.class_name) {
|
|
208
|
+
// Method inside a class → has_method edge from class chunk
|
|
209
|
+
const parentChunkId = classChunkMap.get(c.class_name)
|
|
210
|
+
if (parentChunkId) {
|
|
211
|
+
structuralTriples.push({
|
|
212
|
+
subject: parentChunkId,
|
|
213
|
+
predicate: "has_method",
|
|
214
|
+
object: c.chunk_id,
|
|
215
|
+
weight: 1.0,
|
|
216
|
+
source: "structure",
|
|
217
|
+
file: filePath,
|
|
218
|
+
})
|
|
219
|
+
} else {
|
|
220
|
+
// No class chunk found yet (methods appeared before class preamble, or class was not split)
|
|
221
|
+
// Fall back to file → method
|
|
222
|
+
structuralTriples.push({
|
|
223
|
+
subject: fileNode,
|
|
224
|
+
predicate: "contains_function",
|
|
225
|
+
object: c.chunk_id,
|
|
226
|
+
weight: 1.0,
|
|
227
|
+
source: "structure",
|
|
228
|
+
file: filePath,
|
|
229
|
+
})
|
|
230
|
+
}
|
|
231
|
+
} else if (c.heading_context) {
|
|
232
|
+
// Markdown section
|
|
233
|
+
structuralTriples.push({
|
|
234
|
+
subject: fileNode,
|
|
235
|
+
predicate: "contains",
|
|
236
|
+
object: c.chunk_id,
|
|
237
|
+
weight: 0.5,
|
|
238
|
+
source: "structure",
|
|
239
|
+
file: filePath,
|
|
240
|
+
})
|
|
241
|
+
} else {
|
|
242
|
+
// Generic content chunk
|
|
243
|
+
structuralTriples.push({
|
|
244
|
+
subject: fileNode,
|
|
245
|
+
predicate: "contains",
|
|
246
|
+
object: c.chunk_id,
|
|
247
|
+
weight: 0.3,
|
|
248
|
+
source: "structure",
|
|
249
|
+
file: filePath,
|
|
250
|
+
})
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
await this.graphDB.putEdges([...structuralTriples, ...relationTriples])
|
|
255
|
+
return relationTriples.length
|
|
78
256
|
}
|
|
79
257
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
258
|
+
/** Resolve a file path + line to the best chunk ID.
|
|
259
|
+
* If chunks are provided, finds the one containing the line.
|
|
260
|
+
* Otherwise falls back to the default chunk. */
|
|
261
|
+
resolveChunkId(filePath: string, line: number, chunks?: ChunkWithId[]): string | null {
|
|
262
|
+
if (chunks && chunks.length > 0) {
|
|
263
|
+
for (const c of chunks) {
|
|
264
|
+
if (c.start_line !== undefined && c.end_line !== undefined) {
|
|
265
|
+
if (line >= c.start_line && line <= c.end_line) {
|
|
266
|
+
return c.chunk_id
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
// Line not in any chunk range — return first chunk
|
|
271
|
+
return chunks[0].chunk_id
|
|
272
|
+
}
|
|
273
|
+
// No chunks available — return default
|
|
274
|
+
return buildDefaultChunkId(filePath)
|
|
84
275
|
}
|
|
85
276
|
|
|
86
277
|
async getRelatedChunks(chunkId: string): Promise<Array<{ chunk_id: string; predicate: string; weight: number; direction: "outgoing" | "incoming" }>> {
|
package/vectorizer/graph-db.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import levelgraph from "levelgraph"
|
|
2
2
|
import { Level } from "level"
|
|
3
|
+
import { filePathFromNodeId, isStructuralPredicate } from "./graph-builder"
|
|
3
4
|
|
|
4
5
|
export interface Triple {
|
|
5
6
|
subject: string
|
|
@@ -149,23 +150,27 @@ export class GraphDB {
|
|
|
149
150
|
async deleteFileMeta(filePath: string): Promise<void> {
|
|
150
151
|
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
151
152
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
153
|
+
try {
|
|
154
|
+
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
155
|
+
this.db.get(
|
|
156
|
+
{ subject: `meta:${filePath}`, predicate: "graph_built" },
|
|
157
|
+
(err: Error | undefined, result: Triple[]) => {
|
|
158
|
+
if (err) reject(err)
|
|
159
|
+
else resolve(result || [])
|
|
160
|
+
},
|
|
161
|
+
)
|
|
162
|
+
})
|
|
161
163
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
164
|
+
for (const t of triples) {
|
|
165
|
+
await new Promise<void>((resolve, reject) => {
|
|
166
|
+
this.db.del(t, (err: Error | undefined) => {
|
|
167
|
+
if (err) reject(err)
|
|
168
|
+
else resolve()
|
|
169
|
+
})
|
|
167
170
|
})
|
|
168
|
-
}
|
|
171
|
+
}
|
|
172
|
+
} catch (err) {
|
|
173
|
+
// Silently ignore errors (e.g., no meta triple exists)
|
|
169
174
|
}
|
|
170
175
|
}
|
|
171
176
|
|
|
@@ -191,9 +196,10 @@ export class GraphDB {
|
|
|
191
196
|
|
|
192
197
|
/**
|
|
193
198
|
* Get all triples in the graph (for validation/stats).
|
|
194
|
-
* Excludes meta triples
|
|
199
|
+
* Excludes meta, anchor, and structural triples by default.
|
|
200
|
+
* Pass includeStructural=true to also get structural edges.
|
|
195
201
|
*/
|
|
196
|
-
async getAllTriples(): Promise<Triple[]> {
|
|
202
|
+
async getAllTriples(includeStructural: boolean = false): Promise<Triple[]> {
|
|
197
203
|
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
198
204
|
|
|
199
205
|
const allTriples = await new Promise<Triple[]>((resolve, reject) => {
|
|
@@ -203,7 +209,11 @@ export class GraphDB {
|
|
|
203
209
|
})
|
|
204
210
|
})
|
|
205
211
|
|
|
206
|
-
return allTriples.filter(t =>
|
|
212
|
+
return allTriples.filter(t => {
|
|
213
|
+
if (t.predicate === "graph_built" || t.predicate === "belongs_to") return false
|
|
214
|
+
if (!includeStructural && isStructuralPredicate(t.predicate)) return false
|
|
215
|
+
return true
|
|
216
|
+
})
|
|
207
217
|
}
|
|
208
218
|
|
|
209
219
|
async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
|
|
@@ -213,58 +223,70 @@ export class GraphDB {
|
|
|
213
223
|
|
|
214
224
|
const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
|
|
215
225
|
const visited = new Set<string>()
|
|
216
|
-
|
|
226
|
+
const self = this
|
|
227
|
+
|
|
228
|
+
// Resolve the caller's file directly from the node ID
|
|
229
|
+
const callerFile = filePathFromNodeId(chunkId)
|
|
230
|
+
|
|
217
231
|
async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
|
|
218
|
-
if (currentDepth
|
|
232
|
+
if (currentDepth >= maxDepth || visited.has(currentId)) {
|
|
219
233
|
return
|
|
220
234
|
}
|
|
221
|
-
|
|
235
|
+
|
|
222
236
|
visited.add(currentId)
|
|
223
|
-
|
|
237
|
+
|
|
224
238
|
try {
|
|
225
239
|
const outgoing = await new Promise<Triple[]>((resolve, reject) => {
|
|
226
|
-
|
|
240
|
+
self.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
|
|
227
241
|
if (err) reject(err)
|
|
228
242
|
else resolve(triples || [])
|
|
229
243
|
})
|
|
230
244
|
})
|
|
231
|
-
|
|
245
|
+
|
|
232
246
|
for (const triple of outgoing) {
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
247
|
+
// Skip meta, anchor, and structural-only edges
|
|
248
|
+
if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
|
|
249
|
+
if (isStructuralPredicate(triple.predicate)) continue
|
|
250
|
+
|
|
251
|
+
// Resolve file for the target node directly from its ID
|
|
252
|
+
const targetFile = filePathFromNodeId(triple.object)
|
|
253
|
+
if (!targetFile) continue
|
|
254
|
+
|
|
255
|
+
const existing = relatedFiles.get(targetFile)
|
|
237
256
|
if (existing) {
|
|
238
257
|
existing.weight = Math.max(existing.weight, triple.weight)
|
|
239
258
|
} else {
|
|
240
|
-
relatedFiles.set(
|
|
259
|
+
relatedFiles.set(targetFile, {
|
|
241
260
|
relation: currentRelation || triple.predicate,
|
|
242
261
|
weight: triple.weight
|
|
243
262
|
})
|
|
244
263
|
}
|
|
245
|
-
|
|
246
|
-
// Recurse for imports/extends relations
|
|
264
|
+
|
|
247
265
|
if (triple.predicate === "imports" || triple.predicate === "extends") {
|
|
248
|
-
await traverse(
|
|
266
|
+
await traverse(triple.object, currentDepth + 1, triple.predicate)
|
|
249
267
|
}
|
|
250
268
|
}
|
|
251
|
-
|
|
269
|
+
|
|
252
270
|
const incoming = await new Promise<Triple[]>((resolve, reject) => {
|
|
253
|
-
|
|
271
|
+
self.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
|
|
254
272
|
if (err) reject(err)
|
|
255
273
|
else resolve(triples || [])
|
|
256
274
|
})
|
|
257
275
|
})
|
|
258
|
-
|
|
276
|
+
|
|
259
277
|
for (const triple of incoming) {
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
278
|
+
if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
|
|
279
|
+
if (isStructuralPredicate(triple.predicate)) continue
|
|
280
|
+
|
|
281
|
+
const sourceFile = filePathFromNodeId(triple.subject)
|
|
282
|
+
if (!sourceFile) continue
|
|
283
|
+
|
|
284
|
+
const existing = relatedFiles.get(sourceFile)
|
|
263
285
|
if (existing) {
|
|
264
286
|
existing.weight = Math.max(existing.weight, triple.weight)
|
|
265
287
|
} else {
|
|
266
|
-
relatedFiles.set(
|
|
267
|
-
relation:
|
|
288
|
+
relatedFiles.set(sourceFile, {
|
|
289
|
+
relation: "used_by",
|
|
268
290
|
weight: triple.weight
|
|
269
291
|
})
|
|
270
292
|
}
|
|
@@ -273,17 +295,18 @@ export class GraphDB {
|
|
|
273
295
|
console.error(`Error traversing graph for ${currentId}:`, error)
|
|
274
296
|
}
|
|
275
297
|
}
|
|
276
|
-
|
|
298
|
+
|
|
277
299
|
await traverse(chunkId, 0, "")
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
300
|
+
|
|
301
|
+
// Remove the caller's own file from results
|
|
302
|
+
if (callerFile) relatedFiles.delete(callerFile)
|
|
303
|
+
|
|
304
|
+
return Array.from(relatedFiles.entries())
|
|
305
|
+
.map(([filePath, data]) => ({
|
|
306
|
+
path: filePath,
|
|
282
307
|
relation: data.relation,
|
|
283
308
|
weight: data.weight
|
|
284
309
|
}))
|
|
285
310
|
.sort((a, b) => b.weight - a.weight)
|
|
286
|
-
|
|
287
|
-
return result
|
|
288
311
|
}
|
|
289
312
|
}
|
package/vectorizer/index.ts
CHANGED
|
@@ -16,7 +16,7 @@ import { mergeResults, DEFAULT_HYBRID_CONFIG } from "./hybrid-search.ts";
|
|
|
16
16
|
import { QueryCache, DEFAULT_CACHE_CONFIG } from "./query-cache.ts";
|
|
17
17
|
import { SearchMetrics } from "./search-metrics.ts";
|
|
18
18
|
import { GraphDB } from "./graph-db.ts";
|
|
19
|
-
import { GraphBuilder } from "./graph-builder.ts";
|
|
19
|
+
import { GraphBuilder, isStructuralPredicate } from "./graph-builder.ts";
|
|
20
20
|
import { UsageTracker } from "./usage-tracker.ts";
|
|
21
21
|
|
|
22
22
|
// Suppress transformers.js logs unless DEBUG is set
|
|
@@ -85,6 +85,19 @@ let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
|
|
|
85
85
|
let METRICS_ENABLED = false;
|
|
86
86
|
let CACHE_ENABLED = true;
|
|
87
87
|
|
|
88
|
+
// ── Graph config (v3) ───────────────────────────────────────────────────────
|
|
89
|
+
const DEFAULT_GRAPH_CONFIG = {
|
|
90
|
+
enabled: true,
|
|
91
|
+
max_related: 4,
|
|
92
|
+
min_relevance: 0.5,
|
|
93
|
+
lsp: {
|
|
94
|
+
enabled: true,
|
|
95
|
+
timeout_ms: 5000,
|
|
96
|
+
},
|
|
97
|
+
read_intercept: true,
|
|
98
|
+
};
|
|
99
|
+
let GRAPH_CONFIG = { ...DEFAULT_GRAPH_CONFIG, lsp: { ...DEFAULT_GRAPH_CONFIG.lsp } };
|
|
100
|
+
|
|
88
101
|
function defaultVectorizerYaml() {
|
|
89
102
|
return (
|
|
90
103
|
`vectorizer:\n` +
|
|
@@ -121,6 +134,16 @@ function defaultVectorizerYaml() {
|
|
|
121
134
|
` hybrid: true\n` +
|
|
122
135
|
` bm25_weight: 0.3\n` +
|
|
123
136
|
`\n` +
|
|
137
|
+
` # Graph-based context (v3)\n` +
|
|
138
|
+
` graph:\n` +
|
|
139
|
+
` enabled: true\n` +
|
|
140
|
+
` max_related: 4\n` +
|
|
141
|
+
` min_relevance: 0.5\n` +
|
|
142
|
+
` lsp:\n` +
|
|
143
|
+
` enabled: true\n` +
|
|
144
|
+
` timeout_ms: 5000\n` +
|
|
145
|
+
` read_intercept: true\n` +
|
|
146
|
+
`\n` +
|
|
124
147
|
` # Quality monitoring\n` +
|
|
125
148
|
` quality:\n` +
|
|
126
149
|
` enable_metrics: false\n` +
|
|
@@ -282,6 +305,26 @@ async function loadConfig(projectRoot) {
|
|
|
282
305
|
CACHE_ENABLED = parseBool(qs, "enable_cache", true);
|
|
283
306
|
}
|
|
284
307
|
|
|
308
|
+
// ── Parse graph config (v3) ──────────────────────────────────────────────
|
|
309
|
+
const graphMatch = section.match(/^\s{2}graph:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
|
|
310
|
+
if (graphMatch) {
|
|
311
|
+
const gs = graphMatch[1];
|
|
312
|
+
GRAPH_CONFIG.enabled = parseBool(gs, "enabled", DEFAULT_GRAPH_CONFIG.enabled);
|
|
313
|
+
GRAPH_CONFIG.max_related = parseNumber(gs, "max_related", DEFAULT_GRAPH_CONFIG.max_related);
|
|
314
|
+
GRAPH_CONFIG.min_relevance = parseNumber(gs, "min_relevance", DEFAULT_GRAPH_CONFIG.min_relevance);
|
|
315
|
+
GRAPH_CONFIG.read_intercept = parseBool(gs, "read_intercept", DEFAULT_GRAPH_CONFIG.read_intercept);
|
|
316
|
+
|
|
317
|
+
// Nested lsp: section
|
|
318
|
+
const lspMatch = gs.match(/^\s+lsp:\s*\n([\s\S]*?)(?=^\s{4}[a-zA-Z_\-]+:|\Z)/m);
|
|
319
|
+
if (lspMatch) {
|
|
320
|
+
const ls = lspMatch[1];
|
|
321
|
+
GRAPH_CONFIG.lsp.enabled = parseBool(ls, "enabled", DEFAULT_GRAPH_CONFIG.lsp.enabled);
|
|
322
|
+
GRAPH_CONFIG.lsp.timeout_ms = parseNumber(ls, "timeout_ms", DEFAULT_GRAPH_CONFIG.lsp.timeout_ms);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (DEBUG) console.log("[vectorizer] Graph config:", GRAPH_CONFIG);
|
|
326
|
+
}
|
|
327
|
+
|
|
285
328
|
// Parse global exclude
|
|
286
329
|
const excludeMatch = section.match(/^\s{2}exclude:\s*\n((?:\s{4}-\s+.+\n?)*)/m);
|
|
287
330
|
if (excludeMatch) {
|
|
@@ -392,11 +435,19 @@ class CodebaseIndexer {
|
|
|
392
435
|
this.db = await lancedb.connect(path.join(this.cacheDir, "lancedb"));
|
|
393
436
|
await this.loadHashes();
|
|
394
437
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
438
|
+
// Graph DB — only if graph is enabled in config
|
|
439
|
+
if (GRAPH_CONFIG.enabled) {
|
|
440
|
+
const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
|
|
441
|
+
const graphPath = path.join(this.root, ".opencode", "graph", graphType);
|
|
442
|
+
await fs.mkdir(path.dirname(graphPath), { recursive: true });
|
|
443
|
+
this.graphDB = await new GraphDB(graphPath).init();
|
|
444
|
+
this.graphBuilder = new GraphBuilder(
|
|
445
|
+
this.graphDB,
|
|
446
|
+
this.root,
|
|
447
|
+
GRAPH_CONFIG.lsp.enabled,
|
|
448
|
+
GRAPH_CONFIG.lsp.timeout_ms,
|
|
449
|
+
);
|
|
450
|
+
}
|
|
400
451
|
|
|
401
452
|
// Usage tracker — provenance & usage stats
|
|
402
453
|
this.usageTracker = new UsageTracker(this.cacheDir);
|
|
@@ -557,36 +608,41 @@ class CodebaseIndexer {
|
|
|
557
608
|
// Semantic chunking
|
|
558
609
|
const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
|
|
559
610
|
|
|
560
|
-
// v3: Assign chunk IDs for graph tracking
|
|
561
|
-
const chunksWithIds = this.graphBuilder
|
|
611
|
+
// v3: Assign chunk IDs for graph tracking (works without graph — just adds IDs)
|
|
612
|
+
const chunksWithIds = this.graphBuilder
|
|
613
|
+
? this.graphBuilder.assignChunkIds(relPath, chunks)
|
|
614
|
+
: chunks.map((c, i) => ({ ...c, chunk_id: `chunk:${relPath}::_chunk_${i}` }));
|
|
562
615
|
|
|
563
616
|
// v3: Delete old edges for this file and build new ones
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
617
|
+
let graphEdgesBuilt = 0;
|
|
618
|
+
if (this.graphBuilder && this.graphDB) {
|
|
619
|
+
await this.graphDB.deleteByFile(relPath);
|
|
620
|
+
graphEdgesBuilt = await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
|
|
621
|
+
|
|
622
|
+
// Log graph creation to indexer.log
|
|
623
|
+
if (graphEdgesBuilt > 0 || DEBUG) {
|
|
624
|
+
const timestamp = new Date().toISOString().slice(11, 19);
|
|
625
|
+
const logMsg = `${timestamp} Graph built: ${relPath} (${chunksWithIds.length} chunks)`;
|
|
626
|
+
if (DEBUG) console.log(`[vectorizer] ${logMsg}`);
|
|
627
|
+
|
|
628
|
+
// Write to indexer.log in .opencode directory
|
|
629
|
+
try {
|
|
630
|
+
const logPath = path.join(this.root, ".opencode", "indexer.log");
|
|
631
|
+
const fsSync = await import("fs");
|
|
632
|
+
fsSync.appendFileSync(logPath, `${logMsg}\n`);
|
|
633
|
+
} catch {
|
|
634
|
+
// non-fatal — logging is advisory
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// FR-054: Store graph build timestamp + file hash as metadata triple
|
|
574
639
|
try {
|
|
575
|
-
|
|
576
|
-
const fsSync = await import("fs");
|
|
577
|
-
fsSync.appendFileSync(logPath, `${logMsg}\n`);
|
|
640
|
+
await this.graphDB.setFileMeta(relPath, hash, Date.now());
|
|
578
641
|
} catch {
|
|
579
|
-
// non-fatal —
|
|
642
|
+
// non-fatal — metadata is advisory
|
|
580
643
|
}
|
|
581
644
|
}
|
|
582
645
|
|
|
583
|
-
// FR-054: Store graph build timestamp + file hash as metadata triple
|
|
584
|
-
try {
|
|
585
|
-
await this.graphDB.setFileMeta(relPath, hash, Date.now());
|
|
586
|
-
} catch {
|
|
587
|
-
// non-fatal — metadata is advisory
|
|
588
|
-
}
|
|
589
|
-
|
|
590
646
|
const data = [];
|
|
591
647
|
for (let i = 0; i < chunksWithIds.length; i++) {
|
|
592
648
|
const embedding = await this.embed(chunksWithIds[i].content);
|
|
@@ -606,9 +662,9 @@ class CodebaseIndexer {
|
|
|
606
662
|
function_name: chunksWithIds[i].function_name || "",
|
|
607
663
|
class_name: chunksWithIds[i].class_name || "",
|
|
608
664
|
tags: (fileMeta.tags || []).join(","),
|
|
609
|
-
// Line numbers for "from-to" extraction
|
|
610
|
-
start_line: chunksWithIds[i].start_line,
|
|
611
|
-
end_line: chunksWithIds[i].end_line,
|
|
665
|
+
// Line numbers for "from-to" extraction (default to -1 when unknown)
|
|
666
|
+
start_line: chunksWithIds[i].start_line ?? -1,
|
|
667
|
+
end_line: chunksWithIds[i].end_line ?? -1,
|
|
612
668
|
});
|
|
613
669
|
}
|
|
614
670
|
|
|
@@ -648,7 +704,7 @@ class CodebaseIndexer {
|
|
|
648
704
|
const table = await this.db.openTable(tableName);
|
|
649
705
|
let allRows;
|
|
650
706
|
try {
|
|
651
|
-
allRows = await table.filter("").limit(100000).execute();
|
|
707
|
+
allRows = await table.filter("true").limit(100000).execute();
|
|
652
708
|
} catch (e) {
|
|
653
709
|
if (DEBUG) console.log("[vectorizer] BM25 index build failed (corrupted table?):", e.message);
|
|
654
710
|
return null;
|
|
@@ -712,10 +768,15 @@ class CodebaseIndexer {
|
|
|
712
768
|
const bm25Results = bm25.search(query, fetchLimit);
|
|
713
769
|
|
|
714
770
|
// Build score maps
|
|
771
|
+
// LanceDB _distance is L2 (euclidean). For normalized vectors,
|
|
772
|
+
// L2 ∈ [0, 2]. Convert to similarity ∈ [0, 1]:
|
|
773
|
+
// similarity = 1 - (distance / 2)
|
|
774
|
+
const distanceToScore = (d: number | null | undefined) =>
|
|
775
|
+
d != null ? Math.max(0, 1 - d / 2) : 0.5;
|
|
776
|
+
|
|
715
777
|
const vectorScores = new Map();
|
|
716
778
|
for (let i = 0; i < results.length; i++) {
|
|
717
|
-
|
|
718
|
-
vectorScores.set(i, score);
|
|
779
|
+
vectorScores.set(i, distanceToScore(results[i]._distance));
|
|
719
780
|
}
|
|
720
781
|
|
|
721
782
|
const bm25Scores = new Map();
|
|
@@ -730,7 +791,7 @@ class CodebaseIndexer {
|
|
|
730
791
|
|
|
731
792
|
for (let i = 0; i < results.length; i++) {
|
|
732
793
|
const key = `${results[i].file}:${results[i].chunk_index}`;
|
|
733
|
-
const vs = results[i]._distance
|
|
794
|
+
const vs = distanceToScore(results[i]._distance);
|
|
734
795
|
resultMap.set(key, { row: results[i], vectorScore: vs, bm25Score: 0 });
|
|
735
796
|
}
|
|
736
797
|
|
|
@@ -831,7 +892,10 @@ class CodebaseIndexer {
|
|
|
831
892
|
|
|
832
893
|
const outgoing = await this.graphDB.getOutgoing(result.chunk_id);
|
|
833
894
|
const incoming = await this.graphDB.getIncoming(result.chunk_id);
|
|
834
|
-
|
|
895
|
+
// Filter out structural and meta edges — only relation edges are useful for context
|
|
896
|
+
const allEdges = [...outgoing, ...incoming].filter(
|
|
897
|
+
e => e.predicate !== "belongs_to" && e.predicate !== "graph_built" && !isStructuralPredicate(e.predicate)
|
|
898
|
+
);
|
|
835
899
|
|
|
836
900
|
const neighbors = [];
|
|
837
901
|
for (const edge of allEdges) {
|
|
@@ -852,8 +916,13 @@ class CodebaseIndexer {
|
|
|
852
916
|
});
|
|
853
917
|
}
|
|
854
918
|
|
|
919
|
+
// Apply min_relevance filter, then cap at max_related
|
|
855
920
|
neighbors.sort((a, b) => b.score - a.score);
|
|
856
|
-
|
|
921
|
+
const minRelevance = GRAPH_CONFIG.min_relevance ?? 0.5;
|
|
922
|
+
const maxRelated = GRAPH_CONFIG.max_related ?? 4;
|
|
923
|
+
result.relatedContext = neighbors
|
|
924
|
+
.filter(n => n.score >= minRelevance)
|
|
925
|
+
.slice(0, maxRelated);
|
|
857
926
|
|
|
858
927
|
// FR-060: Record provenance for each attached chunk
|
|
859
928
|
if (this.usageTracker) {
|
|
@@ -894,7 +963,7 @@ class CodebaseIndexer {
|
|
|
894
963
|
const table = await this.db.openTable(tableName);
|
|
895
964
|
let rows;
|
|
896
965
|
try {
|
|
897
|
-
rows = await table.filter("").limit(100000).execute();
|
|
966
|
+
rows = await table.filter("true").limit(100000).execute();
|
|
898
967
|
} catch (e) {
|
|
899
968
|
if (DEBUG) console.log("[vectorizer] Chunk cache build failed (corrupted table?):", e.message);
|
|
900
969
|
return null;
|
|
@@ -1032,7 +1101,7 @@ class CodebaseIndexer {
|
|
|
1032
1101
|
const tables = await this.db.tableNames();
|
|
1033
1102
|
if (tables.includes(tableName)) {
|
|
1034
1103
|
const table = await this.db.openTable(tableName);
|
|
1035
|
-
const allRows = await table.filter("").limit(100000).execute();
|
|
1104
|
+
const allRows = await table.filter("true").limit(100000).execute();
|
|
1036
1105
|
const chunkData = allRows
|
|
1037
1106
|
.filter(r => r.chunk_id && r.vector)
|
|
1038
1107
|
.map(r => ({ chunk_id: r.chunk_id, vector: Array.from(r.vector), file: r.file }));
|
package/vectorizer.yaml
CHANGED
|
@@ -61,6 +61,22 @@ vectorizer:
|
|
|
61
61
|
# Indexes to maintain - each has pattern (what to include) and ignore (what to skip)
|
|
62
62
|
indexes:
|
|
63
63
|
|
|
64
|
+
# Source code index - all common programming languages
|
|
65
|
+
code:
|
|
66
|
+
enabled: true
|
|
67
|
+
pattern: "**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}"
|
|
68
|
+
ignore:
|
|
69
|
+
- "**/node_modules/**"
|
|
70
|
+
- "**/.git/**"
|
|
71
|
+
- "**/dist/**"
|
|
72
|
+
- "**/build/**"
|
|
73
|
+
- "**/.opencode/**"
|
|
74
|
+
- "**/docs/**"
|
|
75
|
+
- "**/vendor/**"
|
|
76
|
+
- "**/__pycache__/**"
|
|
77
|
+
hybrid: true
|
|
78
|
+
bm25_weight: 0.3
|
|
79
|
+
|
|
64
80
|
# Documentation index - markdown, text files
|
|
65
81
|
docs:
|
|
66
82
|
enabled: true
|