@comfanion/usethis_search 0.2.0-dev.0 → 3.0.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +2 -0
- package/package.json +10 -3
- package/tools/read-interceptor.ts +54 -0
- package/tools/search.ts +14 -1
- package/vectorizer/analyzers/lsp-analyzer.ts +162 -0
- package/vectorizer/analyzers/regex-analyzer.ts +255 -0
- package/vectorizer/graph-builder.ts +95 -0
- package/vectorizer/graph-db.ts +97 -0
- package/vectorizer/index.js +80 -6
- package/vectorizer.yaml +14 -0
package/index.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { Plugin } from "@opencode-ai/plugin"
|
|
|
2
2
|
|
|
3
3
|
import search from "./tools/search"
|
|
4
4
|
import codeindex from "./tools/codeindex"
|
|
5
|
+
import readInterceptor from "./tools/read-interceptor"
|
|
5
6
|
import FileIndexerPlugin from "./file-indexer"
|
|
6
7
|
|
|
7
8
|
const UsethisSearchPlugin: Plugin = async (ctx) => {
|
|
@@ -12,6 +13,7 @@ const UsethisSearchPlugin: Plugin = async (ctx) => {
|
|
|
12
13
|
tool: {
|
|
13
14
|
search,
|
|
14
15
|
codeindex,
|
|
16
|
+
read: readInterceptor,
|
|
15
17
|
},
|
|
16
18
|
}
|
|
17
19
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "OpenCode plugin: semantic search
|
|
3
|
+
"version": "3.0.0-dev.0",
|
|
4
|
+
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
7
7
|
"exports": {
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"file-indexer.ts",
|
|
16
16
|
"tools/search.ts",
|
|
17
17
|
"tools/codeindex.ts",
|
|
18
|
+
"tools/read-interceptor.ts",
|
|
18
19
|
"vectorizer/index.js",
|
|
19
20
|
"vectorizer/content-cleaner.ts",
|
|
20
21
|
"vectorizer/metadata-extractor.ts",
|
|
@@ -22,6 +23,10 @@
|
|
|
22
23
|
"vectorizer/hybrid-search.ts",
|
|
23
24
|
"vectorizer/query-cache.ts",
|
|
24
25
|
"vectorizer/search-metrics.ts",
|
|
26
|
+
"vectorizer/graph-db.ts",
|
|
27
|
+
"vectorizer/graph-builder.ts",
|
|
28
|
+
"vectorizer/analyzers/regex-analyzer.ts",
|
|
29
|
+
"vectorizer/analyzers/lsp-analyzer.ts",
|
|
25
30
|
"vectorizer/chunkers/markdown-chunker.ts",
|
|
26
31
|
"vectorizer/chunkers/code-chunker.ts",
|
|
27
32
|
"vectorizer/chunkers/chunker-factory.ts",
|
|
@@ -30,9 +35,11 @@
|
|
|
30
35
|
"LICENSE"
|
|
31
36
|
],
|
|
32
37
|
"dependencies": {
|
|
33
|
-
"@opencode-ai/plugin": "1.1.
|
|
38
|
+
"@opencode-ai/plugin": ">=1.1.0",
|
|
34
39
|
"@xenova/transformers": "^2.17.0",
|
|
35
40
|
"glob": "^10.3.10",
|
|
41
|
+
"level": "^8.0.1",
|
|
42
|
+
"levelgraph": "^4.0.0",
|
|
36
43
|
"vectordb": "^0.4.0"
|
|
37
44
|
},
|
|
38
45
|
"peerDependencies": {
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { tool } from "@opencode-ai/plugin"
|
|
2
|
+
import path from "path"
|
|
3
|
+
|
|
4
|
+
import { CodebaseIndexer } from "../vectorizer/index.js"
|
|
5
|
+
|
|
6
|
+
export default tool({
|
|
7
|
+
description: `Read file with graph-aware context attachment. When available, this tool searches the file in the index and returns content + related context from the graph (imports, links, etc.).
|
|
8
|
+
|
|
9
|
+
Use this instead of the standard Read tool for better context awareness.`,
|
|
10
|
+
|
|
11
|
+
args: {
|
|
12
|
+
filePath: tool.schema.string().describe("Path to the file to read"),
|
|
13
|
+
},
|
|
14
|
+
|
|
15
|
+
async execute(args) {
|
|
16
|
+
const projectRoot = process.cwd()
|
|
17
|
+
const filePath = path.isAbsolute(args.filePath) ? args.filePath : path.join(projectRoot, args.filePath)
|
|
18
|
+
|
|
19
|
+
const relPath = path.relative(projectRoot, filePath)
|
|
20
|
+
|
|
21
|
+
const indexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
22
|
+
const results = await indexer.search(relPath, 20, false, {})
|
|
23
|
+
const fileChunks = results.filter(r => r.file === relPath)
|
|
24
|
+
await indexer.unloadModel()
|
|
25
|
+
|
|
26
|
+
if (fileChunks.length === 0) {
|
|
27
|
+
return `File "${relPath}" not indexed. Use original Read tool or run codeindex({ action: "reindex", index: "code" })`
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
let output = `## ${relPath}\n\n`
|
|
31
|
+
|
|
32
|
+
output += `### Content\n\n`
|
|
33
|
+
for (const chunk of fileChunks) {
|
|
34
|
+
output += chunk.content + "\n\n"
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const allRelated = fileChunks
|
|
38
|
+
.flatMap(c => c.relatedContext || [])
|
|
39
|
+
.filter((r, i, arr) => arr.findIndex(x => x.chunk_id === r.chunk_id) === i)
|
|
40
|
+
|
|
41
|
+
if (allRelated.length > 0) {
|
|
42
|
+
output += `### Related Context\n\n`
|
|
43
|
+
for (const rel of allRelated) {
|
|
44
|
+
const snippet = rel.content.length > 300
|
|
45
|
+
? rel.content.substring(0, 300) + "..."
|
|
46
|
+
: rel.content
|
|
47
|
+
output += `**${rel.file}** (${rel.relation})\n`
|
|
48
|
+
output += `\`\`\`\n${snippet}\n\`\`\`\n\n`
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return output
|
|
53
|
+
},
|
|
54
|
+
})
|
package/tools/search.ts
CHANGED
|
@@ -138,7 +138,20 @@ Examples:
|
|
|
138
138
|
output += "```\n"
|
|
139
139
|
const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
|
|
140
140
|
output += content
|
|
141
|
-
output += "\n```\n
|
|
141
|
+
output += "\n```\n"
|
|
142
|
+
|
|
143
|
+
if (r.relatedContext && r.relatedContext.length > 0) {
|
|
144
|
+
output += "\n**Related Context:**\n"
|
|
145
|
+
for (const rel of r.relatedContext) {
|
|
146
|
+
const snippet = rel.content.length > 200
|
|
147
|
+
? rel.content.substring(0, 200) + "..."
|
|
148
|
+
: rel.content
|
|
149
|
+
output += `- **${rel.file}** (${rel.relation}, via ${rel.via}, score: ${rel.score.toFixed(2)})\n`
|
|
150
|
+
output += ` \`\`\`\n ${snippet}\n \`\`\`\n`
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
output += "\n"
|
|
142
155
|
}
|
|
143
156
|
|
|
144
157
|
output += `---\n*Found ${allResults.length} results. Use Read tool to see full files.*`
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { ChunkWithId } from "../graph-builder"
|
|
2
|
+
|
|
3
|
+
export interface Relation {
|
|
4
|
+
from: string
|
|
5
|
+
to: string
|
|
6
|
+
predicate: string
|
|
7
|
+
weight: number
|
|
8
|
+
source: "lsp"
|
|
9
|
+
line?: number
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export class LSPAnalyzer {
|
|
13
|
+
private readonly timeout = 5000
|
|
14
|
+
|
|
15
|
+
async isAvailable(filePath: string): Promise<boolean> {
|
|
16
|
+
try {
|
|
17
|
+
const ext = filePath.split(".").pop()
|
|
18
|
+
if (!ext) return false
|
|
19
|
+
|
|
20
|
+
const language = this.getLanguage(ext)
|
|
21
|
+
if (!language) return false
|
|
22
|
+
|
|
23
|
+
return this.checkLSPServer(language)
|
|
24
|
+
} catch {
|
|
25
|
+
return false
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async analyzeFile(filePath: string, chunks: ChunkWithId[]): Promise<Relation[]> {
|
|
30
|
+
const relations: Relation[] = []
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
const ext = filePath.split(".").pop()
|
|
34
|
+
if (!ext) return []
|
|
35
|
+
|
|
36
|
+
const language = this.getLanguage(ext)
|
|
37
|
+
if (!language) return []
|
|
38
|
+
|
|
39
|
+
const lines = await this.readFileLines(filePath)
|
|
40
|
+
|
|
41
|
+
const symbols = await this.getDocumentSymbols(filePath, language)
|
|
42
|
+
if (!symbols) return []
|
|
43
|
+
|
|
44
|
+
for (const symbol of symbols) {
|
|
45
|
+
const fromChunkId = this.findChunkForPosition(chunks, symbol.line)
|
|
46
|
+
if (!fromChunkId) continue
|
|
47
|
+
|
|
48
|
+
if (symbol.type === "class" || symbol.type === "interface") {
|
|
49
|
+
const implementations = await this.getImplementations(filePath, symbol.line, symbol.character, language)
|
|
50
|
+
for (const impl of implementations) {
|
|
51
|
+
const toChunkId = this.resolveTargetChunk(filePath, impl)
|
|
52
|
+
if (toChunkId) {
|
|
53
|
+
relations.push({
|
|
54
|
+
from: fromChunkId,
|
|
55
|
+
to: toChunkId,
|
|
56
|
+
predicate: "implements",
|
|
57
|
+
weight: 1.0,
|
|
58
|
+
source: "lsp"
|
|
59
|
+
})
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const references = await this.getReferences(filePath, symbol.line, symbol.character, language)
|
|
65
|
+
for (const ref of references) {
|
|
66
|
+
const toChunkId = this.resolveTargetChunk(filePath, ref)
|
|
67
|
+
if (toChunkId && toChunkId !== fromChunkId) {
|
|
68
|
+
relations.push({
|
|
69
|
+
from: toChunkId,
|
|
70
|
+
to: fromChunkId,
|
|
71
|
+
predicate: "used_by",
|
|
72
|
+
weight: 1.0,
|
|
73
|
+
source: "lsp"
|
|
74
|
+
})
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const definitions = await this.getDefinitions(filePath, symbol.line, symbol.character, language)
|
|
79
|
+
for (const def of definitions) {
|
|
80
|
+
const toChunkId = this.resolveTargetChunk(filePath, def)
|
|
81
|
+
if (toChunkId && toChunkId !== fromChunkId) {
|
|
82
|
+
relations.push({
|
|
83
|
+
from: fromChunkId,
|
|
84
|
+
to: toChunkId,
|
|
85
|
+
predicate: "references",
|
|
86
|
+
weight: 1.0,
|
|
87
|
+
source: "lsp"
|
|
88
|
+
})
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
} catch (error) {
|
|
93
|
+
return []
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return relations
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
private getLanguage(ext: string): string | null {
|
|
100
|
+
const map: Record<string, string> = {
|
|
101
|
+
ts: "typescript",
|
|
102
|
+
js: "javascript",
|
|
103
|
+
tsx: "typescriptreact",
|
|
104
|
+
jsx: "javascriptreact",
|
|
105
|
+
py: "python",
|
|
106
|
+
go: "go",
|
|
107
|
+
rs: "rust",
|
|
108
|
+
java: "java",
|
|
109
|
+
cpp: "cpp",
|
|
110
|
+
c: "c",
|
|
111
|
+
cs: "csharp"
|
|
112
|
+
}
|
|
113
|
+
return map[ext] || null
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
private checkLSPServer(language: string): Promise<boolean> {
|
|
117
|
+
return Promise.resolve(false)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
private async readFileLines(filePath: string): Promise<string[]> {
|
|
121
|
+
const fs = await import("fs/promises")
|
|
122
|
+
const content = await fs.readFile(filePath, "utf-8")
|
|
123
|
+
return content.split("\n")
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
private async getDocumentSymbols(filePath: string, language: string): Promise<Array<{ name: string; type: string; line: number; character: number }> | null> {
|
|
127
|
+
return null
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
private async getImplementations(filePath: string, line: number, character: number, language: string): Promise<Array<{ file: string; line: number; character: number }>> {
|
|
131
|
+
return []
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
private async getReferences(filePath: string, line: number, character: number, language: string): Promise<Array<{ file: string; line: number; character: number }>> {
|
|
135
|
+
return []
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
private async getDefinitions(filePath: string, line: number, character: number, language: string): Promise<Array<{ file: string; line: number; character: number }>> {
|
|
139
|
+
return []
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
private findChunkForPosition(chunks: ChunkWithId[], line: number): string | null {
|
|
143
|
+
for (const chunk of chunks) {
|
|
144
|
+
if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
|
|
145
|
+
if (line >= chunk.start_line && line <= chunk.end_line) {
|
|
146
|
+
return chunk.chunk_id
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return null
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
private resolveTargetChunk(currentFile: string, target: { file: string; line: number; character: number }): string | null {
|
|
154
|
+
if (target.file !== currentFile) {
|
|
155
|
+
const path = target.file.replace(/[^a-zA-Z0-9]/g, "_")
|
|
156
|
+
return `chunk_${path}_0`
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const normalized = currentFile.replace(/[^a-zA-Z0-9]/g, "_")
|
|
160
|
+
return `chunk_${normalized}_0`
|
|
161
|
+
}
|
|
162
|
+
}
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import path from "path"
|
|
2
|
+
import { ChunkWithId } from "../graph-builder"
|
|
3
|
+
|
|
4
|
+
export interface Relation {
|
|
5
|
+
from: string
|
|
6
|
+
to: string
|
|
7
|
+
predicate: string
|
|
8
|
+
weight: number
|
|
9
|
+
source: "regex" | "markdown"
|
|
10
|
+
line?: number
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export class RegexAnalyzer {
|
|
14
|
+
private readonly patterns = {
|
|
15
|
+
jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
|
|
16
|
+
pythonFromImport: /from\s+(\S+)\s+import/g,
|
|
17
|
+
pythonImport: /import\s+(\S+)/g,
|
|
18
|
+
extends: /class\s+\w+\s+extends\s+(\w+)/g,
|
|
19
|
+
implements: /class\s+\w+\s+implements\s+([^{]+)/g,
|
|
20
|
+
markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
|
|
24
|
+
const relations: Relation[] = []
|
|
25
|
+
const ext = path.extname(filePath)
|
|
26
|
+
const lines = content.split("\n")
|
|
27
|
+
|
|
28
|
+
if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
|
|
29
|
+
this.analyzeJSCode(content, lines, filePath, chunks, relations)
|
|
30
|
+
} else if ([".py"].includes(ext)) {
|
|
31
|
+
this.analyzePythonCode(content, lines, filePath, chunks, relations)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return relations
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
|
|
38
|
+
const relations: Relation[] = []
|
|
39
|
+
const lines = content.split("\n")
|
|
40
|
+
const dir = path.dirname(filePath)
|
|
41
|
+
|
|
42
|
+
let match
|
|
43
|
+
this.patterns.markdownLink.lastIndex = 0
|
|
44
|
+
while ((match = this.patterns.markdownLink.exec(content)) !== null) {
|
|
45
|
+
const linkText = match[1]
|
|
46
|
+
const linkTarget = match[2]
|
|
47
|
+
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
48
|
+
const line = lines[lineIndex]
|
|
49
|
+
|
|
50
|
+
const targetPath = this.resolvePath(filePath, linkTarget)
|
|
51
|
+
if (!targetPath) continue
|
|
52
|
+
|
|
53
|
+
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
54
|
+
if (!fromChunkId) continue
|
|
55
|
+
|
|
56
|
+
const toChunkId = this.findChunkForLinkTarget(targetPath, linkTarget, chunks)
|
|
57
|
+
if (toChunkId) {
|
|
58
|
+
relations.push({
|
|
59
|
+
from: fromChunkId,
|
|
60
|
+
to: toChunkId,
|
|
61
|
+
predicate: "links_to",
|
|
62
|
+
weight: 1.0,
|
|
63
|
+
source: "markdown",
|
|
64
|
+
line: lineIndex
|
|
65
|
+
})
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return relations
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
private analyzeJSCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
|
|
73
|
+
let match
|
|
74
|
+
|
|
75
|
+
this.patterns.jsImports.lastIndex = 0
|
|
76
|
+
while ((match = this.patterns.jsImports.exec(content)) !== null) {
|
|
77
|
+
const importPath = match[1]
|
|
78
|
+
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
79
|
+
const line = lines[lineIndex]
|
|
80
|
+
|
|
81
|
+
if (importPath.startsWith(".")) {
|
|
82
|
+
const targetPath = this.resolvePath(filePath, importPath)
|
|
83
|
+
if (!targetPath) continue
|
|
84
|
+
|
|
85
|
+
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
86
|
+
if (!fromChunkId) continue
|
|
87
|
+
|
|
88
|
+
const toChunkId = this.findFirstChunkInFile(targetPath)
|
|
89
|
+
if (toChunkId) {
|
|
90
|
+
relations.push({
|
|
91
|
+
from: fromChunkId,
|
|
92
|
+
to: toChunkId,
|
|
93
|
+
predicate: "imports",
|
|
94
|
+
weight: 0.8,
|
|
95
|
+
source: "regex",
|
|
96
|
+
line: lineIndex
|
|
97
|
+
})
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
this.patterns.extends.lastIndex = 0
|
|
103
|
+
while ((match = this.patterns.extends.exec(content)) !== null) {
|
|
104
|
+
const parentClass = match[1]
|
|
105
|
+
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
106
|
+
|
|
107
|
+
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
108
|
+
if (!fromChunkId) continue
|
|
109
|
+
|
|
110
|
+
const toChunkId = this.findChunkContainingSymbol(chunks, parentClass)
|
|
111
|
+
if (toChunkId) {
|
|
112
|
+
relations.push({
|
|
113
|
+
from: fromChunkId,
|
|
114
|
+
to: toChunkId,
|
|
115
|
+
predicate: "extends",
|
|
116
|
+
weight: 0.8,
|
|
117
|
+
source: "regex",
|
|
118
|
+
line: lineIndex
|
|
119
|
+
})
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
this.patterns.implements.lastIndex = 0
|
|
124
|
+
while ((match = this.patterns.implements.exec(content)) !== null) {
|
|
125
|
+
const interfaces = match[1].split(",").map(s => s.trim())
|
|
126
|
+
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
127
|
+
|
|
128
|
+
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
129
|
+
if (!fromChunkId) continue
|
|
130
|
+
|
|
131
|
+
for (const iface of interfaces) {
|
|
132
|
+
const toChunkId = this.findChunkContainingSymbol(chunks, iface)
|
|
133
|
+
if (toChunkId) {
|
|
134
|
+
relations.push({
|
|
135
|
+
from: fromChunkId,
|
|
136
|
+
to: toChunkId,
|
|
137
|
+
predicate: "implements",
|
|
138
|
+
weight: 0.8,
|
|
139
|
+
source: "regex",
|
|
140
|
+
line: lineIndex
|
|
141
|
+
})
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
private analyzePythonCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
|
|
148
|
+
let match
|
|
149
|
+
|
|
150
|
+
this.patterns.pythonFromImport.lastIndex = 0
|
|
151
|
+
while ((match = this.patterns.pythonFromImport.exec(content)) !== null) {
|
|
152
|
+
const importPath = match[1]
|
|
153
|
+
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
154
|
+
|
|
155
|
+
if (importPath.startsWith(".")) {
|
|
156
|
+
const targetPath = this.resolvePath(filePath, importPath)
|
|
157
|
+
if (!targetPath) continue
|
|
158
|
+
|
|
159
|
+
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
160
|
+
if (!fromChunkId) continue
|
|
161
|
+
|
|
162
|
+
const toChunkId = this.findFirstChunkInFile(targetPath)
|
|
163
|
+
if (toChunkId) {
|
|
164
|
+
relations.push({
|
|
165
|
+
from: fromChunkId,
|
|
166
|
+
to: toChunkId,
|
|
167
|
+
predicate: "imports",
|
|
168
|
+
weight: 0.8,
|
|
169
|
+
source: "regex",
|
|
170
|
+
line: lineIndex
|
|
171
|
+
})
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
this.patterns.pythonImport.lastIndex = 0
|
|
177
|
+
while ((match = this.patterns.pythonImport.exec(content)) !== null) {
|
|
178
|
+
const importPath = match[1]
|
|
179
|
+
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
180
|
+
|
|
181
|
+
if (importPath.startsWith(".")) {
|
|
182
|
+
const targetPath = this.resolvePath(filePath, importPath)
|
|
183
|
+
if (!targetPath) continue
|
|
184
|
+
|
|
185
|
+
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
186
|
+
if (!fromChunkId) continue
|
|
187
|
+
|
|
188
|
+
const toChunkId = this.findFirstChunkInFile(targetPath)
|
|
189
|
+
if (toChunkId) {
|
|
190
|
+
relations.push({
|
|
191
|
+
from: fromChunkId,
|
|
192
|
+
to: toChunkId,
|
|
193
|
+
predicate: "imports",
|
|
194
|
+
weight: 0.8,
|
|
195
|
+
source: "regex",
|
|
196
|
+
line: lineIndex
|
|
197
|
+
})
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
private resolvePath(filePath: string, target: string): string | null {
|
|
204
|
+
try {
|
|
205
|
+
const dir = path.dirname(filePath)
|
|
206
|
+
const absoluteTarget = path.resolve(dir, target)
|
|
207
|
+
|
|
208
|
+
if (!absoluteTarget.startsWith(process.cwd())) {
|
|
209
|
+
return null
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return path.relative(process.cwd(), absoluteTarget)
|
|
213
|
+
} catch {
|
|
214
|
+
return null
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
|
|
219
|
+
for (const chunk of chunks) {
|
|
220
|
+
if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
|
|
221
|
+
if (lineIndex >= chunk.start_line && lineIndex <= chunk.end_line) {
|
|
222
|
+
return chunk.chunk_id
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
return null
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
private findFirstChunkInFile(targetPath: string): string | null {
|
|
230
|
+
const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
|
|
231
|
+
return `chunk_${normalized}_0`
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
|
|
235
|
+
for (const chunk of chunks) {
|
|
236
|
+
if (chunk.content.includes(symbol)) {
|
|
237
|
+
return chunk.chunk_id
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return null
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
private findChunkForLinkTarget(targetPath: string, linkTarget: string, chunks: ChunkWithId[]): string | null {
|
|
244
|
+
const hashIndex = linkTarget.indexOf("#")
|
|
245
|
+
if (hashIndex !== -1) {
|
|
246
|
+
const heading = linkTarget.substring(hashIndex + 1).toLowerCase()
|
|
247
|
+
for (const chunk of chunks) {
|
|
248
|
+
if (chunk.heading_context && chunk.heading_context.toLowerCase().includes(heading)) {
|
|
249
|
+
return chunk.chunk_id
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return this.findFirstChunkInFile(targetPath)
|
|
254
|
+
}
|
|
255
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import path from "path"
|
|
2
|
+
import { GraphDB, Triple } from "./graph-db"
|
|
3
|
+
import { RegexAnalyzer, Relation as RegexRelation } from "./analyzers/regex-analyzer"
|
|
4
|
+
import { LSPAnalyzer, Relation as LSPRelation } from "./analyzers/lsp-analyzer"
|
|
5
|
+
|
|
6
|
+
export interface ChunkWithId {
|
|
7
|
+
chunk_id: string
|
|
8
|
+
content: string
|
|
9
|
+
start_line?: number
|
|
10
|
+
end_line?: number
|
|
11
|
+
heading_context?: string
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export class GraphBuilder {
|
|
15
|
+
private lspAnalyzer: LSPAnalyzer
|
|
16
|
+
private regexAnalyzer: RegexAnalyzer
|
|
17
|
+
|
|
18
|
+
constructor(
|
|
19
|
+
private graphDB: GraphDB,
|
|
20
|
+
private projectRoot: string
|
|
21
|
+
) {
|
|
22
|
+
this.lspAnalyzer = new LSPAnalyzer()
|
|
23
|
+
this.regexAnalyzer = new RegexAnalyzer()
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
assignChunkIds(filePath: string, chunks: any[]): ChunkWithId[] {
|
|
27
|
+
const withoutExt = filePath.replace(/\.[^/.]+$/, "")
|
|
28
|
+
const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
|
|
29
|
+
|
|
30
|
+
return chunks.map((chunk, index) => {
|
|
31
|
+
const chunkId = `chunk_${normalizedPath}_${index}`
|
|
32
|
+
return {
|
|
33
|
+
...chunk,
|
|
34
|
+
chunk_id: chunkId
|
|
35
|
+
} as ChunkWithId
|
|
36
|
+
})
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async buildEdges(
|
|
40
|
+
filePath: string,
|
|
41
|
+
content: string,
|
|
42
|
+
chunks: ChunkWithId[],
|
|
43
|
+
fileType: "code" | "docs"
|
|
44
|
+
): Promise<void> {
|
|
45
|
+
let relations: Array<RegexRelation | LSPRelation> = []
|
|
46
|
+
|
|
47
|
+
if (fileType === "docs") {
|
|
48
|
+
relations = this.regexAnalyzer.analyzeMarkdown(filePath, content, chunks)
|
|
49
|
+
} else if (fileType === "code") {
|
|
50
|
+
const lspAvailable = await this.lspAnalyzer.isAvailable(filePath)
|
|
51
|
+
|
|
52
|
+
if (lspAvailable) {
|
|
53
|
+
try {
|
|
54
|
+
relations = await this.lspAnalyzer.analyzeFile(filePath, chunks)
|
|
55
|
+
} catch (error) {
|
|
56
|
+
relations = this.regexAnalyzer.analyzeCode(filePath, content, chunks)
|
|
57
|
+
}
|
|
58
|
+
} else {
|
|
59
|
+
relations = this.regexAnalyzer.analyzeCode(filePath, content, chunks)
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const triples: Triple[] = relations.map(rel => ({
|
|
64
|
+
subject: rel.from,
|
|
65
|
+
predicate: rel.predicate,
|
|
66
|
+
object: rel.to,
|
|
67
|
+
weight: rel.weight,
|
|
68
|
+
source: rel.source,
|
|
69
|
+
file: filePath,
|
|
70
|
+
line: rel.line
|
|
71
|
+
}))
|
|
72
|
+
|
|
73
|
+
await this.graphDB.putEdges(triples)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
resolveChunkId(filePath: string, line: number): string | null {
|
|
77
|
+
const withoutExt = filePath.replace(/\.[^/.]+$/, "")
|
|
78
|
+
const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
|
|
79
|
+
return `chunk_${normalizedPath}_0`
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async getRelatedChunks(chunkId: string): Promise<Array<{ chunk_id: string; predicate: string; weight: number; direction: "outgoing" | "incoming" }>> {
|
|
83
|
+
const [outgoing, incoming] = await Promise.all([
|
|
84
|
+
this.graphDB.getOutgoing(chunkId),
|
|
85
|
+
this.graphDB.getIncoming(chunkId)
|
|
86
|
+
])
|
|
87
|
+
|
|
88
|
+
const result = [
|
|
89
|
+
...outgoing.map(t => ({ chunk_id: t.object, predicate: t.predicate, weight: t.weight, direction: "outgoing" as const })),
|
|
90
|
+
...incoming.map(t => ({ chunk_id: t.subject, predicate: t.predicate, weight: t.weight, direction: "incoming" as const }))
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
return result
|
|
94
|
+
}
|
|
95
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import levelgraph from "levelgraph"
|
|
2
|
+
import { Level } from "level"
|
|
3
|
+
|
|
4
|
+
export interface Triple {
|
|
5
|
+
subject: string
|
|
6
|
+
predicate: string
|
|
7
|
+
object: string
|
|
8
|
+
weight: number
|
|
9
|
+
source: string
|
|
10
|
+
file: string
|
|
11
|
+
line?: number
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export class GraphDB {
|
|
15
|
+
private db: any
|
|
16
|
+
private initialized: boolean = false
|
|
17
|
+
|
|
18
|
+
constructor(private dbPath: string) {}
|
|
19
|
+
|
|
20
|
+
async init(): Promise<this> {
|
|
21
|
+
const levelDb = new Level(this.dbPath)
|
|
22
|
+
this.db = levelgraph(levelDb)
|
|
23
|
+
this.initialized = true
|
|
24
|
+
return this
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async putEdges(triples: Triple[]): Promise<void> {
|
|
28
|
+
if (!this.initialized) {
|
|
29
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
30
|
+
}
|
|
31
|
+
await new Promise<void>((resolve, reject) => {
|
|
32
|
+
this.db.put(triples, (err: Error | undefined) => {
|
|
33
|
+
if (err) reject(err)
|
|
34
|
+
else resolve()
|
|
35
|
+
})
|
|
36
|
+
})
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async getOutgoing(chunkId: string): Promise<Triple[]> {
|
|
40
|
+
if (!this.initialized) {
|
|
41
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
42
|
+
}
|
|
43
|
+
return new Promise<Triple[]>((resolve, reject) => {
|
|
44
|
+
this.db.get({ subject: chunkId }, (err: Error | undefined, triples: Triple[]) => {
|
|
45
|
+
if (err) reject(err)
|
|
46
|
+
else resolve(triples || [])
|
|
47
|
+
})
|
|
48
|
+
})
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async getIncoming(chunkId: string): Promise<Triple[]> {
|
|
52
|
+
if (!this.initialized) {
|
|
53
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
54
|
+
}
|
|
55
|
+
return new Promise<Triple[]>((resolve, reject) => {
|
|
56
|
+
this.db.get({ object: chunkId }, (err: Error | undefined, triples: Triple[]) => {
|
|
57
|
+
if (err) reject(err)
|
|
58
|
+
else resolve(triples || [])
|
|
59
|
+
})
|
|
60
|
+
})
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async deleteByFile(filePath: string): Promise<void> {
|
|
64
|
+
if (!this.initialized) {
|
|
65
|
+
throw new Error("GraphDB not initialized. Call init() first.")
|
|
66
|
+
}
|
|
67
|
+
const allTriples = await new Promise<Triple[]>((resolve, reject) => {
|
|
68
|
+
this.db.get({}, (err: Error | undefined, triples: Triple[]) => {
|
|
69
|
+
if (err) reject(err)
|
|
70
|
+
else resolve(triples || [])
|
|
71
|
+
})
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
const toDelete = allTriples.filter(t => t.file === filePath)
|
|
75
|
+
|
|
76
|
+
for (const t of toDelete) {
|
|
77
|
+
await new Promise<void>((resolve, reject) => {
|
|
78
|
+
this.db.del(t, (err: Error | undefined) => {
|
|
79
|
+
if (err) reject(err)
|
|
80
|
+
else resolve()
|
|
81
|
+
})
|
|
82
|
+
})
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async close(): Promise<void> {
|
|
87
|
+
if (this.initialized && this.db) {
|
|
88
|
+
await new Promise<void>((resolve, reject) => {
|
|
89
|
+
this.db.close((err: Error | undefined) => {
|
|
90
|
+
if (err) reject(err)
|
|
91
|
+
else resolve()
|
|
92
|
+
})
|
|
93
|
+
})
|
|
94
|
+
this.initialized = false
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
package/vectorizer/index.js
CHANGED
|
@@ -15,6 +15,8 @@ import { BM25Index } from "./bm25-index.ts";
|
|
|
15
15
|
import { mergeResults, DEFAULT_HYBRID_CONFIG } from "./hybrid-search.ts";
|
|
16
16
|
import { QueryCache, DEFAULT_CACHE_CONFIG } from "./query-cache.ts";
|
|
17
17
|
import { SearchMetrics } from "./search-metrics.ts";
|
|
18
|
+
import { GraphDB } from "./graph-db.ts";
|
|
19
|
+
import { GraphBuilder } from "./graph-builder.ts";
|
|
18
20
|
|
|
19
21
|
// Suppress transformers.js logs unless DEBUG is set
|
|
20
22
|
const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*";
|
|
@@ -350,6 +352,8 @@ class CodebaseIndexer {
|
|
|
350
352
|
this.configLoaded = false;
|
|
351
353
|
this.bm25 = null; // lazy-built BM25 index
|
|
352
354
|
this.metrics = null; // lazy-loaded SearchMetrics
|
|
355
|
+
this.graphDB = null; // Graph DB for relationships
|
|
356
|
+
this.graphBuilder = null; // Graph builder orchestrator
|
|
353
357
|
}
|
|
354
358
|
|
|
355
359
|
async init() {
|
|
@@ -360,6 +364,13 @@ class CodebaseIndexer {
|
|
|
360
364
|
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
361
365
|
this.db = await lancedb.connect(path.join(this.cacheDir, "lancedb"));
|
|
362
366
|
await this.loadHashes();
|
|
367
|
+
|
|
368
|
+
const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
|
|
369
|
+
const graphPath = path.join(this.root, ".opencode", "graph", graphType);
|
|
370
|
+
await fs.mkdir(path.dirname(graphPath), { recursive: true });
|
|
371
|
+
this.graphDB = await new GraphDB(graphPath).init();
|
|
372
|
+
this.graphBuilder = new GraphBuilder(this.graphDB, this.root);
|
|
373
|
+
|
|
363
374
|
return this;
|
|
364
375
|
}
|
|
365
376
|
|
|
@@ -503,13 +514,21 @@ class CodebaseIndexer {
|
|
|
503
514
|
// Semantic chunking
|
|
504
515
|
const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
|
|
505
516
|
|
|
517
|
+
// v3: Assign chunk IDs for graph tracking
|
|
518
|
+
const chunksWithIds = this.graphBuilder.assignChunkIds(relPath, chunks);
|
|
519
|
+
|
|
520
|
+
// v3: Delete old edges for this file and build new ones
|
|
521
|
+
await this.graphDB.deleteByFile(relPath);
|
|
522
|
+
await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
|
|
523
|
+
|
|
506
524
|
const data = [];
|
|
507
|
-
for (let i = 0; i <
|
|
508
|
-
const embedding = await this.embed(
|
|
525
|
+
for (let i = 0; i < chunksWithIds.length; i++) {
|
|
526
|
+
const embedding = await this.embed(chunksWithIds[i].content);
|
|
509
527
|
data.push({
|
|
528
|
+
chunk_id: chunksWithIds[i].chunk_id,
|
|
510
529
|
file: relPath,
|
|
511
530
|
chunk_index: i,
|
|
512
|
-
content:
|
|
531
|
+
content: chunksWithIds[i].content,
|
|
513
532
|
vector: embedding,
|
|
514
533
|
archived: archived,
|
|
515
534
|
// v2 metadata
|
|
@@ -517,9 +536,9 @@ class CodebaseIndexer {
|
|
|
517
536
|
language: fileMeta.language,
|
|
518
537
|
last_modified: fileMeta.last_modified,
|
|
519
538
|
file_size: fileMeta.file_size,
|
|
520
|
-
heading_context:
|
|
521
|
-
function_name:
|
|
522
|
-
class_name:
|
|
539
|
+
heading_context: chunksWithIds[i].heading_context || "",
|
|
540
|
+
function_name: chunksWithIds[i].function_name || "",
|
|
541
|
+
class_name: chunksWithIds[i].class_name || "",
|
|
523
542
|
tags: (fileMeta.tags || []).join(","),
|
|
524
543
|
});
|
|
525
544
|
}
|
|
@@ -720,9 +739,64 @@ class CodebaseIndexer {
|
|
|
720
739
|
}
|
|
721
740
|
}
|
|
722
741
|
|
|
742
|
+
// ── Graph context expansion (v3) ───────────────────────────────────────
|
|
743
|
+
if (this.graphDB) {
|
|
744
|
+
for (const result of finalResults) {
|
|
745
|
+
if (!result.chunk_id) continue;
|
|
746
|
+
|
|
747
|
+
const outgoing = await this.graphDB.getOutgoing(result.chunk_id);
|
|
748
|
+
const incoming = await this.graphDB.getIncoming(result.chunk_id);
|
|
749
|
+
const allEdges = [...outgoing, ...incoming];
|
|
750
|
+
|
|
751
|
+
const neighbors = [];
|
|
752
|
+
for (const edge of allEdges) {
|
|
753
|
+
const neighborId = edge.subject === result.chunk_id ? edge.object : edge.subject;
|
|
754
|
+
const neighborChunk = await this.findChunkById(neighborId);
|
|
755
|
+
if (!neighborChunk) continue;
|
|
756
|
+
|
|
757
|
+
const similarity = this.cosineSimilarity(neighborChunk.vector, queryEmbedding);
|
|
758
|
+
const score = edge.weight * similarity;
|
|
759
|
+
|
|
760
|
+
neighbors.push({
|
|
761
|
+
chunk_id: neighborId,
|
|
762
|
+
file: neighborChunk.file,
|
|
763
|
+
content: neighborChunk.content,
|
|
764
|
+
relation: edge.predicate,
|
|
765
|
+
score,
|
|
766
|
+
via: edge.source
|
|
767
|
+
});
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
neighbors.sort((a, b) => b.score - a.score);
|
|
771
|
+
result.relatedContext = neighbors.slice(0, 3);
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
|
|
723
775
|
return finalResults;
|
|
724
776
|
}
|
|
725
777
|
|
|
778
|
+
async findChunkById(chunkId) {
|
|
779
|
+
const tableName = "chunks";
|
|
780
|
+
const tables = await this.db.tableNames();
|
|
781
|
+
if (!tables.includes(tableName)) return null;
|
|
782
|
+
|
|
783
|
+
const table = await this.db.openTable(tableName);
|
|
784
|
+
const rows = await table.search([0]).limit(100000).execute();
|
|
785
|
+
return rows.find(r => r.chunk_id === chunkId) || null;
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
cosineSimilarity(vecA, vecB) {
|
|
789
|
+
let dotProduct = 0;
|
|
790
|
+
let normA = 0;
|
|
791
|
+
let normB = 0;
|
|
792
|
+
for (let i = 0; i < vecA.length; i++) {
|
|
793
|
+
dotProduct += vecA[i] * vecB[i];
|
|
794
|
+
normA += vecA[i] * vecA[i];
|
|
795
|
+
normB += vecB[i] * vecB[i];
|
|
796
|
+
}
|
|
797
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
798
|
+
}
|
|
799
|
+
|
|
726
800
|
async checkHealth(extraIgnore = []) {
|
|
727
801
|
const { glob } = await import("glob");
|
|
728
802
|
const preset = INDEX_PRESETS[this.indexName] || DEFAULT_PRESETS.code;
|
package/vectorizer.yaml
CHANGED
|
@@ -39,6 +39,20 @@ vectorizer:
|
|
|
39
39
|
hybrid: false # Enable hybrid search (vector + BM25)
|
|
40
40
|
bm25_weight: 0.3 # BM25 weight in hybrid mode (0.0-1.0)
|
|
41
41
|
|
|
42
|
+
# Graph-based context (v3)
|
|
43
|
+
graph:
|
|
44
|
+
enabled: true
|
|
45
|
+
max_related: 3 # How many related chunks to attach
|
|
46
|
+
min_relevance: 0.5 # Minimum score threshold for related context
|
|
47
|
+
|
|
48
|
+
# LSP for code analysis
|
|
49
|
+
lsp:
|
|
50
|
+
enabled: true
|
|
51
|
+
timeout_ms: 5000 # Timeout per file
|
|
52
|
+
|
|
53
|
+
# Read() intercept
|
|
54
|
+
read_intercept: true
|
|
55
|
+
|
|
42
56
|
# Quality monitoring (v2)
|
|
43
57
|
quality:
|
|
44
58
|
enable_metrics: false # Track search quality metrics
|