@comfanion/usethis_search 3.0.0-dev.16 → 3.0.0-dev.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/vectorizer/analyzers/lsp-analyzer.ts +7 -7
- package/vectorizer/analyzers/regex-analyzer.ts +173 -67
- package/vectorizer/chunkers/code-chunker.ts +74 -24
- package/vectorizer/chunkers/markdown-chunker.ts +69 -7
- package/vectorizer/graph-builder.ts +207 -15
- package/vectorizer/graph-db.ts +70 -47
- package/vectorizer/index.ts +111 -23
- package/vectorizer.yaml +16 -0
|
@@ -22,14 +22,18 @@ export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
|
|
|
22
22
|
export interface MarkdownChunk {
|
|
23
23
|
content: string
|
|
24
24
|
heading_context: string // "H1 > H2 > H3"
|
|
25
|
+
start_line?: number
|
|
26
|
+
end_line?: number
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
// ── Internal types ──────────────────────────────────────────────────────────
|
|
28
30
|
|
|
29
31
|
interface Section {
|
|
30
|
-
level: number //
|
|
32
|
+
level: number //1-6 for headings, 0 for preamble
|
|
31
33
|
heading: string
|
|
32
34
|
body: string
|
|
35
|
+
start_line: number
|
|
36
|
+
end_line: number
|
|
33
37
|
}
|
|
34
38
|
|
|
35
39
|
// ── Parsing ─────────────────────────────────────────────────────────────────
|
|
@@ -38,19 +42,23 @@ interface Section {
|
|
|
38
42
|
function parseSections(content: string): Section[] {
|
|
39
43
|
const lines = content.split("\n")
|
|
40
44
|
const sections: Section[] = []
|
|
41
|
-
let currentSection: Section = { level: 0, heading: "", body: "" }
|
|
45
|
+
let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0 }
|
|
42
46
|
|
|
43
|
-
for (
|
|
47
|
+
for (let i = 0; i < lines.length; i++) {
|
|
48
|
+
const line = lines[i]
|
|
44
49
|
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
|
|
45
50
|
if (headingMatch) {
|
|
46
51
|
// Push previous section
|
|
47
52
|
if (currentSection.body.trim() || currentSection.heading) {
|
|
53
|
+
currentSection.end_line = i - 1
|
|
48
54
|
sections.push(currentSection)
|
|
49
55
|
}
|
|
50
56
|
currentSection = {
|
|
51
57
|
level: headingMatch[1].length,
|
|
52
58
|
heading: headingMatch[2].trim(),
|
|
53
59
|
body: "",
|
|
60
|
+
start_line: i,
|
|
61
|
+
end_line: 0,
|
|
54
62
|
}
|
|
55
63
|
} else {
|
|
56
64
|
currentSection.body += line + "\n"
|
|
@@ -59,6 +67,7 @@ function parseSections(content: string): Section[] {
|
|
|
59
67
|
|
|
60
68
|
// Push last section
|
|
61
69
|
if (currentSection.body.trim() || currentSection.heading) {
|
|
70
|
+
currentSection.end_line = lines.length - 1
|
|
62
71
|
sections.push(currentSection)
|
|
63
72
|
}
|
|
64
73
|
|
|
@@ -97,6 +106,45 @@ function splitLargeText(text: string, maxSize: number): string[] {
|
|
|
97
106
|
return chunks
|
|
98
107
|
}
|
|
99
108
|
|
|
109
|
+
function splitLargeTextWithLines(text: string, maxSize: number, startLine: number): Array<{ content: string; start_line: number; end_line: number }> {
|
|
110
|
+
if (text.length <= maxSize) {
|
|
111
|
+
const lines = text.split("\n")
|
|
112
|
+
return [{ content: text, start_line: startLine, end_line: startLine + lines.length - 1 }]
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const chunks: Array<{ content: string; start_line: number; end_line: number }> = []
|
|
116
|
+
const lines = text.split("\n")
|
|
117
|
+
let current: string[] = []
|
|
118
|
+
let currentLen = 0
|
|
119
|
+
let chunkStartLine = startLine
|
|
120
|
+
|
|
121
|
+
for (let i = 0; i < lines.length; i++) {
|
|
122
|
+
const line = lines[i]
|
|
123
|
+
if (currentLen + line.length + 1 > maxSize && current.length > 0) {
|
|
124
|
+
chunks.push({
|
|
125
|
+
content: current.join("\n"),
|
|
126
|
+
start_line: chunkStartLine,
|
|
127
|
+
end_line: startLine + i - 1,
|
|
128
|
+
})
|
|
129
|
+
current = []
|
|
130
|
+
currentLen = 0
|
|
131
|
+
chunkStartLine = startLine + i
|
|
132
|
+
}
|
|
133
|
+
current.push(line)
|
|
134
|
+
currentLen += line.length + 1
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (current.length > 0) {
|
|
138
|
+
chunks.push({
|
|
139
|
+
content: current.join("\n"),
|
|
140
|
+
start_line: chunkStartLine,
|
|
141
|
+
end_line: startLine + lines.length - 1,
|
|
142
|
+
})
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return chunks
|
|
146
|
+
}
|
|
147
|
+
|
|
100
148
|
// ── Public API ──────────────────────────────────────────────────────────────
|
|
101
149
|
|
|
102
150
|
/**
|
|
@@ -138,7 +186,12 @@ export function chunkMarkdown(
|
|
|
138
186
|
? `${"#".repeat(section.level)} ${section.heading}\n${section.body}`
|
|
139
187
|
: section.body
|
|
140
188
|
|
|
141
|
-
rawChunks.push({
|
|
189
|
+
rawChunks.push({
|
|
190
|
+
content: sectionText.trim(),
|
|
191
|
+
heading_context: headingContext,
|
|
192
|
+
start_line: section.start_line,
|
|
193
|
+
end_line: section.end_line,
|
|
194
|
+
})
|
|
142
195
|
}
|
|
143
196
|
|
|
144
197
|
// Merge small sections with previous
|
|
@@ -150,7 +203,11 @@ export function chunkMarkdown(
|
|
|
150
203
|
) {
|
|
151
204
|
const prev = merged[merged.length - 1]
|
|
152
205
|
prev.content += "\n\n" + chunk.content
|
|
153
|
-
//
|
|
206
|
+
// Merge end_line
|
|
207
|
+
if (chunk.end_line !== undefined) {
|
|
208
|
+
prev.end_line = chunk.end_line
|
|
209
|
+
}
|
|
210
|
+
// Keep deepest heading context
|
|
154
211
|
if (chunk.heading_context) {
|
|
155
212
|
prev.heading_context = chunk.heading_context
|
|
156
213
|
}
|
|
@@ -163,9 +220,14 @@ export function chunkMarkdown(
|
|
|
163
220
|
const result: MarkdownChunk[] = []
|
|
164
221
|
for (const chunk of merged) {
|
|
165
222
|
if (chunk.content.length > config.max_chunk_size) {
|
|
166
|
-
const parts =
|
|
223
|
+
const parts = splitLargeTextWithLines(chunk.content, config.max_chunk_size, chunk.start_line || 0)
|
|
167
224
|
for (const part of parts) {
|
|
168
|
-
result.push({
|
|
225
|
+
result.push({
|
|
226
|
+
content: part.content,
|
|
227
|
+
heading_context: chunk.heading_context,
|
|
228
|
+
start_line: part.start_line,
|
|
229
|
+
end_line: part.end_line,
|
|
230
|
+
})
|
|
169
231
|
}
|
|
170
232
|
} else {
|
|
171
233
|
result.push(chunk)
|
|
@@ -9,29 +9,117 @@ export interface ChunkWithId {
|
|
|
9
9
|
start_line?: number
|
|
10
10
|
end_line?: number
|
|
11
11
|
heading_context?: string
|
|
12
|
+
function_name?: string
|
|
13
|
+
class_name?: string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// ── Chunk ID helpers ────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
/** Build a symbol-aware chunk ID.
|
|
19
|
+
*
|
|
20
|
+
* Format: `chunk:{relPath}::{symbol}`
|
|
21
|
+
* Examples:
|
|
22
|
+
* chunk:src/user-service.ts::UserService
|
|
23
|
+
* chunk:src/user-service.ts::UserService.findById
|
|
24
|
+
* chunk:src/utils.ts::helper
|
|
25
|
+
* chunk:docs/api.md::authentication
|
|
26
|
+
* chunk:src/index.ts::_chunk_0
|
|
27
|
+
*/
|
|
28
|
+
export function buildChunkId(filePath: string, chunk: { class_name?: string; function_name?: string; heading_context?: string }, index: number): string {
|
|
29
|
+
let symbol: string
|
|
30
|
+
|
|
31
|
+
if (chunk.class_name && chunk.function_name) {
|
|
32
|
+
symbol = `${chunk.class_name}.${chunk.function_name}`
|
|
33
|
+
} else if (chunk.class_name) {
|
|
34
|
+
symbol = chunk.class_name
|
|
35
|
+
} else if (chunk.function_name) {
|
|
36
|
+
symbol = chunk.function_name
|
|
37
|
+
} else if (chunk.heading_context) {
|
|
38
|
+
// Markdown: slugify heading
|
|
39
|
+
symbol = chunk.heading_context
|
|
40
|
+
.toLowerCase()
|
|
41
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
42
|
+
.replace(/^-|-$/g, "")
|
|
43
|
+
if (!symbol) symbol = `_chunk_${index}`
|
|
44
|
+
} else {
|
|
45
|
+
symbol = `_chunk_${index}`
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return `chunk:${filePath}::${symbol}`
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** Build the file-level node ID. */
|
|
52
|
+
export function buildFileNodeId(filePath: string): string {
|
|
53
|
+
return `file:${filePath}`
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Extract the file path from any node ID (chunk: or file:). */
|
|
57
|
+
export function filePathFromNodeId(nodeId: string): string | null {
|
|
58
|
+
if (nodeId.startsWith("chunk:")) {
|
|
59
|
+
const sep = nodeId.indexOf("::")
|
|
60
|
+
return sep === -1 ? null : nodeId.slice(6, sep)
|
|
61
|
+
}
|
|
62
|
+
if (nodeId.startsWith("file:")) {
|
|
63
|
+
return nodeId.slice(5)
|
|
64
|
+
}
|
|
65
|
+
if (nodeId.startsWith("meta:")) {
|
|
66
|
+
return nodeId.slice(5)
|
|
67
|
+
}
|
|
68
|
+
return null
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/** Build a cross-file chunk ID that points to the default (first) chunk of the target file.
|
|
72
|
+
* Used by regex/LSP analyzers when we don't know the exact target chunk. */
|
|
73
|
+
export function buildDefaultChunkId(filePath: string): string {
|
|
74
|
+
return `chunk:${filePath}::_chunk_0`
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ── Structural edge predicates ──────────────────────────────────────────────
|
|
78
|
+
|
|
79
|
+
const STRUCTURAL_PREDICATES = new Set([
|
|
80
|
+
"contains_class",
|
|
81
|
+
"contains_function",
|
|
82
|
+
"contains_interface",
|
|
83
|
+
"contains",
|
|
84
|
+
"has_method",
|
|
85
|
+
])
|
|
86
|
+
|
|
87
|
+
export function isStructuralPredicate(predicate: string): boolean {
|
|
88
|
+
return STRUCTURAL_PREDICATES.has(predicate)
|
|
12
89
|
}
|
|
13
90
|
|
|
14
91
|
export class GraphBuilder {
|
|
15
92
|
private lspAnalyzer: LSPAnalyzer
|
|
16
93
|
private regexAnalyzer: RegexAnalyzer
|
|
94
|
+
private lspEnabled: boolean
|
|
17
95
|
|
|
18
96
|
constructor(
|
|
19
97
|
private graphDB: GraphDB,
|
|
20
|
-
private projectRoot: string
|
|
98
|
+
private projectRoot: string,
|
|
99
|
+
lspEnabled: boolean = true,
|
|
100
|
+
lspTimeoutMs: number = 5000,
|
|
21
101
|
) {
|
|
22
|
-
this.
|
|
23
|
-
this.
|
|
102
|
+
this.lspEnabled = lspEnabled
|
|
103
|
+
this.lspAnalyzer = new LSPAnalyzer(projectRoot, lspTimeoutMs)
|
|
104
|
+
this.regexAnalyzer = new RegexAnalyzer(projectRoot)
|
|
24
105
|
}
|
|
25
106
|
|
|
26
107
|
assignChunkIds(filePath: string, chunks: any[]): ChunkWithId[] {
|
|
27
|
-
const
|
|
28
|
-
const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
|
|
108
|
+
const seen = new Map<string, number>()
|
|
29
109
|
|
|
30
110
|
return chunks.map((chunk, index) => {
|
|
31
|
-
|
|
111
|
+
let chunkId = buildChunkId(filePath, chunk, index)
|
|
112
|
+
|
|
113
|
+
// Handle duplicate symbols (e.g. two chunks for same class split by size)
|
|
114
|
+
const count = seen.get(chunkId) || 0
|
|
115
|
+
if (count > 0) {
|
|
116
|
+
chunkId = `${chunkId}#${count}`
|
|
117
|
+
}
|
|
118
|
+
seen.set(chunkId.replace(/#\d+$/, ""), count + 1)
|
|
119
|
+
|
|
32
120
|
return {
|
|
33
121
|
...chunk,
|
|
34
|
-
chunk_id: chunkId
|
|
122
|
+
chunk_id: chunkId,
|
|
35
123
|
} as ChunkWithId
|
|
36
124
|
})
|
|
37
125
|
}
|
|
@@ -41,13 +129,13 @@ export class GraphBuilder {
|
|
|
41
129
|
content: string,
|
|
42
130
|
chunks: ChunkWithId[],
|
|
43
131
|
fileType: "code" | "docs"
|
|
44
|
-
): Promise<
|
|
132
|
+
): Promise<number> {
|
|
45
133
|
let relations: Array<RegexRelation | LSPRelation> = []
|
|
46
134
|
|
|
47
135
|
if (fileType === "docs") {
|
|
48
136
|
relations = this.regexAnalyzer.analyzeMarkdown(filePath, content, chunks)
|
|
49
137
|
} else if (fileType === "code") {
|
|
50
|
-
const lspAvailable = await this.lspAnalyzer.isAvailable(filePath)
|
|
138
|
+
const lspAvailable = this.lspEnabled && await this.lspAnalyzer.isAvailable(filePath)
|
|
51
139
|
|
|
52
140
|
if (lspAvailable) {
|
|
53
141
|
try {
|
|
@@ -63,7 +151,7 @@ export class GraphBuilder {
|
|
|
63
151
|
}
|
|
64
152
|
}
|
|
65
153
|
|
|
66
|
-
const
|
|
154
|
+
const relationTriples: Triple[] = relations.map(rel => ({
|
|
67
155
|
subject: rel.from,
|
|
68
156
|
predicate: rel.predicate,
|
|
69
157
|
object: rel.to,
|
|
@@ -73,13 +161,117 @@ export class GraphBuilder {
|
|
|
73
161
|
line: rel.line
|
|
74
162
|
}))
|
|
75
163
|
|
|
76
|
-
|
|
164
|
+
// ── Structural edges ────────────────────────────────────────────────────
|
|
165
|
+
const fileNode = buildFileNodeId(filePath)
|
|
166
|
+
const structuralTriples: Triple[] = []
|
|
167
|
+
|
|
168
|
+
// Anchor: every chunk belongs_to its file
|
|
169
|
+
for (const c of chunks) {
|
|
170
|
+
structuralTriples.push({
|
|
171
|
+
subject: c.chunk_id,
|
|
172
|
+
predicate: "belongs_to",
|
|
173
|
+
object: filePath,
|
|
174
|
+
weight: 0,
|
|
175
|
+
source: "anchor",
|
|
176
|
+
file: filePath,
|
|
177
|
+
})
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// File node → symbol chunks
|
|
181
|
+
// Track class chunks for has_method edges
|
|
182
|
+
const classChunkMap = new Map<string, string>() // className → chunk_id
|
|
183
|
+
|
|
184
|
+
for (const c of chunks) {
|
|
185
|
+
if (c.class_name && !c.function_name) {
|
|
186
|
+
// Class/interface chunk (no method = class-level)
|
|
187
|
+
const predicate = c.content.match(/\binterface\s/) ? "contains_interface" : "contains_class"
|
|
188
|
+
structuralTriples.push({
|
|
189
|
+
subject: fileNode,
|
|
190
|
+
predicate,
|
|
191
|
+
object: c.chunk_id,
|
|
192
|
+
weight: 1.0,
|
|
193
|
+
source: "structure",
|
|
194
|
+
file: filePath,
|
|
195
|
+
})
|
|
196
|
+
classChunkMap.set(c.class_name, c.chunk_id)
|
|
197
|
+
} else if (c.function_name && !c.class_name) {
|
|
198
|
+
// Top-level function
|
|
199
|
+
structuralTriples.push({
|
|
200
|
+
subject: fileNode,
|
|
201
|
+
predicate: "contains_function",
|
|
202
|
+
object: c.chunk_id,
|
|
203
|
+
weight: 1.0,
|
|
204
|
+
source: "structure",
|
|
205
|
+
file: filePath,
|
|
206
|
+
})
|
|
207
|
+
} else if (c.function_name && c.class_name) {
|
|
208
|
+
// Method inside a class → has_method edge from class chunk
|
|
209
|
+
const parentChunkId = classChunkMap.get(c.class_name)
|
|
210
|
+
if (parentChunkId) {
|
|
211
|
+
structuralTriples.push({
|
|
212
|
+
subject: parentChunkId,
|
|
213
|
+
predicate: "has_method",
|
|
214
|
+
object: c.chunk_id,
|
|
215
|
+
weight: 1.0,
|
|
216
|
+
source: "structure",
|
|
217
|
+
file: filePath,
|
|
218
|
+
})
|
|
219
|
+
} else {
|
|
220
|
+
// No class chunk found yet (methods appeared before class preamble, or class was not split)
|
|
221
|
+
// Fall back to file → method
|
|
222
|
+
structuralTriples.push({
|
|
223
|
+
subject: fileNode,
|
|
224
|
+
predicate: "contains_function",
|
|
225
|
+
object: c.chunk_id,
|
|
226
|
+
weight: 1.0,
|
|
227
|
+
source: "structure",
|
|
228
|
+
file: filePath,
|
|
229
|
+
})
|
|
230
|
+
}
|
|
231
|
+
} else if (c.heading_context) {
|
|
232
|
+
// Markdown section
|
|
233
|
+
structuralTriples.push({
|
|
234
|
+
subject: fileNode,
|
|
235
|
+
predicate: "contains",
|
|
236
|
+
object: c.chunk_id,
|
|
237
|
+
weight: 0.5,
|
|
238
|
+
source: "structure",
|
|
239
|
+
file: filePath,
|
|
240
|
+
})
|
|
241
|
+
} else {
|
|
242
|
+
// Generic content chunk
|
|
243
|
+
structuralTriples.push({
|
|
244
|
+
subject: fileNode,
|
|
245
|
+
predicate: "contains",
|
|
246
|
+
object: c.chunk_id,
|
|
247
|
+
weight: 0.3,
|
|
248
|
+
source: "structure",
|
|
249
|
+
file: filePath,
|
|
250
|
+
})
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
await this.graphDB.putEdges([...structuralTriples, ...relationTriples])
|
|
255
|
+
return relationTriples.length
|
|
77
256
|
}
|
|
78
257
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
258
|
+
/** Resolve a file path + line to the best chunk ID.
|
|
259
|
+
* If chunks are provided, finds the one containing the line.
|
|
260
|
+
* Otherwise falls back to the default chunk. */
|
|
261
|
+
resolveChunkId(filePath: string, line: number, chunks?: ChunkWithId[]): string | null {
|
|
262
|
+
if (chunks && chunks.length > 0) {
|
|
263
|
+
for (const c of chunks) {
|
|
264
|
+
if (c.start_line !== undefined && c.end_line !== undefined) {
|
|
265
|
+
if (line >= c.start_line && line <= c.end_line) {
|
|
266
|
+
return c.chunk_id
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
// Line not in any chunk range — return first chunk
|
|
271
|
+
return chunks[0].chunk_id
|
|
272
|
+
}
|
|
273
|
+
// No chunks available — return default
|
|
274
|
+
return buildDefaultChunkId(filePath)
|
|
83
275
|
}
|
|
84
276
|
|
|
85
277
|
async getRelatedChunks(chunkId: string): Promise<Array<{ chunk_id: string; predicate: string; weight: number; direction: "outgoing" | "incoming" }>> {
|
package/vectorizer/graph-db.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import levelgraph from "levelgraph"
|
|
2
2
|
import { Level } from "level"
|
|
3
|
+
import { filePathFromNodeId, isStructuralPredicate } from "./graph-builder"
|
|
3
4
|
|
|
4
5
|
export interface Triple {
|
|
5
6
|
subject: string
|
|
@@ -149,23 +150,27 @@ export class GraphDB {
|
|
|
149
150
|
async deleteFileMeta(filePath: string): Promise<void> {
|
|
150
151
|
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
151
152
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
153
|
+
try {
|
|
154
|
+
const triples = await new Promise<Triple[]>((resolve, reject) => {
|
|
155
|
+
this.db.get(
|
|
156
|
+
{ subject: `meta:${filePath}`, predicate: "graph_built" },
|
|
157
|
+
(err: Error | undefined, result: Triple[]) => {
|
|
158
|
+
if (err) reject(err)
|
|
159
|
+
else resolve(result || [])
|
|
160
|
+
},
|
|
161
|
+
)
|
|
162
|
+
})
|
|
161
163
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
164
|
+
for (const t of triples) {
|
|
165
|
+
await new Promise<void>((resolve, reject) => {
|
|
166
|
+
this.db.del(t, (err: Error | undefined) => {
|
|
167
|
+
if (err) reject(err)
|
|
168
|
+
else resolve()
|
|
169
|
+
})
|
|
167
170
|
})
|
|
168
|
-
}
|
|
171
|
+
}
|
|
172
|
+
} catch (err) {
|
|
173
|
+
// Silently ignore errors (e.g., no meta triple exists)
|
|
169
174
|
}
|
|
170
175
|
}
|
|
171
176
|
|
|
@@ -191,9 +196,10 @@ export class GraphDB {
|
|
|
191
196
|
|
|
192
197
|
/**
|
|
193
198
|
* Get all triples in the graph (for validation/stats).
|
|
194
|
-
* Excludes meta triples
|
|
199
|
+
* Excludes meta, anchor, and structural triples by default.
|
|
200
|
+
* Pass includeStructural=true to also get structural edges.
|
|
195
201
|
*/
|
|
196
|
-
async getAllTriples(): Promise<Triple[]> {
|
|
202
|
+
async getAllTriples(includeStructural: boolean = false): Promise<Triple[]> {
|
|
197
203
|
if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
|
|
198
204
|
|
|
199
205
|
const allTriples = await new Promise<Triple[]>((resolve, reject) => {
|
|
@@ -203,7 +209,11 @@ export class GraphDB {
|
|
|
203
209
|
})
|
|
204
210
|
})
|
|
205
211
|
|
|
206
|
-
return allTriples.filter(t =>
|
|
212
|
+
return allTriples.filter(t => {
|
|
213
|
+
if (t.predicate === "graph_built" || t.predicate === "belongs_to") return false
|
|
214
|
+
if (!includeStructural && isStructuralPredicate(t.predicate)) return false
|
|
215
|
+
return true
|
|
216
|
+
})
|
|
207
217
|
}
|
|
208
218
|
|
|
209
219
|
async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
|
|
@@ -213,58 +223,70 @@ export class GraphDB {
|
|
|
213
223
|
|
|
214
224
|
const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
|
|
215
225
|
const visited = new Set<string>()
|
|
216
|
-
|
|
226
|
+
const self = this
|
|
227
|
+
|
|
228
|
+
// Resolve the caller's file directly from the node ID
|
|
229
|
+
const callerFile = filePathFromNodeId(chunkId)
|
|
230
|
+
|
|
217
231
|
async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
|
|
218
|
-
if (currentDepth
|
|
232
|
+
if (currentDepth >= maxDepth || visited.has(currentId)) {
|
|
219
233
|
return
|
|
220
234
|
}
|
|
221
|
-
|
|
235
|
+
|
|
222
236
|
visited.add(currentId)
|
|
223
|
-
|
|
237
|
+
|
|
224
238
|
try {
|
|
225
239
|
const outgoing = await new Promise<Triple[]>((resolve, reject) => {
|
|
226
|
-
|
|
240
|
+
self.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
|
|
227
241
|
if (err) reject(err)
|
|
228
242
|
else resolve(triples || [])
|
|
229
243
|
})
|
|
230
244
|
})
|
|
231
|
-
|
|
245
|
+
|
|
232
246
|
for (const triple of outgoing) {
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
247
|
+
// Skip meta, anchor, and structural-only edges
|
|
248
|
+
if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
|
|
249
|
+
if (isStructuralPredicate(triple.predicate)) continue
|
|
250
|
+
|
|
251
|
+
// Resolve file for the target node directly from its ID
|
|
252
|
+
const targetFile = filePathFromNodeId(triple.object)
|
|
253
|
+
if (!targetFile) continue
|
|
254
|
+
|
|
255
|
+
const existing = relatedFiles.get(targetFile)
|
|
237
256
|
if (existing) {
|
|
238
257
|
existing.weight = Math.max(existing.weight, triple.weight)
|
|
239
258
|
} else {
|
|
240
|
-
relatedFiles.set(
|
|
259
|
+
relatedFiles.set(targetFile, {
|
|
241
260
|
relation: currentRelation || triple.predicate,
|
|
242
261
|
weight: triple.weight
|
|
243
262
|
})
|
|
244
263
|
}
|
|
245
|
-
|
|
246
|
-
// Recurse for imports/extends relations
|
|
264
|
+
|
|
247
265
|
if (triple.predicate === "imports" || triple.predicate === "extends") {
|
|
248
|
-
await traverse(
|
|
266
|
+
await traverse(triple.object, currentDepth + 1, triple.predicate)
|
|
249
267
|
}
|
|
250
268
|
}
|
|
251
|
-
|
|
269
|
+
|
|
252
270
|
const incoming = await new Promise<Triple[]>((resolve, reject) => {
|
|
253
|
-
|
|
271
|
+
self.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
|
|
254
272
|
if (err) reject(err)
|
|
255
273
|
else resolve(triples || [])
|
|
256
274
|
})
|
|
257
275
|
})
|
|
258
|
-
|
|
276
|
+
|
|
259
277
|
for (const triple of incoming) {
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
278
|
+
if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
|
|
279
|
+
if (isStructuralPredicate(triple.predicate)) continue
|
|
280
|
+
|
|
281
|
+
const sourceFile = filePathFromNodeId(triple.subject)
|
|
282
|
+
if (!sourceFile) continue
|
|
283
|
+
|
|
284
|
+
const existing = relatedFiles.get(sourceFile)
|
|
263
285
|
if (existing) {
|
|
264
286
|
existing.weight = Math.max(existing.weight, triple.weight)
|
|
265
287
|
} else {
|
|
266
|
-
relatedFiles.set(
|
|
267
|
-
relation:
|
|
288
|
+
relatedFiles.set(sourceFile, {
|
|
289
|
+
relation: "used_by",
|
|
268
290
|
weight: triple.weight
|
|
269
291
|
})
|
|
270
292
|
}
|
|
@@ -273,17 +295,18 @@ export class GraphDB {
|
|
|
273
295
|
console.error(`Error traversing graph for ${currentId}:`, error)
|
|
274
296
|
}
|
|
275
297
|
}
|
|
276
|
-
|
|
298
|
+
|
|
277
299
|
await traverse(chunkId, 0, "")
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
300
|
+
|
|
301
|
+
// Remove the caller's own file from results
|
|
302
|
+
if (callerFile) relatedFiles.delete(callerFile)
|
|
303
|
+
|
|
304
|
+
return Array.from(relatedFiles.entries())
|
|
305
|
+
.map(([filePath, data]) => ({
|
|
306
|
+
path: filePath,
|
|
282
307
|
relation: data.relation,
|
|
283
308
|
weight: data.weight
|
|
284
309
|
}))
|
|
285
310
|
.sort((a, b) => b.weight - a.weight)
|
|
286
|
-
|
|
287
|
-
return result
|
|
288
311
|
}
|
|
289
312
|
}
|