@comfanion/usethis_search 0.2.0-dev.0 → 3.0.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,255 @@
1
+ import path from "path"
2
+ import { ChunkWithId } from "../graph-builder"
3
+
4
+ export interface Relation {
5
+ from: string
6
+ to: string
7
+ predicate: string
8
+ weight: number
9
+ source: "regex" | "markdown"
10
+ line?: number
11
+ }
12
+
13
+ export class RegexAnalyzer {
14
+ private readonly patterns = {
15
+ jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
16
+ pythonFromImport: /from\s+(\S+)\s+import/g,
17
+ pythonImport: /import\s+(\S+)/g,
18
+ extends: /class\s+\w+\s+extends\s+(\w+)/g,
19
+ implements: /class\s+\w+\s+implements\s+([^{]+)/g,
20
+ markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
21
+ }
22
+
23
+ analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
24
+ const relations: Relation[] = []
25
+ const ext = path.extname(filePath)
26
+ const lines = content.split("\n")
27
+
28
+ if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
29
+ this.analyzeJSCode(content, lines, filePath, chunks, relations)
30
+ } else if ([".py"].includes(ext)) {
31
+ this.analyzePythonCode(content, lines, filePath, chunks, relations)
32
+ }
33
+
34
+ return relations
35
+ }
36
+
37
+ analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
38
+ const relations: Relation[] = []
39
+ const lines = content.split("\n")
40
+ const dir = path.dirname(filePath)
41
+
42
+ let match
43
+ this.patterns.markdownLink.lastIndex = 0
44
+ while ((match = this.patterns.markdownLink.exec(content)) !== null) {
45
+ const linkText = match[1]
46
+ const linkTarget = match[2]
47
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
48
+ const line = lines[lineIndex]
49
+
50
+ const targetPath = this.resolvePath(filePath, linkTarget)
51
+ if (!targetPath) continue
52
+
53
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
54
+ if (!fromChunkId) continue
55
+
56
+ const toChunkId = this.findChunkForLinkTarget(targetPath, linkTarget, chunks)
57
+ if (toChunkId) {
58
+ relations.push({
59
+ from: fromChunkId,
60
+ to: toChunkId,
61
+ predicate: "links_to",
62
+ weight: 1.0,
63
+ source: "markdown",
64
+ line: lineIndex
65
+ })
66
+ }
67
+ }
68
+
69
+ return relations
70
+ }
71
+
72
+ private analyzeJSCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
73
+ let match
74
+
75
+ this.patterns.jsImports.lastIndex = 0
76
+ while ((match = this.patterns.jsImports.exec(content)) !== null) {
77
+ const importPath = match[1]
78
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
79
+ const line = lines[lineIndex]
80
+
81
+ if (importPath.startsWith(".")) {
82
+ const targetPath = this.resolvePath(filePath, importPath)
83
+ if (!targetPath) continue
84
+
85
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
86
+ if (!fromChunkId) continue
87
+
88
+ const toChunkId = this.findFirstChunkInFile(targetPath)
89
+ if (toChunkId) {
90
+ relations.push({
91
+ from: fromChunkId,
92
+ to: toChunkId,
93
+ predicate: "imports",
94
+ weight: 0.8,
95
+ source: "regex",
96
+ line: lineIndex
97
+ })
98
+ }
99
+ }
100
+ }
101
+
102
+ this.patterns.extends.lastIndex = 0
103
+ while ((match = this.patterns.extends.exec(content)) !== null) {
104
+ const parentClass = match[1]
105
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
106
+
107
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
108
+ if (!fromChunkId) continue
109
+
110
+ const toChunkId = this.findChunkContainingSymbol(chunks, parentClass)
111
+ if (toChunkId) {
112
+ relations.push({
113
+ from: fromChunkId,
114
+ to: toChunkId,
115
+ predicate: "extends",
116
+ weight: 0.8,
117
+ source: "regex",
118
+ line: lineIndex
119
+ })
120
+ }
121
+ }
122
+
123
+ this.patterns.implements.lastIndex = 0
124
+ while ((match = this.patterns.implements.exec(content)) !== null) {
125
+ const interfaces = match[1].split(",").map(s => s.trim())
126
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
127
+
128
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
129
+ if (!fromChunkId) continue
130
+
131
+ for (const iface of interfaces) {
132
+ const toChunkId = this.findChunkContainingSymbol(chunks, iface)
133
+ if (toChunkId) {
134
+ relations.push({
135
+ from: fromChunkId,
136
+ to: toChunkId,
137
+ predicate: "implements",
138
+ weight: 0.8,
139
+ source: "regex",
140
+ line: lineIndex
141
+ })
142
+ }
143
+ }
144
+ }
145
+ }
146
+
147
+ private analyzePythonCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
148
+ let match
149
+
150
+ this.patterns.pythonFromImport.lastIndex = 0
151
+ while ((match = this.patterns.pythonFromImport.exec(content)) !== null) {
152
+ const importPath = match[1]
153
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
154
+
155
+ if (importPath.startsWith(".")) {
156
+ const targetPath = this.resolvePath(filePath, importPath)
157
+ if (!targetPath) continue
158
+
159
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
160
+ if (!fromChunkId) continue
161
+
162
+ const toChunkId = this.findFirstChunkInFile(targetPath)
163
+ if (toChunkId) {
164
+ relations.push({
165
+ from: fromChunkId,
166
+ to: toChunkId,
167
+ predicate: "imports",
168
+ weight: 0.8,
169
+ source: "regex",
170
+ line: lineIndex
171
+ })
172
+ }
173
+ }
174
+ }
175
+
176
+ this.patterns.pythonImport.lastIndex = 0
177
+ while ((match = this.patterns.pythonImport.exec(content)) !== null) {
178
+ const importPath = match[1]
179
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
180
+
181
+ if (importPath.startsWith(".")) {
182
+ const targetPath = this.resolvePath(filePath, importPath)
183
+ if (!targetPath) continue
184
+
185
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
186
+ if (!fromChunkId) continue
187
+
188
+ const toChunkId = this.findFirstChunkInFile(targetPath)
189
+ if (toChunkId) {
190
+ relations.push({
191
+ from: fromChunkId,
192
+ to: toChunkId,
193
+ predicate: "imports",
194
+ weight: 0.8,
195
+ source: "regex",
196
+ line: lineIndex
197
+ })
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ private resolvePath(filePath: string, target: string): string | null {
204
+ try {
205
+ const dir = path.dirname(filePath)
206
+ const absoluteTarget = path.resolve(dir, target)
207
+
208
+ if (!absoluteTarget.startsWith(process.cwd())) {
209
+ return null
210
+ }
211
+
212
+ return path.relative(process.cwd(), absoluteTarget)
213
+ } catch {
214
+ return null
215
+ }
216
+ }
217
+
218
+ private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
219
+ for (const chunk of chunks) {
220
+ if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
221
+ if (lineIndex >= chunk.start_line && lineIndex <= chunk.end_line) {
222
+ return chunk.chunk_id
223
+ }
224
+ }
225
+ }
226
+ return null
227
+ }
228
+
229
+ private findFirstChunkInFile(targetPath: string): string | null {
230
+ const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
231
+ return `chunk_${normalized}_0`
232
+ }
233
+
234
+ private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
235
+ for (const chunk of chunks) {
236
+ if (chunk.content.includes(symbol)) {
237
+ return chunk.chunk_id
238
+ }
239
+ }
240
+ return null
241
+ }
242
+
243
+ private findChunkForLinkTarget(targetPath: string, linkTarget: string, chunks: ChunkWithId[]): string | null {
244
+ const hashIndex = linkTarget.indexOf("#")
245
+ if (hashIndex !== -1) {
246
+ const heading = linkTarget.substring(hashIndex + 1).toLowerCase()
247
+ for (const chunk of chunks) {
248
+ if (chunk.heading_context && chunk.heading_context.toLowerCase().includes(heading)) {
249
+ return chunk.chunk_id
250
+ }
251
+ }
252
+ }
253
+ return this.findFirstChunkInFile(targetPath)
254
+ }
255
+ }
@@ -0,0 +1,198 @@
1
+ import path from "path"
2
+ import { GraphDB, Triple } from "./graph-db"
3
+ import { RegexAnalyzer, Relation as RegexRelation } from "./analyzers/regex-analyzer"
4
+ import { LSPAnalyzer, Relation as LSPRelation } from "./analyzers/lsp-analyzer"
5
+
6
+ export interface ChunkWithId {
7
+ chunk_id: string
8
+ content: string
9
+ start_line?: number
10
+ end_line?: number
11
+ heading_context?: string
12
+ }
13
+
14
+ export class GraphBuilder {
15
+ private lspAnalyzer: LSPAnalyzer
16
+ private regexAnalyzer: RegexAnalyzer
17
+
18
+ constructor(
19
+ private graphDB: GraphDB,
20
+ private projectRoot: string
21
+ ) {
22
+ this.lspAnalyzer = new LSPAnalyzer()
23
+ this.regexAnalyzer = new RegexAnalyzer()
24
+ }
25
+
26
+ assignChunkIds(filePath: string, chunks: any[]): ChunkWithId[] {
27
+ const withoutExt = filePath.replace(/\.[^/.]+$/, "")
28
+ const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
29
+
30
+ return chunks.map((chunk, index) => {
31
+ const chunkId = `chunk_${normalizedPath}_${index}`
32
+ return {
33
+ ...chunk,
34
+ chunk_id: chunkId
35
+ } as ChunkWithId
36
+ })
37
+ }
38
+
39
+ async buildEdges(
40
+ filePath: string,
41
+ content: string,
42
+ chunks: ChunkWithId[],
43
+ fileType: "code" | "docs"
44
+ ): Promise<void> {
45
+ let relations: Array<RegexRelation | LSPRelation> = []
46
+
47
+ if (fileType === "docs") {
48
+ relations = this.regexAnalyzer.analyzeMarkdown(filePath, content, chunks)
49
+ } else if (fileType === "code") {
50
+ const lspAvailable = await this.lspAnalyzer.isAvailable(filePath)
51
+
52
+ if (lspAvailable) {
53
+ try {
54
+ relations = await this.lspAnalyzer.analyzeFile(filePath, chunks)
55
+ } catch {
56
+ // LSP threw — fall through to regex
57
+ }
58
+ }
59
+
60
+ // Fallback: if LSP unavailable, threw, or returned nothing → use regex
61
+ if (relations.length === 0) {
62
+ relations = this.regexAnalyzer.analyzeCode(filePath, content, chunks)
63
+ }
64
+ }
65
+
66
+ const triples: Triple[] = relations.map(rel => ({
67
+ subject: rel.from,
68
+ predicate: rel.predicate,
69
+ object: rel.to,
70
+ weight: rel.weight,
71
+ source: rel.source,
72
+ file: filePath,
73
+ line: rel.line
74
+ }))
75
+
76
+ await this.graphDB.putEdges(triples)
77
+ }
78
+
79
+ resolveChunkId(filePath: string, line: number): string | null {
80
+ const withoutExt = filePath.replace(/\.[^/.]+$/, "")
81
+ const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
82
+ return `chunk_${normalizedPath}_0`
83
+ }
84
+
85
+ async getRelatedChunks(chunkId: string): Promise<Array<{ chunk_id: string; predicate: string; weight: number; direction: "outgoing" | "incoming" }>> {
86
+ const [outgoing, incoming] = await Promise.all([
87
+ this.graphDB.getOutgoing(chunkId),
88
+ this.graphDB.getIncoming(chunkId)
89
+ ])
90
+
91
+ const result = [
92
+ ...outgoing.map(t => ({ chunk_id: t.object, predicate: t.predicate, weight: t.weight, direction: "outgoing" as const })),
93
+ ...incoming.map(t => ({ chunk_id: t.subject, predicate: t.predicate, weight: t.weight, direction: "incoming" as const }))
94
+ ]
95
+
96
+ return result
97
+ }
98
+
99
+ // ---- FR-005: Semantic similarity edges ------------------------------------
100
+
101
+ /**
102
+ * Build "similar_to" edges between chunks whose cosine similarity > threshold.
103
+ * Only creates edges where no explicit link already exists.
104
+ *
105
+ * @param chunks — array of { chunk_id, vector } (all indexed chunks)
106
+ * @param threshold — minimum cosine similarity (default 0.8)
107
+ * @param maxEdgesPerChunk — limit outgoing similarity edges per chunk (default 3)
108
+ * @returns number of similarity edges created
109
+ */
110
+ async buildSemanticEdges(
111
+ chunks: Array<{ chunk_id: string; vector: number[]; file: string }>,
112
+ threshold: number = 0.8,
113
+ maxEdgesPerChunk: number = 3,
114
+ ): Promise<number> {
115
+ if (chunks.length < 2) return 0
116
+
117
+ // Pre-collect all existing edges so we can skip pairs that already have links
118
+ const existingPairs = new Set<string>()
119
+ for (const chunk of chunks) {
120
+ try {
121
+ const outgoing = await this.graphDB.getOutgoing(chunk.chunk_id)
122
+ for (const t of outgoing) {
123
+ existingPairs.add(`${t.subject}|${t.object}`)
124
+ existingPairs.add(`${t.object}|${t.subject}`) // bidirectional check
125
+ }
126
+ } catch {
127
+ // skip — chunk may not have edges yet
128
+ }
129
+ }
130
+
131
+ const newTriples: Triple[] = []
132
+
133
+ // For each chunk, find top-N most similar chunks above threshold
134
+ for (let i = 0; i < chunks.length; i++) {
135
+ const a = chunks[i]
136
+ if (!a.vector || a.vector.length === 0) continue
137
+
138
+ const candidates: Array<{ idx: number; similarity: number }> = []
139
+
140
+ for (let j = i + 1; j < chunks.length; j++) {
141
+ const b = chunks[j]
142
+ if (!b.vector || b.vector.length === 0) continue
143
+
144
+ // Skip same-file chunks (intra-file similarity is less useful)
145
+ if (a.file === b.file) continue
146
+
147
+ // Skip if explicit edge already exists
148
+ const pairKey = `${a.chunk_id}|${b.chunk_id}`
149
+ if (existingPairs.has(pairKey)) continue
150
+
151
+ const similarity = this.cosineSimilarity(a.vector, b.vector)
152
+ if (similarity > threshold) {
153
+ candidates.push({ idx: j, similarity })
154
+ }
155
+ }
156
+
157
+ // Sort by similarity descending, take top N
158
+ candidates.sort((x, y) => y.similarity - x.similarity)
159
+ const top = candidates.slice(0, maxEdgesPerChunk)
160
+
161
+ for (const c of top) {
162
+ const b = chunks[c.idx]
163
+ newTriples.push({
164
+ subject: a.chunk_id,
165
+ predicate: "similar_to",
166
+ object: b.chunk_id,
167
+ weight: parseFloat(c.similarity.toFixed(4)),
168
+ source: "semantic",
169
+ file: a.file,
170
+ })
171
+ // Mark as existing so reverse pair isn't duplicated
172
+ existingPairs.add(`${a.chunk_id}|${b.chunk_id}`)
173
+ existingPairs.add(`${b.chunk_id}|${a.chunk_id}`)
174
+ }
175
+ }
176
+
177
+ if (newTriples.length > 0) {
178
+ // Batch insert in groups of 100 to avoid overwhelming LevelDB
179
+ for (let i = 0; i < newTriples.length; i += 100) {
180
+ const batch = newTriples.slice(i, i + 100)
181
+ await this.graphDB.putEdges(batch)
182
+ }
183
+ }
184
+
185
+ return newTriples.length
186
+ }
187
+
188
+ private cosineSimilarity(vecA: number[], vecB: number[]): number {
189
+ let dot = 0, normA = 0, normB = 0
190
+ for (let i = 0; i < vecA.length; i++) {
191
+ dot += vecA[i] * vecB[i]
192
+ normA += vecA[i] * vecA[i]
193
+ normB += vecB[i] * vecB[i]
194
+ }
195
+ const denom = Math.sqrt(normA) * Math.sqrt(normB)
196
+ return denom === 0 ? 0 : dot / denom
197
+ }
198
+ }