@comfanion/usethis_search 3.0.0-dev.16 → 3.0.0-dev.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,14 +22,18 @@ export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
22
22
  export interface MarkdownChunk {
23
23
  content: string
24
24
  heading_context: string // "H1 > H2 > H3"
25
+ start_line?: number
26
+ end_line?: number
25
27
  }
26
28
 
27
29
  // ── Internal types ──────────────────────────────────────────────────────────
28
30
 
29
31
  interface Section {
30
- level: number // 1-6 for headings, 0 for preamble
32
+ level: number //1-6 for headings, 0 for preamble
31
33
  heading: string
32
34
  body: string
35
+ start_line: number
36
+ end_line: number
33
37
  }
34
38
 
35
39
  // ── Parsing ─────────────────────────────────────────────────────────────────
@@ -38,19 +42,23 @@ interface Section {
38
42
  function parseSections(content: string): Section[] {
39
43
  const lines = content.split("\n")
40
44
  const sections: Section[] = []
41
- let currentSection: Section = { level: 0, heading: "", body: "" }
45
+ let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0 }
42
46
 
43
- for (const line of lines) {
47
+ for (let i = 0; i < lines.length; i++) {
48
+ const line = lines[i]
44
49
  const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
45
50
  if (headingMatch) {
46
51
  // Push previous section
47
52
  if (currentSection.body.trim() || currentSection.heading) {
53
+ currentSection.end_line = i - 1
48
54
  sections.push(currentSection)
49
55
  }
50
56
  currentSection = {
51
57
  level: headingMatch[1].length,
52
58
  heading: headingMatch[2].trim(),
53
59
  body: "",
60
+ start_line: i,
61
+ end_line: 0,
54
62
  }
55
63
  } else {
56
64
  currentSection.body += line + "\n"
@@ -59,6 +67,7 @@ function parseSections(content: string): Section[] {
59
67
 
60
68
  // Push last section
61
69
  if (currentSection.body.trim() || currentSection.heading) {
70
+ currentSection.end_line = lines.length - 1
62
71
  sections.push(currentSection)
63
72
  }
64
73
 
@@ -97,6 +106,45 @@ function splitLargeText(text: string, maxSize: number): string[] {
97
106
  return chunks
98
107
  }
99
108
 
109
+ function splitLargeTextWithLines(text: string, maxSize: number, startLine: number): Array<{ content: string; start_line: number; end_line: number }> {
110
+ if (text.length <= maxSize) {
111
+ const lines = text.split("\n")
112
+ return [{ content: text, start_line: startLine, end_line: startLine + lines.length - 1 }]
113
+ }
114
+
115
+ const chunks: Array<{ content: string; start_line: number; end_line: number }> = []
116
+ const lines = text.split("\n")
117
+ let current: string[] = []
118
+ let currentLen = 0
119
+ let chunkStartLine = startLine
120
+
121
+ for (let i = 0; i < lines.length; i++) {
122
+ const line = lines[i]
123
+ if (currentLen + line.length + 1 > maxSize && current.length > 0) {
124
+ chunks.push({
125
+ content: current.join("\n"),
126
+ start_line: chunkStartLine,
127
+ end_line: startLine + i - 1,
128
+ })
129
+ current = []
130
+ currentLen = 0
131
+ chunkStartLine = startLine + i
132
+ }
133
+ current.push(line)
134
+ currentLen += line.length + 1
135
+ }
136
+
137
+ if (current.length > 0) {
138
+ chunks.push({
139
+ content: current.join("\n"),
140
+ start_line: chunkStartLine,
141
+ end_line: startLine + lines.length - 1,
142
+ })
143
+ }
144
+
145
+ return chunks
146
+ }
147
+
100
148
  // ── Public API ──────────────────────────────────────────────────────────────
101
149
 
102
150
  /**
@@ -138,7 +186,12 @@ export function chunkMarkdown(
138
186
  ? `${"#".repeat(section.level)} ${section.heading}\n${section.body}`
139
187
  : section.body
140
188
 
141
- rawChunks.push({ content: sectionText.trim(), heading_context: headingContext })
189
+ rawChunks.push({
190
+ content: sectionText.trim(),
191
+ heading_context: headingContext,
192
+ start_line: section.start_line,
193
+ end_line: section.end_line,
194
+ })
142
195
  }
143
196
 
144
197
  // Merge small sections with previous
@@ -150,7 +203,11 @@ export function chunkMarkdown(
150
203
  ) {
151
204
  const prev = merged[merged.length - 1]
152
205
  prev.content += "\n\n" + chunk.content
153
- // Keep the deepest heading context
206
+ // Merge end_line
207
+ if (chunk.end_line !== undefined) {
208
+ prev.end_line = chunk.end_line
209
+ }
210
+ // Keep deepest heading context
154
211
  if (chunk.heading_context) {
155
212
  prev.heading_context = chunk.heading_context
156
213
  }
@@ -163,9 +220,14 @@ export function chunkMarkdown(
163
220
  const result: MarkdownChunk[] = []
164
221
  for (const chunk of merged) {
165
222
  if (chunk.content.length > config.max_chunk_size) {
166
- const parts = splitLargeText(chunk.content, config.max_chunk_size)
223
+ const parts = splitLargeTextWithLines(chunk.content, config.max_chunk_size, chunk.start_line || 0)
167
224
  for (const part of parts) {
168
- result.push({ content: part, heading_context: chunk.heading_context })
225
+ result.push({
226
+ content: part.content,
227
+ heading_context: chunk.heading_context,
228
+ start_line: part.start_line,
229
+ end_line: part.end_line,
230
+ })
169
231
  }
170
232
  } else {
171
233
  result.push(chunk)
@@ -9,29 +9,117 @@ export interface ChunkWithId {
9
9
  start_line?: number
10
10
  end_line?: number
11
11
  heading_context?: string
12
+ function_name?: string
13
+ class_name?: string
14
+ }
15
+
16
+ // ── Chunk ID helpers ────────────────────────────────────────────────────────
17
+
18
+ /** Build a symbol-aware chunk ID.
19
+ *
20
+ * Format: `chunk:{relPath}::{symbol}`
21
+ * Examples:
22
+ * chunk:src/user-service.ts::UserService
23
+ * chunk:src/user-service.ts::UserService.findById
24
+ * chunk:src/utils.ts::helper
25
+ * chunk:docs/api.md::authentication
26
+ * chunk:src/index.ts::_chunk_0
27
+ */
28
+ export function buildChunkId(filePath: string, chunk: { class_name?: string; function_name?: string; heading_context?: string }, index: number): string {
29
+ let symbol: string
30
+
31
+ if (chunk.class_name && chunk.function_name) {
32
+ symbol = `${chunk.class_name}.${chunk.function_name}`
33
+ } else if (chunk.class_name) {
34
+ symbol = chunk.class_name
35
+ } else if (chunk.function_name) {
36
+ symbol = chunk.function_name
37
+ } else if (chunk.heading_context) {
38
+ // Markdown: slugify heading
39
+ symbol = chunk.heading_context
40
+ .toLowerCase()
41
+ .replace(/[^a-z0-9]+/g, "-")
42
+ .replace(/^-|-$/g, "")
43
+ if (!symbol) symbol = `_chunk_${index}`
44
+ } else {
45
+ symbol = `_chunk_${index}`
46
+ }
47
+
48
+ return `chunk:${filePath}::${symbol}`
49
+ }
50
+
51
+ /** Build the file-level node ID. */
52
+ export function buildFileNodeId(filePath: string): string {
53
+ return `file:${filePath}`
54
+ }
55
+
56
+ /** Extract the file path from any node ID (chunk: or file:). */
57
+ export function filePathFromNodeId(nodeId: string): string | null {
58
+ if (nodeId.startsWith("chunk:")) {
59
+ const sep = nodeId.indexOf("::")
60
+ return sep === -1 ? null : nodeId.slice(6, sep)
61
+ }
62
+ if (nodeId.startsWith("file:")) {
63
+ return nodeId.slice(5)
64
+ }
65
+ if (nodeId.startsWith("meta:")) {
66
+ return nodeId.slice(5)
67
+ }
68
+ return null
69
+ }
70
+
71
+ /** Build a cross-file chunk ID that points to the default (first) chunk of the target file.
72
+ * Used by regex/LSP analyzers when we don't know the exact target chunk. */
73
+ export function buildDefaultChunkId(filePath: string): string {
74
+ return `chunk:${filePath}::_chunk_0`
75
+ }
76
+
77
+ // ── Structural edge predicates ──────────────────────────────────────────────
78
+
79
+ const STRUCTURAL_PREDICATES = new Set([
80
+ "contains_class",
81
+ "contains_function",
82
+ "contains_interface",
83
+ "contains",
84
+ "has_method",
85
+ ])
86
+
87
+ export function isStructuralPredicate(predicate: string): boolean {
88
+ return STRUCTURAL_PREDICATES.has(predicate)
12
89
  }
13
90
 
14
91
  export class GraphBuilder {
15
92
  private lspAnalyzer: LSPAnalyzer
16
93
  private regexAnalyzer: RegexAnalyzer
94
+ private lspEnabled: boolean
17
95
 
18
96
  constructor(
19
97
  private graphDB: GraphDB,
20
- private projectRoot: string
98
+ private projectRoot: string,
99
+ lspEnabled: boolean = true,
100
+ lspTimeoutMs: number = 5000,
21
101
  ) {
22
- this.lspAnalyzer = new LSPAnalyzer()
23
- this.regexAnalyzer = new RegexAnalyzer()
102
+ this.lspEnabled = lspEnabled
103
+ this.lspAnalyzer = new LSPAnalyzer(projectRoot, lspTimeoutMs)
104
+ this.regexAnalyzer = new RegexAnalyzer(projectRoot)
24
105
  }
25
106
 
26
107
  assignChunkIds(filePath: string, chunks: any[]): ChunkWithId[] {
27
- const withoutExt = filePath.replace(/\.[^/.]+$/, "")
28
- const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
108
+ const seen = new Map<string, number>()
29
109
 
30
110
  return chunks.map((chunk, index) => {
31
- const chunkId = `chunk_${normalizedPath}_${index}`
111
+ let chunkId = buildChunkId(filePath, chunk, index)
112
+
113
+ // Handle duplicate symbols (e.g. two chunks for same class split by size)
114
+ const count = seen.get(chunkId) || 0
115
+ if (count > 0) {
116
+ chunkId = `${chunkId}#${count}`
117
+ }
118
+ seen.set(chunkId.replace(/#\d+$/, ""), count + 1)
119
+
32
120
  return {
33
121
  ...chunk,
34
- chunk_id: chunkId
122
+ chunk_id: chunkId,
35
123
  } as ChunkWithId
36
124
  })
37
125
  }
@@ -41,13 +129,13 @@ export class GraphBuilder {
41
129
  content: string,
42
130
  chunks: ChunkWithId[],
43
131
  fileType: "code" | "docs"
44
- ): Promise<void> {
132
+ ): Promise<number> {
45
133
  let relations: Array<RegexRelation | LSPRelation> = []
46
134
 
47
135
  if (fileType === "docs") {
48
136
  relations = this.regexAnalyzer.analyzeMarkdown(filePath, content, chunks)
49
137
  } else if (fileType === "code") {
50
- const lspAvailable = await this.lspAnalyzer.isAvailable(filePath)
138
+ const lspAvailable = this.lspEnabled && await this.lspAnalyzer.isAvailable(filePath)
51
139
 
52
140
  if (lspAvailable) {
53
141
  try {
@@ -63,7 +151,7 @@ export class GraphBuilder {
63
151
  }
64
152
  }
65
153
 
66
- const triples: Triple[] = relations.map(rel => ({
154
+ const relationTriples: Triple[] = relations.map(rel => ({
67
155
  subject: rel.from,
68
156
  predicate: rel.predicate,
69
157
  object: rel.to,
@@ -73,13 +161,117 @@ export class GraphBuilder {
73
161
  line: rel.line
74
162
  }))
75
163
 
76
- await this.graphDB.putEdges(triples)
164
+ // ── Structural edges ────────────────────────────────────────────────────
165
+ const fileNode = buildFileNodeId(filePath)
166
+ const structuralTriples: Triple[] = []
167
+
168
+ // Anchor: every chunk belongs_to its file
169
+ for (const c of chunks) {
170
+ structuralTriples.push({
171
+ subject: c.chunk_id,
172
+ predicate: "belongs_to",
173
+ object: filePath,
174
+ weight: 0,
175
+ source: "anchor",
176
+ file: filePath,
177
+ })
178
+ }
179
+
180
+ // File node → symbol chunks
181
+ // Track class chunks for has_method edges
182
+ const classChunkMap = new Map<string, string>() // className → chunk_id
183
+
184
+ for (const c of chunks) {
185
+ if (c.class_name && !c.function_name) {
186
+ // Class/interface chunk (no method = class-level)
187
+ const predicate = c.content.match(/\binterface\s/) ? "contains_interface" : "contains_class"
188
+ structuralTriples.push({
189
+ subject: fileNode,
190
+ predicate,
191
+ object: c.chunk_id,
192
+ weight: 1.0,
193
+ source: "structure",
194
+ file: filePath,
195
+ })
196
+ classChunkMap.set(c.class_name, c.chunk_id)
197
+ } else if (c.function_name && !c.class_name) {
198
+ // Top-level function
199
+ structuralTriples.push({
200
+ subject: fileNode,
201
+ predicate: "contains_function",
202
+ object: c.chunk_id,
203
+ weight: 1.0,
204
+ source: "structure",
205
+ file: filePath,
206
+ })
207
+ } else if (c.function_name && c.class_name) {
208
+ // Method inside a class → has_method edge from class chunk
209
+ const parentChunkId = classChunkMap.get(c.class_name)
210
+ if (parentChunkId) {
211
+ structuralTriples.push({
212
+ subject: parentChunkId,
213
+ predicate: "has_method",
214
+ object: c.chunk_id,
215
+ weight: 1.0,
216
+ source: "structure",
217
+ file: filePath,
218
+ })
219
+ } else {
220
+ // No class chunk found yet (methods appeared before class preamble, or class was not split)
221
+ // Fall back to file → method
222
+ structuralTriples.push({
223
+ subject: fileNode,
224
+ predicate: "contains_function",
225
+ object: c.chunk_id,
226
+ weight: 1.0,
227
+ source: "structure",
228
+ file: filePath,
229
+ })
230
+ }
231
+ } else if (c.heading_context) {
232
+ // Markdown section
233
+ structuralTriples.push({
234
+ subject: fileNode,
235
+ predicate: "contains",
236
+ object: c.chunk_id,
237
+ weight: 0.5,
238
+ source: "structure",
239
+ file: filePath,
240
+ })
241
+ } else {
242
+ // Generic content chunk
243
+ structuralTriples.push({
244
+ subject: fileNode,
245
+ predicate: "contains",
246
+ object: c.chunk_id,
247
+ weight: 0.3,
248
+ source: "structure",
249
+ file: filePath,
250
+ })
251
+ }
252
+ }
253
+
254
+ await this.graphDB.putEdges([...structuralTriples, ...relationTriples])
255
+ return relationTriples.length
77
256
  }
78
257
 
79
- resolveChunkId(filePath: string, line: number): string | null {
80
- const withoutExt = filePath.replace(/\.[^/.]+$/, "")
81
- const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
82
- return `chunk_${normalizedPath}_0`
258
+ /** Resolve a file path + line to the best chunk ID.
259
+ * If chunks are provided, finds the one containing the line.
260
+ * Otherwise falls back to the default chunk. */
261
+ resolveChunkId(filePath: string, line: number, chunks?: ChunkWithId[]): string | null {
262
+ if (chunks && chunks.length > 0) {
263
+ for (const c of chunks) {
264
+ if (c.start_line !== undefined && c.end_line !== undefined) {
265
+ if (line >= c.start_line && line <= c.end_line) {
266
+ return c.chunk_id
267
+ }
268
+ }
269
+ }
270
+ // Line not in any chunk range — return first chunk
271
+ return chunks[0].chunk_id
272
+ }
273
+ // No chunks available — return default
274
+ return buildDefaultChunkId(filePath)
83
275
  }
84
276
 
85
277
  async getRelatedChunks(chunkId: string): Promise<Array<{ chunk_id: string; predicate: string; weight: number; direction: "outgoing" | "incoming" }>> {
@@ -1,5 +1,6 @@
1
1
  import levelgraph from "levelgraph"
2
2
  import { Level } from "level"
3
+ import { filePathFromNodeId, isStructuralPredicate } from "./graph-builder"
3
4
 
4
5
  export interface Triple {
5
6
  subject: string
@@ -149,23 +150,27 @@ export class GraphDB {
149
150
  async deleteFileMeta(filePath: string): Promise<void> {
150
151
  if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
151
152
 
152
- const triples = await new Promise<Triple[]>((resolve, reject) => {
153
- this.db.get(
154
- { subject: `meta:${filePath}`, predicate: "graph_built" },
155
- (err: Error | undefined, result: Triple[]) => {
156
- if (err) reject(err)
157
- else resolve(result || [])
158
- },
159
- )
160
- })
153
+ try {
154
+ const triples = await new Promise<Triple[]>((resolve, reject) => {
155
+ this.db.get(
156
+ { subject: `meta:${filePath}`, predicate: "graph_built" },
157
+ (err: Error | undefined, result: Triple[]) => {
158
+ if (err) reject(err)
159
+ else resolve(result || [])
160
+ },
161
+ )
162
+ })
161
163
 
162
- for (const t of triples) {
163
- await new Promise<void>((resolve, reject) => {
164
- this.db.del(t, (err: Error | undefined) => {
165
- if (err) reject(err)
166
- else resolve()
164
+ for (const t of triples) {
165
+ await new Promise<void>((resolve, reject) => {
166
+ this.db.del(t, (err: Error | undefined) => {
167
+ if (err) reject(err)
168
+ else resolve()
169
+ })
167
170
  })
168
- })
171
+ }
172
+ } catch (err) {
173
+ // Silently ignore errors (e.g., no meta triple exists)
169
174
  }
170
175
  }
171
176
 
@@ -191,9 +196,10 @@ export class GraphDB {
191
196
 
192
197
  /**
193
198
  * Get all triples in the graph (for validation/stats).
194
- * Excludes meta triples (predicate === "graph_built").
199
+ * Excludes meta, anchor, and structural triples by default.
200
+ * Pass includeStructural=true to also get structural edges.
195
201
  */
196
- async getAllTriples(): Promise<Triple[]> {
202
+ async getAllTriples(includeStructural: boolean = false): Promise<Triple[]> {
197
203
  if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
198
204
 
199
205
  const allTriples = await new Promise<Triple[]>((resolve, reject) => {
@@ -203,7 +209,11 @@ export class GraphDB {
203
209
  })
204
210
  })
205
211
 
206
- return allTriples.filter(t => t.predicate !== "graph_built")
212
+ return allTriples.filter(t => {
213
+ if (t.predicate === "graph_built" || t.predicate === "belongs_to") return false
214
+ if (!includeStructural && isStructuralPredicate(t.predicate)) return false
215
+ return true
216
+ })
207
217
  }
208
218
 
209
219
  async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
@@ -213,58 +223,70 @@ export class GraphDB {
213
223
 
214
224
  const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
215
225
  const visited = new Set<string>()
216
-
226
+ const self = this
227
+
228
+ // Resolve the caller's file directly from the node ID
229
+ const callerFile = filePathFromNodeId(chunkId)
230
+
217
231
  async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
218
- if (currentDepth > maxDepth || visited.has(currentId)) {
232
+ if (currentDepth >= maxDepth || visited.has(currentId)) {
219
233
  return
220
234
  }
221
-
235
+
222
236
  visited.add(currentId)
223
-
237
+
224
238
  try {
225
239
  const outgoing = await new Promise<Triple[]>((resolve, reject) => {
226
- this.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
240
+ self.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
227
241
  if (err) reject(err)
228
242
  else resolve(triples || [])
229
243
  })
230
244
  })
231
-
245
+
232
246
  for (const triple of outgoing) {
233
- const fileId = triple.object
234
-
235
- // Aggregate relations and weights
236
- const existing = relatedFiles.get(fileId)
247
+ // Skip meta, anchor, and structural-only edges
248
+ if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
249
+ if (isStructuralPredicate(triple.predicate)) continue
250
+
251
+ // Resolve file for the target node directly from its ID
252
+ const targetFile = filePathFromNodeId(triple.object)
253
+ if (!targetFile) continue
254
+
255
+ const existing = relatedFiles.get(targetFile)
237
256
  if (existing) {
238
257
  existing.weight = Math.max(existing.weight, triple.weight)
239
258
  } else {
240
- relatedFiles.set(fileId, {
259
+ relatedFiles.set(targetFile, {
241
260
  relation: currentRelation || triple.predicate,
242
261
  weight: triple.weight
243
262
  })
244
263
  }
245
-
246
- // Recurse for imports/extends relations
264
+
247
265
  if (triple.predicate === "imports" || triple.predicate === "extends") {
248
- await traverse(fileId, currentDepth + 1, triple.predicate)
266
+ await traverse(triple.object, currentDepth + 1, triple.predicate)
249
267
  }
250
268
  }
251
-
269
+
252
270
  const incoming = await new Promise<Triple[]>((resolve, reject) => {
253
- this.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
271
+ self.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
254
272
  if (err) reject(err)
255
273
  else resolve(triples || [])
256
274
  })
257
275
  })
258
-
276
+
259
277
  for (const triple of incoming) {
260
- const fileId = triple.subject
261
-
262
- const existing = relatedFiles.get(fileId)
278
+ if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
279
+ if (isStructuralPredicate(triple.predicate)) continue
280
+
281
+ const sourceFile = filePathFromNodeId(triple.subject)
282
+ if (!sourceFile) continue
283
+
284
+ const existing = relatedFiles.get(sourceFile)
263
285
  if (existing) {
264
286
  existing.weight = Math.max(existing.weight, triple.weight)
265
287
  } else {
266
- relatedFiles.set(fileId, {
267
- relation: `used_by`,
288
+ relatedFiles.set(sourceFile, {
289
+ relation: "used_by",
268
290
  weight: triple.weight
269
291
  })
270
292
  }
@@ -273,17 +295,18 @@ export class GraphDB {
273
295
  console.error(`Error traversing graph for ${currentId}:`, error)
274
296
  }
275
297
  }
276
-
298
+
277
299
  await traverse(chunkId, 0, "")
278
-
279
- const result = Array.from(relatedFiles.entries())
280
- .map(([path, data]) => ({
281
- path,
300
+
301
+ // Remove the caller's own file from results
302
+ if (callerFile) relatedFiles.delete(callerFile)
303
+
304
+ return Array.from(relatedFiles.entries())
305
+ .map(([filePath, data]) => ({
306
+ path: filePath,
282
307
  relation: data.relation,
283
308
  weight: data.weight
284
309
  }))
285
310
  .sort((a, b) => b.weight - a.weight)
286
-
287
- return result
288
311
  }
289
312
  }