npm - @comfanion/usethis_search - Versions diffs - 3.0.0-dev.16 → 3.0.0-dev.18 - Mend

@comfanion/usethis_search 3.0.0-dev.16 → 3.0.0-dev.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +1 -1
package/vectorizer/analyzers/lsp-analyzer.ts +7 -7
package/vectorizer/analyzers/regex-analyzer.ts +173 -67
package/vectorizer/chunkers/code-chunker.ts +74 -24
package/vectorizer/chunkers/markdown-chunker.ts +69 -7
package/vectorizer/graph-builder.ts +207 -15
package/vectorizer/graph-db.ts +70 -47
package/vectorizer/index.ts +111 -23
package/vectorizer.yaml +16 -0

package/vectorizer/chunkers/markdown-chunker.ts CHANGED Viewed

@@ -22,14 +22,18 @@ export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
 export interface MarkdownChunk {
   content: string
   heading_context: string   // "H1 > H2 > H3"
+  start_line?: number
+  end_line?: number
 }
 // ── Internal types ──────────────────────────────────────────────────────────
 interface Section {
-  level: number       // 1-6 for headings, 0 for preamble
+  level: number       //1-6 for headings, 0 for preamble
   heading: string
   body: string
+  start_line: number
+  end_line: number
 }
 // ── Parsing ─────────────────────────────────────────────────────────────────
@@ -38,19 +42,23 @@ interface Section {
 function parseSections(content: string): Section[] {
   const lines = content.split("\n")
   const sections: Section[] = []
-  let currentSection: Section = { level: 0, heading: "", body: "" }
+  let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0 }
-  for (const line of lines) {
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i]
     const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
     if (headingMatch) {
       // Push previous section
       if (currentSection.body.trim() || currentSection.heading) {
+        currentSection.end_line = i - 1
         sections.push(currentSection)
       }
       currentSection = {
         level: headingMatch[1].length,
         heading: headingMatch[2].trim(),
         body: "",
+        start_line: i,
+        end_line: 0,
       }
     } else {
       currentSection.body += line + "\n"
@@ -59,6 +67,7 @@ function parseSections(content: string): Section[] {
   // Push last section
   if (currentSection.body.trim() || currentSection.heading) {
+    currentSection.end_line = lines.length - 1
     sections.push(currentSection)
   }
@@ -97,6 +106,45 @@ function splitLargeText(text: string, maxSize: number): string[] {
   return chunks
 }
+function splitLargeTextWithLines(text: string, maxSize: number, startLine: number): Array<{ content: string; start_line: number; end_line: number }> {
+  if (text.length <= maxSize) {
+    const lines = text.split("\n")
+    return [{ content: text, start_line: startLine, end_line: startLine + lines.length - 1 }]
+  }
+  const chunks: Array<{ content: string; start_line: number; end_line: number }> = []
+  const lines = text.split("\n")
+  let current: string[] = []
+  let currentLen = 0
+  let chunkStartLine = startLine
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i]
+    if (currentLen + line.length + 1 > maxSize && current.length > 0) {
+      chunks.push({
+        content: current.join("\n"),
+        start_line: chunkStartLine,
+        end_line: startLine + i - 1,
+      })
+      current = []
+      currentLen = 0
+      chunkStartLine = startLine + i
+    }
+    current.push(line)
+    currentLen += line.length + 1
+  }
+  if (current.length > 0) {
+    chunks.push({
+      content: current.join("\n"),
+      start_line: chunkStartLine,
+      end_line: startLine + lines.length - 1,
+    })
+  }
+  return chunks
+}
 // ── Public API ──────────────────────────────────────────────────────────────
 /**
@@ -138,7 +186,12 @@ export function chunkMarkdown(
       ? `${"#".repeat(section.level)} ${section.heading}\n${section.body}`
       : section.body
-    rawChunks.push({ content: sectionText.trim(), heading_context: headingContext })
+    rawChunks.push({
+      content: sectionText.trim(),
+      heading_context: headingContext,
+      start_line: section.start_line,
+      end_line: section.end_line,
+    })
   }
   // Merge small sections with previous
@@ -150,7 +203,11 @@ export function chunkMarkdown(
     ) {
       const prev = merged[merged.length - 1]
       prev.content += "\n\n" + chunk.content
-      // Keep the deepest heading context
+      // Merge end_line
+      if (chunk.end_line !== undefined) {
+        prev.end_line = chunk.end_line
+      }
+      // Keep deepest heading context
       if (chunk.heading_context) {
         prev.heading_context = chunk.heading_context
       }
@@ -163,9 +220,14 @@ export function chunkMarkdown(
   const result: MarkdownChunk[] = []
   for (const chunk of merged) {
     if (chunk.content.length > config.max_chunk_size) {
-      const parts = splitLargeText(chunk.content, config.max_chunk_size)
+      const parts = splitLargeTextWithLines(chunk.content, config.max_chunk_size, chunk.start_line || 0)
       for (const part of parts) {
-        result.push({ content: part, heading_context: chunk.heading_context })
+        result.push({
+          content: part.content,
+          heading_context: chunk.heading_context,
+          start_line: part.start_line,
+          end_line: part.end_line,
+        })
       }
     } else {
       result.push(chunk)

package/vectorizer/graph-builder.ts CHANGED Viewed

@@ -9,29 +9,117 @@ export interface ChunkWithId {
   start_line?: number
   end_line?: number
   heading_context?: string
+  function_name?: string
+  class_name?: string
+}
+// ── Chunk ID helpers ────────────────────────────────────────────────────────
+/** Build a symbol-aware chunk ID.
+ *
+ *  Format: `chunk:{relPath}::{symbol}`
+ *  Examples:
+ *    chunk:src/user-service.ts::UserService
+ *    chunk:src/user-service.ts::UserService.findById
+ *    chunk:src/utils.ts::helper
+ *    chunk:docs/api.md::authentication
+ *    chunk:src/index.ts::_chunk_0
+ */
+export function buildChunkId(filePath: string, chunk: { class_name?: string; function_name?: string; heading_context?: string }, index: number): string {
+  let symbol: string
+  if (chunk.class_name && chunk.function_name) {
+    symbol = `${chunk.class_name}.${chunk.function_name}`
+  } else if (chunk.class_name) {
+    symbol = chunk.class_name
+  } else if (chunk.function_name) {
+    symbol = chunk.function_name
+  } else if (chunk.heading_context) {
+    // Markdown: slugify heading
+    symbol = chunk.heading_context
+      .toLowerCase()
+      .replace(/[^a-z0-9]+/g, "-")
+      .replace(/^-|-$/g, "")
+    if (!symbol) symbol = `_chunk_${index}`
+  } else {
+    symbol = `_chunk_${index}`
+  }
+  return `chunk:${filePath}::${symbol}`
+}
+/** Build the file-level node ID. */
+export function buildFileNodeId(filePath: string): string {
+  return `file:${filePath}`
+}
+/** Extract the file path from any node ID (chunk: or file:). */
+export function filePathFromNodeId(nodeId: string): string | null {
+  if (nodeId.startsWith("chunk:")) {
+    const sep = nodeId.indexOf("::")
+    return sep === -1 ? null : nodeId.slice(6, sep)
+  }
+  if (nodeId.startsWith("file:")) {
+    return nodeId.slice(5)
+  }
+  if (nodeId.startsWith("meta:")) {
+    return nodeId.slice(5)
+  }
+  return null
+}
+/** Build a cross-file chunk ID that points to the default (first) chunk of the target file.
+ *  Used by regex/LSP analyzers when we don't know the exact target chunk. */
+export function buildDefaultChunkId(filePath: string): string {
+  return `chunk:${filePath}::_chunk_0`
+}
+// ── Structural edge predicates ──────────────────────────────────────────────
+const STRUCTURAL_PREDICATES = new Set([
+  "contains_class",
+  "contains_function",
+  "contains_interface",
+  "contains",
+  "has_method",
+])
+export function isStructuralPredicate(predicate: string): boolean {
+  return STRUCTURAL_PREDICATES.has(predicate)
 }
 export class GraphBuilder {
   private lspAnalyzer: LSPAnalyzer
   private regexAnalyzer: RegexAnalyzer
+  private lspEnabled: boolean
   constructor(
     private graphDB: GraphDB,
-    private projectRoot: string
+    private projectRoot: string,
+    lspEnabled: boolean = true,
+    lspTimeoutMs: number = 5000,
   ) {
-    this.lspAnalyzer = new LSPAnalyzer()
-    this.regexAnalyzer = new RegexAnalyzer()
+    this.lspEnabled = lspEnabled
+    this.lspAnalyzer = new LSPAnalyzer(projectRoot, lspTimeoutMs)
+    this.regexAnalyzer = new RegexAnalyzer(projectRoot)
   }
   assignChunkIds(filePath: string, chunks: any[]): ChunkWithId[] {
-    const withoutExt = filePath.replace(/\.[^/.]+$/, "")
-    const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
+    const seen = new Map<string, number>()
     return chunks.map((chunk, index) => {
-      const chunkId = `chunk_${normalizedPath}_${index}`
+      let chunkId = buildChunkId(filePath, chunk, index)
+      // Handle duplicate symbols (e.g. two chunks for same class split by size)
+      const count = seen.get(chunkId) || 0
+      if (count > 0) {
+        chunkId = `${chunkId}#${count}`
+      }
+      seen.set(chunkId.replace(/#\d+$/, ""), count + 1)
       return {
         ...chunk,
-        chunk_id: chunkId
+        chunk_id: chunkId,
       } as ChunkWithId
     })
   }
@@ -41,13 +129,13 @@ export class GraphBuilder {
     content: string,
     chunks: ChunkWithId[],
     fileType: "code" | "docs"
-  ): Promise<void> {
+  ): Promise<number> {
     let relations: Array<RegexRelation | LSPRelation> = []
     if (fileType === "docs") {
       relations = this.regexAnalyzer.analyzeMarkdown(filePath, content, chunks)
     } else if (fileType === "code") {
-      const lspAvailable = await this.lspAnalyzer.isAvailable(filePath)
+      const lspAvailable = this.lspEnabled && await this.lspAnalyzer.isAvailable(filePath)
       if (lspAvailable) {
         try {
@@ -63,7 +151,7 @@ export class GraphBuilder {
       }
     }
-    const triples: Triple[] = relations.map(rel => ({
+    const relationTriples: Triple[] = relations.map(rel => ({
       subject: rel.from,
       predicate: rel.predicate,
       object: rel.to,
@@ -73,13 +161,117 @@ export class GraphBuilder {
       line: rel.line
     }))
-    await this.graphDB.putEdges(triples)
+    // ── Structural edges ────────────────────────────────────────────────────
+    const fileNode = buildFileNodeId(filePath)
+    const structuralTriples: Triple[] = []
+    // Anchor: every chunk belongs_to its file
+    for (const c of chunks) {
+      structuralTriples.push({
+        subject: c.chunk_id,
+        predicate: "belongs_to",
+        object: filePath,
+        weight: 0,
+        source: "anchor",
+        file: filePath,
+      })
+    }
+    // File node → symbol chunks
+    // Track class chunks for has_method edges
+    const classChunkMap = new Map<string, string>() // className → chunk_id
+    for (const c of chunks) {
+      if (c.class_name && !c.function_name) {
+        // Class/interface chunk (no method = class-level)
+        const predicate = c.content.match(/\binterface\s/) ? "contains_interface" : "contains_class"
+        structuralTriples.push({
+          subject: fileNode,
+          predicate,
+          object: c.chunk_id,
+          weight: 1.0,
+          source: "structure",
+          file: filePath,
+        })
+        classChunkMap.set(c.class_name, c.chunk_id)
+      } else if (c.function_name && !c.class_name) {
+        // Top-level function
+        structuralTriples.push({
+          subject: fileNode,
+          predicate: "contains_function",
+          object: c.chunk_id,
+          weight: 1.0,
+          source: "structure",
+          file: filePath,
+        })
+      } else if (c.function_name && c.class_name) {
+        // Method inside a class → has_method edge from class chunk
+        const parentChunkId = classChunkMap.get(c.class_name)
+        if (parentChunkId) {
+          structuralTriples.push({
+            subject: parentChunkId,
+            predicate: "has_method",
+            object: c.chunk_id,
+            weight: 1.0,
+            source: "structure",
+            file: filePath,
+          })
+        } else {
+          // No class chunk found yet (methods appeared before class preamble, or class was not split)
+          // Fall back to file → method
+          structuralTriples.push({
+            subject: fileNode,
+            predicate: "contains_function",
+            object: c.chunk_id,
+            weight: 1.0,
+            source: "structure",
+            file: filePath,
+          })
+        }
+      } else if (c.heading_context) {
+        // Markdown section
+        structuralTriples.push({
+          subject: fileNode,
+          predicate: "contains",
+          object: c.chunk_id,
+          weight: 0.5,
+          source: "structure",
+          file: filePath,
+        })
+      } else {
+        // Generic content chunk
+        structuralTriples.push({
+          subject: fileNode,
+          predicate: "contains",
+          object: c.chunk_id,
+          weight: 0.3,
+          source: "structure",
+          file: filePath,
+        })
+      }
+    }
+    await this.graphDB.putEdges([...structuralTriples, ...relationTriples])
+    return relationTriples.length
   }
-  resolveChunkId(filePath: string, line: number): string | null {
-    const withoutExt = filePath.replace(/\.[^/.]+$/, "")
-    const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
-    return `chunk_${normalizedPath}_0`
+  /** Resolve a file path + line to the best chunk ID.
+   *  If chunks are provided, finds the one containing the line.
+   *  Otherwise falls back to the default chunk. */
+  resolveChunkId(filePath: string, line: number, chunks?: ChunkWithId[]): string | null {
+    if (chunks && chunks.length > 0) {
+      for (const c of chunks) {
+        if (c.start_line !== undefined && c.end_line !== undefined) {
+          if (line >= c.start_line && line <= c.end_line) {
+            return c.chunk_id
+          }
+        }
+      }
+      // Line not in any chunk range — return first chunk
+      return chunks[0].chunk_id
+    }
+    // No chunks available — return default
+    return buildDefaultChunkId(filePath)
   }
   async getRelatedChunks(chunkId: string): Promise<Array<{ chunk_id: string; predicate: string; weight: number; direction: "outgoing" | "incoming" }>> {

package/vectorizer/graph-db.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import levelgraph from "levelgraph"
 import { Level } from "level"
+import { filePathFromNodeId, isStructuralPredicate } from "./graph-builder"
 export interface Triple {
   subject: string
@@ -149,23 +150,27 @@ export class GraphDB {
   async deleteFileMeta(filePath: string): Promise<void> {
     if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
-    const triples = await new Promise<Triple[]>((resolve, reject) => {
-      this.db.get(
-        { subject: `meta:${filePath}`, predicate: "graph_built" },
-        (err: Error | undefined, result: Triple[]) => {
-          if (err) reject(err)
-          else resolve(result || [])
-        },
-      )
-    })
+    try {
+      const triples = await new Promise<Triple[]>((resolve, reject) => {
+        this.db.get(
+          { subject: `meta:${filePath}`, predicate: "graph_built" },
+          (err: Error | undefined, result: Triple[]) => {
+            if (err) reject(err)
+            else resolve(result || [])
+          },
+        )
+      })
-    for (const t of triples) {
-      await new Promise<void>((resolve, reject) => {
-        this.db.del(t, (err: Error | undefined) => {
-          if (err) reject(err)
-          else resolve()
+      for (const t of triples) {
+        await new Promise<void>((resolve, reject) => {
+          this.db.del(t, (err: Error | undefined) => {
+            if (err) reject(err)
+            else resolve()
+          })
         })
-      })
+      }
+    } catch (err) {
+      // Silently ignore errors (e.g., no meta triple exists)
     }
   }
@@ -191,9 +196,10 @@ export class GraphDB {
   /**
    * Get all triples in the graph (for validation/stats).
-   * Excludes meta triples (predicate === "graph_built").
+   * Excludes meta, anchor, and structural triples by default.
+   * Pass includeStructural=true to also get structural edges.
    */
-  async getAllTriples(): Promise<Triple[]> {
+  async getAllTriples(includeStructural: boolean = false): Promise<Triple[]> {
     if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
     const allTriples = await new Promise<Triple[]>((resolve, reject) => {
@@ -203,7 +209,11 @@ export class GraphDB {
       })
     })
-    return allTriples.filter(t => t.predicate !== "graph_built")
+    return allTriples.filter(t => {
+      if (t.predicate === "graph_built" || t.predicate === "belongs_to") return false
+      if (!includeStructural && isStructuralPredicate(t.predicate)) return false
+      return true
+    })
   }
   async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
@@ -213,58 +223,70 @@ export class GraphDB {
     const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
     const visited = new Set<string>()
+    const self = this
+    // Resolve the caller's file directly from the node ID
+    const callerFile = filePathFromNodeId(chunkId)
     async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
-      if (currentDepth > maxDepth || visited.has(currentId)) {
+      if (currentDepth >= maxDepth || visited.has(currentId)) {
         return
       }
       visited.add(currentId)
       try {
         const outgoing = await new Promise<Triple[]>((resolve, reject) => {
-          this.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
+          self.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
             if (err) reject(err)
             else resolve(triples || [])
           })
         })
         for (const triple of outgoing) {
-          const fileId = triple.object
-          // Aggregate relations and weights
-          const existing = relatedFiles.get(fileId)
+          // Skip meta, anchor, and structural-only edges
+          if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
+          if (isStructuralPredicate(triple.predicate)) continue
+          // Resolve file for the target node directly from its ID
+          const targetFile = filePathFromNodeId(triple.object)
+          if (!targetFile) continue
+          const existing = relatedFiles.get(targetFile)
           if (existing) {
             existing.weight = Math.max(existing.weight, triple.weight)
           } else {
-            relatedFiles.set(fileId, {
+            relatedFiles.set(targetFile, {
               relation: currentRelation || triple.predicate,
               weight: triple.weight
             })
           }
-          // Recurse for imports/extends relations
           if (triple.predicate === "imports" || triple.predicate === "extends") {
-            await traverse(fileId, currentDepth + 1, triple.predicate)
+            await traverse(triple.object, currentDepth + 1, triple.predicate)
           }
         }
         const incoming = await new Promise<Triple[]>((resolve, reject) => {
-          this.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
+          self.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
             if (err) reject(err)
             else resolve(triples || [])
           })
         })
         for (const triple of incoming) {
-          const fileId = triple.subject
-          const existing = relatedFiles.get(fileId)
+          if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
+          if (isStructuralPredicate(triple.predicate)) continue
+          const sourceFile = filePathFromNodeId(triple.subject)
+          if (!sourceFile) continue
+          const existing = relatedFiles.get(sourceFile)
           if (existing) {
             existing.weight = Math.max(existing.weight, triple.weight)
           } else {
-            relatedFiles.set(fileId, {
-              relation: `used_by`,
+            relatedFiles.set(sourceFile, {
+              relation: "used_by",
               weight: triple.weight
             })
           }
@@ -273,17 +295,18 @@ export class GraphDB {
         console.error(`Error traversing graph for ${currentId}:`, error)
       }
     }
     await traverse(chunkId, 0, "")
-    const result = Array.from(relatedFiles.entries())
-      .map(([path, data]) => ({
-        path,
+    // Remove the caller's own file from results
+    if (callerFile) relatedFiles.delete(callerFile)
+    return Array.from(relatedFiles.entries())
+      .map(([filePath, data]) => ({
+        path: filePath,
         relation: data.relation,
         weight: data.weight
       }))
       .sort((a, b) => b.weight - a.weight)
-    return result
   }
 }