npm - @comfanion/usethis_search - Versions diffs - 3.0.0-dev.16 → 3.0.0-dev.18 - Mend

@comfanion/usethis_search 3.0.0-dev.16 → 3.0.0-dev.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +1 -1
package/vectorizer/analyzers/lsp-analyzer.ts +7 -7
package/vectorizer/analyzers/regex-analyzer.ts +173 -67
package/vectorizer/chunkers/code-chunker.ts +74 -24
package/vectorizer/chunkers/markdown-chunker.ts +69 -7
package/vectorizer/graph-builder.ts +207 -15
package/vectorizer/graph-db.ts +70 -47
package/vectorizer/index.ts +111 -23
package/vectorizer.yaml +16 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@comfanion/usethis_search",
-  "version": "3.0.0-dev.16",
+  "version": "3.0.0-dev.18",
   "description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
   "type": "module",
   "main": "./index.ts",

package/vectorizer/analyzers/lsp-analyzer.ts CHANGED Viewed

@@ -12,7 +12,7 @@
 import path from "path"
 import fs from "fs/promises"
-import { ChunkWithId } from "../graph-builder"
+import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
 import { LSPClient, LSPSymbolInformation, SymbolKind } from "./lsp-client"
 export interface Relation {
@@ -252,7 +252,9 @@ export class LSPAnalyzer {
     return result
   }
-  /** Convert LSP location URI + line → chunk_id. */
+  /** Convert LSP location URI + line → chunk_id.
+   *  For same-file refs, resolves to exact chunk by line.
+   *  For cross-file refs, returns the default (first) chunk of the target file. */
   private locationToChunkId(currentFile: string, uri: string, line: number, root: string): string | null {
     // uri = file:///absolute/path/to/file.ts
     const filePath = uri.startsWith("file://") ? uri.slice(7) : uri
@@ -261,11 +263,9 @@ export class LSPAnalyzer {
     // Skip external files (node_modules, etc.)
     if (relPath.startsWith("..") || relPath.includes("node_modules")) return null
-    const withoutExt = relPath.replace(/\.[^/.]+$/, "")
-    const normalized = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
-    // For cross-file references, point to chunk 0 (first chunk of target file)
-    // For same-file, we could be more precise but chunk 0 is sufficient for graph
-    return `chunk_${normalized}_0`
+    // Same file → use findChunkForPosition (called separately with chunks)
+    // Cross-file → default chunk
+    return buildDefaultChunkId(relPath)
   }
   private findChunkForPosition(chunks: ChunkWithId[], line: number): string | null {

package/vectorizer/analyzers/regex-analyzer.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import path from "path"
-import { ChunkWithId } from "../graph-builder"
+import fs from "fs"
+import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
 export interface Relation {
   from: string
@@ -10,14 +11,127 @@ export interface Relation {
   line?: number
 }
+// ── Module resolution ─────────────────────────────────────────────────────────
+/** Extensions to try when resolving JS/TS imports (in order). */
+const JS_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"]
+/** Extensions to try when resolving Python imports. */
+const PY_EXTENSIONS = [".py"]
+/**
+ * Resolve a relative import to an actual file on disk.
+ *
+ * Follows Node.js / TypeScript module resolution:
+ *   1. Exact path (has extension) → check exists
+ *   2. Try each extension: `target.ts`, `target.tsx`, ...
+ *   3. Try directory index: `target/index.ts`, `target/index.tsx`, ...
+ *
+ * For Python:
+ *   1. `target.py`
+ *   2. `target/__init__.py`
+ *
+ * Fallback: if nothing exists on disk, infer extension from the source file
+ * (so offline / unit-test scenarios still produce useful edges).
+ *
+ * Returns a project-relative path (e.g. `src/utils.ts`) or null.
+ */
+function resolveModulePath(
+  projectRoot: string,
+  sourceFile: string,
+  importSpecifier: string,
+  language: "js" | "python" | "markdown",
+): string | null {
+  const dir = path.dirname(path.resolve(projectRoot, sourceFile))
+  const base = path.resolve(dir, importSpecifier)
+  // Security: must stay inside project root
+  if (!base.startsWith(projectRoot)) return null
+  const hasExtension = !!path.extname(base)
+  // 1. If specifier already has an extension, check it directly
+  if (hasExtension) {
+    if (fileExists(base)) return path.relative(projectRoot, base)
+    // Even if the exact file doesn't exist, return the relative path so we
+    // can still build a "best effort" edge (e.g. markdown link to ./api.md
+    // in a test without real files).
+    return path.relative(projectRoot, base)
+  }
+  const exts = language === "python" ? PY_EXTENSIONS : JS_EXTENSIONS
+  // 2. Try appending each extension
+  for (const ext of exts) {
+    const candidate = base + ext
+    if (fileExists(candidate)) return path.relative(projectRoot, candidate)
+  }
+  // 3. Try directory index files
+  const indexNames = language === "python" ? ["__init__.py"] : exts.map(e => "index" + e)
+  for (const idx of indexNames) {
+    const candidate = path.join(base, idx)
+    if (fileExists(candidate)) return path.relative(projectRoot, candidate)
+  }
+  // 4. Fallback: infer extension from source file
+  //    `app.ts` imports `./utils` → assume `utils.ts`
+  const sourceExt = path.extname(sourceFile)
+  if (sourceExt && exts.includes(sourceExt)) {
+    return path.relative(projectRoot, base + sourceExt)
+  }
+  // Last resort for Python: assume .py
+  if (language === "python") {
+    return path.relative(projectRoot, base + ".py")
+  }
+  return null
+}
+/** Synchronous file-exists check (cheap for module resolution). */
+function fileExists(absPath: string): boolean {
+  try {
+    return fs.statSync(absPath).isFile()
+  } catch {
+    return false
+  }
+}
+/**
+ * Convert Python relative import specifier to a path.
+ *   `.utils`   → `./utils`
+ *   `..utils`  → `../utils`
+ *   `...pkg`   → `../../pkg`
+ */
+function pythonRelativeToPath(spec: string): string {
+  const match = spec.match(/^(\.+)(.*)$/)
+  if (!match) return spec
+  const dots = match[1].length  // number of leading dots
+  const module = match[2]       // remainder, e.g. "utils"
+  // 1 dot = current dir "./", 2 dots = "../", 3 = "../../", ...
+  const prefix = dots === 1 ? "./" : "../".repeat(dots - 1)
+  // Module part: dots→slashes (e.g. "pkg.sub" → "pkg/sub")
+  const modulePath = module.replace(/\./g, "/")
+  return prefix + modulePath
+}
+// ── RegexAnalyzer ────────────────────────────────────────────────────────────
 export class RegexAnalyzer {
+  private projectRoot: string
   private readonly patterns = {
     jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
     pythonFromImport: /from\s+(\S+)\s+import/g,
     pythonImport: /import\s+(\S+)/g,
     extends: /class\s+\w+\s+extends\s+(\w+)/g,
     implements: /class\s+\w+\s+implements\s+([^{]+)/g,
-    markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
+    markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g,
+  }
+  constructor(projectRoot?: string) {
+    this.projectRoot = projectRoot || process.cwd()
   }
   analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
@@ -25,9 +139,9 @@ export class RegexAnalyzer {
     const ext = path.extname(filePath)
     const lines = content.split("\n")
-    if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
+    if ([".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs"].includes(ext)) {
       this.analyzeJSCode(content, lines, filePath, chunks, relations)
-    } else if ([".py"].includes(ext)) {
+    } else if (ext === ".py") {
       this.analyzePythonCode(content, lines, filePath, chunks, relations)
     }
@@ -37,17 +151,14 @@ export class RegexAnalyzer {
   analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
     const relations: Relation[] = []
     const lines = content.split("\n")
-    const dir = path.dirname(filePath)
     let match
     this.patterns.markdownLink.lastIndex = 0
     while ((match = this.patterns.markdownLink.exec(content)) !== null) {
-      const linkText = match[1]
       const linkTarget = match[2]
       const lineIndex = content.substring(0, match.index).split("\n").length - 1
-      const line = lines[lineIndex]
-      const targetPath = this.resolvePath(filePath, linkTarget)
+      const targetPath = this.resolveMarkdownLink(filePath, linkTarget)
       if (!targetPath) continue
       const fromChunkId = this.findChunkForLine(chunks, lineIndex)
@@ -61,7 +172,7 @@ export class RegexAnalyzer {
           predicate: "links_to",
           weight: 1.0,
           source: "markdown",
-          line: lineIndex
+          line: lineIndex,
         })
       }
     }
@@ -69,33 +180,34 @@ export class RegexAnalyzer {
     return relations
   }
-  private analyzeJSCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
+  // ── JS / TS ───────────────────────────────────────────────────────────────
+  private analyzeJSCode(
+    content: string, lines: string[], filePath: string,
+    chunks: ChunkWithId[], relations: Relation[],
+  ) {
     let match
     this.patterns.jsImports.lastIndex = 0
     while ((match = this.patterns.jsImports.exec(content)) !== null) {
       const importPath = match[1]
       const lineIndex = content.substring(0, match.index).split("\n").length - 1
-      const line = lines[lineIndex]
       if (importPath.startsWith(".")) {
-        const targetPath = this.resolvePath(filePath, importPath)
+        const targetPath = resolveModulePath(this.projectRoot, filePath, importPath, "js")
         if (!targetPath) continue
         const fromChunkId = this.findChunkForLine(chunks, lineIndex)
         if (!fromChunkId) continue
-        const toChunkId = this.findFirstChunkInFile(targetPath)
-        if (toChunkId) {
-          relations.push({
-            from: fromChunkId,
-            to: toChunkId,
-            predicate: "imports",
-            weight: 0.8,
-            source: "regex",
-            line: lineIndex
-          })
-        }
+        relations.push({
+          from: fromChunkId,
+          to: buildDefaultChunkId(targetPath),
+          predicate: "imports",
+          weight: 0.8,
+          source: "regex",
+          line: lineIndex,
+        })
       }
     }
@@ -115,7 +227,7 @@ export class RegexAnalyzer {
           predicate: "extends",
           weight: 0.8,
           source: "regex",
-          line: lineIndex
+          line: lineIndex,
         })
       }
     }
@@ -137,14 +249,19 @@ export class RegexAnalyzer {
             predicate: "implements",
             weight: 0.8,
             source: "regex",
-            line: lineIndex
+            line: lineIndex,
           })
         }
       }
     }
   }
-  private analyzePythonCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
+  // ── Python ────────────────────────────────────────────────────────────────
+  private analyzePythonCode(
+    content: string, lines: string[], filePath: string,
+    chunks: ChunkWithId[], relations: Relation[],
+  ) {
     let match
     this.patterns.pythonFromImport.lastIndex = 0
@@ -153,23 +270,21 @@ export class RegexAnalyzer {
       const lineIndex = content.substring(0, match.index).split("\n").length - 1
       if (importPath.startsWith(".")) {
-        const targetPath = this.resolvePath(filePath, importPath)
+        const pyPath = pythonRelativeToPath(importPath)
+        const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
         if (!targetPath) continue
         const fromChunkId = this.findChunkForLine(chunks, lineIndex)
         if (!fromChunkId) continue
-        const toChunkId = this.findFirstChunkInFile(targetPath)
-        if (toChunkId) {
-          relations.push({
-            from: fromChunkId,
-            to: toChunkId,
-            predicate: "imports",
-            weight: 0.8,
-            source: "regex",
-            line: lineIndex
-          })
-        }
+        relations.push({
+          from: fromChunkId,
+          to: buildDefaultChunkId(targetPath),
+          predicate: "imports",
+          weight: 0.8,
+          source: "regex",
+          line: lineIndex,
+        })
       }
     }
@@ -179,42 +294,38 @@ export class RegexAnalyzer {
       const lineIndex = content.substring(0, match.index).split("\n").length - 1
       if (importPath.startsWith(".")) {
-        const targetPath = this.resolvePath(filePath, importPath)
+        const pyPath = pythonRelativeToPath(importPath)
+        const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
         if (!targetPath) continue
         const fromChunkId = this.findChunkForLine(chunks, lineIndex)
         if (!fromChunkId) continue
-        const toChunkId = this.findFirstChunkInFile(targetPath)
-        if (toChunkId) {
-          relations.push({
-            from: fromChunkId,
-            to: toChunkId,
-            predicate: "imports",
-            weight: 0.8,
-            source: "regex",
-            line: lineIndex
-          })
-        }
+        relations.push({
+          from: fromChunkId,
+          to: buildDefaultChunkId(targetPath),
+          predicate: "imports",
+          weight: 0.8,
+          source: "regex",
+          line: lineIndex,
+        })
       }
     }
   }
-  private resolvePath(filePath: string, target: string): string | null {
-    try {
-      const dir = path.dirname(filePath)
-      const absoluteTarget = path.resolve(dir, target)
+  // ── Markdown link resolution ──────────────────────────────────────────────
-      if (!absoluteTarget.startsWith(process.cwd())) {
-        return null
-      }
+  private resolveMarkdownLink(filePath: string, target: string): string | null {
+    // Strip anchor (#section)
+    const hashIdx = target.indexOf("#")
+    const cleanTarget = hashIdx >= 0 ? target.substring(0, hashIdx) : target
+    if (!cleanTarget) return null
-      return path.relative(process.cwd(), absoluteTarget)
-    } catch {
-      return null
-    }
+    return resolveModulePath(this.projectRoot, filePath, cleanTarget, "markdown")
   }
+  // ── Chunk lookup helpers ──────────────────────────────────────────────────
   private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
     for (const chunk of chunks) {
       if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
@@ -226,11 +337,6 @@ export class RegexAnalyzer {
     return null
   }
-  private findFirstChunkInFile(targetPath: string): string | null {
-    const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
-    return `chunk_${normalized}_0`
-  }
   private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
     for (const chunk of chunks) {
       if (chunk.content.includes(symbol)) {
@@ -250,6 +356,6 @@ export class RegexAnalyzer {
         }
       }
     }
-    return this.findFirstChunkInFile(targetPath)
+    return buildDefaultChunkId(targetPath)
   }
 }

package/vectorizer/chunkers/code-chunker.ts CHANGED Viewed

@@ -23,6 +23,8 @@ export interface CodeChunk {
   content: string
   function_name?: string
   class_name?: string
+  start_line?: number
+  end_line?: number
 }
 // ── Block detection ─────────────────────────────────────────────────────────
@@ -172,31 +174,74 @@ function findPythonBlockEnd(lines: string[], startLine: number): number {
   return lines.length - 1
 }
-// ── Fallback: line-based splitting ──────────────────────────────────────────
+  // ── Fallback: line-based splitting ──────────────────────────────────────────
+  function splitByLines(lines: string[], maxChars: number): CodeChunk[] {
+    const chunks: CodeChunk[] = []
+    let current: string[] = []
+    let currentLen = 0
+    let startLine = 0
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i]
+      if (currentLen + line.length + 1 > maxChars && current.length > 0) {
+        chunks.push({ content: current.join("\n"), start_line: startLine, end_line: i - 1 })
+        current = []
+        currentLen = 0
+        startLine = i
+      }
+      current.push(line)
+      currentLen += line.length + 1
+    }
-function splitByLines(lines: string[], maxChars: number): CodeChunk[] {
-  const chunks: CodeChunk[] = []
-  let current: string[] = []
-  let currentLen = 0
-  for (const line of lines) {
-    if (currentLen + line.length + 1 > maxChars && current.length > 0) {
-      chunks.push({ content: current.join("\n") })
-      current = []
-      currentLen = 0
+    if (current.length > 0) {
+      chunks.push({ content: current.join("\n"), start_line: startLine, end_line: lines.length - 1 })
     }
-    current.push(line)
-    currentLen += line.length + 1
-  }
-  if (current.length > 0) {
-    chunks.push({ content: current.join("\n") })
+    return chunks
   }
-  return chunks
-}
+  // ── Split large chunks preserving line numbers ────────────────────────────
+  function splitChunkByLines(chunk: CodeChunk, maxChars: number): CodeChunk[] {
+    const lines = chunk.content.split("\n")
+    const baseLine = chunk.start_line || 0
+    const parts: CodeChunk[] = []
+    let current: string[] = []
+    let currentLen = 0
+    let startLine = baseLine
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i]
+      if (currentLen + line.length + 1 > maxChars && current.length > 0) {
+        parts.push({
+          ...chunk,
+          content: current.join("\n"),
+          start_line: startLine,
+          end_line: baseLine + i - 1,
+        })
+        current = []
+        currentLen = 0
+        startLine = baseLine + i
+      }
+      current.push(line)
+      currentLen += line.length + 1
+    }
+    if (current.length > 0) {
+      parts.push({
+        ...chunk,
+        content: current.join("\n"),
+        start_line: startLine,
+        end_line: baseLine + lines.length - 1,
+      })
+    }
+    return parts
+  }
-// ── Public API ──────────────────────────────────────────────────────────────
+  // ── Public API ──────────────────────────────────────────────────────────────
 /**
  * Chunk source code by functions/classes.
@@ -231,12 +276,13 @@ export function chunkCode(
     if (block.startLine > lastEnd + 1) {
       const gapContent = lines.slice(lastEnd + 1, block.startLine).join("\n").trim()
       if (gapContent.length >= config.min_chunk_size) {
-        chunks.push({ content: gapContent })
+        chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
       } else if (gapContent.length > 0 && chunks.length > 0) {
         // Merge small gap with previous chunk
         chunks[chunks.length - 1].content += "\n\n" + gapContent
+        chunks[chunks.length - 1].end_line = block.startLine - 1
       } else if (gapContent.length > 0) {
-        chunks.push({ content: gapContent })
+        chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
       }
     }
@@ -259,6 +305,8 @@ export function chunkCode(
               chunks.push({
                 content: gap,
                 class_name: block.name,
+                start_line: classLastEnd + 1,
+                end_line: method.startLine - 1,
               })
             }
           }
@@ -267,6 +315,8 @@ export function chunkCode(
             content: lines.slice(method.startLine, method.endLine + 1).join("\n"),
             function_name: method.name,
             class_name: block.name,
+            start_line: method.startLine,
+            end_line: method.endLine,
           })
           classLastEnd = method.endLine
         }
@@ -275,7 +325,7 @@ export function chunkCode(
         if (classLastEnd < block.endLine) {
           const tail = lines.slice(classLastEnd + 1, block.endLine + 1).join("\n").trim()
           if (tail) {
-            chunks.push({ content: tail, class_name: block.name })
+            chunks.push({ content: tail, class_name: block.name, start_line: classLastEnd + 1, end_line: block.endLine })
           }
         }
       } else {
@@ -312,9 +362,9 @@ export function chunkCode(
   const result: CodeChunk[] = []
   for (const chunk of chunks) {
     if (chunk.content.length > config.max_chunk_size) {
-      const parts = splitByLines(chunk.content.split("\n"), config.max_chunk_size)
+      const parts = splitChunkByLines(chunk, config.max_chunk_size)
       for (const p of parts) {
-        result.push({ ...chunk, content: p.content })
+        result.push(p)
       }
     } else {
       result.push(chunk)