npm - @comfanion/usethis_search - Versions diffs - 3.0.0-dev.17 → 3.0.0-dev.18 - Mend

@comfanion/usethis_search 3.0.0-dev.17 → 3.0.0-dev.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/vectorizer/analyzers/lsp-analyzer.ts +7 -7
package/vectorizer/analyzers/regex-analyzer.ts +173 -67
package/vectorizer/graph-builder.ts +206 -15
package/vectorizer/graph-db.ts +70 -47
package/vectorizer/index.ts +109 -40
package/vectorizer.yaml +16 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@comfanion/usethis_search",
-  "version": "3.0.0-dev.17",
+  "version": "3.0.0-dev.18",
   "description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
   "type": "module",
   "main": "./index.ts",

package/vectorizer/analyzers/lsp-analyzer.ts CHANGED Viewed

@@ -12,7 +12,7 @@
 import path from "path"
 import fs from "fs/promises"
-import { ChunkWithId } from "../graph-builder"
+import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
 import { LSPClient, LSPSymbolInformation, SymbolKind } from "./lsp-client"
 export interface Relation {
@@ -252,7 +252,9 @@ export class LSPAnalyzer {
     return result
   }
-  /** Convert LSP location URI + line → chunk_id. */
+  /** Convert LSP location URI + line → chunk_id.
+   *  For same-file refs, resolves to exact chunk by line.
+   *  For cross-file refs, returns the default (first) chunk of the target file. */
   private locationToChunkId(currentFile: string, uri: string, line: number, root: string): string | null {
     // uri = file:///absolute/path/to/file.ts
     const filePath = uri.startsWith("file://") ? uri.slice(7) : uri
@@ -261,11 +263,9 @@ export class LSPAnalyzer {
     // Skip external files (node_modules, etc.)
     if (relPath.startsWith("..") || relPath.includes("node_modules")) return null
-    const withoutExt = relPath.replace(/\.[^/.]+$/, "")
-    const normalized = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
-    // For cross-file references, point to chunk 0 (first chunk of target file)
-    // For same-file, we could be more precise but chunk 0 is sufficient for graph
-    return `chunk_${normalized}_0`
+    // Same file → use findChunkForPosition (called separately with chunks)
+    // Cross-file → default chunk
+    return buildDefaultChunkId(relPath)
   }
   private findChunkForPosition(chunks: ChunkWithId[], line: number): string | null {

package/vectorizer/analyzers/regex-analyzer.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import path from "path"
-import { ChunkWithId } from "../graph-builder"
+import fs from "fs"
+import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
 export interface Relation {
   from: string
@@ -10,14 +11,127 @@ export interface Relation {
   line?: number
 }
+// ── Module resolution ─────────────────────────────────────────────────────────
+/** Extensions to try when resolving JS/TS imports (in order). */
+const JS_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"]
+/** Extensions to try when resolving Python imports. */
+const PY_EXTENSIONS = [".py"]
+/**
+ * Resolve a relative import to an actual file on disk.
+ *
+ * Follows Node.js / TypeScript module resolution:
+ *   1. Exact path (has extension) → check exists
+ *   2. Try each extension: `target.ts`, `target.tsx`, ...
+ *   3. Try directory index: `target/index.ts`, `target/index.tsx`, ...
+ *
+ * For Python:
+ *   1. `target.py`
+ *   2. `target/__init__.py`
+ *
+ * Fallback: if nothing exists on disk, infer extension from the source file
+ * (so offline / unit-test scenarios still produce useful edges).
+ *
+ * Returns a project-relative path (e.g. `src/utils.ts`) or null.
+ */
+function resolveModulePath(
+  projectRoot: string,
+  sourceFile: string,
+  importSpecifier: string,
+  language: "js" | "python" | "markdown",
+): string | null {
+  const dir = path.dirname(path.resolve(projectRoot, sourceFile))
+  const base = path.resolve(dir, importSpecifier)
+  // Security: must stay inside project root
+  if (!base.startsWith(projectRoot)) return null
+  const hasExtension = !!path.extname(base)
+  // 1. If specifier already has an extension, check it directly
+  if (hasExtension) {
+    if (fileExists(base)) return path.relative(projectRoot, base)
+    // Even if the exact file doesn't exist, return the relative path so we
+    // can still build a "best effort" edge (e.g. markdown link to ./api.md
+    // in a test without real files).
+    return path.relative(projectRoot, base)
+  }
+  const exts = language === "python" ? PY_EXTENSIONS : JS_EXTENSIONS
+  // 2. Try appending each extension
+  for (const ext of exts) {
+    const candidate = base + ext
+    if (fileExists(candidate)) return path.relative(projectRoot, candidate)
+  }
+  // 3. Try directory index files
+  const indexNames = language === "python" ? ["__init__.py"] : exts.map(e => "index" + e)
+  for (const idx of indexNames) {
+    const candidate = path.join(base, idx)
+    if (fileExists(candidate)) return path.relative(projectRoot, candidate)
+  }
+  // 4. Fallback: infer extension from source file
+  //    `app.ts` imports `./utils` → assume `utils.ts`
+  const sourceExt = path.extname(sourceFile)
+  if (sourceExt && exts.includes(sourceExt)) {
+    return path.relative(projectRoot, base + sourceExt)
+  }
+  // Last resort for Python: assume .py
+  if (language === "python") {
+    return path.relative(projectRoot, base + ".py")
+  }
+  return null
+}
+/** Synchronous file-exists check (cheap for module resolution). */
+function fileExists(absPath: string): boolean {
+  try {
+    return fs.statSync(absPath).isFile()
+  } catch {
+    return false
+  }
+}
+/**
+ * Convert Python relative import specifier to a path.
+ *   `.utils`   → `./utils`
+ *   `..utils`  → `../utils`
+ *   `...pkg`   → `../../pkg`
+ */
+function pythonRelativeToPath(spec: string): string {
+  const match = spec.match(/^(\.+)(.*)$/)
+  if (!match) return spec
+  const dots = match[1].length  // number of leading dots
+  const module = match[2]       // remainder, e.g. "utils"
+  // 1 dot = current dir "./", 2 dots = "../", 3 = "../../", ...
+  const prefix = dots === 1 ? "./" : "../".repeat(dots - 1)
+  // Module part: dots→slashes (e.g. "pkg.sub" → "pkg/sub")
+  const modulePath = module.replace(/\./g, "/")
+  return prefix + modulePath
+}
+// ── RegexAnalyzer ────────────────────────────────────────────────────────────
 export class RegexAnalyzer {
+  private projectRoot: string
   private readonly patterns = {
     jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
     pythonFromImport: /from\s+(\S+)\s+import/g,
     pythonImport: /import\s+(\S+)/g,
     extends: /class\s+\w+\s+extends\s+(\w+)/g,
     implements: /class\s+\w+\s+implements\s+([^{]+)/g,
-    markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
+    markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g,
+  }
+  constructor(projectRoot?: string) {
+    this.projectRoot = projectRoot || process.cwd()
   }
   analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
@@ -25,9 +139,9 @@ export class RegexAnalyzer {
     const ext = path.extname(filePath)
     const lines = content.split("\n")
-    if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
+    if ([".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs"].includes(ext)) {
       this.analyzeJSCode(content, lines, filePath, chunks, relations)
-    } else if ([".py"].includes(ext)) {
+    } else if (ext === ".py") {
       this.analyzePythonCode(content, lines, filePath, chunks, relations)
     }
@@ -37,17 +151,14 @@ export class RegexAnalyzer {
   analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
     const relations: Relation[] = []
     const lines = content.split("\n")
-    const dir = path.dirname(filePath)
     let match
     this.patterns.markdownLink.lastIndex = 0
     while ((match = this.patterns.markdownLink.exec(content)) !== null) {
-      const linkText = match[1]
       const linkTarget = match[2]
       const lineIndex = content.substring(0, match.index).split("\n").length - 1
-      const line = lines[lineIndex]
-      const targetPath = this.resolvePath(filePath, linkTarget)
+      const targetPath = this.resolveMarkdownLink(filePath, linkTarget)
       if (!targetPath) continue
       const fromChunkId = this.findChunkForLine(chunks, lineIndex)
@@ -61,7 +172,7 @@ export class RegexAnalyzer {
           predicate: "links_to",
           weight: 1.0,
           source: "markdown",
-          line: lineIndex
+          line: lineIndex,
         })
       }
     }
@@ -69,33 +180,34 @@ export class RegexAnalyzer {
     return relations
   }
-  private analyzeJSCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
+  // ── JS / TS ───────────────────────────────────────────────────────────────
+  private analyzeJSCode(
+    content: string, lines: string[], filePath: string,
+    chunks: ChunkWithId[], relations: Relation[],
+  ) {
     let match
     this.patterns.jsImports.lastIndex = 0
     while ((match = this.patterns.jsImports.exec(content)) !== null) {
       const importPath = match[1]
       const lineIndex = content.substring(0, match.index).split("\n").length - 1
-      const line = lines[lineIndex]
       if (importPath.startsWith(".")) {
-        const targetPath = this.resolvePath(filePath, importPath)
+        const targetPath = resolveModulePath(this.projectRoot, filePath, importPath, "js")
         if (!targetPath) continue
         const fromChunkId = this.findChunkForLine(chunks, lineIndex)
         if (!fromChunkId) continue
-        const toChunkId = this.findFirstChunkInFile(targetPath)
-        if (toChunkId) {
-          relations.push({
-            from: fromChunkId,
-            to: toChunkId,
-            predicate: "imports",
-            weight: 0.8,
-            source: "regex",
-            line: lineIndex
-          })
-        }
+        relations.push({
+          from: fromChunkId,
+          to: buildDefaultChunkId(targetPath),
+          predicate: "imports",
+          weight: 0.8,
+          source: "regex",
+          line: lineIndex,
+        })
       }
     }
@@ -115,7 +227,7 @@ export class RegexAnalyzer {
           predicate: "extends",
           weight: 0.8,
           source: "regex",
-          line: lineIndex
+          line: lineIndex,
         })
       }
     }
@@ -137,14 +249,19 @@ export class RegexAnalyzer {
             predicate: "implements",
             weight: 0.8,
             source: "regex",
-            line: lineIndex
+            line: lineIndex,
           })
         }
       }
     }
   }
-  private analyzePythonCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
+  // ── Python ────────────────────────────────────────────────────────────────
+  private analyzePythonCode(
+    content: string, lines: string[], filePath: string,
+    chunks: ChunkWithId[], relations: Relation[],
+  ) {
     let match
     this.patterns.pythonFromImport.lastIndex = 0
@@ -153,23 +270,21 @@ export class RegexAnalyzer {
       const lineIndex = content.substring(0, match.index).split("\n").length - 1
       if (importPath.startsWith(".")) {
-        const targetPath = this.resolvePath(filePath, importPath)
+        const pyPath = pythonRelativeToPath(importPath)
+        const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
         if (!targetPath) continue
         const fromChunkId = this.findChunkForLine(chunks, lineIndex)
         if (!fromChunkId) continue
-        const toChunkId = this.findFirstChunkInFile(targetPath)
-        if (toChunkId) {
-          relations.push({
-            from: fromChunkId,
-            to: toChunkId,
-            predicate: "imports",
-            weight: 0.8,
-            source: "regex",
-            line: lineIndex
-          })
-        }
+        relations.push({
+          from: fromChunkId,
+          to: buildDefaultChunkId(targetPath),
+          predicate: "imports",
+          weight: 0.8,
+          source: "regex",
+          line: lineIndex,
+        })
       }
     }
@@ -179,42 +294,38 @@ export class RegexAnalyzer {
       const lineIndex = content.substring(0, match.index).split("\n").length - 1
       if (importPath.startsWith(".")) {
-        const targetPath = this.resolvePath(filePath, importPath)
+        const pyPath = pythonRelativeToPath(importPath)
+        const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
         if (!targetPath) continue
         const fromChunkId = this.findChunkForLine(chunks, lineIndex)
         if (!fromChunkId) continue
-        const toChunkId = this.findFirstChunkInFile(targetPath)
-        if (toChunkId) {
-          relations.push({
-            from: fromChunkId,
-            to: toChunkId,
-            predicate: "imports",
-            weight: 0.8,
-            source: "regex",
-            line: lineIndex
-          })
-        }
+        relations.push({
+          from: fromChunkId,
+          to: buildDefaultChunkId(targetPath),
+          predicate: "imports",
+          weight: 0.8,
+          source: "regex",
+          line: lineIndex,
+        })
       }
     }
   }
-  private resolvePath(filePath: string, target: string): string | null {
-    try {
-      const dir = path.dirname(filePath)
-      const absoluteTarget = path.resolve(dir, target)
+  // ── Markdown link resolution ──────────────────────────────────────────────
-      if (!absoluteTarget.startsWith(process.cwd())) {
-        return null
-      }
+  private resolveMarkdownLink(filePath: string, target: string): string | null {
+    // Strip anchor (#section)
+    const hashIdx = target.indexOf("#")
+    const cleanTarget = hashIdx >= 0 ? target.substring(0, hashIdx) : target
+    if (!cleanTarget) return null
-      return path.relative(process.cwd(), absoluteTarget)
-    } catch {
-      return null
-    }
+    return resolveModulePath(this.projectRoot, filePath, cleanTarget, "markdown")
   }
+  // ── Chunk lookup helpers ──────────────────────────────────────────────────
   private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
     for (const chunk of chunks) {
       if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
@@ -226,11 +337,6 @@ export class RegexAnalyzer {
     return null
   }
-  private findFirstChunkInFile(targetPath: string): string | null {
-    const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
-    return `chunk_${normalized}_0`
-  }
   private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
     for (const chunk of chunks) {
       if (chunk.content.includes(symbol)) {
@@ -250,6 +356,6 @@ export class RegexAnalyzer {
         }
       }
     }
-    return this.findFirstChunkInFile(targetPath)
+    return buildDefaultChunkId(targetPath)
   }
 }

package/vectorizer/graph-builder.ts CHANGED Viewed

@@ -9,29 +9,117 @@ export interface ChunkWithId {
   start_line?: number
   end_line?: number
   heading_context?: string
+  function_name?: string
+  class_name?: string
+}
+// ── Chunk ID helpers ────────────────────────────────────────────────────────
+/** Build a symbol-aware chunk ID.
+ *
+ *  Format: `chunk:{relPath}::{symbol}`
+ *  Examples:
+ *    chunk:src/user-service.ts::UserService
+ *    chunk:src/user-service.ts::UserService.findById
+ *    chunk:src/utils.ts::helper
+ *    chunk:docs/api.md::authentication
+ *    chunk:src/index.ts::_chunk_0
+ */
+export function buildChunkId(filePath: string, chunk: { class_name?: string; function_name?: string; heading_context?: string }, index: number): string {
+  let symbol: string
+  if (chunk.class_name && chunk.function_name) {
+    symbol = `${chunk.class_name}.${chunk.function_name}`
+  } else if (chunk.class_name) {
+    symbol = chunk.class_name
+  } else if (chunk.function_name) {
+    symbol = chunk.function_name
+  } else if (chunk.heading_context) {
+    // Markdown: slugify heading
+    symbol = chunk.heading_context
+      .toLowerCase()
+      .replace(/[^a-z0-9]+/g, "-")
+      .replace(/^-|-$/g, "")
+    if (!symbol) symbol = `_chunk_${index}`
+  } else {
+    symbol = `_chunk_${index}`
+  }
+  return `chunk:${filePath}::${symbol}`
+}
+/** Build the file-level node ID. */
+export function buildFileNodeId(filePath: string): string {
+  return `file:${filePath}`
+}
+/** Extract the file path from any node ID (chunk: or file:). */
+export function filePathFromNodeId(nodeId: string): string | null {
+  if (nodeId.startsWith("chunk:")) {
+    const sep = nodeId.indexOf("::")
+    return sep === -1 ? null : nodeId.slice(6, sep)
+  }
+  if (nodeId.startsWith("file:")) {
+    return nodeId.slice(5)
+  }
+  if (nodeId.startsWith("meta:")) {
+    return nodeId.slice(5)
+  }
+  return null
+}
+/** Build a cross-file chunk ID that points to the default (first) chunk of the target file.
+ *  Used by regex/LSP analyzers when we don't know the exact target chunk. */
+export function buildDefaultChunkId(filePath: string): string {
+  return `chunk:${filePath}::_chunk_0`
+}
+// ── Structural edge predicates ──────────────────────────────────────────────
+const STRUCTURAL_PREDICATES = new Set([
+  "contains_class",
+  "contains_function",
+  "contains_interface",
+  "contains",
+  "has_method",
+])
+export function isStructuralPredicate(predicate: string): boolean {
+  return STRUCTURAL_PREDICATES.has(predicate)
 }
 export class GraphBuilder {
   private lspAnalyzer: LSPAnalyzer
   private regexAnalyzer: RegexAnalyzer
+  private lspEnabled: boolean
   constructor(
     private graphDB: GraphDB,
-    private projectRoot: string
+    private projectRoot: string,
+    lspEnabled: boolean = true,
+    lspTimeoutMs: number = 5000,
   ) {
-    this.lspAnalyzer = new LSPAnalyzer()
-    this.regexAnalyzer = new RegexAnalyzer()
+    this.lspEnabled = lspEnabled
+    this.lspAnalyzer = new LSPAnalyzer(projectRoot, lspTimeoutMs)
+    this.regexAnalyzer = new RegexAnalyzer(projectRoot)
   }
   assignChunkIds(filePath: string, chunks: any[]): ChunkWithId[] {
-    const withoutExt = filePath.replace(/\.[^/.]+$/, "")
-    const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
+    const seen = new Map<string, number>()
     return chunks.map((chunk, index) => {
-      const chunkId = `chunk_${normalizedPath}_${index}`
+      let chunkId = buildChunkId(filePath, chunk, index)
+      // Handle duplicate symbols (e.g. two chunks for same class split by size)
+      const count = seen.get(chunkId) || 0
+      if (count > 0) {
+        chunkId = `${chunkId}#${count}`
+      }
+      seen.set(chunkId.replace(/#\d+$/, ""), count + 1)
       return {
         ...chunk,
-        chunk_id: chunkId
+        chunk_id: chunkId,
       } as ChunkWithId
     })
   }
@@ -47,7 +135,7 @@ export class GraphBuilder {
     if (fileType === "docs") {
       relations = this.regexAnalyzer.analyzeMarkdown(filePath, content, chunks)
     } else if (fileType === "code") {
-      const lspAvailable = await this.lspAnalyzer.isAvailable(filePath)
+      const lspAvailable = this.lspEnabled && await this.lspAnalyzer.isAvailable(filePath)
       if (lspAvailable) {
         try {
@@ -63,7 +151,7 @@ export class GraphBuilder {
       }
     }
-    const triples: Triple[] = relations.map(rel => ({
+    const relationTriples: Triple[] = relations.map(rel => ({
       subject: rel.from,
       predicate: rel.predicate,
       object: rel.to,
@@ -73,14 +161,117 @@ export class GraphBuilder {
       line: rel.line
     }))
-    await this.graphDB.putEdges(triples)
-    return triples.length
+    // ── Structural edges ────────────────────────────────────────────────────
+    const fileNode = buildFileNodeId(filePath)
+    const structuralTriples: Triple[] = []
+    // Anchor: every chunk belongs_to its file
+    for (const c of chunks) {
+      structuralTriples.push({
+        subject: c.chunk_id,
+        predicate: "belongs_to",
+        object: filePath,
+        weight: 0,
+        source: "anchor",
+        file: filePath,
+      })
+    }
+    // File node → symbol chunks
+    // Track class chunks for has_method edges
+    const classChunkMap = new Map<string, string>() // className → chunk_id
+    for (const c of chunks) {
+      if (c.class_name && !c.function_name) {
+        // Class/interface chunk (no method = class-level)
+        const predicate = c.content.match(/\binterface\s/) ? "contains_interface" : "contains_class"
+        structuralTriples.push({
+          subject: fileNode,
+          predicate,
+          object: c.chunk_id,
+          weight: 1.0,
+          source: "structure",
+          file: filePath,
+        })
+        classChunkMap.set(c.class_name, c.chunk_id)
+      } else if (c.function_name && !c.class_name) {
+        // Top-level function
+        structuralTriples.push({
+          subject: fileNode,
+          predicate: "contains_function",
+          object: c.chunk_id,
+          weight: 1.0,
+          source: "structure",
+          file: filePath,
+        })
+      } else if (c.function_name && c.class_name) {
+        // Method inside a class → has_method edge from class chunk
+        const parentChunkId = classChunkMap.get(c.class_name)
+        if (parentChunkId) {
+          structuralTriples.push({
+            subject: parentChunkId,
+            predicate: "has_method",
+            object: c.chunk_id,
+            weight: 1.0,
+            source: "structure",
+            file: filePath,
+          })
+        } else {
+          // No class chunk found yet (methods appeared before class preamble, or class was not split)
+          // Fall back to file → method
+          structuralTriples.push({
+            subject: fileNode,
+            predicate: "contains_function",
+            object: c.chunk_id,
+            weight: 1.0,
+            source: "structure",
+            file: filePath,
+          })
+        }
+      } else if (c.heading_context) {
+        // Markdown section
+        structuralTriples.push({
+          subject: fileNode,
+          predicate: "contains",
+          object: c.chunk_id,
+          weight: 0.5,
+          source: "structure",
+          file: filePath,
+        })
+      } else {
+        // Generic content chunk
+        structuralTriples.push({
+          subject: fileNode,
+          predicate: "contains",
+          object: c.chunk_id,
+          weight: 0.3,
+          source: "structure",
+          file: filePath,
+        })
+      }
+    }
+    await this.graphDB.putEdges([...structuralTriples, ...relationTriples])
+    return relationTriples.length
   }
-  resolveChunkId(filePath: string, line: number): string | null {
-    const withoutExt = filePath.replace(/\.[^/.]+$/, "")
-    const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
-    return `chunk_${normalizedPath}_0`
+  /** Resolve a file path + line to the best chunk ID.
+   *  If chunks are provided, finds the one containing the line.
+   *  Otherwise falls back to the default chunk. */
+  resolveChunkId(filePath: string, line: number, chunks?: ChunkWithId[]): string | null {
+    if (chunks && chunks.length > 0) {
+      for (const c of chunks) {
+        if (c.start_line !== undefined && c.end_line !== undefined) {
+          if (line >= c.start_line && line <= c.end_line) {
+            return c.chunk_id
+          }
+        }
+      }
+      // Line not in any chunk range — return first chunk
+      return chunks[0].chunk_id
+    }
+    // No chunks available — return default
+    return buildDefaultChunkId(filePath)
   }
   async getRelatedChunks(chunkId: string): Promise<Array<{ chunk_id: string; predicate: string; weight: number; direction: "outgoing" | "incoming" }>> {

package/vectorizer/graph-db.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import levelgraph from "levelgraph"
 import { Level } from "level"
+import { filePathFromNodeId, isStructuralPredicate } from "./graph-builder"
 export interface Triple {
   subject: string
@@ -149,23 +150,27 @@ export class GraphDB {
   async deleteFileMeta(filePath: string): Promise<void> {
     if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
-    const triples = await new Promise<Triple[]>((resolve, reject) => {
-      this.db.get(
-        { subject: `meta:${filePath}`, predicate: "graph_built" },
-        (err: Error | undefined, result: Triple[]) => {
-          if (err) reject(err)
-          else resolve(result || [])
-        },
-      )
-    })
+    try {
+      const triples = await new Promise<Triple[]>((resolve, reject) => {
+        this.db.get(
+          { subject: `meta:${filePath}`, predicate: "graph_built" },
+          (err: Error | undefined, result: Triple[]) => {
+            if (err) reject(err)
+            else resolve(result || [])
+          },
+        )
+      })
-    for (const t of triples) {
-      await new Promise<void>((resolve, reject) => {
-        this.db.del(t, (err: Error | undefined) => {
-          if (err) reject(err)
-          else resolve()
+      for (const t of triples) {
+        await new Promise<void>((resolve, reject) => {
+          this.db.del(t, (err: Error | undefined) => {
+            if (err) reject(err)
+            else resolve()
+          })
         })
-      })
+      }
+    } catch (err) {
+      // Silently ignore errors (e.g., no meta triple exists)
     }
   }
@@ -191,9 +196,10 @@ export class GraphDB {
   /**
    * Get all triples in the graph (for validation/stats).
-   * Excludes meta triples (predicate === "graph_built").
+   * Excludes meta, anchor, and structural triples by default.
+   * Pass includeStructural=true to also get structural edges.
    */
-  async getAllTriples(): Promise<Triple[]> {
+  async getAllTriples(includeStructural: boolean = false): Promise<Triple[]> {
     if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
     const allTriples = await new Promise<Triple[]>((resolve, reject) => {
@@ -203,7 +209,11 @@ export class GraphDB {
       })
     })
-    return allTriples.filter(t => t.predicate !== "graph_built")
+    return allTriples.filter(t => {
+      if (t.predicate === "graph_built" || t.predicate === "belongs_to") return false
+      if (!includeStructural && isStructuralPredicate(t.predicate)) return false
+      return true
+    })
   }
   async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
@@ -213,58 +223,70 @@ export class GraphDB {
     const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
     const visited = new Set<string>()
+    const self = this
+    // Resolve the caller's file directly from the node ID
+    const callerFile = filePathFromNodeId(chunkId)
     async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
-      if (currentDepth > maxDepth || visited.has(currentId)) {
+      if (currentDepth >= maxDepth || visited.has(currentId)) {
         return
       }
       visited.add(currentId)
       try {
         const outgoing = await new Promise<Triple[]>((resolve, reject) => {
-          this.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
+          self.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
             if (err) reject(err)
             else resolve(triples || [])
           })
         })
         for (const triple of outgoing) {
-          const fileId = triple.object
-          // Aggregate relations and weights
-          const existing = relatedFiles.get(fileId)
+          // Skip meta, anchor, and structural-only edges
+          if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
+          if (isStructuralPredicate(triple.predicate)) continue
+          // Resolve file for the target node directly from its ID
+          const targetFile = filePathFromNodeId(triple.object)
+          if (!targetFile) continue
+          const existing = relatedFiles.get(targetFile)
           if (existing) {
             existing.weight = Math.max(existing.weight, triple.weight)
           } else {
-            relatedFiles.set(fileId, {
+            relatedFiles.set(targetFile, {
               relation: currentRelation || triple.predicate,
               weight: triple.weight
             })
           }
-          // Recurse for imports/extends relations
           if (triple.predicate === "imports" || triple.predicate === "extends") {
-            await traverse(fileId, currentDepth + 1, triple.predicate)
+            await traverse(triple.object, currentDepth + 1, triple.predicate)
           }
         }
         const incoming = await new Promise<Triple[]>((resolve, reject) => {
-          this.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
+          self.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
             if (err) reject(err)
             else resolve(triples || [])
           })
         })
         for (const triple of incoming) {
-          const fileId = triple.subject
-          const existing = relatedFiles.get(fileId)
+          if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
+          if (isStructuralPredicate(triple.predicate)) continue
+          const sourceFile = filePathFromNodeId(triple.subject)
+          if (!sourceFile) continue
+          const existing = relatedFiles.get(sourceFile)
           if (existing) {
             existing.weight = Math.max(existing.weight, triple.weight)
           } else {
-            relatedFiles.set(fileId, {
-              relation: `used_by`,
+            relatedFiles.set(sourceFile, {
+              relation: "used_by",
               weight: triple.weight
             })
           }
@@ -273,17 +295,18 @@ export class GraphDB {
         console.error(`Error traversing graph for ${currentId}:`, error)
       }
     }
     await traverse(chunkId, 0, "")
-    const result = Array.from(relatedFiles.entries())
-      .map(([path, data]) => ({
-        path,
+    // Remove the caller's own file from results
+    if (callerFile) relatedFiles.delete(callerFile)
+    return Array.from(relatedFiles.entries())
+      .map(([filePath, data]) => ({
+        path: filePath,
         relation: data.relation,
         weight: data.weight
       }))
       .sort((a, b) => b.weight - a.weight)
-    return result
   }
 }

package/vectorizer/index.ts CHANGED Viewed

@@ -16,7 +16,7 @@ import { mergeResults, DEFAULT_HYBRID_CONFIG } from "./hybrid-search.ts";
 import { QueryCache, DEFAULT_CACHE_CONFIG } from "./query-cache.ts";
 import { SearchMetrics } from "./search-metrics.ts";
 import { GraphDB } from "./graph-db.ts";
-import { GraphBuilder } from "./graph-builder.ts";
+import { GraphBuilder, isStructuralPredicate } from "./graph-builder.ts";
 import { UsageTracker } from "./usage-tracker.ts";
 // Suppress transformers.js logs unless DEBUG is set
@@ -85,6 +85,19 @@ let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
 let METRICS_ENABLED = false;
 let CACHE_ENABLED = true;
+// ── Graph config (v3) ───────────────────────────────────────────────────────
+const DEFAULT_GRAPH_CONFIG = {
+  enabled: true,
+  max_related: 4,
+  min_relevance: 0.5,
+  lsp: {
+    enabled: true,
+    timeout_ms: 5000,
+  },
+  read_intercept: true,
+};
+let GRAPH_CONFIG = { ...DEFAULT_GRAPH_CONFIG, lsp: { ...DEFAULT_GRAPH_CONFIG.lsp } };
 function defaultVectorizerYaml() {
   return (
     `vectorizer:\n` +
@@ -121,6 +134,16 @@ function defaultVectorizerYaml() {
     `    hybrid: true\n` +
     `    bm25_weight: 0.3\n` +
     `\n` +
+    `  # Graph-based context (v3)\n` +
+    `  graph:\n` +
+    `    enabled: true\n` +
+    `    max_related: 4\n` +
+    `    min_relevance: 0.5\n` +
+    `    lsp:\n` +
+    `      enabled: true\n` +
+    `      timeout_ms: 5000\n` +
+    `    read_intercept: true\n` +
+    `\n` +
     `  # Quality monitoring\n` +
     `  quality:\n` +
     `    enable_metrics: false\n` +
@@ -282,6 +305,26 @@ async function loadConfig(projectRoot) {
       CACHE_ENABLED = parseBool(qs, "enable_cache", true);
     }
+    // ── Parse graph config (v3) ──────────────────────────────────────────────
+    const graphMatch = section.match(/^\s{2}graph:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
+    if (graphMatch) {
+      const gs = graphMatch[1];
+      GRAPH_CONFIG.enabled = parseBool(gs, "enabled", DEFAULT_GRAPH_CONFIG.enabled);
+      GRAPH_CONFIG.max_related = parseNumber(gs, "max_related", DEFAULT_GRAPH_CONFIG.max_related);
+      GRAPH_CONFIG.min_relevance = parseNumber(gs, "min_relevance", DEFAULT_GRAPH_CONFIG.min_relevance);
+      GRAPH_CONFIG.read_intercept = parseBool(gs, "read_intercept", DEFAULT_GRAPH_CONFIG.read_intercept);
+      // Nested lsp: section
+      const lspMatch = gs.match(/^\s+lsp:\s*\n([\s\S]*?)(?=^\s{4}[a-zA-Z_\-]+:|\Z)/m);
+      if (lspMatch) {
+        const ls = lspMatch[1];
+        GRAPH_CONFIG.lsp.enabled = parseBool(ls, "enabled", DEFAULT_GRAPH_CONFIG.lsp.enabled);
+        GRAPH_CONFIG.lsp.timeout_ms = parseNumber(ls, "timeout_ms", DEFAULT_GRAPH_CONFIG.lsp.timeout_ms);
+      }
+      if (DEBUG) console.log("[vectorizer] Graph config:", GRAPH_CONFIG);
+    }
     // Parse global exclude
     const excludeMatch = section.match(/^\s{2}exclude:\s*\n((?:\s{4}-\s+.+\n?)*)/m);
     if (excludeMatch) {
@@ -392,11 +435,19 @@ class CodebaseIndexer {
     this.db = await lancedb.connect(path.join(this.cacheDir, "lancedb"));
     await this.loadHashes();
-    const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
-    const graphPath = path.join(this.root, ".opencode", "graph", graphType);
-    await fs.mkdir(path.dirname(graphPath), { recursive: true });
-    this.graphDB = await new GraphDB(graphPath).init();
-    this.graphBuilder = new GraphBuilder(this.graphDB, this.root);
+    // Graph DB — only if graph is enabled in config
+    if (GRAPH_CONFIG.enabled) {
+      const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
+      const graphPath = path.join(this.root, ".opencode", "graph", graphType);
+      await fs.mkdir(path.dirname(graphPath), { recursive: true });
+      this.graphDB = await new GraphDB(graphPath).init();
+      this.graphBuilder = new GraphBuilder(
+        this.graphDB,
+        this.root,
+        GRAPH_CONFIG.lsp.enabled,
+        GRAPH_CONFIG.lsp.timeout_ms,
+      );
+    }
     // Usage tracker — provenance & usage stats
     this.usageTracker = new UsageTracker(this.cacheDir);
@@ -557,36 +608,41 @@ class CodebaseIndexer {
     // Semantic chunking
     const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
-    // v3: Assign chunk IDs for graph tracking
-    const chunksWithIds = this.graphBuilder.assignChunkIds(relPath, chunks);
+    // v3: Assign chunk IDs for graph tracking (works without graph — just adds IDs)
+    const chunksWithIds = this.graphBuilder
+      ? this.graphBuilder.assignChunkIds(relPath, chunks)
+      : chunks.map((c, i) => ({ ...c, chunk_id: `chunk:${relPath}::_chunk_${i}` }));
     // v3: Delete old edges for this file and build new ones
-    await this.graphDB.deleteByFile(relPath);
-    const graphEdgesBuilt = await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
-    // Log graph creation to indexer.log
-    if (graphEdgesBuilt > 0 || DEBUG) {
-      const timestamp = new Date().toISOString().slice(11, 19);
-      const logMsg = `${timestamp} Graph built: ${relPath} (${chunksWithIds.length} chunks)`;
-      if (DEBUG) console.log(`[vectorizer] ${logMsg}`);
-      // Write to indexer.log in .opencode directory
+    let graphEdgesBuilt = 0;
+    if (this.graphBuilder && this.graphDB) {
+      await this.graphDB.deleteByFile(relPath);
+      graphEdgesBuilt = await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
+      // Log graph creation to indexer.log
+      if (graphEdgesBuilt > 0 || DEBUG) {
+        const timestamp = new Date().toISOString().slice(11, 19);
+        const logMsg = `${timestamp} Graph built: ${relPath} (${chunksWithIds.length} chunks)`;
+        if (DEBUG) console.log(`[vectorizer] ${logMsg}`);
+        // Write to indexer.log in .opencode directory
+        try {
+          const logPath = path.join(this.root, ".opencode", "indexer.log");
+          const fsSync = await import("fs");
+          fsSync.appendFileSync(logPath, `${logMsg}\n`);
+        } catch {
+          // non-fatal — logging is advisory
+        }
+      }
+      // FR-054: Store graph build timestamp + file hash as metadata triple
       try {
-        const logPath = path.join(this.root, ".opencode", "indexer.log");
-        const fsSync = await import("fs");
-        fsSync.appendFileSync(logPath, `${logMsg}\n`);
+        await this.graphDB.setFileMeta(relPath, hash, Date.now());
       } catch {
-        // non-fatal — logging is advisory
+        // non-fatal — metadata is advisory
       }
     }
-    // FR-054: Store graph build timestamp + file hash as metadata triple
-    try {
-      await this.graphDB.setFileMeta(relPath, hash, Date.now());
-    } catch {
-      // non-fatal — metadata is advisory
-    }
     const data = [];
     for (let i = 0; i < chunksWithIds.length; i++) {
       const embedding = await this.embed(chunksWithIds[i].content);
@@ -606,9 +662,9 @@ class CodebaseIndexer {
         function_name: chunksWithIds[i].function_name || "",
         class_name: chunksWithIds[i].class_name || "",
         tags: (fileMeta.tags || []).join(","),
-        // Line numbers for "from-to" extraction
-        start_line: chunksWithIds[i].start_line,
-        end_line: chunksWithIds[i].end_line,
+        // Line numbers for "from-to" extraction (default to -1 when unknown)
+        start_line: chunksWithIds[i].start_line ?? -1,
+        end_line: chunksWithIds[i].end_line ?? -1,
       });
     }
@@ -648,7 +704,7 @@ class CodebaseIndexer {
     const table = await this.db.openTable(tableName);
     let allRows;
     try {
-      allRows = await table.filter("").limit(100000).execute();
+      allRows = await table.filter("true").limit(100000).execute();
     } catch (e) {
       if (DEBUG) console.log("[vectorizer] BM25 index build failed (corrupted table?):", e.message);
       return null;
@@ -712,10 +768,15 @@ class CodebaseIndexer {
           const bm25Results = bm25.search(query, fetchLimit);
           // Build score maps
+          // LanceDB _distance is L2 (euclidean). For normalized vectors,
+          // L2 ∈ [0, 2]. Convert to similarity ∈ [0, 1]:
+          //   similarity = 1 - (distance / 2)
+          const distanceToScore = (d: number | null | undefined) =>
+            d != null ? Math.max(0, 1 - d / 2) : 0.5;
           const vectorScores = new Map();
           for (let i = 0; i < results.length; i++) {
-            const score = results[i]._distance != null ? 1 - results[i]._distance : 0.5;
-            vectorScores.set(i, score);
+            vectorScores.set(i, distanceToScore(results[i]._distance));
           }
           const bm25Scores = new Map();
@@ -730,7 +791,7 @@ class CodebaseIndexer {
           for (let i = 0; i < results.length; i++) {
             const key = `${results[i].file}:${results[i].chunk_index}`;
-            const vs = results[i]._distance != null ? 1 - results[i]._distance : 0.5;
+            const vs = distanceToScore(results[i]._distance);
             resultMap.set(key, { row: results[i], vectorScore: vs, bm25Score: 0 });
           }
@@ -831,7 +892,10 @@ class CodebaseIndexer {
         const outgoing = await this.graphDB.getOutgoing(result.chunk_id);
         const incoming = await this.graphDB.getIncoming(result.chunk_id);
-        const allEdges = [...outgoing, ...incoming];
+        // Filter out structural and meta edges — only relation edges are useful for context
+        const allEdges = [...outgoing, ...incoming].filter(
+          e => e.predicate !== "belongs_to" && e.predicate !== "graph_built" && !isStructuralPredicate(e.predicate)
+        );
         const neighbors = [];
         for (const edge of allEdges) {
@@ -852,8 +916,13 @@ class CodebaseIndexer {
           });
         }
+        // Apply min_relevance filter, then cap at max_related
         neighbors.sort((a, b) => b.score - a.score);
-        result.relatedContext = neighbors.slice(0, 3);
+        const minRelevance = GRAPH_CONFIG.min_relevance ?? 0.5;
+        const maxRelated = GRAPH_CONFIG.max_related ?? 4;
+        result.relatedContext = neighbors
+          .filter(n => n.score >= minRelevance)
+          .slice(0, maxRelated);
         // FR-060: Record provenance for each attached chunk
         if (this.usageTracker) {
@@ -894,7 +963,7 @@ class CodebaseIndexer {
       const table = await this.db.openTable(tableName);
       let rows;
       try {
-        rows = await table.filter("").limit(100000).execute();
+        rows = await table.filter("true").limit(100000).execute();
       } catch (e) {
         if (DEBUG) console.log("[vectorizer] Chunk cache build failed (corrupted table?):", e.message);
         return null;
@@ -1032,7 +1101,7 @@ class CodebaseIndexer {
         const tables = await this.db.tableNames();
         if (tables.includes(tableName)) {
           const table = await this.db.openTable(tableName);
-          const allRows = await table.filter("").limit(100000).execute();
+          const allRows = await table.filter("true").limit(100000).execute();
           const chunkData = allRows
             .filter(r => r.chunk_id && r.vector)
             .map(r => ({ chunk_id: r.chunk_id, vector: Array.from(r.vector), file: r.file }));

package/vectorizer.yaml CHANGED Viewed

@@ -61,6 +61,22 @@ vectorizer:
   # Indexes to maintain - each has pattern (what to include) and ignore (what to skip)
   indexes:
+    # Source code index - all common programming languages
+    code:
+      enabled: true
+      pattern: "**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}"
+      ignore:
+        - "**/node_modules/**"
+        - "**/.git/**"
+        - "**/dist/**"
+        - "**/build/**"
+        - "**/.opencode/**"
+        - "**/docs/**"
+        - "**/vendor/**"
+        - "**/__pycache__/**"
+      hybrid: true
+      bm25_weight: 0.3
     # Documentation index - markdown, text files
     docs:
       enabled: true