npm - @comfanion/usethis_search - Versions diffs - 0.1.5 → 3.0.0-dev.0 - Mend

@comfanion/usethis_search 0.1.5 → 3.0.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +98 -7
package/file-indexer.ts +21 -1
package/index.ts +2 -0
package/package.json +20 -3
package/tools/codeindex.ts +135 -16
package/tools/read-interceptor.ts +54 -0
package/tools/search.ts +60 -12
package/vectorizer/analyzers/lsp-analyzer.ts +162 -0
package/vectorizer/analyzers/regex-analyzer.ts +255 -0
package/vectorizer/bm25-index.ts +155 -0
package/vectorizer/chunkers/chunker-factory.ts +98 -0
package/vectorizer/chunkers/code-chunker.ts +325 -0
package/vectorizer/chunkers/markdown-chunker.ts +177 -0
package/vectorizer/content-cleaner.ts +136 -0
package/vectorizer/graph-builder.ts +95 -0
package/vectorizer/graph-db.ts +97 -0
package/vectorizer/hybrid-search.ts +97 -0
package/vectorizer/index.js +470 -17
package/vectorizer/metadata-extractor.ts +125 -0
package/vectorizer/query-cache.ts +126 -0
package/vectorizer/search-metrics.ts +155 -0
package/vectorizer.yaml +95 -0

package/tools/search.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 /**
- * Semantic Code Search Tool
+ * Semantic Code Search Tool (v2)
  *
  * Uses local embeddings + LanceDB vector store via bundled vectorizer.
+ * v2: hybrid search, metadata filtering, rich result metadata.
  * Index data is stored in `.opencode/vectors/<index>/`.
  */
@@ -33,6 +34,13 @@ Examples:
     searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
     freshen: tool.schema.boolean().optional().default(true).describe("Auto-update stale files before searching (default: true)"),
     includeArchived: tool.schema.boolean().optional().default(false).describe("Include archived files in results (default: false). Files are archived if in /archive/ folder or have 'archived: true' in frontmatter."),
+    // v2 params
+    hybrid: tool.schema.boolean().optional().describe("Enable hybrid search (vector + BM25 keyword matching). Improves exact keyword recall."),
+    fileType: tool.schema.string().optional().describe("Filter by file type: 'code', 'docs', or 'config'"),
+    language: tool.schema.string().optional().describe("Filter by language: 'typescript', 'python', 'markdown', etc."),
+    modifiedAfter: tool.schema.string().optional().describe("Filter: only files modified after this ISO date (e.g. '2024-01-01')"),
+    modifiedBefore: tool.schema.string().optional().describe("Filter: only files modified before this ISO date"),
+    tags: tool.schema.string().optional().describe("Filter by frontmatter tags (comma-separated, e.g. 'auth,security')"),
   },
   async execute(args) {
@@ -43,6 +51,15 @@ Examples:
       const limit = args.limit || 10
       const indexName = args.index || "code"
+      // Build search options from v2 params
+      const searchOptions: Record<string, any> = {}
+      if (args.hybrid != null) searchOptions.hybrid = args.hybrid
+      if (args.fileType) searchOptions.fileType = args.fileType
+      if (args.language) searchOptions.language = args.language
+      if (args.modifiedAfter) searchOptions.modifiedAfter = args.modifiedAfter
+      if (args.modifiedBefore) searchOptions.modifiedBefore = args.modifiedBefore
+      if (args.tags) searchOptions.tags = args.tags.split(",").map((t: string) => t.trim()).filter(Boolean)
       // Auto-freshen stale files before searching
       if (args.freshen !== false) {
         const tempIndexer = await new CodebaseIndexer(projectRoot, indexName).init()
@@ -56,7 +73,7 @@ Examples:
         await tempIndexer.unloadModel()
         if (indexes.length === 0) {
-          return `❌ No indexes found. Create one with: codeindex({ action: "reindex", index: "code" })`
+          return `No indexes found. Create one with: codeindex({ action: "reindex", index: "code" })`
         }
         for (const idx of indexes) {
@@ -64,52 +81,83 @@ Examples:
           if (args.freshen !== false) {
             await indexer.freshen()
           }
-          const results = await indexer.search(args.query, limit, args.includeArchived)
+          const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
           allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
           await indexer.unloadModel()
         }
-        allResults.sort((a, b) => (a._distance || 0) - (b._distance || 0))
+        allResults.sort((a, b) => {
+          // Prefer combinedScore (hybrid), fall back to distance
+          const scoreA = a._combinedScore ?? (a._distance != null ? 1 - a._distance : 0)
+          const scoreB = b._combinedScore ?? (b._distance != null ? 1 - b._distance : 0)
+          return scoreB - scoreA
+        })
         allResults = allResults.slice(0, limit)
       } else {
         const hashesFile = path.join(projectRoot, ".opencode", "vectors", indexName, "hashes.json")
         try {
           await fs.access(hashesFile)
         } catch {
-          return `❌ Index "${indexName}" not found. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
+          return `Index "${indexName}" not found. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
         }
         const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
-        const results = await indexer.search(args.query, limit, args.includeArchived)
+        const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
         allResults = results.map((r: any) => ({ ...r, _index: indexName }))
         await indexer.unloadModel()
       }
       if (allResults.length === 0) {
         const scope = args.searchAll ? "any index" : `index "${indexName}"`
-        return `No results found in ${scope} for: "${args.query}"\n\nTry:\n- Different keywords\n- Re-index with: codeindex({ action: "reindex", index: "${indexName}" })`
+        return `No results found in ${scope} for: "${args.query}"\n\nTry:\n- Different keywords\n- Enable hybrid search: search({ query: "...", hybrid: true })\n- Re-index with: codeindex({ action: "reindex", index: "${indexName}" })`
       }
       const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
-      let output = `## Search Results for: "${args.query}" (${scope})\n\n`
+      const hybridLabel = args.hybrid ? " [hybrid]" : ""
+      let output = `## Search Results for: "${args.query}" (${scope}${hybridLabel})\n\n`
       for (let i = 0; i < allResults.length; i++) {
         const r = allResults[i]
-        const score = r._distance ? (1 - r._distance).toFixed(3) : "N/A"
+        const score = r._combinedScore != null
+          ? r._combinedScore.toFixed(3)
+          : r._distance != null
+            ? (1 - r._distance).toFixed(3)
+            : "N/A"
         const indexLabel = args.searchAll ? ` [${r._index}]` : ""
+        // v2: show rich metadata when available
+        const metaParts: string[] = []
+        if (r.language && r.language !== "unknown") metaParts.push(r.language)
+        if (r.heading_context) metaParts.push(`"${r.heading_context}"`)
+        if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
+        if (r.class_name) metaParts.push(`class: ${r.class_name}`)
+        const metaLine = metaParts.length > 0 ? ` (${metaParts.join(", ")})` : ""
         output += `### ${i + 1}. ${r.file}${indexLabel}\n`
-        output += `**Relevance:** ${score}\n\n`
+        output += `**Relevance:** ${score}${metaLine}\n\n`
         output += "```\n"
         const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
         output += content
-        output += "\n```\n\n"
+        output += "\n```\n"
+        if (r.relatedContext && r.relatedContext.length > 0) {
+          output += "\n**Related Context:**\n"
+          for (const rel of r.relatedContext) {
+            const snippet = rel.content.length > 200
+              ? rel.content.substring(0, 200) + "..."
+              : rel.content
+            output += `- **${rel.file}** (${rel.relation}, via ${rel.via}, score: ${rel.score.toFixed(2)})\n`
+            output += `  \`\`\`\n  ${snippet}\n  \`\`\`\n`
+          }
+        }
+        output += "\n"
       }
       output += `---\n*Found ${allResults.length} results. Use Read tool to see full files.*`
       return output
     } catch (error: any) {
-      return `❌ Search failed: ${error.message || String(error)}`
+      return `Search failed: ${error.message || String(error)}`
     }
   },
 })

package/vectorizer/analyzers/lsp-analyzer.ts ADDED Viewed

@@ -0,0 +1,162 @@
+import { ChunkWithId } from "../graph-builder"
+export interface Relation {
+  from: string
+  to: string
+  predicate: string
+  weight: number
+  source: "lsp"
+  line?: number
+}
+export class LSPAnalyzer {
+  private readonly timeout = 5000
+  async isAvailable(filePath: string): Promise<boolean> {
+    try {
+      const ext = filePath.split(".").pop()
+      if (!ext) return false
+      const language = this.getLanguage(ext)
+      if (!language) return false
+      return this.checkLSPServer(language)
+    } catch {
+      return false
+    }
+  }
+  async analyzeFile(filePath: string, chunks: ChunkWithId[]): Promise<Relation[]> {
+    const relations: Relation[] = []
+    try {
+      const ext = filePath.split(".").pop()
+      if (!ext) return []
+      const language = this.getLanguage(ext)
+      if (!language) return []
+      const lines = await this.readFileLines(filePath)
+      const symbols = await this.getDocumentSymbols(filePath, language)
+      if (!symbols) return []
+      for (const symbol of symbols) {
+        const fromChunkId = this.findChunkForPosition(chunks, symbol.line)
+        if (!fromChunkId) continue
+        if (symbol.type === "class" || symbol.type === "interface") {
+          const implementations = await this.getImplementations(filePath, symbol.line, symbol.character, language)
+          for (const impl of implementations) {
+            const toChunkId = this.resolveTargetChunk(filePath, impl)
+            if (toChunkId) {
+              relations.push({
+                from: fromChunkId,
+                to: toChunkId,
+                predicate: "implements",
+                weight: 1.0,
+                source: "lsp"
+              })
+            }
+          }
+        }
+        const references = await this.getReferences(filePath, symbol.line, symbol.character, language)
+        for (const ref of references) {
+          const toChunkId = this.resolveTargetChunk(filePath, ref)
+          if (toChunkId && toChunkId !== fromChunkId) {
+            relations.push({
+              from: toChunkId,
+              to: fromChunkId,
+              predicate: "used_by",
+              weight: 1.0,
+              source: "lsp"
+            })
+          }
+        }
+        const definitions = await this.getDefinitions(filePath, symbol.line, symbol.character, language)
+        for (const def of definitions) {
+          const toChunkId = this.resolveTargetChunk(filePath, def)
+          if (toChunkId && toChunkId !== fromChunkId) {
+            relations.push({
+              from: fromChunkId,
+              to: toChunkId,
+              predicate: "references",
+              weight: 1.0,
+              source: "lsp"
+            })
+          }
+        }
+      }
+    } catch (error) {
+      return []
+    }
+    return relations
+  }
+  private getLanguage(ext: string): string | null {
+    const map: Record<string, string> = {
+      ts: "typescript",
+      js: "javascript",
+      tsx: "typescriptreact",
+      jsx: "javascriptreact",
+      py: "python",
+      go: "go",
+      rs: "rust",
+      java: "java",
+      cpp: "cpp",
+      c: "c",
+      cs: "csharp"
+    }
+    return map[ext] || null
+  }
+  private checkLSPServer(language: string): Promise<boolean> {
+    return Promise.resolve(false)
+  }
+  private async readFileLines(filePath: string): Promise<string[]> {
+    const fs = await import("fs/promises")
+    const content = await fs.readFile(filePath, "utf-8")
+    return content.split("\n")
+  }
+  private async getDocumentSymbols(filePath: string, language: string): Promise<Array<{ name: string; type: string; line: number; character: number }> | null> {
+    return null
+  }
+  private async getImplementations(filePath: string, line: number, character: number, language: string): Promise<Array<{ file: string; line: number; character: number }>> {
+    return []
+  }
+  private async getReferences(filePath: string, line: number, character: number, language: string): Promise<Array<{ file: string; line: number; character: number }>> {
+    return []
+  }
+  private async getDefinitions(filePath: string, line: number, character: number, language: string): Promise<Array<{ file: string; line: number; character: number }>> {
+    return []
+  }
+  private findChunkForPosition(chunks: ChunkWithId[], line: number): string | null {
+    for (const chunk of chunks) {
+      if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
+        if (line >= chunk.start_line && line <= chunk.end_line) {
+          return chunk.chunk_id
+        }
+      }
+    }
+    return null
+  }
+  private resolveTargetChunk(currentFile: string, target: { file: string; line: number; character: number }): string | null {
+    if (target.file !== currentFile) {
+      const path = target.file.replace(/[^a-zA-Z0-9]/g, "_")
+      return `chunk_${path}_0`
+    }
+    const normalized = currentFile.replace(/[^a-zA-Z0-9]/g, "_")
+    return `chunk_${normalized}_0`
+  }
+}

package/vectorizer/analyzers/regex-analyzer.ts ADDED Viewed

@@ -0,0 +1,255 @@
+import path from "path"
+import { ChunkWithId } from "../graph-builder"
+export interface Relation {
+  from: string
+  to: string
+  predicate: string
+  weight: number
+  source: "regex" | "markdown"
+  line?: number
+}
+export class RegexAnalyzer {
+  private readonly patterns = {
+    jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
+    pythonFromImport: /from\s+(\S+)\s+import/g,
+    pythonImport: /import\s+(\S+)/g,
+    extends: /class\s+\w+\s+extends\s+(\w+)/g,
+    implements: /class\s+\w+\s+implements\s+([^{]+)/g,
+    markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
+  }
+  analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
+    const relations: Relation[] = []
+    const ext = path.extname(filePath)
+    const lines = content.split("\n")
+    if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
+      this.analyzeJSCode(content, lines, filePath, chunks, relations)
+    } else if ([".py"].includes(ext)) {
+      this.analyzePythonCode(content, lines, filePath, chunks, relations)
+    }
+    return relations
+  }
+  analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
+    const relations: Relation[] = []
+    const lines = content.split("\n")
+    const dir = path.dirname(filePath)
+    let match
+    this.patterns.markdownLink.lastIndex = 0
+    while ((match = this.patterns.markdownLink.exec(content)) !== null) {
+      const linkText = match[1]
+      const linkTarget = match[2]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      const line = lines[lineIndex]
+      const targetPath = this.resolvePath(filePath, linkTarget)
+      if (!targetPath) continue
+      const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+      if (!fromChunkId) continue
+      const toChunkId = this.findChunkForLinkTarget(targetPath, linkTarget, chunks)
+      if (toChunkId) {
+        relations.push({
+          from: fromChunkId,
+          to: toChunkId,
+          predicate: "links_to",
+          weight: 1.0,
+          source: "markdown",
+          line: lineIndex
+        })
+      }
+    }
+    return relations
+  }
+  private analyzeJSCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
+    let match
+    this.patterns.jsImports.lastIndex = 0
+    while ((match = this.patterns.jsImports.exec(content)) !== null) {
+      const importPath = match[1]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      const line = lines[lineIndex]
+      if (importPath.startsWith(".")) {
+        const targetPath = this.resolvePath(filePath, importPath)
+        if (!targetPath) continue
+        const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+        if (!fromChunkId) continue
+        const toChunkId = this.findFirstChunkInFile(targetPath)
+        if (toChunkId) {
+          relations.push({
+            from: fromChunkId,
+            to: toChunkId,
+            predicate: "imports",
+            weight: 0.8,
+            source: "regex",
+            line: lineIndex
+          })
+        }
+      }
+    }
+    this.patterns.extends.lastIndex = 0
+    while ((match = this.patterns.extends.exec(content)) !== null) {
+      const parentClass = match[1]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+      if (!fromChunkId) continue
+      const toChunkId = this.findChunkContainingSymbol(chunks, parentClass)
+      if (toChunkId) {
+        relations.push({
+          from: fromChunkId,
+          to: toChunkId,
+          predicate: "extends",
+          weight: 0.8,
+          source: "regex",
+          line: lineIndex
+        })
+      }
+    }
+    this.patterns.implements.lastIndex = 0
+    while ((match = this.patterns.implements.exec(content)) !== null) {
+      const interfaces = match[1].split(",").map(s => s.trim())
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+      if (!fromChunkId) continue
+      for (const iface of interfaces) {
+        const toChunkId = this.findChunkContainingSymbol(chunks, iface)
+        if (toChunkId) {
+          relations.push({
+            from: fromChunkId,
+            to: toChunkId,
+            predicate: "implements",
+            weight: 0.8,
+            source: "regex",
+            line: lineIndex
+          })
+        }
+      }
+    }
+  }
+  private analyzePythonCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
+    let match
+    this.patterns.pythonFromImport.lastIndex = 0
+    while ((match = this.patterns.pythonFromImport.exec(content)) !== null) {
+      const importPath = match[1]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      if (importPath.startsWith(".")) {
+        const targetPath = this.resolvePath(filePath, importPath)
+        if (!targetPath) continue
+        const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+        if (!fromChunkId) continue
+        const toChunkId = this.findFirstChunkInFile(targetPath)
+        if (toChunkId) {
+          relations.push({
+            from: fromChunkId,
+            to: toChunkId,
+            predicate: "imports",
+            weight: 0.8,
+            source: "regex",
+            line: lineIndex
+          })
+        }
+      }
+    }
+    this.patterns.pythonImport.lastIndex = 0
+    while ((match = this.patterns.pythonImport.exec(content)) !== null) {
+      const importPath = match[1]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      if (importPath.startsWith(".")) {
+        const targetPath = this.resolvePath(filePath, importPath)
+        if (!targetPath) continue
+        const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+        if (!fromChunkId) continue
+        const toChunkId = this.findFirstChunkInFile(targetPath)
+        if (toChunkId) {
+          relations.push({
+            from: fromChunkId,
+            to: toChunkId,
+            predicate: "imports",
+            weight: 0.8,
+            source: "regex",
+            line: lineIndex
+          })
+        }
+      }
+    }
+  }
+  private resolvePath(filePath: string, target: string): string | null {
+    try {
+      const dir = path.dirname(filePath)
+      const absoluteTarget = path.resolve(dir, target)
+      if (!absoluteTarget.startsWith(process.cwd())) {
+        return null
+      }
+      return path.relative(process.cwd(), absoluteTarget)
+    } catch {
+      return null
+    }
+  }
+  private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
+    for (const chunk of chunks) {
+      if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
+        if (lineIndex >= chunk.start_line && lineIndex <= chunk.end_line) {
+          return chunk.chunk_id
+        }
+      }
+    }
+    return null
+  }
+  private findFirstChunkInFile(targetPath: string): string | null {
+    const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
+    return `chunk_${normalized}_0`
+  }
+  private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
+    for (const chunk of chunks) {
+      if (chunk.content.includes(symbol)) {
+        return chunk.chunk_id
+      }
+    }
+    return null
+  }
+  private findChunkForLinkTarget(targetPath: string, linkTarget: string, chunks: ChunkWithId[]): string | null {
+    const hashIndex = linkTarget.indexOf("#")
+    if (hashIndex !== -1) {
+      const heading = linkTarget.substring(hashIndex + 1).toLowerCase()
+      for (const chunk of chunks) {
+        if (chunk.heading_context && chunk.heading_context.toLowerCase().includes(heading)) {
+          return chunk.chunk_id
+        }
+      }
+    }
+    return this.findFirstChunkInFile(targetPath)
+  }
+}