npm - @comfanion/usethis_search - Versions diffs - 0.2.0-dev.0 → 3.0.0-dev.1 - Mend

@comfanion/usethis_search 0.2.0-dev.0 → 3.0.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/file-indexer.ts +13 -0
package/index.ts +7 -1
package/package.json +12 -3
package/tools/codeindex.ts +155 -6
package/tools/read-interceptor.ts +127 -0
package/tools/search.ts +14 -1
package/vectorizer/analyzers/lsp-analyzer.ts +293 -0
package/vectorizer/analyzers/lsp-client.ts +369 -0
package/vectorizer/analyzers/regex-analyzer.ts +255 -0
package/vectorizer/graph-builder.ts +198 -0
package/vectorizer/graph-db.ts +289 -0
package/vectorizer/index.js +167 -9
package/vectorizer/usage-tracker.ts +204 -0
package/vectorizer.yaml +14 -0

package/vectorizer/analyzers/regex-analyzer.ts ADDED Viewed

@@ -0,0 +1,255 @@
+import path from "path"
+import { ChunkWithId } from "../graph-builder"
+export interface Relation {
+  from: string
+  to: string
+  predicate: string
+  weight: number
+  source: "regex" | "markdown"
+  line?: number
+}
+export class RegexAnalyzer {
+  private readonly patterns = {
+    jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
+    pythonFromImport: /from\s+(\S+)\s+import/g,
+    pythonImport: /import\s+(\S+)/g,
+    extends: /class\s+\w+\s+extends\s+(\w+)/g,
+    implements: /class\s+\w+\s+implements\s+([^{]+)/g,
+    markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
+  }
+  analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
+    const relations: Relation[] = []
+    const ext = path.extname(filePath)
+    const lines = content.split("\n")
+    if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
+      this.analyzeJSCode(content, lines, filePath, chunks, relations)
+    } else if ([".py"].includes(ext)) {
+      this.analyzePythonCode(content, lines, filePath, chunks, relations)
+    }
+    return relations
+  }
+  analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
+    const relations: Relation[] = []
+    const lines = content.split("\n")
+    const dir = path.dirname(filePath)
+    let match
+    this.patterns.markdownLink.lastIndex = 0
+    while ((match = this.patterns.markdownLink.exec(content)) !== null) {
+      const linkText = match[1]
+      const linkTarget = match[2]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      const line = lines[lineIndex]
+      const targetPath = this.resolvePath(filePath, linkTarget)
+      if (!targetPath) continue
+      const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+      if (!fromChunkId) continue
+      const toChunkId = this.findChunkForLinkTarget(targetPath, linkTarget, chunks)
+      if (toChunkId) {
+        relations.push({
+          from: fromChunkId,
+          to: toChunkId,
+          predicate: "links_to",
+          weight: 1.0,
+          source: "markdown",
+          line: lineIndex
+        })
+      }
+    }
+    return relations
+  }
+  private analyzeJSCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
+    let match
+    this.patterns.jsImports.lastIndex = 0
+    while ((match = this.patterns.jsImports.exec(content)) !== null) {
+      const importPath = match[1]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      const line = lines[lineIndex]
+      if (importPath.startsWith(".")) {
+        const targetPath = this.resolvePath(filePath, importPath)
+        if (!targetPath) continue
+        const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+        if (!fromChunkId) continue
+        const toChunkId = this.findFirstChunkInFile(targetPath)
+        if (toChunkId) {
+          relations.push({
+            from: fromChunkId,
+            to: toChunkId,
+            predicate: "imports",
+            weight: 0.8,
+            source: "regex",
+            line: lineIndex
+          })
+        }
+      }
+    }
+    this.patterns.extends.lastIndex = 0
+    while ((match = this.patterns.extends.exec(content)) !== null) {
+      const parentClass = match[1]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+      if (!fromChunkId) continue
+      const toChunkId = this.findChunkContainingSymbol(chunks, parentClass)
+      if (toChunkId) {
+        relations.push({
+          from: fromChunkId,
+          to: toChunkId,
+          predicate: "extends",
+          weight: 0.8,
+          source: "regex",
+          line: lineIndex
+        })
+      }
+    }
+    this.patterns.implements.lastIndex = 0
+    while ((match = this.patterns.implements.exec(content)) !== null) {
+      const interfaces = match[1].split(",").map(s => s.trim())
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+      if (!fromChunkId) continue
+      for (const iface of interfaces) {
+        const toChunkId = this.findChunkContainingSymbol(chunks, iface)
+        if (toChunkId) {
+          relations.push({
+            from: fromChunkId,
+            to: toChunkId,
+            predicate: "implements",
+            weight: 0.8,
+            source: "regex",
+            line: lineIndex
+          })
+        }
+      }
+    }
+  }
+  private analyzePythonCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
+    let match
+    this.patterns.pythonFromImport.lastIndex = 0
+    while ((match = this.patterns.pythonFromImport.exec(content)) !== null) {
+      const importPath = match[1]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      if (importPath.startsWith(".")) {
+        const targetPath = this.resolvePath(filePath, importPath)
+        if (!targetPath) continue
+        const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+        if (!fromChunkId) continue
+        const toChunkId = this.findFirstChunkInFile(targetPath)
+        if (toChunkId) {
+          relations.push({
+            from: fromChunkId,
+            to: toChunkId,
+            predicate: "imports",
+            weight: 0.8,
+            source: "regex",
+            line: lineIndex
+          })
+        }
+      }
+    }
+    this.patterns.pythonImport.lastIndex = 0
+    while ((match = this.patterns.pythonImport.exec(content)) !== null) {
+      const importPath = match[1]
+      const lineIndex = content.substring(0, match.index).split("\n").length - 1
+      if (importPath.startsWith(".")) {
+        const targetPath = this.resolvePath(filePath, importPath)
+        if (!targetPath) continue
+        const fromChunkId = this.findChunkForLine(chunks, lineIndex)
+        if (!fromChunkId) continue
+        const toChunkId = this.findFirstChunkInFile(targetPath)
+        if (toChunkId) {
+          relations.push({
+            from: fromChunkId,
+            to: toChunkId,
+            predicate: "imports",
+            weight: 0.8,
+            source: "regex",
+            line: lineIndex
+          })
+        }
+      }
+    }
+  }
+  private resolvePath(filePath: string, target: string): string | null {
+    try {
+      const dir = path.dirname(filePath)
+      const absoluteTarget = path.resolve(dir, target)
+      if (!absoluteTarget.startsWith(process.cwd())) {
+        return null
+      }
+      return path.relative(process.cwd(), absoluteTarget)
+    } catch {
+      return null
+    }
+  }
+  private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
+    for (const chunk of chunks) {
+      if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
+        if (lineIndex >= chunk.start_line && lineIndex <= chunk.end_line) {
+          return chunk.chunk_id
+        }
+      }
+    }
+    return null
+  }
+  private findFirstChunkInFile(targetPath: string): string | null {
+    const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
+    return `chunk_${normalized}_0`
+  }
+  private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
+    for (const chunk of chunks) {
+      if (chunk.content.includes(symbol)) {
+        return chunk.chunk_id
+      }
+    }
+    return null
+  }
+  private findChunkForLinkTarget(targetPath: string, linkTarget: string, chunks: ChunkWithId[]): string | null {
+    const hashIndex = linkTarget.indexOf("#")
+    if (hashIndex !== -1) {
+      const heading = linkTarget.substring(hashIndex + 1).toLowerCase()
+      for (const chunk of chunks) {
+        if (chunk.heading_context && chunk.heading_context.toLowerCase().includes(heading)) {
+          return chunk.chunk_id
+        }
+      }
+    }
+    return this.findFirstChunkInFile(targetPath)
+  }
+}

package/vectorizer/graph-builder.ts ADDED Viewed

@@ -0,0 +1,198 @@
+import path from "path"
+import { GraphDB, Triple } from "./graph-db"
+import { RegexAnalyzer, Relation as RegexRelation } from "./analyzers/regex-analyzer"
+import { LSPAnalyzer, Relation as LSPRelation } from "./analyzers/lsp-analyzer"
+export interface ChunkWithId {
+  chunk_id: string
+  content: string
+  start_line?: number
+  end_line?: number
+  heading_context?: string
+}
+export class GraphBuilder {
+  private lspAnalyzer: LSPAnalyzer
+  private regexAnalyzer: RegexAnalyzer
+  constructor(
+    private graphDB: GraphDB,
+    private projectRoot: string
+  ) {
+    this.lspAnalyzer = new LSPAnalyzer()
+    this.regexAnalyzer = new RegexAnalyzer()
+  }
+  assignChunkIds(filePath: string, chunks: any[]): ChunkWithId[] {
+    const withoutExt = filePath.replace(/\.[^/.]+$/, "")
+    const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
+    return chunks.map((chunk, index) => {
+      const chunkId = `chunk_${normalizedPath}_${index}`
+      return {
+        ...chunk,
+        chunk_id: chunkId
+      } as ChunkWithId
+    })
+  }
+  async buildEdges(
+    filePath: string,
+    content: string,
+    chunks: ChunkWithId[],
+    fileType: "code" | "docs"
+  ): Promise<void> {
+    let relations: Array<RegexRelation | LSPRelation> = []
+    if (fileType === "docs") {
+      relations = this.regexAnalyzer.analyzeMarkdown(filePath, content, chunks)
+    } else if (fileType === "code") {
+      const lspAvailable = await this.lspAnalyzer.isAvailable(filePath)
+      if (lspAvailable) {
+        try {
+          relations = await this.lspAnalyzer.analyzeFile(filePath, chunks)
+        } catch {
+          // LSP threw — fall through to regex
+        }
+      }
+      // Fallback: if LSP unavailable, threw, or returned nothing → use regex
+      if (relations.length === 0) {
+        relations = this.regexAnalyzer.analyzeCode(filePath, content, chunks)
+      }
+    }
+    const triples: Triple[] = relations.map(rel => ({
+      subject: rel.from,
+      predicate: rel.predicate,
+      object: rel.to,
+      weight: rel.weight,
+      source: rel.source,
+      file: filePath,
+      line: rel.line
+    }))
+    await this.graphDB.putEdges(triples)
+  }
+  resolveChunkId(filePath: string, line: number): string | null {
+    const withoutExt = filePath.replace(/\.[^/.]+$/, "")
+    const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
+    return `chunk_${normalizedPath}_0`
+  }
+  async getRelatedChunks(chunkId: string): Promise<Array<{ chunk_id: string; predicate: string; weight: number; direction: "outgoing" | "incoming" }>> {
+    const [outgoing, incoming] = await Promise.all([
+      this.graphDB.getOutgoing(chunkId),
+      this.graphDB.getIncoming(chunkId)
+    ])
+    const result = [
+      ...outgoing.map(t => ({ chunk_id: t.object, predicate: t.predicate, weight: t.weight, direction: "outgoing" as const })),
+      ...incoming.map(t => ({ chunk_id: t.subject, predicate: t.predicate, weight: t.weight, direction: "incoming" as const }))
+    ]
+    return result
+  }
+  // ---- FR-005: Semantic similarity edges ------------------------------------
+  /**
+   * Build "similar_to" edges between chunks whose cosine similarity > threshold.
+   * Only creates edges where no explicit link already exists.
+   *
+   * @param chunks — array of { chunk_id, vector } (all indexed chunks)
+   * @param threshold — minimum cosine similarity (default 0.8)
+   * @param maxEdgesPerChunk — limit outgoing similarity edges per chunk (default 3)
+   * @returns number of similarity edges created
+   */
+  async buildSemanticEdges(
+    chunks: Array<{ chunk_id: string; vector: number[]; file: string }>,
+    threshold: number = 0.8,
+    maxEdgesPerChunk: number = 3,
+  ): Promise<number> {
+    if (chunks.length < 2) return 0
+    // Pre-collect all existing edges so we can skip pairs that already have links
+    const existingPairs = new Set<string>()
+    for (const chunk of chunks) {
+      try {
+        const outgoing = await this.graphDB.getOutgoing(chunk.chunk_id)
+        for (const t of outgoing) {
+          existingPairs.add(`${t.subject}|${t.object}`)
+          existingPairs.add(`${t.object}|${t.subject}`) // bidirectional check
+        }
+      } catch {
+        // skip — chunk may not have edges yet
+      }
+    }
+    const newTriples: Triple[] = []
+    // For each chunk, find top-N most similar chunks above threshold
+    for (let i = 0; i < chunks.length; i++) {
+      const a = chunks[i]
+      if (!a.vector || a.vector.length === 0) continue
+      const candidates: Array<{ idx: number; similarity: number }> = []
+      for (let j = i + 1; j < chunks.length; j++) {
+        const b = chunks[j]
+        if (!b.vector || b.vector.length === 0) continue
+        // Skip same-file chunks (intra-file similarity is less useful)
+        if (a.file === b.file) continue
+        // Skip if explicit edge already exists
+        const pairKey = `${a.chunk_id}|${b.chunk_id}`
+        if (existingPairs.has(pairKey)) continue
+        const similarity = this.cosineSimilarity(a.vector, b.vector)
+        if (similarity > threshold) {
+          candidates.push({ idx: j, similarity })
+        }
+      }
+      // Sort by similarity descending, take top N
+      candidates.sort((x, y) => y.similarity - x.similarity)
+      const top = candidates.slice(0, maxEdgesPerChunk)
+      for (const c of top) {
+        const b = chunks[c.idx]
+        newTriples.push({
+          subject: a.chunk_id,
+          predicate: "similar_to",
+          object: b.chunk_id,
+          weight: parseFloat(c.similarity.toFixed(4)),
+          source: "semantic",
+          file: a.file,
+        })
+        // Mark as existing so reverse pair isn't duplicated
+        existingPairs.add(`${a.chunk_id}|${b.chunk_id}`)
+        existingPairs.add(`${b.chunk_id}|${a.chunk_id}`)
+      }
+    }
+    if (newTriples.length > 0) {
+      // Batch insert in groups of 100 to avoid overwhelming LevelDB
+      for (let i = 0; i < newTriples.length; i += 100) {
+        const batch = newTriples.slice(i, i + 100)
+        await this.graphDB.putEdges(batch)
+      }
+    }
+    return newTriples.length
+  }
+  private cosineSimilarity(vecA: number[], vecB: number[]): number {
+    let dot = 0, normA = 0, normB = 0
+    for (let i = 0; i < vecA.length; i++) {
+      dot += vecA[i] * vecB[i]
+      normA += vecA[i] * vecA[i]
+      normB += vecB[i] * vecB[i]
+    }
+    const denom = Math.sqrt(normA) * Math.sqrt(normB)
+    return denom === 0 ? 0 : dot / denom
+  }
+}