npm - @comfanion/usethis_search - Versions diffs - 4.4.0 → 4.5.1 - Mend

@comfanion/usethis_search 4.4.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/api.ts +34 -17
package/cache/manager.ts +30 -19
package/cli.ts +8 -5
package/file-indexer.ts +28 -11
package/hooks/message-before.ts +5 -5
package/hooks/tool-substitution.ts +4 -120
package/index.ts +17 -6
package/package.json +3 -2
package/tools/codeindex.ts +192 -184
package/tools/graph.ts +265 -0
package/tools/read-interceptor.ts +7 -3
package/tools/search.ts +268 -190
package/tools/workspace-state.ts +1 -2
package/tools/workspace.ts +76 -108
package/vectorizer/analyzers/lsp-client.ts +52 -6
package/vectorizer/chunkers/chunker-factory.ts +6 -0
package/vectorizer/chunkers/code-chunker.ts +73 -16
package/vectorizer/chunkers/lsp-chunker.ts +313 -191
package/vectorizer/graph-db.ts +6 -4
package/vectorizer/index.ts +329 -134
package/vectorizer/usage-tracker.ts +36 -0
package/vectorizer.yaml +2 -2

package/tools/codeindex.ts CHANGED Viewed

@@ -9,7 +9,7 @@ import { tool } from "@opencode-ai/plugin"
 import path from "path"
 import fs from "fs/promises"
-import { CodebaseIndexer } from "../vectorizer/index.ts"
+import { CodebaseIndexer, getIndexer, releaseIndexer, destroyIndexer } from "../vectorizer/index.ts"
 const INDEX_EXTENSIONS: Record<string, string[]> = {
   code: [".js", ".ts", ".jsx", ".tsx", ".go", ".py", ".rs", ".java", ".kt", ".swift", ".c", ".cpp", ".h", ".cs", ".rb", ".php"],
@@ -98,14 +98,17 @@ Available indexes:
         output += `### Active Indexes\n\n`
         for (const idx of indexes) {
           try {
-            const indexer = await new CodebaseIndexer(projectRoot, idx).init()
-            const stats = await indexer.getStats()
-            await indexer.unloadModel()
-            const desc = INDEX_DESCRIPTIONS[idx] || "Custom index"
-            const features = stats.features
-              ? ` | chunking: ${stats.features.chunking}, hybrid: ${stats.features.hybrid ? "on" : "off"}`
-              : ""
-            output += `- **${idx}** - ${desc} (files: ${stats.fileCount}, chunks: ${stats.chunkCount}${features})\n`
+            const indexer = await getIndexer(projectRoot, idx)
+            try {
+              const stats = await indexer.getStats()
+              const desc = INDEX_DESCRIPTIONS[idx] || "Custom index"
+              const features = stats.features
+                ? ` | chunking: ${stats.features.chunking}, hybrid: ${stats.features.hybrid ? "on" : "off"}`
+                : ""
+              output += `- **${idx}** - ${desc} (files: ${stats.fileCount}, chunks: ${stats.chunkCount}${features})\n`
+            } finally {
+              releaseIndexer(projectRoot, idx)
+            }
           } catch {
             output += `- ${idx}\n`
           }
@@ -119,41 +122,44 @@ Available indexes:
     if (args.action === "status") {
       const hashesFile = path.join(vectorsDir, indexName, "hashes.json")
       try {
-        const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
-        const stats = await indexer.getStats()
-        await indexer.unloadModel()
-        const sampleFiles = Object.keys(JSON.parse(await fs.readFile(hashesFile, "utf8"))).slice(0, 5)
-        const desc = INDEX_DESCRIPTIONS[indexName] || "Custom index"
-        let output = `## Index Status: "${indexName}"\n\n`
-        output += `**Description:** ${desc}\n`
-        output += `**Files indexed:** ${stats.fileCount}\n`
-        output += `**Total chunks:** ${stats.chunkCount}\n`
-        output += `**Model:** ${stats.model}\n`
-        if (stats.features) {
-          output += `\n**Features:**\n`
-          output += `- Chunking strategy: ${stats.features.chunking}\n`
-          output += `- Hybrid search: ${stats.features.hybrid ? "enabled" : "disabled"}\n`
-          output += `- Metrics: ${stats.features.metrics ? "enabled" : "disabled"}\n`
-          output += `- Query cache: ${stats.features.cache ? "enabled" : "disabled"}\n`
-        }
-        // Show metrics summary if available
+        const indexer = await getIndexer(projectRoot, indexName)
         try {
-          const metrics = await indexer.getMetrics()
-          if (metrics.total_queries > 0) {
-            output += `\n**Search Metrics:**\n`
-            output += `- Total queries: ${metrics.total_queries}\n`
-            output += `- Avg results/query: ${metrics.avg_results_per_query.toFixed(1)}\n`
-            output += `- Zero results rate: ${(metrics.zero_results_rate * 100).toFixed(1)}%\n`
-            output += `- Avg relevance: ${metrics.avg_relevance.toFixed(3)}\n`
+          const stats = await indexer.getStats()
+          const sampleFiles = Object.keys(JSON.parse(await fs.readFile(hashesFile, "utf8"))).slice(0, 5)
+          const desc = INDEX_DESCRIPTIONS[indexName] || "Custom index"
+          let output = `## Index Status: "${indexName}"\n\n`
+          output += `**Description:** ${desc}\n`
+          output += `**Files indexed:** ${stats.fileCount}\n`
+          output += `**Total chunks:** ${stats.chunkCount}\n`
+          output += `**Model:** ${stats.model}\n`
+          if (stats.features) {
+            output += `\n**Features:**\n`
+            output += `- Chunking strategy: ${stats.features.chunking}\n`
+            output += `- Hybrid search: ${stats.features.hybrid ? "enabled" : "disabled"}\n`
+            output += `- Metrics: ${stats.features.metrics ? "enabled" : "disabled"}\n`
+            output += `- Query cache: ${stats.features.cache ? "enabled" : "disabled"}\n`
           }
-        } catch {}
-        output += `\n**Sample indexed files:**\n${sampleFiles.map((f) => `- ${f}`).join("\n")}${stats.fileCount > 5 ? `\n- ... and ${stats.fileCount - 5} more` : ""}`
-        return output
+          // Show metrics summary if available
+          try {
+            const metrics = await indexer.getMetrics()
+            if (metrics.total_queries > 0) {
+              output += `\n**Search Metrics:**\n`
+              output += `- Total queries: ${metrics.total_queries}\n`
+              output += `- Avg results/query: ${metrics.avg_results_per_query.toFixed(1)}\n`
+              output += `- Zero results rate: ${(metrics.zero_results_rate * 100).toFixed(1)}%\n`
+              output += `- Avg relevance: ${metrics.avg_relevance.toFixed(3)}\n`
+            }
+          } catch {}
+          output += `\n**Sample indexed files:**\n${sampleFiles.map((f) => `- ${f}`).join("\n")}${stats.fileCount > 5 ? `\n- ... and ${stats.fileCount - 5} more` : ""}`
+          return output
+        } finally {
+          releaseIndexer(projectRoot, indexName)
+        }
       } catch {
         return `## Index Status: "${indexName}"\n\nIndex "${indexName}" not created yet. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
       }
@@ -161,6 +167,8 @@ Available indexes:
     if (args.action === "reindex") {
       try {
+        // Destroy any pooled instance to get fresh state for reindex
+        await destroyIndexer(projectRoot, indexName)
         const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
         const baseDir = args.dir ? path.resolve(projectRoot, args.dir) : projectRoot
@@ -195,8 +203,8 @@ Available indexes:
           } catch {}
         }
-        await indexer.unloadModel()
         const stats = await indexer.getStats()
+        await indexer.unloadModel()
         let output = `## Re-indexing Complete\n\n`
         output += `**Index:** ${indexName}\n`
@@ -258,41 +266,43 @@ Available indexes:
           return `## Gold Dataset Test\n\nNo test queries found in gold dataset.`
         }
-        const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
-        let passed = 0
-        let failed = 0
-        let output = `## Gold Dataset Test Results\n\n`
-        for (const t of tests) {
-          const results = await indexer.search(t.query, 10, false)
-          const foundFiles = results.map((r: any) => r.file)
-          const foundExpected = t.expected_files.filter(f => foundFiles.includes(f))
-          const topScore = results.length > 0 && results[0]._distance != null
-            ? 1 - results[0]._distance
-            : 0
-          const pass = foundExpected.length >= Math.ceil(t.expected_files.length * 0.5) && topScore >= t.min_relevance
-          if (pass) {
-            passed++
-            output += `**PASS** Query: "${t.query}"\n`
-          } else {
-            failed++
-            output += `**FAIL** Query: "${t.query}"\n`
-          }
+        const indexer = await getIndexer(projectRoot, indexName)
+        try {
+          let passed = 0
+          let failed = 0
+          let output = `## Gold Dataset Test Results\n\n`
+          for (const t of tests) {
+            const results = await indexer.search(t.query, 10, false)
+            const foundFiles = results.map((r: any) => r.file)
+            const foundExpected = t.expected_files.filter(f => foundFiles.includes(f))
+            const topScore = results.length > 0 && results[0]._distance != null
+              ? 1 - results[0]._distance
+              : 0
+            const pass = foundExpected.length >= Math.ceil(t.expected_files.length * 0.5) && topScore >= t.min_relevance
+            if (pass) {
+              passed++
+              output += `**PASS** Query: "${t.query}"\n`
+            } else {
+              failed++
+              output += `**FAIL** Query: "${t.query}"\n`
+            }
-          output += `  Found: ${foundFiles.slice(0, 3).map((f: string) => `${f} (${(1 - (results.find((r: any) => r.file === f)?._distance ?? 1)).toFixed(2)})`).join(", ")}\n`
-          if (foundExpected.length < t.expected_files.length) {
-            const missing = t.expected_files.filter(f => !foundFiles.includes(f))
-            output += `  Missing: ${missing.join(", ")}\n`
+            output += `  Found: ${foundFiles.slice(0, 3).map((f: string) => `${f} (${(1 - (results.find((r: any) => r.file === f)?._distance ?? 1)).toFixed(2)})`).join(", ")}\n`
+            if (foundExpected.length < t.expected_files.length) {
+              const missing = t.expected_files.filter(f => !foundFiles.includes(f))
+              output += `  Missing: ${missing.join(", ")}\n`
+            }
+            output += `\n`
           }
-          output += `\n`
-        }
-        await indexer.unloadModel()
-        output += `---\n**Summary:** ${passed}/${tests.length} tests passed (${Math.round(passed / tests.length * 100)}%)\n`
-        return output
+          output += `---\n**Summary:** ${passed}/${tests.length} tests passed (${Math.round(passed / tests.length * 100)}%)\n`
+          return output
+        } finally {
+          releaseIndexer(projectRoot, indexName)
+        }
       } catch (error: any) {
         return `Gold dataset test failed: ${error.message || String(error)}`
       }
@@ -301,139 +311,137 @@ Available indexes:
     // NFR-031: Graph validation
     if (args.action === "validate-graph") {
       try {
-        const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
-        // Access internal graphDB and db
-        const graphDB = (indexer as any).graphDB
-        const db = (indexer as any).db
-        if (!graphDB) {
-          await indexer.unloadModel()
-          return `## Graph Validation: "${indexName}"\n\nNo graph database found. Run reindex first.`
-        }
-        // 1. Get all triples from graph
-        let allTriples: any[] = []
+        const indexer = await getIndexer(projectRoot, indexName)
         try {
-          allTriples = await graphDB.getAllTriples()
-        } catch (e: any) {
-          await indexer.unloadModel()
-          return `## Graph Validation: "${indexName}"\n\n**Error:** Failed to read graph database: ${e.message || String(e)}\n\nThe graph database may be corrupted. Run: codeindex({ action: "reindex", index: "${indexName}" })`
-        }
+          // Access internal graphDB and db
+          const graphDB = (indexer as any).graphDB
+          const db = (indexer as any).db
-        // 2. Get all chunk IDs from vector DB
-        const knownChunkIds = new Set<string>()
-        const tables = await db.tableNames()
-        if (tables.includes("chunks")) {
-          const table = await db.openTable("chunks")
+          if (!graphDB) {
+            return `## Graph Validation: "${indexName}"\n\nNo graph database found. Run reindex first.`
+          }
+          // 1. Get all triples from graph
+          let allTriples: any[] = []
           try {
-            const rows = await table.filter("").limit(100000).execute()
-            for (const row of rows) {
-              if (row.chunk_id) knownChunkIds.add(row.chunk_id)
-            }
+            allTriples = await graphDB.getAllTriples()
           } catch (e: any) {
-            await indexer.unloadModel()
-            return `## Graph Validation: "${indexName}"\n\n**Error:** Failed to read vector database: ${e.message || String(e)}\n\nThe vector database may be corrupted. Run: codeindex({ action: "reindex", index: "${indexName}" })`
+            return `## Graph Validation: "${indexName}"\n\n**Error:** Failed to read graph database: ${e.message || String(e)}\n\nThe graph database may be corrupted. Run: codeindex({ action: "reindex", index: "${indexName}" })`
           }
-        }
-        // 3. Validate: find orphaned triples (subject or object points to non-existent chunk)
-        const orphanedSubjects: Array<{ triple: string; missingId: string }> = []
-        const orphanedObjects: Array<{ triple: string; missingId: string }> = []
-        const predicateCounts: Record<string, number> = {}
-        const sourceCounts: Record<string, number> = {}
-        const fileCounts: Record<string, number> = {}
-        for (const t of allTriples) {
-          // Count predicates/sources
-          predicateCounts[t.predicate] = (predicateCounts[t.predicate] || 0) + 1
-          sourceCounts[t.source] = (sourceCounts[t.source] || 0) + 1
-          fileCounts[t.file] = (fileCounts[t.file] || 0) + 1
-          // Check subject (skip meta: prefixed subjects)
-          if (!t.subject.startsWith("meta:") && t.subject.startsWith("chunk_") && !knownChunkIds.has(t.subject)) {
-            orphanedSubjects.push({
-              triple: `${t.subject} --[${t.predicate}]--> ${t.object}`,
-              missingId: t.subject,
-            })
+          // 2. Get all chunk IDs from vector DB
+          const knownChunkIds = new Set<string>()
+          const tables = await db.tableNames()
+          if (tables.includes("chunks")) {
+            const table = await db.openTable("chunks")
+            try {
+              const rows = await table.filter("").limit(100000).execute()
+              for (const row of rows) {
+                if (row.chunk_id) knownChunkIds.add(row.chunk_id)
+              }
+            } catch (e: any) {
+              return `## Graph Validation: "${indexName}"\n\n**Error:** Failed to read vector database: ${e.message || String(e)}\n\nThe vector database may be corrupted. Run: codeindex({ action: "reindex", index: "${indexName}" })`
+            }
           }
-          // Check object (skip non-chunk objects like file paths, hashes)
-          if (t.object.startsWith("chunk_") && !knownChunkIds.has(t.object)) {
-            orphanedObjects.push({
-              triple: `${t.subject} --[${t.predicate}]--> ${t.object}`,
-              missingId: t.object,
-            })
-          }
-        }
+          // 3. Validate: find orphaned triples (subject or object points to non-existent chunk)
+          const orphanedSubjects: Array<{ triple: string; missingId: string }> = []
+          const orphanedObjects: Array<{ triple: string; missingId: string }> = []
+          const predicateCounts: Record<string, number> = {}
+          const sourceCounts: Record<string, number> = {}
+          const fileCounts: Record<string, number> = {}
+          for (const t of allTriples) {
+            // Count predicates/sources
+            predicateCounts[t.predicate] = (predicateCounts[t.predicate] || 0) + 1
+            sourceCounts[t.source] = (sourceCounts[t.source] || 0) + 1
+            fileCounts[t.file] = (fileCounts[t.file] || 0) + 1
+            // Check subject (skip meta: prefixed subjects)
+            if (!t.subject.startsWith("meta:") && t.subject.startsWith("chunk_") && !knownChunkIds.has(t.subject)) {
+              orphanedSubjects.push({
+                triple: `${t.subject} --[${t.predicate}]--> ${t.object}`,
+                missingId: t.subject,
+              })
+            }
-        // 4. Get file metadata stats
-        let fileMeta: Array<{ filePath: string; hash: string; timestamp: number }> = []
-        try {
-          fileMeta = await graphDB.getAllFileMeta()
-        } catch (e: any) {
-          // Non-fatal - continue validation without metadata
-          console.warn(`Warning: Failed to get file metadata: ${e.message || String(e)}`)
-        }
+            // Check object (skip non-chunk objects like file paths, hashes)
+            if (t.object.startsWith("chunk_") && !knownChunkIds.has(t.object)) {
+              orphanedObjects.push({
+                triple: `${t.subject} --[${t.predicate}]--> ${t.object}`,
+                missingId: t.object,
+              })
+            }
+          }
-        await indexer.unloadModel()
+          // 4. Get file metadata stats
+          let fileMeta: Array<{ filePath: string; hash: string; timestamp: number }> = []
+          try {
+            fileMeta = await graphDB.getAllFileMeta()
+          } catch (e: any) {
+            // Non-fatal - continue validation without metadata
+            console.warn(`Warning: Failed to get file metadata: ${e.message || String(e)}`)
+          }
-        // 5. Build report
-        const totalOrphaned = orphanedSubjects.length + orphanedObjects.length
-        const isHealthy = totalOrphaned === 0
+          // 5. Build report
+          const totalOrphaned = orphanedSubjects.length + orphanedObjects.length
+          const isHealthy = totalOrphaned === 0
-        let output = `## Graph Validation: "${indexName}"\n\n`
-        output += `**Status:** ${isHealthy ? "HEALTHY" : "ISSUES FOUND"}\n\n`
+          let output = `## Graph Validation: "${indexName}"\n\n`
+          output += `**Status:** ${isHealthy ? "HEALTHY" : "ISSUES FOUND"}\n\n`
-        output += `### Statistics\n`
-        output += `- **Total triples:** ${allTriples.length}\n`
-        output += `- **Known chunk IDs:** ${knownChunkIds.size}\n`
-        output += `- **Files with graph metadata:** ${fileMeta.length}\n`
-        output += `- **Unique files in graph:** ${Object.keys(fileCounts).length}\n\n`
+          output += `### Statistics\n`
+          output += `- **Total triples:** ${allTriples.length}\n`
+          output += `- **Known chunk IDs:** ${knownChunkIds.size}\n`
+          output += `- **Files with graph metadata:** ${fileMeta.length}\n`
+          output += `- **Unique files in graph:** ${Object.keys(fileCounts).length}\n\n`
-        output += `### Edge Types\n`
-        for (const [pred, count] of Object.entries(predicateCounts).sort((a, b) => b[1] - a[1])) {
-          output += `- **${pred}:** ${count}\n`
-        }
-        output += `\n`
+          output += `### Edge Types\n`
+          for (const [pred, count] of Object.entries(predicateCounts).sort((a, b) => b[1] - a[1])) {
+            output += `- **${pred}:** ${count}\n`
+          }
+          output += `\n`
-        output += `### Edge Sources\n`
-        for (const [source, count] of Object.entries(sourceCounts).sort((a, b) => b[1] - a[1])) {
-          output += `- **${source}:** ${count}\n`
-        }
-        output += `\n`
+          output += `### Edge Sources\n`
+          for (const [source, count] of Object.entries(sourceCounts).sort((a, b) => b[1] - a[1])) {
+            output += `- **${source}:** ${count}\n`
+          }
+          output += `\n`
-        if (totalOrphaned > 0) {
-          output += `### Orphaned References (${totalOrphaned})\n\n`
+          if (totalOrphaned > 0) {
+            output += `### Orphaned References (${totalOrphaned})\n\n`
-          if (orphanedSubjects.length > 0) {
-            output += `**Broken subjects** (${orphanedSubjects.length}):\n`
-            for (const o of orphanedSubjects.slice(0, 10)) {
-              output += `- \`${o.missingId}\` in: ${o.triple}\n`
-            }
-            if (orphanedSubjects.length > 10) {
-              output += `- ... and ${orphanedSubjects.length - 10} more\n`
+            if (orphanedSubjects.length > 0) {
+              output += `**Broken subjects** (${orphanedSubjects.length}):\n`
+              for (const o of orphanedSubjects.slice(0, 10)) {
+                output += `- \`${o.missingId}\` in: ${o.triple}\n`
+              }
+              if (orphanedSubjects.length > 10) {
+                output += `- ... and ${orphanedSubjects.length - 10} more\n`
+              }
+              output += `\n`
             }
-            output += `\n`
-          }
-          if (orphanedObjects.length > 0) {
-            output += `**Broken objects** (${orphanedObjects.length}):\n`
-            for (const o of orphanedObjects.slice(0, 10)) {
-              output += `- \`${o.missingId}\` in: ${o.triple}\n`
-            }
-            if (orphanedObjects.length > 10) {
-              output += `- ... and ${orphanedObjects.length - 10} more\n`
+            if (orphanedObjects.length > 0) {
+              output += `**Broken objects** (${orphanedObjects.length}):\n`
+              for (const o of orphanedObjects.slice(0, 10)) {
+                output += `- \`${o.missingId}\` in: ${o.triple}\n`
+              }
+              if (orphanedObjects.length > 10) {
+                output += `- ... and ${orphanedObjects.length - 10} more\n`
+              }
+              output += `\n`
             }
-            output += `\n`
+            output += `**Recommendation:** Run \`codeindex({ action: "reindex", index: "${indexName}" })\` to rebuild the graph.\n`
+          } else {
+            output += `### Integrity\nAll chunk references are valid. No orphaned triples found.\n`
           }
-          output += `**Recommendation:** Run \`codeindex({ action: "reindex", index: "${indexName}" })\` to rebuild the graph.\n`
-        } else {
-          output += `### Integrity\nAll chunk references are valid. No orphaned triples found.\n`
+          return output
+        } finally {
+          releaseIndexer(projectRoot, indexName)
         }
-        return output
       } catch (error: any) {
         return `Graph validation failed: ${error.message || String(error)}`
       }