npm - @comfanion/usethis_search - Versions diffs - 3.0.0-dev.0 → 3.0.0-dev.1 - Mend

@comfanion/usethis_search 3.0.0-dev.0 → 3.0.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/file-indexer.ts +13 -0
package/index.ts +5 -1
package/package.json +3 -1
package/tools/codeindex.ts +155 -6
package/tools/read-interceptor.ts +78 -5
package/vectorizer/analyzers/lsp-analyzer.ts +225 -94
package/vectorizer/analyzers/lsp-client.ts +369 -0
package/vectorizer/graph-builder.ts +106 -3
package/vectorizer/graph-db.ts +192 -0
package/vectorizer/index.js +93 -9
package/vectorizer/usage-tracker.ts +204 -0

package/file-indexer.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import fs from "fs/promises"
 import fsSync from "fs"
 import { CodebaseIndexer } from "./vectorizer/index.js"
+import { initGraphAPI } from "./api.js"
 /**
  * File Indexer Plugin
@@ -252,7 +253,19 @@ async function ensureIndexOnSessionStart(
   for (const [indexName, indexConfig] of Object.entries(config.indexes)) {
     if (!indexConfig.enabled) continue
     const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
     try {
+      // Initialize graph API for Mind plugin integration
+      try {
+        const graphDB = (indexer as any).graphDB
+        if (graphDB) {
+          initGraphAPI(graphDB)
+          log("Graph API initialized for Mind plugin")
+        }
+      } catch (error) {
+        debug("Failed to initialize graph API:", error)
+      }
       const indexExists = await hasIndex(projectRoot, indexName)
       const health = await indexer.checkHealth(config.exclude)

package/index.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import search from "./tools/search"
 import codeindex from "./tools/codeindex"
 import readInterceptor from "./tools/read-interceptor"
 import FileIndexerPlugin from "./file-indexer"
+import { getRelatedFiles, getGraphEntries, isGraphAPIAvailable } from "./api.js"
 const UsethisSearchPlugin: Plugin = async (ctx) => {
   const fileIndexerHooks = await FileIndexerPlugin(ctx as any)
@@ -18,4 +19,7 @@ const UsethisSearchPlugin: Plugin = async (ctx) => {
   }
 }
-export default UsethisSearchPlugin;
+export default UsethisSearchPlugin
+// Export graph API for other plugins (e.g., Mind)
+export { getRelatedFiles, getGraphEntries, isGraphAPIAvailable }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@comfanion/usethis_search",
-  "version": "3.0.0-dev.0",
+  "version": "3.0.0-dev.1",
   "description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
   "type": "module",
   "main": "./index.ts",
@@ -24,9 +24,11 @@
     "vectorizer/query-cache.ts",
     "vectorizer/search-metrics.ts",
     "vectorizer/graph-db.ts",
+    "vectorizer/usage-tracker.ts",
     "vectorizer/graph-builder.ts",
     "vectorizer/analyzers/regex-analyzer.ts",
     "vectorizer/analyzers/lsp-analyzer.ts",
+    "vectorizer/analyzers/lsp-client.ts",
     "vectorizer/chunkers/markdown-chunker.ts",
     "vectorizer/chunkers/code-chunker.ts",
     "vectorizer/chunkers/chunker-factory.ts",

package/tools/codeindex.ts CHANGED Viewed

@@ -61,6 +61,7 @@ Actions:
 - "list" → List all available indexes with stats
 - "reindex" → Re-index files using local vectorizer
 - "test" → Run gold dataset quality tests (if configured)
+- "validate-graph" → Validate graph consistency (orphaned triples, broken chunk refs)
 Available indexes:
 - "code" - Source code files
@@ -68,7 +69,7 @@ Available indexes:
 - "config" - Configuration files`,
   args: {
-    action: tool.schema.enum(["status", "list", "reindex", "test"]).describe("Action to perform"),
+    action: tool.schema.enum(["status", "list", "reindex", "test", "validate-graph"]).describe("Action to perform"),
     index: tool.schema.string().optional().default("code").describe("Index name: code, docs, config"),
     dir: tool.schema.string().optional().describe("Directory to index (default: project root)"),
   },
@@ -170,11 +171,27 @@ Available indexes:
         let indexed = 0
         let skipped = 0
-        for (const filePath of files) {
+        const total = files.length
+        // FR-053: Progress reporting during indexing + graph building
+        const progressLines: string[] = []
+        for (let i = 0; i < files.length; i++) {
+          const filePath = files[i]
           try {
             const wasIndexed = await indexer.indexFile(filePath)
-            if (wasIndexed) indexed++
-            else skipped++
+            if (wasIndexed) {
+              indexed++
+              // Log progress at 10%, 25%, 50%, 75%, 100% milestones
+              const pct = Math.round(((i + 1) / total) * 100)
+              if (pct === 10 || pct === 25 || pct === 50 || pct === 75 || pct === 100) {
+                const msg = `Building index + graph: ${i + 1}/${total} files (${pct}%)`
+                if (progressLines.length === 0 || progressLines[progressLines.length - 1] !== msg) {
+                  progressLines.push(msg)
+                }
+              }
+            } else {
+              skipped++
+            }
           } catch {}
         }
@@ -184,13 +201,21 @@ Available indexes:
         let output = `## Re-indexing Complete\n\n`
         output += `**Index:** ${indexName}\n`
         output += `**Directory:** ${args.dir || "(project root)"}\n`
-        output += `**Files found:** ${files.length}\n`
+        output += `**Files found:** ${total}\n`
         output += `**Files indexed:** ${indexed}\n`
         output += `**Files unchanged:** ${skipped}\n`
         output += `**Total chunks:** ${stats.chunkCount}\n`
         if (stats.features) {
           output += `**Chunking:** ${stats.features.chunking}\n`
         }
+        if (progressLines.length > 0) {
+          output += `\n**Build Progress:**\n`
+          for (const line of progressLines) {
+            output += `- ${line}\n`
+          }
+        }
         return output
       } catch (error: any) {
         return `Re-indexing failed: ${error.message || String(error)}`
@@ -273,6 +298,130 @@ Available indexes:
       }
     }
-    return `Unknown action: ${args.action}. Use: status, list, reindex, or test`
+    // NFR-031: Graph validation
+    if (args.action === "validate-graph") {
+      try {
+        const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
+        // Access internal graphDB and db
+        const graphDB = (indexer as any).graphDB
+        const db = (indexer as any).db
+        if (!graphDB) {
+          await indexer.unloadModel()
+          return `## Graph Validation: "${indexName}"\n\nNo graph database found. Run reindex first.`
+        }
+        // 1. Get all triples from graph
+        const allTriples = await graphDB.getAllTriples()
+        // 2. Get all chunk IDs from vector DB
+        const knownChunkIds = new Set<string>()
+        const tables = await db.tableNames()
+        if (tables.includes("chunks")) {
+          const table = await db.openTable("chunks")
+          const rows = await table.search([0]).limit(100000).execute()
+          for (const row of rows) {
+            if (row.chunk_id) knownChunkIds.add(row.chunk_id)
+          }
+        }
+        // 3. Validate: find orphaned triples (subject or object points to non-existent chunk)
+        const orphanedSubjects: Array<{ triple: string; missingId: string }> = []
+        const orphanedObjects: Array<{ triple: string; missingId: string }> = []
+        const predicateCounts: Record<string, number> = {}
+        const sourceCounts: Record<string, number> = {}
+        const fileCounts: Record<string, number> = {}
+        for (const t of allTriples) {
+          // Count predicates/sources
+          predicateCounts[t.predicate] = (predicateCounts[t.predicate] || 0) + 1
+          sourceCounts[t.source] = (sourceCounts[t.source] || 0) + 1
+          fileCounts[t.file] = (fileCounts[t.file] || 0) + 1
+          // Check subject (skip meta: prefixed subjects)
+          if (!t.subject.startsWith("meta:") && t.subject.startsWith("chunk_") && !knownChunkIds.has(t.subject)) {
+            orphanedSubjects.push({
+              triple: `${t.subject} --[${t.predicate}]--> ${t.object}`,
+              missingId: t.subject,
+            })
+          }
+          // Check object (skip non-chunk objects like file paths, hashes)
+          if (t.object.startsWith("chunk_") && !knownChunkIds.has(t.object)) {
+            orphanedObjects.push({
+              triple: `${t.subject} --[${t.predicate}]--> ${t.object}`,
+              missingId: t.object,
+            })
+          }
+        }
+        // 4. Get file metadata stats
+        const fileMeta = await graphDB.getAllFileMeta()
+        await indexer.unloadModel()
+        // 5. Build report
+        const totalOrphaned = orphanedSubjects.length + orphanedObjects.length
+        const isHealthy = totalOrphaned === 0
+        let output = `## Graph Validation: "${indexName}"\n\n`
+        output += `**Status:** ${isHealthy ? "HEALTHY" : "ISSUES FOUND"}\n\n`
+        output += `### Statistics\n`
+        output += `- **Total triples:** ${allTriples.length}\n`
+        output += `- **Known chunk IDs:** ${knownChunkIds.size}\n`
+        output += `- **Files with graph metadata:** ${fileMeta.length}\n`
+        output += `- **Unique files in graph:** ${Object.keys(fileCounts).length}\n\n`
+        output += `### Edge Types\n`
+        for (const [pred, count] of Object.entries(predicateCounts).sort((a, b) => b[1] - a[1])) {
+          output += `- **${pred}:** ${count}\n`
+        }
+        output += `\n`
+        output += `### Edge Sources\n`
+        for (const [source, count] of Object.entries(sourceCounts).sort((a, b) => b[1] - a[1])) {
+          output += `- **${source}:** ${count}\n`
+        }
+        output += `\n`
+        if (totalOrphaned > 0) {
+          output += `### Orphaned References (${totalOrphaned})\n\n`
+          if (orphanedSubjects.length > 0) {
+            output += `**Broken subjects** (${orphanedSubjects.length}):\n`
+            for (const o of orphanedSubjects.slice(0, 10)) {
+              output += `- \`${o.missingId}\` in: ${o.triple}\n`
+            }
+            if (orphanedSubjects.length > 10) {
+              output += `- ... and ${orphanedSubjects.length - 10} more\n`
+            }
+            output += `\n`
+          }
+          if (orphanedObjects.length > 0) {
+            output += `**Broken objects** (${orphanedObjects.length}):\n`
+            for (const o of orphanedObjects.slice(0, 10)) {
+              output += `- \`${o.missingId}\` in: ${o.triple}\n`
+            }
+            if (orphanedObjects.length > 10) {
+              output += `- ... and ${orphanedObjects.length - 10} more\n`
+            }
+            output += `\n`
+          }
+          output += `**Recommendation:** Run \`codeindex({ action: "reindex", index: "${indexName}" })\` to rebuild the graph.\n`
+        } else {
+          output += `### Integrity\nAll chunk references are valid. No orphaned triples found.\n`
+        }
+        return output
+      } catch (error: any) {
+        return `Graph validation failed: ${error.message || String(error)}`
+      }
+    }
+    return `Unknown action: ${args.action}. Use: status, list, reindex, test, or validate-graph`
   },
 })

package/tools/read-interceptor.ts CHANGED Viewed

@@ -1,8 +1,53 @@
 import { tool } from "@opencode-ai/plugin"
 import path from "path"
+import fs from "fs/promises"
 import { CodebaseIndexer } from "../vectorizer/index.js"
+// FR-043: Logging for intercepted Read() calls
+const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*"
+interface ReadLogEntry {
+  timestamp: number
+  filePath: string
+  relPath: string
+  chunksFound: number
+  relatedContextCount: number
+  durationMs: number
+  fallback: boolean
+}
+const LOG_MAX_ENTRIES = 500
+/**
+ * Append a log entry to the Read() interception log file.
+ * Non-blocking, non-fatal — errors are silently ignored.
+ */
+async function logReadInterception(projectRoot: string, entry: ReadLogEntry): Promise<void> {
+  try {
+    const logPath = path.join(projectRoot, ".opencode", "vectors", "read-intercept.log.json")
+    await fs.mkdir(path.dirname(logPath), { recursive: true })
+    let entries: ReadLogEntry[] = []
+    try {
+      const raw = await fs.readFile(logPath, "utf-8")
+      entries = JSON.parse(raw)
+    } catch {
+      // file doesn't exist or is invalid — start fresh
+    }
+    entries.push(entry)
+    // Cap log size to avoid unbounded growth
+    if (entries.length > LOG_MAX_ENTRIES) {
+      entries = entries.slice(-LOG_MAX_ENTRIES)
+    }
+    await fs.writeFile(logPath, JSON.stringify(entries, null, 2), "utf-8")
+  } catch {
+    // non-fatal — logging must never break Read
+  }
+}
 export default tool({
   description: `Read file with graph-aware context attachment. When available, this tool searches the file in the index and returns content + related context from the graph (imports, links, etc.).
@@ -13,17 +58,46 @@ Use this instead of the standard Read tool for better context awareness.`,
   },
   async execute(args) {
+    const startTime = Date.now()
     const projectRoot = process.cwd()
     const filePath = path.isAbsolute(args.filePath) ? args.filePath : path.join(projectRoot, args.filePath)
     const relPath = path.relative(projectRoot, filePath)
+    if (DEBUG) {
+      console.log(`[read-interceptor] Intercepted Read("${relPath}")`)
+    }
     const indexer = await new CodebaseIndexer(projectRoot, "code").init()
     const results = await indexer.search(relPath, 20, false, {})
     const fileChunks = results.filter(r => r.file === relPath)
     await indexer.unloadModel()
-    if (fileChunks.length === 0) {
+    const allRelated = fileChunks
+      .flatMap(c => c.relatedContext || [])
+      .filter((r, i, arr) => arr.findIndex(x => x.chunk_id === r.chunk_id) === i)
+    const durationMs = Date.now() - startTime
+    const fallback = fileChunks.length === 0
+    // FR-043: Log the interception asynchronously (non-blocking)
+    logReadInterception(projectRoot, {
+      timestamp: startTime,
+      filePath: args.filePath,
+      relPath,
+      chunksFound: fileChunks.length,
+      relatedContextCount: allRelated.length,
+      durationMs,
+      fallback,
+    }).catch(() => {})
+    if (DEBUG) {
+      console.log(
+        `[read-interceptor] ${relPath}: ${fileChunks.length} chunks, ${allRelated.length} related, ${durationMs}ms${fallback ? " (fallback)" : ""}`
+      )
+    }
+    if (fallback) {
       return `File "${relPath}" not indexed. Use original Read tool or run codeindex({ action: "reindex", index: "code" })`
     }
@@ -34,10 +108,6 @@ Use this instead of the standard Read tool for better context awareness.`,
       output += chunk.content + "\n\n"
     }
-    const allRelated = fileChunks
-      .flatMap(c => c.relatedContext || [])
-      .filter((r, i, arr) => arr.findIndex(x => x.chunk_id === r.chunk_id) === i)
     if (allRelated.length > 0) {
       output += `### Related Context\n\n`
       for (const rel of allRelated) {
@@ -52,3 +122,6 @@ Use this instead of the standard Read tool for better context awareness.`,
     return output
   },
 })
+// Export for testing
+export { logReadInterception, ReadLogEntry }