@comfanion/usethis_search 3.0.0-dev.0 → 3.0.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/file-indexer.ts +13 -0
- package/index.ts +5 -1
- package/package.json +3 -1
- package/tools/codeindex.ts +155 -6
- package/tools/read-interceptor.ts +78 -5
- package/vectorizer/analyzers/lsp-analyzer.ts +225 -94
- package/vectorizer/analyzers/lsp-client.ts +369 -0
- package/vectorizer/graph-builder.ts +106 -3
- package/vectorizer/graph-db.ts +192 -0
- package/vectorizer/index.js +93 -9
- package/vectorizer/usage-tracker.ts +204 -0
package/file-indexer.ts
CHANGED
|
@@ -4,6 +4,7 @@ import fs from "fs/promises"
|
|
|
4
4
|
import fsSync from "fs"
|
|
5
5
|
|
|
6
6
|
import { CodebaseIndexer } from "./vectorizer/index.js"
|
|
7
|
+
import { initGraphAPI } from "./api.js"
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
10
|
* File Indexer Plugin
|
|
@@ -252,7 +253,19 @@ async function ensureIndexOnSessionStart(
|
|
|
252
253
|
for (const [indexName, indexConfig] of Object.entries(config.indexes)) {
|
|
253
254
|
if (!indexConfig.enabled) continue
|
|
254
255
|
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
256
|
+
|
|
255
257
|
try {
|
|
258
|
+
// Initialize graph API for Mind plugin integration
|
|
259
|
+
try {
|
|
260
|
+
const graphDB = (indexer as any).graphDB
|
|
261
|
+
if (graphDB) {
|
|
262
|
+
initGraphAPI(graphDB)
|
|
263
|
+
log("Graph API initialized for Mind plugin")
|
|
264
|
+
}
|
|
265
|
+
} catch (error) {
|
|
266
|
+
debug("Failed to initialize graph API:", error)
|
|
267
|
+
}
|
|
268
|
+
|
|
256
269
|
const indexExists = await hasIndex(projectRoot, indexName)
|
|
257
270
|
const health = await indexer.checkHealth(config.exclude)
|
|
258
271
|
|
package/index.ts
CHANGED
|
@@ -4,6 +4,7 @@ import search from "./tools/search"
|
|
|
4
4
|
import codeindex from "./tools/codeindex"
|
|
5
5
|
import readInterceptor from "./tools/read-interceptor"
|
|
6
6
|
import FileIndexerPlugin from "./file-indexer"
|
|
7
|
+
import { getRelatedFiles, getGraphEntries, isGraphAPIAvailable } from "./api.js"
|
|
7
8
|
|
|
8
9
|
const UsethisSearchPlugin: Plugin = async (ctx) => {
|
|
9
10
|
const fileIndexerHooks = await FileIndexerPlugin(ctx as any)
|
|
@@ -18,4 +19,7 @@ const UsethisSearchPlugin: Plugin = async (ctx) => {
|
|
|
18
19
|
}
|
|
19
20
|
}
|
|
20
21
|
|
|
21
|
-
export default UsethisSearchPlugin
|
|
22
|
+
export default UsethisSearchPlugin
|
|
23
|
+
|
|
24
|
+
// Export graph API for other plugins (e.g., Mind)
|
|
25
|
+
export { getRelatedFiles, getGraphEntries, isGraphAPIAvailable }
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "3.0.0-dev.
|
|
3
|
+
"version": "3.0.0-dev.1",
|
|
4
4
|
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -24,9 +24,11 @@
|
|
|
24
24
|
"vectorizer/query-cache.ts",
|
|
25
25
|
"vectorizer/search-metrics.ts",
|
|
26
26
|
"vectorizer/graph-db.ts",
|
|
27
|
+
"vectorizer/usage-tracker.ts",
|
|
27
28
|
"vectorizer/graph-builder.ts",
|
|
28
29
|
"vectorizer/analyzers/regex-analyzer.ts",
|
|
29
30
|
"vectorizer/analyzers/lsp-analyzer.ts",
|
|
31
|
+
"vectorizer/analyzers/lsp-client.ts",
|
|
30
32
|
"vectorizer/chunkers/markdown-chunker.ts",
|
|
31
33
|
"vectorizer/chunkers/code-chunker.ts",
|
|
32
34
|
"vectorizer/chunkers/chunker-factory.ts",
|
package/tools/codeindex.ts
CHANGED
|
@@ -61,6 +61,7 @@ Actions:
|
|
|
61
61
|
- "list" → List all available indexes with stats
|
|
62
62
|
- "reindex" → Re-index files using local vectorizer
|
|
63
63
|
- "test" → Run gold dataset quality tests (if configured)
|
|
64
|
+
- "validate-graph" → Validate graph consistency (orphaned triples, broken chunk refs)
|
|
64
65
|
|
|
65
66
|
Available indexes:
|
|
66
67
|
- "code" - Source code files
|
|
@@ -68,7 +69,7 @@ Available indexes:
|
|
|
68
69
|
- "config" - Configuration files`,
|
|
69
70
|
|
|
70
71
|
args: {
|
|
71
|
-
action: tool.schema.enum(["status", "list", "reindex", "test"]).describe("Action to perform"),
|
|
72
|
+
action: tool.schema.enum(["status", "list", "reindex", "test", "validate-graph"]).describe("Action to perform"),
|
|
72
73
|
index: tool.schema.string().optional().default("code").describe("Index name: code, docs, config"),
|
|
73
74
|
dir: tool.schema.string().optional().describe("Directory to index (default: project root)"),
|
|
74
75
|
},
|
|
@@ -170,11 +171,27 @@ Available indexes:
|
|
|
170
171
|
|
|
171
172
|
let indexed = 0
|
|
172
173
|
let skipped = 0
|
|
173
|
-
|
|
174
|
+
const total = files.length
|
|
175
|
+
|
|
176
|
+
// FR-053: Progress reporting during indexing + graph building
|
|
177
|
+
const progressLines: string[] = []
|
|
178
|
+
for (let i = 0; i < files.length; i++) {
|
|
179
|
+
const filePath = files[i]
|
|
174
180
|
try {
|
|
175
181
|
const wasIndexed = await indexer.indexFile(filePath)
|
|
176
|
-
if (wasIndexed)
|
|
177
|
-
|
|
182
|
+
if (wasIndexed) {
|
|
183
|
+
indexed++
|
|
184
|
+
// Log progress at 10%, 25%, 50%, 75%, 100% milestones
|
|
185
|
+
const pct = Math.round(((i + 1) / total) * 100)
|
|
186
|
+
if (pct === 10 || pct === 25 || pct === 50 || pct === 75 || pct === 100) {
|
|
187
|
+
const msg = `Building index + graph: ${i + 1}/${total} files (${pct}%)`
|
|
188
|
+
if (progressLines.length === 0 || progressLines[progressLines.length - 1] !== msg) {
|
|
189
|
+
progressLines.push(msg)
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
} else {
|
|
193
|
+
skipped++
|
|
194
|
+
}
|
|
178
195
|
} catch {}
|
|
179
196
|
}
|
|
180
197
|
|
|
@@ -184,13 +201,21 @@ Available indexes:
|
|
|
184
201
|
let output = `## Re-indexing Complete\n\n`
|
|
185
202
|
output += `**Index:** ${indexName}\n`
|
|
186
203
|
output += `**Directory:** ${args.dir || "(project root)"}\n`
|
|
187
|
-
output += `**Files found:** ${
|
|
204
|
+
output += `**Files found:** ${total}\n`
|
|
188
205
|
output += `**Files indexed:** ${indexed}\n`
|
|
189
206
|
output += `**Files unchanged:** ${skipped}\n`
|
|
190
207
|
output += `**Total chunks:** ${stats.chunkCount}\n`
|
|
191
208
|
if (stats.features) {
|
|
192
209
|
output += `**Chunking:** ${stats.features.chunking}\n`
|
|
193
210
|
}
|
|
211
|
+
|
|
212
|
+
if (progressLines.length > 0) {
|
|
213
|
+
output += `\n**Build Progress:**\n`
|
|
214
|
+
for (const line of progressLines) {
|
|
215
|
+
output += `- ${line}\n`
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
194
219
|
return output
|
|
195
220
|
} catch (error: any) {
|
|
196
221
|
return `Re-indexing failed: ${error.message || String(error)}`
|
|
@@ -273,6 +298,130 @@ Available indexes:
|
|
|
273
298
|
}
|
|
274
299
|
}
|
|
275
300
|
|
|
276
|
-
|
|
301
|
+
// NFR-031: Graph validation
|
|
302
|
+
if (args.action === "validate-graph") {
|
|
303
|
+
try {
|
|
304
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
305
|
+
|
|
306
|
+
// Access internal graphDB and db
|
|
307
|
+
const graphDB = (indexer as any).graphDB
|
|
308
|
+
const db = (indexer as any).db
|
|
309
|
+
|
|
310
|
+
if (!graphDB) {
|
|
311
|
+
await indexer.unloadModel()
|
|
312
|
+
return `## Graph Validation: "${indexName}"\n\nNo graph database found. Run reindex first.`
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// 1. Get all triples from graph
|
|
316
|
+
const allTriples = await graphDB.getAllTriples()
|
|
317
|
+
|
|
318
|
+
// 2. Get all chunk IDs from vector DB
|
|
319
|
+
const knownChunkIds = new Set<string>()
|
|
320
|
+
const tables = await db.tableNames()
|
|
321
|
+
if (tables.includes("chunks")) {
|
|
322
|
+
const table = await db.openTable("chunks")
|
|
323
|
+
const rows = await table.search([0]).limit(100000).execute()
|
|
324
|
+
for (const row of rows) {
|
|
325
|
+
if (row.chunk_id) knownChunkIds.add(row.chunk_id)
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// 3. Validate: find orphaned triples (subject or object points to non-existent chunk)
|
|
330
|
+
const orphanedSubjects: Array<{ triple: string; missingId: string }> = []
|
|
331
|
+
const orphanedObjects: Array<{ triple: string; missingId: string }> = []
|
|
332
|
+
const predicateCounts: Record<string, number> = {}
|
|
333
|
+
const sourceCounts: Record<string, number> = {}
|
|
334
|
+
const fileCounts: Record<string, number> = {}
|
|
335
|
+
|
|
336
|
+
for (const t of allTriples) {
|
|
337
|
+
// Count predicates/sources
|
|
338
|
+
predicateCounts[t.predicate] = (predicateCounts[t.predicate] || 0) + 1
|
|
339
|
+
sourceCounts[t.source] = (sourceCounts[t.source] || 0) + 1
|
|
340
|
+
fileCounts[t.file] = (fileCounts[t.file] || 0) + 1
|
|
341
|
+
|
|
342
|
+
// Check subject (skip meta: prefixed subjects)
|
|
343
|
+
if (!t.subject.startsWith("meta:") && t.subject.startsWith("chunk_") && !knownChunkIds.has(t.subject)) {
|
|
344
|
+
orphanedSubjects.push({
|
|
345
|
+
triple: `${t.subject} --[${t.predicate}]--> ${t.object}`,
|
|
346
|
+
missingId: t.subject,
|
|
347
|
+
})
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// Check object (skip non-chunk objects like file paths, hashes)
|
|
351
|
+
if (t.object.startsWith("chunk_") && !knownChunkIds.has(t.object)) {
|
|
352
|
+
orphanedObjects.push({
|
|
353
|
+
triple: `${t.subject} --[${t.predicate}]--> ${t.object}`,
|
|
354
|
+
missingId: t.object,
|
|
355
|
+
})
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// 4. Get file metadata stats
|
|
360
|
+
const fileMeta = await graphDB.getAllFileMeta()
|
|
361
|
+
|
|
362
|
+
await indexer.unloadModel()
|
|
363
|
+
|
|
364
|
+
// 5. Build report
|
|
365
|
+
const totalOrphaned = orphanedSubjects.length + orphanedObjects.length
|
|
366
|
+
const isHealthy = totalOrphaned === 0
|
|
367
|
+
|
|
368
|
+
let output = `## Graph Validation: "${indexName}"\n\n`
|
|
369
|
+
output += `**Status:** ${isHealthy ? "HEALTHY" : "ISSUES FOUND"}\n\n`
|
|
370
|
+
|
|
371
|
+
output += `### Statistics\n`
|
|
372
|
+
output += `- **Total triples:** ${allTriples.length}\n`
|
|
373
|
+
output += `- **Known chunk IDs:** ${knownChunkIds.size}\n`
|
|
374
|
+
output += `- **Files with graph metadata:** ${fileMeta.length}\n`
|
|
375
|
+
output += `- **Unique files in graph:** ${Object.keys(fileCounts).length}\n\n`
|
|
376
|
+
|
|
377
|
+
output += `### Edge Types\n`
|
|
378
|
+
for (const [pred, count] of Object.entries(predicateCounts).sort((a, b) => b[1] - a[1])) {
|
|
379
|
+
output += `- **${pred}:** ${count}\n`
|
|
380
|
+
}
|
|
381
|
+
output += `\n`
|
|
382
|
+
|
|
383
|
+
output += `### Edge Sources\n`
|
|
384
|
+
for (const [source, count] of Object.entries(sourceCounts).sort((a, b) => b[1] - a[1])) {
|
|
385
|
+
output += `- **${source}:** ${count}\n`
|
|
386
|
+
}
|
|
387
|
+
output += `\n`
|
|
388
|
+
|
|
389
|
+
if (totalOrphaned > 0) {
|
|
390
|
+
output += `### Orphaned References (${totalOrphaned})\n\n`
|
|
391
|
+
|
|
392
|
+
if (orphanedSubjects.length > 0) {
|
|
393
|
+
output += `**Broken subjects** (${orphanedSubjects.length}):\n`
|
|
394
|
+
for (const o of orphanedSubjects.slice(0, 10)) {
|
|
395
|
+
output += `- \`${o.missingId}\` in: ${o.triple}\n`
|
|
396
|
+
}
|
|
397
|
+
if (orphanedSubjects.length > 10) {
|
|
398
|
+
output += `- ... and ${orphanedSubjects.length - 10} more\n`
|
|
399
|
+
}
|
|
400
|
+
output += `\n`
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if (orphanedObjects.length > 0) {
|
|
404
|
+
output += `**Broken objects** (${orphanedObjects.length}):\n`
|
|
405
|
+
for (const o of orphanedObjects.slice(0, 10)) {
|
|
406
|
+
output += `- \`${o.missingId}\` in: ${o.triple}\n`
|
|
407
|
+
}
|
|
408
|
+
if (orphanedObjects.length > 10) {
|
|
409
|
+
output += `- ... and ${orphanedObjects.length - 10} more\n`
|
|
410
|
+
}
|
|
411
|
+
output += `\n`
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
output += `**Recommendation:** Run \`codeindex({ action: "reindex", index: "${indexName}" })\` to rebuild the graph.\n`
|
|
415
|
+
} else {
|
|
416
|
+
output += `### Integrity\nAll chunk references are valid. No orphaned triples found.\n`
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
return output
|
|
420
|
+
} catch (error: any) {
|
|
421
|
+
return `Graph validation failed: ${error.message || String(error)}`
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
return `Unknown action: ${args.action}. Use: status, list, reindex, test, or validate-graph`
|
|
277
426
|
},
|
|
278
427
|
})
|
|
@@ -1,8 +1,53 @@
|
|
|
1
1
|
import { tool } from "@opencode-ai/plugin"
|
|
2
2
|
import path from "path"
|
|
3
|
+
import fs from "fs/promises"
|
|
3
4
|
|
|
4
5
|
import { CodebaseIndexer } from "../vectorizer/index.js"
|
|
5
6
|
|
|
7
|
+
// FR-043: Logging for intercepted Read() calls
|
|
8
|
+
const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*"
|
|
9
|
+
|
|
10
|
+
interface ReadLogEntry {
|
|
11
|
+
timestamp: number
|
|
12
|
+
filePath: string
|
|
13
|
+
relPath: string
|
|
14
|
+
chunksFound: number
|
|
15
|
+
relatedContextCount: number
|
|
16
|
+
durationMs: number
|
|
17
|
+
fallback: boolean
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const LOG_MAX_ENTRIES = 500
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Append a log entry to the Read() interception log file.
|
|
24
|
+
* Non-blocking, non-fatal — errors are silently ignored.
|
|
25
|
+
*/
|
|
26
|
+
async function logReadInterception(projectRoot: string, entry: ReadLogEntry): Promise<void> {
|
|
27
|
+
try {
|
|
28
|
+
const logPath = path.join(projectRoot, ".opencode", "vectors", "read-intercept.log.json")
|
|
29
|
+
await fs.mkdir(path.dirname(logPath), { recursive: true })
|
|
30
|
+
|
|
31
|
+
let entries: ReadLogEntry[] = []
|
|
32
|
+
try {
|
|
33
|
+
const raw = await fs.readFile(logPath, "utf-8")
|
|
34
|
+
entries = JSON.parse(raw)
|
|
35
|
+
} catch {
|
|
36
|
+
// file doesn't exist or is invalid — start fresh
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
entries.push(entry)
|
|
40
|
+
// Cap log size to avoid unbounded growth
|
|
41
|
+
if (entries.length > LOG_MAX_ENTRIES) {
|
|
42
|
+
entries = entries.slice(-LOG_MAX_ENTRIES)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
await fs.writeFile(logPath, JSON.stringify(entries, null, 2), "utf-8")
|
|
46
|
+
} catch {
|
|
47
|
+
// non-fatal — logging must never break Read
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
6
51
|
export default tool({
|
|
7
52
|
description: `Read file with graph-aware context attachment. When available, this tool searches the file in the index and returns content + related context from the graph (imports, links, etc.).
|
|
8
53
|
|
|
@@ -13,17 +58,46 @@ Use this instead of the standard Read tool for better context awareness.`,
|
|
|
13
58
|
},
|
|
14
59
|
|
|
15
60
|
async execute(args) {
|
|
61
|
+
const startTime = Date.now()
|
|
16
62
|
const projectRoot = process.cwd()
|
|
17
63
|
const filePath = path.isAbsolute(args.filePath) ? args.filePath : path.join(projectRoot, args.filePath)
|
|
18
64
|
|
|
19
65
|
const relPath = path.relative(projectRoot, filePath)
|
|
20
66
|
|
|
67
|
+
if (DEBUG) {
|
|
68
|
+
console.log(`[read-interceptor] Intercepted Read("${relPath}")`)
|
|
69
|
+
}
|
|
70
|
+
|
|
21
71
|
const indexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
22
72
|
const results = await indexer.search(relPath, 20, false, {})
|
|
23
73
|
const fileChunks = results.filter(r => r.file === relPath)
|
|
24
74
|
await indexer.unloadModel()
|
|
25
75
|
|
|
26
|
-
|
|
76
|
+
const allRelated = fileChunks
|
|
77
|
+
.flatMap(c => c.relatedContext || [])
|
|
78
|
+
.filter((r, i, arr) => arr.findIndex(x => x.chunk_id === r.chunk_id) === i)
|
|
79
|
+
|
|
80
|
+
const durationMs = Date.now() - startTime
|
|
81
|
+
const fallback = fileChunks.length === 0
|
|
82
|
+
|
|
83
|
+
// FR-043: Log the interception asynchronously (non-blocking)
|
|
84
|
+
logReadInterception(projectRoot, {
|
|
85
|
+
timestamp: startTime,
|
|
86
|
+
filePath: args.filePath,
|
|
87
|
+
relPath,
|
|
88
|
+
chunksFound: fileChunks.length,
|
|
89
|
+
relatedContextCount: allRelated.length,
|
|
90
|
+
durationMs,
|
|
91
|
+
fallback,
|
|
92
|
+
}).catch(() => {})
|
|
93
|
+
|
|
94
|
+
if (DEBUG) {
|
|
95
|
+
console.log(
|
|
96
|
+
`[read-interceptor] ${relPath}: ${fileChunks.length} chunks, ${allRelated.length} related, ${durationMs}ms${fallback ? " (fallback)" : ""}`
|
|
97
|
+
)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (fallback) {
|
|
27
101
|
return `File "${relPath}" not indexed. Use original Read tool or run codeindex({ action: "reindex", index: "code" })`
|
|
28
102
|
}
|
|
29
103
|
|
|
@@ -34,10 +108,6 @@ Use this instead of the standard Read tool for better context awareness.`,
|
|
|
34
108
|
output += chunk.content + "\n\n"
|
|
35
109
|
}
|
|
36
110
|
|
|
37
|
-
const allRelated = fileChunks
|
|
38
|
-
.flatMap(c => c.relatedContext || [])
|
|
39
|
-
.filter((r, i, arr) => arr.findIndex(x => x.chunk_id === r.chunk_id) === i)
|
|
40
|
-
|
|
41
111
|
if (allRelated.length > 0) {
|
|
42
112
|
output += `### Related Context\n\n`
|
|
43
113
|
for (const rel of allRelated) {
|
|
@@ -52,3 +122,6 @@ Use this instead of the standard Read tool for better context awareness.`,
|
|
|
52
122
|
return output
|
|
53
123
|
},
|
|
54
124
|
})
|
|
125
|
+
|
|
126
|
+
// Export for testing
|
|
127
|
+
export { logReadInterception, ReadLogEntry }
|