@comfanion/usethis_search 3.0.0-dev.1 → 3.0.0-dev.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api.ts +92 -0
- package/file-indexer.ts +2 -2
- package/index.ts +19 -9
- package/package.json +3 -3
- package/tools/codeindex.ts +23 -6
- package/tools/search.ts +1 -1
- package/vectorizer/{index.js → index.ts} +22 -3
- package/tools/read-interceptor.ts +0 -127
package/api.ts
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* usethis_search API
|
|
3
|
+
*
|
|
4
|
+
* Exports internal functions for plugin-to-plugin communication.
|
|
5
|
+
* Used by Mind plugin for graph-based workspace management.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { GraphDB } from "./vectorizer/graph-db"
|
|
9
|
+
|
|
10
|
+
// Global GraphDB instance (shared across plugins)
|
|
11
|
+
let graphDBInstance: GraphDB | null = null
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Initialize API with GraphDB instance
|
|
15
|
+
*/
|
|
16
|
+
export function initGraphAPI(db: GraphDB): void {
|
|
17
|
+
graphDBInstance = db
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Get related files for a given file path
|
|
22
|
+
*
|
|
23
|
+
* @param filePath - File path to get relations for
|
|
24
|
+
* @param maxDepth - Maximum graph depth to traverse (default: 1)
|
|
25
|
+
* @returns Array of related files with relation type and weight
|
|
26
|
+
*
|
|
27
|
+
* Example:
|
|
28
|
+
* ```javascript
|
|
29
|
+
* const related = await getRelatedFiles("src/auth/login.ts", 1)
|
|
30
|
+
* // Returns:
|
|
31
|
+
* [
|
|
32
|
+
* { path: "src/types/User.ts", relation: "imports", weight: 0.9 },
|
|
33
|
+
* { path: "src/auth/BaseAuth.ts", relation: "extends", weight: 0.95 },
|
|
34
|
+
* { path: "src/routes/api.ts", relation: "used_by", weight: 0.8 }
|
|
35
|
+
* ]
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export async function getRelatedFiles(
|
|
39
|
+
filePath: string,
|
|
40
|
+
maxDepth: number = 1
|
|
41
|
+
): Promise<{path: string, relation: string, weight: number}[]> {
|
|
42
|
+
if (!graphDBInstance) {
|
|
43
|
+
console.warn("[usethis_search API] GraphDB not initialized. Returning empty array.")
|
|
44
|
+
return []
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
const chunkId = `file:${filePath}`
|
|
49
|
+
const related = await graphDBInstance.getRelatedFiles(chunkId, maxDepth)
|
|
50
|
+
|
|
51
|
+
// Filter out the input file itself (it might appear in the graph)
|
|
52
|
+
const filtered = related.filter(r => r.path !== filePath)
|
|
53
|
+
|
|
54
|
+
return filtered
|
|
55
|
+
} catch (error) {
|
|
56
|
+
console.error(`[usethis_search API] Error getting related files for ${filePath}:`, error)
|
|
57
|
+
return []
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Check if graph API is available
|
|
63
|
+
*/
|
|
64
|
+
export function isGraphAPIAvailable(): boolean {
|
|
65
|
+
return graphDBInstance !== null
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Get all graph entries for a file (both incoming and outgoing)
|
|
70
|
+
*/
|
|
71
|
+
export async function getGraphEntries(filePath: string) {
|
|
72
|
+
if (!graphDBInstance) {
|
|
73
|
+
return null
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
const chunkId = `file:${filePath}`
|
|
78
|
+
const [outgoing, incoming] = await Promise.all([
|
|
79
|
+
graphDBInstance.getOutgoing(chunkId),
|
|
80
|
+
graphDBInstance.getIncoming(chunkId),
|
|
81
|
+
])
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
imports: outgoing.filter(t => t.predicate === "imports"),
|
|
85
|
+
extends: outgoing.filter(t => t.predicate === "extends"),
|
|
86
|
+
used_by: incoming,
|
|
87
|
+
}
|
|
88
|
+
} catch (error) {
|
|
89
|
+
console.error(`[usethis_search API] Error getting graph entries for ${filePath}:`, error)
|
|
90
|
+
return null
|
|
91
|
+
}
|
|
92
|
+
}
|
package/file-indexer.ts
CHANGED
|
@@ -3,8 +3,8 @@ import path from "path"
|
|
|
3
3
|
import fs from "fs/promises"
|
|
4
4
|
import fsSync from "fs"
|
|
5
5
|
|
|
6
|
-
import { CodebaseIndexer } from "./vectorizer/index.
|
|
7
|
-
import { initGraphAPI } from "./api
|
|
6
|
+
import { CodebaseIndexer } from "./vectorizer/index.ts"
|
|
7
|
+
import { initGraphAPI } from "./api"
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* File Indexer Plugin
|
package/index.ts
CHANGED
|
@@ -2,24 +2,34 @@ import type { Plugin } from "@opencode-ai/plugin"
|
|
|
2
2
|
|
|
3
3
|
import search from "./tools/search"
|
|
4
4
|
import codeindex from "./tools/codeindex"
|
|
5
|
-
import readInterceptor from "./tools/read-interceptor"
|
|
6
5
|
import FileIndexerPlugin from "./file-indexer"
|
|
7
|
-
import { getRelatedFiles, getGraphEntries, isGraphAPIAvailable } from "./api.js"
|
|
8
6
|
|
|
9
|
-
const UsethisSearchPlugin: Plugin = async (
|
|
10
|
-
|
|
7
|
+
const UsethisSearchPlugin: Plugin = async ({ directory, client }) => {
|
|
8
|
+
// Start file indexer (background indexing + event handling)
|
|
9
|
+
let fileIndexerEvent: ((args: any) => Promise<void>) | null = null
|
|
10
|
+
try {
|
|
11
|
+
const hooks = await FileIndexerPlugin({ directory, client } as any)
|
|
12
|
+
fileIndexerEvent = hooks?.event || null
|
|
13
|
+
} catch {
|
|
14
|
+
// file indexer init failed — tools still work, just no auto-indexing
|
|
15
|
+
}
|
|
11
16
|
|
|
12
17
|
return {
|
|
13
|
-
...fileIndexerHooks,
|
|
14
18
|
tool: {
|
|
15
19
|
search,
|
|
16
20
|
codeindex,
|
|
17
|
-
|
|
21
|
+
},
|
|
22
|
+
|
|
23
|
+
event: async (args: any) => {
|
|
24
|
+
if (fileIndexerEvent) {
|
|
25
|
+
try {
|
|
26
|
+
await fileIndexerEvent(args)
|
|
27
|
+
} catch {
|
|
28
|
+
// non-fatal
|
|
29
|
+
}
|
|
30
|
+
}
|
|
18
31
|
},
|
|
19
32
|
}
|
|
20
33
|
}
|
|
21
34
|
|
|
22
35
|
export default UsethisSearchPlugin
|
|
23
|
-
|
|
24
|
-
// Export graph API for other plugins (e.g., Mind)
|
|
25
|
-
export { getRelatedFiles, getGraphEntries, isGraphAPIAvailable }
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "3.0.0-dev.
|
|
3
|
+
"version": "3.0.0-dev.10",
|
|
4
4
|
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -12,11 +12,11 @@
|
|
|
12
12
|
},
|
|
13
13
|
"files": [
|
|
14
14
|
"index.ts",
|
|
15
|
+
"api.ts",
|
|
15
16
|
"file-indexer.ts",
|
|
16
17
|
"tools/search.ts",
|
|
17
18
|
"tools/codeindex.ts",
|
|
18
|
-
"
|
|
19
|
-
"vectorizer/index.js",
|
|
19
|
+
"vectorizer/index.ts",
|
|
20
20
|
"vectorizer/content-cleaner.ts",
|
|
21
21
|
"vectorizer/metadata-extractor.ts",
|
|
22
22
|
"vectorizer/bm25-index.ts",
|
package/tools/codeindex.ts
CHANGED
|
@@ -9,7 +9,7 @@ import { tool } from "@opencode-ai/plugin"
|
|
|
9
9
|
import path from "path"
|
|
10
10
|
import fs from "fs/promises"
|
|
11
11
|
|
|
12
|
-
import { CodebaseIndexer } from "../vectorizer/index.
|
|
12
|
+
import { CodebaseIndexer } from "../vectorizer/index.ts"
|
|
13
13
|
|
|
14
14
|
const INDEX_EXTENSIONS: Record<string, string[]> = {
|
|
15
15
|
code: [".js", ".ts", ".jsx", ".tsx", ".go", ".py", ".rs", ".java", ".kt", ".swift", ".c", ".cpp", ".h", ".cs", ".rb", ".php"],
|
|
@@ -313,16 +313,27 @@ Available indexes:
|
|
|
313
313
|
}
|
|
314
314
|
|
|
315
315
|
// 1. Get all triples from graph
|
|
316
|
-
|
|
316
|
+
let allTriples: any[] = []
|
|
317
|
+
try {
|
|
318
|
+
allTriples = await graphDB.getAllTriples()
|
|
319
|
+
} catch (e: any) {
|
|
320
|
+
await indexer.unloadModel()
|
|
321
|
+
return `## Graph Validation: "${indexName}"\n\n**Error:** Failed to read graph database: ${e.message || String(e)}\n\nThe graph database may be corrupted. Run: codeindex({ action: "reindex", index: "${indexName}" })`
|
|
322
|
+
}
|
|
317
323
|
|
|
318
324
|
// 2. Get all chunk IDs from vector DB
|
|
319
325
|
const knownChunkIds = new Set<string>()
|
|
320
326
|
const tables = await db.tableNames()
|
|
321
327
|
if (tables.includes("chunks")) {
|
|
322
328
|
const table = await db.openTable("chunks")
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
329
|
+
try {
|
|
330
|
+
const rows = await table.search([0]).limit(100000).execute()
|
|
331
|
+
for (const row of rows) {
|
|
332
|
+
if (row.chunk_id) knownChunkIds.add(row.chunk_id)
|
|
333
|
+
}
|
|
334
|
+
} catch (e: any) {
|
|
335
|
+
await indexer.unloadModel()
|
|
336
|
+
return `## Graph Validation: "${indexName}"\n\n**Error:** Failed to read vector database: ${e.message || String(e)}\n\nThe vector database may be corrupted. Run: codeindex({ action: "reindex", index: "${indexName}" })`
|
|
326
337
|
}
|
|
327
338
|
}
|
|
328
339
|
|
|
@@ -357,7 +368,13 @@ Available indexes:
|
|
|
357
368
|
}
|
|
358
369
|
|
|
359
370
|
// 4. Get file metadata stats
|
|
360
|
-
|
|
371
|
+
let fileMeta: Array<{ filePath: string; hash: string; timestamp: number }> = []
|
|
372
|
+
try {
|
|
373
|
+
fileMeta = await graphDB.getAllFileMeta()
|
|
374
|
+
} catch (e: any) {
|
|
375
|
+
// Non-fatal - continue validation without metadata
|
|
376
|
+
console.warn(`Warning: Failed to get file metadata: ${e.message || String(e)}`)
|
|
377
|
+
}
|
|
361
378
|
|
|
362
379
|
await indexer.unloadModel()
|
|
363
380
|
|
package/tools/search.ts
CHANGED
|
@@ -10,7 +10,7 @@ import { tool } from "@opencode-ai/plugin"
|
|
|
10
10
|
import path from "path"
|
|
11
11
|
import fs from "fs/promises"
|
|
12
12
|
|
|
13
|
-
import { CodebaseIndexer } from "../vectorizer/index.
|
|
13
|
+
import { CodebaseIndexer } from "../vectorizer/index.ts"
|
|
14
14
|
|
|
15
15
|
export default tool({
|
|
16
16
|
description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
|
|
@@ -603,7 +603,13 @@ class CodebaseIndexer {
|
|
|
603
603
|
if (!tables.includes(tableName)) return null;
|
|
604
604
|
|
|
605
605
|
const table = await this.db.openTable(tableName);
|
|
606
|
-
|
|
606
|
+
let allRows;
|
|
607
|
+
try {
|
|
608
|
+
allRows = await table.search([0]).limit(100000).execute();
|
|
609
|
+
} catch (e) {
|
|
610
|
+
if (DEBUG) console.log("[vectorizer] BM25 index build failed (corrupted table?):", e.message);
|
|
611
|
+
return null;
|
|
612
|
+
}
|
|
607
613
|
|
|
608
614
|
if (allRows.length === 0) return null;
|
|
609
615
|
|
|
@@ -643,7 +649,14 @@ class CodebaseIndexer {
|
|
|
643
649
|
(options.tags && options.tags.length > 0);
|
|
644
650
|
const isHybrid = HYBRID_CONFIG.enabled || options.hybrid;
|
|
645
651
|
const fetchLimit = (hasFilters || isHybrid) ? Math.max(limit * 3, 50) : limit;
|
|
646
|
-
let results
|
|
652
|
+
let results;
|
|
653
|
+
try {
|
|
654
|
+
results = await table.search(queryEmbedding).limit(fetchLimit).execute();
|
|
655
|
+
} catch (e) {
|
|
656
|
+
// LanceDB schema error (e.g. missing vector column) — index is corrupted
|
|
657
|
+
if (DEBUG) console.log("[vectorizer] Vector search failed (corrupted index?):", e.message);
|
|
658
|
+
return [];
|
|
659
|
+
}
|
|
647
660
|
|
|
648
661
|
// ── Hybrid search ───────────────────────────────────────────────────────
|
|
649
662
|
if (HYBRID_CONFIG.enabled || options.hybrid) {
|
|
@@ -833,7 +846,13 @@ class CodebaseIndexer {
|
|
|
833
846
|
if (!tables.includes(tableName)) return null;
|
|
834
847
|
|
|
835
848
|
const table = await this.db.openTable(tableName);
|
|
836
|
-
|
|
849
|
+
let rows;
|
|
850
|
+
try {
|
|
851
|
+
rows = await table.search([0]).limit(100000).execute();
|
|
852
|
+
} catch (e) {
|
|
853
|
+
if (DEBUG) console.log("[vectorizer] Chunk cache build failed (corrupted table?):", e.message);
|
|
854
|
+
return null;
|
|
855
|
+
}
|
|
837
856
|
this._chunkCache = new Map();
|
|
838
857
|
for (const row of rows) {
|
|
839
858
|
if (row.chunk_id) {
|
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
import { tool } from "@opencode-ai/plugin"
|
|
2
|
-
import path from "path"
|
|
3
|
-
import fs from "fs/promises"
|
|
4
|
-
|
|
5
|
-
import { CodebaseIndexer } from "../vectorizer/index.js"
|
|
6
|
-
|
|
7
|
-
// FR-043: Logging for intercepted Read() calls
|
|
8
|
-
const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*"
|
|
9
|
-
|
|
10
|
-
interface ReadLogEntry {
|
|
11
|
-
timestamp: number
|
|
12
|
-
filePath: string
|
|
13
|
-
relPath: string
|
|
14
|
-
chunksFound: number
|
|
15
|
-
relatedContextCount: number
|
|
16
|
-
durationMs: number
|
|
17
|
-
fallback: boolean
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
const LOG_MAX_ENTRIES = 500
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Append a log entry to the Read() interception log file.
|
|
24
|
-
* Non-blocking, non-fatal — errors are silently ignored.
|
|
25
|
-
*/
|
|
26
|
-
async function logReadInterception(projectRoot: string, entry: ReadLogEntry): Promise<void> {
|
|
27
|
-
try {
|
|
28
|
-
const logPath = path.join(projectRoot, ".opencode", "vectors", "read-intercept.log.json")
|
|
29
|
-
await fs.mkdir(path.dirname(logPath), { recursive: true })
|
|
30
|
-
|
|
31
|
-
let entries: ReadLogEntry[] = []
|
|
32
|
-
try {
|
|
33
|
-
const raw = await fs.readFile(logPath, "utf-8")
|
|
34
|
-
entries = JSON.parse(raw)
|
|
35
|
-
} catch {
|
|
36
|
-
// file doesn't exist or is invalid — start fresh
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
entries.push(entry)
|
|
40
|
-
// Cap log size to avoid unbounded growth
|
|
41
|
-
if (entries.length > LOG_MAX_ENTRIES) {
|
|
42
|
-
entries = entries.slice(-LOG_MAX_ENTRIES)
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
await fs.writeFile(logPath, JSON.stringify(entries, null, 2), "utf-8")
|
|
46
|
-
} catch {
|
|
47
|
-
// non-fatal — logging must never break Read
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
export default tool({
|
|
52
|
-
description: `Read file with graph-aware context attachment. When available, this tool searches the file in the index and returns content + related context from the graph (imports, links, etc.).
|
|
53
|
-
|
|
54
|
-
Use this instead of the standard Read tool for better context awareness.`,
|
|
55
|
-
|
|
56
|
-
args: {
|
|
57
|
-
filePath: tool.schema.string().describe("Path to the file to read"),
|
|
58
|
-
},
|
|
59
|
-
|
|
60
|
-
async execute(args) {
|
|
61
|
-
const startTime = Date.now()
|
|
62
|
-
const projectRoot = process.cwd()
|
|
63
|
-
const filePath = path.isAbsolute(args.filePath) ? args.filePath : path.join(projectRoot, args.filePath)
|
|
64
|
-
|
|
65
|
-
const relPath = path.relative(projectRoot, filePath)
|
|
66
|
-
|
|
67
|
-
if (DEBUG) {
|
|
68
|
-
console.log(`[read-interceptor] Intercepted Read("${relPath}")`)
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
const indexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
72
|
-
const results = await indexer.search(relPath, 20, false, {})
|
|
73
|
-
const fileChunks = results.filter(r => r.file === relPath)
|
|
74
|
-
await indexer.unloadModel()
|
|
75
|
-
|
|
76
|
-
const allRelated = fileChunks
|
|
77
|
-
.flatMap(c => c.relatedContext || [])
|
|
78
|
-
.filter((r, i, arr) => arr.findIndex(x => x.chunk_id === r.chunk_id) === i)
|
|
79
|
-
|
|
80
|
-
const durationMs = Date.now() - startTime
|
|
81
|
-
const fallback = fileChunks.length === 0
|
|
82
|
-
|
|
83
|
-
// FR-043: Log the interception asynchronously (non-blocking)
|
|
84
|
-
logReadInterception(projectRoot, {
|
|
85
|
-
timestamp: startTime,
|
|
86
|
-
filePath: args.filePath,
|
|
87
|
-
relPath,
|
|
88
|
-
chunksFound: fileChunks.length,
|
|
89
|
-
relatedContextCount: allRelated.length,
|
|
90
|
-
durationMs,
|
|
91
|
-
fallback,
|
|
92
|
-
}).catch(() => {})
|
|
93
|
-
|
|
94
|
-
if (DEBUG) {
|
|
95
|
-
console.log(
|
|
96
|
-
`[read-interceptor] ${relPath}: ${fileChunks.length} chunks, ${allRelated.length} related, ${durationMs}ms${fallback ? " (fallback)" : ""}`
|
|
97
|
-
)
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
if (fallback) {
|
|
101
|
-
return `File "${relPath}" not indexed. Use original Read tool or run codeindex({ action: "reindex", index: "code" })`
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
let output = `## ${relPath}\n\n`
|
|
105
|
-
|
|
106
|
-
output += `### Content\n\n`
|
|
107
|
-
for (const chunk of fileChunks) {
|
|
108
|
-
output += chunk.content + "\n\n"
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
if (allRelated.length > 0) {
|
|
112
|
-
output += `### Related Context\n\n`
|
|
113
|
-
for (const rel of allRelated) {
|
|
114
|
-
const snippet = rel.content.length > 300
|
|
115
|
-
? rel.content.substring(0, 300) + "..."
|
|
116
|
-
: rel.content
|
|
117
|
-
output += `**${rel.file}** (${rel.relation})\n`
|
|
118
|
-
output += `\`\`\`\n${snippet}\n\`\`\`\n\n`
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return output
|
|
123
|
-
},
|
|
124
|
-
})
|
|
125
|
-
|
|
126
|
-
// Export for testing
|
|
127
|
-
export { logReadInterception, ReadLogEntry }
|