@comfanion/usethis_search 3.0.0-dev.9 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +263 -0
- package/file-indexer.ts +1 -1
- package/index.ts +0 -2
- package/package.json +12 -5
- package/tools/codeindex.ts +2 -2
- package/tools/search.ts +254 -66
- package/vectorizer/analyzers/lsp-analyzer.ts +7 -7
- package/vectorizer/analyzers/regex-analyzer.ts +358 -61
- package/vectorizer/chunk-store.ts +207 -0
- package/vectorizer/chunkers/code-chunker.ts +74 -24
- package/vectorizer/chunkers/markdown-chunker.ts +69 -7
- package/vectorizer/graph-builder.ts +207 -15
- package/vectorizer/graph-db.ts +161 -164
- package/vectorizer/hybrid-search.ts +1 -1
- package/vectorizer/{index.js → index.ts} +796 -160
- package/vectorizer.yaml +20 -2
package/cli.ts
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* CLI for usethis_search index management.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* bun run cli.ts <action> [options]
|
|
7
|
+
*
|
|
8
|
+
* Actions:
|
|
9
|
+
* reindex [--index code|docs|config] Re-index files
|
|
10
|
+
* status [--index code|docs|config] Show index stats
|
|
11
|
+
* list List all indexes
|
|
12
|
+
* clear [--index code|docs|config] Clear index and rebuild
|
|
13
|
+
* validate-graph [--index code] Validate graph consistency
|
|
14
|
+
*
|
|
15
|
+
* Examples:
|
|
16
|
+
* bun run cli.ts reindex
|
|
17
|
+
* bun run cli.ts reindex --index docs
|
|
18
|
+
* bun run cli.ts status --index code
|
|
19
|
+
* bun run cli.ts list
|
|
20
|
+
* bun run cli.ts clear --index code
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import path from "path"
|
|
24
|
+
import fs from "fs/promises"
|
|
25
|
+
import { CodebaseIndexer } from "./vectorizer/index.ts"
|
|
26
|
+
|
|
27
|
+
const args = process.argv.slice(2)
|
|
28
|
+
const action = args[0]
|
|
29
|
+
const indexFlag = args.indexOf("--index")
|
|
30
|
+
const indexName = indexFlag !== -1 && args[indexFlag + 1] ? args[indexFlag + 1] : "code"
|
|
31
|
+
const projectRoot = process.cwd()
|
|
32
|
+
|
|
33
|
+
function formatDuration(ms: number): string {
|
|
34
|
+
const seconds = ms / 1000
|
|
35
|
+
if (seconds < 60) return `${seconds.toFixed(1)}s`
|
|
36
|
+
const mins = Math.floor(seconds / 60)
|
|
37
|
+
const secs = Math.round(seconds % 60)
|
|
38
|
+
return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async function reindex() {
|
|
42
|
+
console.log(`\n Reindexing "${indexName}"...\n`)
|
|
43
|
+
const start = Date.now()
|
|
44
|
+
|
|
45
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
46
|
+
const stats = await indexer.indexAll(
|
|
47
|
+
(indexed: number, total: number, file: string, fileNum: number) => {
|
|
48
|
+
const pct = Math.round((fileNum / total) * 100)
|
|
49
|
+
process.stdout.write(`\r [${pct}%] ${fileNum}/${total} — ${file}`)
|
|
50
|
+
},
|
|
51
|
+
)
|
|
52
|
+
await indexer.unloadModel()
|
|
53
|
+
|
|
54
|
+
const elapsed = formatDuration(Date.now() - start)
|
|
55
|
+
console.log(`\n\n Done in ${elapsed}`)
|
|
56
|
+
console.log(` Files indexed: ${stats.indexed}`)
|
|
57
|
+
console.log(` Files unchanged: ${stats.skipped}`)
|
|
58
|
+
console.log(` Total files: ${stats.total}`)
|
|
59
|
+
if (stats.semanticEdges > 0) {
|
|
60
|
+
console.log(` Semantic edges: ${stats.semanticEdges}`)
|
|
61
|
+
}
|
|
62
|
+
console.log()
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function status() {
|
|
66
|
+
try {
|
|
67
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
68
|
+
const stats = await indexer.getStats()
|
|
69
|
+
await indexer.unloadModel()
|
|
70
|
+
|
|
71
|
+
console.log(`\n Index: "${indexName}"`)
|
|
72
|
+
console.log(` Description: ${stats.description}`)
|
|
73
|
+
console.log(` Model: ${stats.model}`)
|
|
74
|
+
console.log(` Files: ${stats.fileCount}`)
|
|
75
|
+
console.log(` Chunks: ${stats.chunkCount}`)
|
|
76
|
+
if (stats.features) {
|
|
77
|
+
console.log(` Chunking: ${stats.features.chunking}`)
|
|
78
|
+
console.log(` Hybrid: ${stats.features.hybrid ? "on" : "off"}`)
|
|
79
|
+
console.log(` BM25 weight: ${stats.features.bm25_weight}`)
|
|
80
|
+
console.log(` Metrics: ${stats.features.metrics ? "on" : "off"}`)
|
|
81
|
+
console.log(` Cache: ${stats.features.cache ? "on" : "off"}`)
|
|
82
|
+
}
|
|
83
|
+
console.log()
|
|
84
|
+
} catch {
|
|
85
|
+
console.log(`\n Index "${indexName}" not found. Run: bun run cli.ts reindex --index ${indexName}\n`)
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async function list() {
|
|
90
|
+
const vectorsDir = path.join(projectRoot, ".opencode", "vectors")
|
|
91
|
+
let indexes: string[] = []
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
const entries = await fs.readdir(vectorsDir, { withFileTypes: true })
|
|
95
|
+
indexes = entries.filter(e => e.isDirectory()).map(e => e.name)
|
|
96
|
+
} catch {
|
|
97
|
+
console.log(`\n No indexes found. Run: bun run cli.ts reindex\n`)
|
|
98
|
+
return
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (indexes.length === 0) {
|
|
102
|
+
console.log(`\n No indexes found. Run: bun run cli.ts reindex\n`)
|
|
103
|
+
return
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
console.log(`\n Indexes:\n`)
|
|
107
|
+
for (const idx of indexes) {
|
|
108
|
+
try {
|
|
109
|
+
const indexer = await new CodebaseIndexer(projectRoot, idx).init()
|
|
110
|
+
const stats = await indexer.getStats()
|
|
111
|
+
await indexer.unloadModel()
|
|
112
|
+
const hybrid = stats.features?.hybrid ? " [hybrid]" : ""
|
|
113
|
+
console.log(` ${idx.padEnd(10)} ${stats.fileCount} files, ${stats.chunkCount} chunks${hybrid}`)
|
|
114
|
+
} catch {
|
|
115
|
+
console.log(` ${idx.padEnd(10)} (error reading)`)
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
console.log()
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async function clear() {
|
|
122
|
+
console.log(`\n Clearing "${indexName}" index...`)
|
|
123
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
124
|
+
await indexer.clear()
|
|
125
|
+
await indexer.unloadModel()
|
|
126
|
+
console.log(` Cleared. Run: bun run cli.ts reindex --index ${indexName}\n`)
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function validateGraph() {
|
|
130
|
+
console.log(`\n Validating graph for "${indexName}"...\n`)
|
|
131
|
+
|
|
132
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
133
|
+
const graphDB = (indexer as any).graphDB
|
|
134
|
+
const db = (indexer as any).db
|
|
135
|
+
|
|
136
|
+
if (!graphDB) {
|
|
137
|
+
await indexer.unloadModel()
|
|
138
|
+
console.log(` No graph database found. Run reindex first.\n`)
|
|
139
|
+
return
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Get all triples
|
|
143
|
+
let allTriples: any[] = []
|
|
144
|
+
try {
|
|
145
|
+
allTriples = await graphDB.getAllTriples()
|
|
146
|
+
} catch (e: any) {
|
|
147
|
+
await indexer.unloadModel()
|
|
148
|
+
console.log(` Error reading graph: ${e.message}\n`)
|
|
149
|
+
return
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Get all chunk IDs from vector DB
|
|
153
|
+
const knownChunkIds = new Set<string>()
|
|
154
|
+
const tables = await db.tableNames()
|
|
155
|
+
if (tables.includes("chunks")) {
|
|
156
|
+
const table = await db.openTable("chunks")
|
|
157
|
+
try {
|
|
158
|
+
const rows = await table.filter("true").limit(100000).execute()
|
|
159
|
+
for (const row of rows) {
|
|
160
|
+
if (row.chunk_id) knownChunkIds.add(row.chunk_id)
|
|
161
|
+
}
|
|
162
|
+
} catch (e: any) {
|
|
163
|
+
await indexer.unloadModel()
|
|
164
|
+
console.log(` Error reading vector DB: ${e.message}\n`)
|
|
165
|
+
return
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Find orphaned triples
|
|
170
|
+
const predicateCounts: Record<string, number> = {}
|
|
171
|
+
const sourceCounts: Record<string, number> = {}
|
|
172
|
+
let orphaned = 0
|
|
173
|
+
|
|
174
|
+
for (const t of allTriples) {
|
|
175
|
+
predicateCounts[t.predicate] = (predicateCounts[t.predicate] || 0) + 1
|
|
176
|
+
sourceCounts[t.source] = (sourceCounts[t.source] || 0) + 1
|
|
177
|
+
|
|
178
|
+
if (t.subject.startsWith("chunk:") && !knownChunkIds.has(t.subject)) orphaned++
|
|
179
|
+
if (t.object.startsWith("chunk:") && !knownChunkIds.has(t.object)) orphaned++
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
await indexer.unloadModel()
|
|
183
|
+
|
|
184
|
+
console.log(` Total triples: ${allTriples.length}`)
|
|
185
|
+
console.log(` Known chunks: ${knownChunkIds.size}`)
|
|
186
|
+
console.log(` Orphaned refs: ${orphaned}`)
|
|
187
|
+
console.log()
|
|
188
|
+
|
|
189
|
+
console.log(` Edge types:`)
|
|
190
|
+
for (const [pred, count] of Object.entries(predicateCounts).sort((a, b) => b[1] - a[1])) {
|
|
191
|
+
console.log(` ${pred.padEnd(22)} ${count}`)
|
|
192
|
+
}
|
|
193
|
+
console.log()
|
|
194
|
+
|
|
195
|
+
console.log(` Edge sources:`)
|
|
196
|
+
for (const [source, count] of Object.entries(sourceCounts).sort((a, b) => b[1] - a[1])) {
|
|
197
|
+
console.log(` ${source.padEnd(22)} ${count}`)
|
|
198
|
+
}
|
|
199
|
+
console.log()
|
|
200
|
+
|
|
201
|
+
if (orphaned > 0) {
|
|
202
|
+
console.log(` ⚠ Found ${orphaned} orphaned references. Run: bun run cli.ts reindex --index ${indexName}`)
|
|
203
|
+
} else {
|
|
204
|
+
console.log(` ✓ Graph is healthy`)
|
|
205
|
+
}
|
|
206
|
+
console.log()
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// ── Main ────────────────────────────────────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
async function main() {
|
|
212
|
+
if (!action || action === "help" || action === "--help" || action === "-h") {
|
|
213
|
+
console.log(`
|
|
214
|
+
usethis_search CLI — index management
|
|
215
|
+
|
|
216
|
+
Usage: bun run cli.ts <action> [--index <name>]
|
|
217
|
+
|
|
218
|
+
Actions:
|
|
219
|
+
reindex Re-index files (default: code)
|
|
220
|
+
status Show index statistics
|
|
221
|
+
list List all indexes
|
|
222
|
+
clear Clear index data
|
|
223
|
+
validate-graph Check graph consistency
|
|
224
|
+
|
|
225
|
+
Options:
|
|
226
|
+
--index <name> Index name: code, docs, config (default: code)
|
|
227
|
+
|
|
228
|
+
Examples:
|
|
229
|
+
bun run cli.ts reindex
|
|
230
|
+
bun run cli.ts reindex --index docs
|
|
231
|
+
bun run cli.ts status
|
|
232
|
+
bun run cli.ts list
|
|
233
|
+
bun run cli.ts clear --index code
|
|
234
|
+
bun run cli.ts validate-graph
|
|
235
|
+
`)
|
|
236
|
+
return
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
switch (action) {
|
|
240
|
+
case "reindex":
|
|
241
|
+
await reindex()
|
|
242
|
+
break
|
|
243
|
+
case "status":
|
|
244
|
+
await status()
|
|
245
|
+
break
|
|
246
|
+
case "list":
|
|
247
|
+
await list()
|
|
248
|
+
break
|
|
249
|
+
case "clear":
|
|
250
|
+
await clear()
|
|
251
|
+
break
|
|
252
|
+
case "validate-graph":
|
|
253
|
+
await validateGraph()
|
|
254
|
+
break
|
|
255
|
+
default:
|
|
256
|
+
console.log(`\n Unknown action: "${action}". Run: bun run cli.ts help\n`)
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
main().catch((e) => {
|
|
261
|
+
console.error(`\n Fatal error: ${e.message}\n`)
|
|
262
|
+
process.exit(1)
|
|
263
|
+
})
|
package/file-indexer.ts
CHANGED
package/index.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import type { Plugin } from "@opencode-ai/plugin"
|
|
2
2
|
|
|
3
3
|
import search from "./tools/search"
|
|
4
|
-
import codeindex from "./tools/codeindex"
|
|
5
4
|
import FileIndexerPlugin from "./file-indexer"
|
|
6
5
|
|
|
7
6
|
const UsethisSearchPlugin: Plugin = async ({ directory, client }) => {
|
|
@@ -17,7 +16,6 @@ const UsethisSearchPlugin: Plugin = async ({ directory, client }) => {
|
|
|
17
16
|
return {
|
|
18
17
|
tool: {
|
|
19
18
|
search,
|
|
20
|
-
codeindex,
|
|
21
19
|
},
|
|
22
20
|
|
|
23
21
|
event: async (args: any) => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "3.0.0
|
|
3
|
+
"version": "3.0.0",
|
|
4
4
|
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -8,15 +8,23 @@
|
|
|
8
8
|
".": "./index.ts"
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
|
-
"test": "bun test"
|
|
11
|
+
"test": "bun test",
|
|
12
|
+
"reindex": "bun run cli.ts reindex",
|
|
13
|
+
"status": "bun run cli.ts status",
|
|
14
|
+
"index:list": "bun run cli.ts list",
|
|
15
|
+
"index:clear": "bun run cli.ts clear"
|
|
16
|
+
},
|
|
17
|
+
"bin": {
|
|
18
|
+
"usethis-search": "./cli.ts"
|
|
12
19
|
},
|
|
13
20
|
"files": [
|
|
14
21
|
"index.ts",
|
|
22
|
+
"cli.ts",
|
|
15
23
|
"api.ts",
|
|
16
24
|
"file-indexer.ts",
|
|
17
25
|
"tools/search.ts",
|
|
18
26
|
"tools/codeindex.ts",
|
|
19
|
-
"vectorizer/index.
|
|
27
|
+
"vectorizer/index.ts",
|
|
20
28
|
"vectorizer/content-cleaner.ts",
|
|
21
29
|
"vectorizer/metadata-extractor.ts",
|
|
22
30
|
"vectorizer/bm25-index.ts",
|
|
@@ -24,6 +32,7 @@
|
|
|
24
32
|
"vectorizer/query-cache.ts",
|
|
25
33
|
"vectorizer/search-metrics.ts",
|
|
26
34
|
"vectorizer/graph-db.ts",
|
|
35
|
+
"vectorizer/chunk-store.ts",
|
|
27
36
|
"vectorizer/usage-tracker.ts",
|
|
28
37
|
"vectorizer/graph-builder.ts",
|
|
29
38
|
"vectorizer/analyzers/regex-analyzer.ts",
|
|
@@ -40,8 +49,6 @@
|
|
|
40
49
|
"@opencode-ai/plugin": ">=1.1.0",
|
|
41
50
|
"@xenova/transformers": "^2.17.0",
|
|
42
51
|
"glob": "^10.3.10",
|
|
43
|
-
"level": "^8.0.1",
|
|
44
|
-
"levelgraph": "^4.0.0",
|
|
45
52
|
"vectordb": "^0.4.0"
|
|
46
53
|
},
|
|
47
54
|
"peerDependencies": {
|
package/tools/codeindex.ts
CHANGED
|
@@ -9,7 +9,7 @@ import { tool } from "@opencode-ai/plugin"
|
|
|
9
9
|
import path from "path"
|
|
10
10
|
import fs from "fs/promises"
|
|
11
11
|
|
|
12
|
-
import { CodebaseIndexer } from "../vectorizer/index.
|
|
12
|
+
import { CodebaseIndexer } from "../vectorizer/index.ts"
|
|
13
13
|
|
|
14
14
|
const INDEX_EXTENSIONS: Record<string, string[]> = {
|
|
15
15
|
code: [".js", ".ts", ".jsx", ".tsx", ".go", ".py", ".rs", ".java", ".kt", ".swift", ".c", ".cpp", ".h", ".cs", ".rb", ".php"],
|
|
@@ -327,7 +327,7 @@ Available indexes:
|
|
|
327
327
|
if (tables.includes("chunks")) {
|
|
328
328
|
const table = await db.openTable("chunks")
|
|
329
329
|
try {
|
|
330
|
-
const rows = await table.
|
|
330
|
+
const rows = await table.filter("").limit(100000).execute()
|
|
331
331
|
for (const row of rows) {
|
|
332
332
|
if (row.chunk_id) knownChunkIds.add(row.chunk_id)
|
|
333
333
|
}
|