@comfanion/usethis_search 0.1.5 → 3.0.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -7
- package/file-indexer.ts +21 -1
- package/index.ts +2 -0
- package/package.json +20 -3
- package/tools/codeindex.ts +135 -16
- package/tools/read-interceptor.ts +54 -0
- package/tools/search.ts +60 -12
- package/vectorizer/analyzers/lsp-analyzer.ts +162 -0
- package/vectorizer/analyzers/regex-analyzer.ts +255 -0
- package/vectorizer/bm25-index.ts +155 -0
- package/vectorizer/chunkers/chunker-factory.ts +98 -0
- package/vectorizer/chunkers/code-chunker.ts +325 -0
- package/vectorizer/chunkers/markdown-chunker.ts +177 -0
- package/vectorizer/content-cleaner.ts +136 -0
- package/vectorizer/graph-builder.ts +95 -0
- package/vectorizer/graph-db.ts +97 -0
- package/vectorizer/hybrid-search.ts +97 -0
- package/vectorizer/index.js +470 -17
- package/vectorizer/metadata-extractor.ts +125 -0
- package/vectorizer/query-cache.ts +126 -0
- package/vectorizer/search-metrics.ts +155 -0
- package/vectorizer.yaml +95 -0
package/README.md
CHANGED
|
@@ -11,9 +11,13 @@ Forget about `grep` and `find` — search code by meaning, not by text!
|
|
|
11
11
|
An OpenCode plugin that adds **smart search** to your project:
|
|
12
12
|
|
|
13
13
|
- 🧠 **Semantic search** — finds code by meaning, even when words don't match
|
|
14
|
+
- 🔀 **Hybrid search (v2)** — combines vector similarity + BM25 keyword matching
|
|
15
|
+
- 🧩 **Semantic chunking (v2)** — structure-aware splitting for Markdown (headings) and code (functions/classes)
|
|
16
|
+
- 🏷️ **Rich metadata (v2)** — filter by file type, language, date, tags
|
|
14
17
|
- ⚡ **Automatic indexing** — files are indexed on change (zero effort)
|
|
15
18
|
- 📦 **Local vectorization** — works offline, no API keys needed
|
|
16
19
|
- 🎯 **Three indexes** — separate for code, docs, and configs
|
|
20
|
+
- 📊 **Quality metrics (v2)** — track search relevance and usage
|
|
17
21
|
- 🌍 **Multilingual** — supports Ukrainian, Russian, and English
|
|
18
22
|
|
|
19
23
|
---
|
|
@@ -79,6 +83,31 @@ search({
|
|
|
79
83
|
query: "database connection",
|
|
80
84
|
searchAll: true
|
|
81
85
|
})
|
|
86
|
+
|
|
87
|
+
// v2: Hybrid search (vector + keyword matching)
|
|
88
|
+
search({
|
|
89
|
+
query: "getUserById",
|
|
90
|
+
hybrid: true
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
// v2: Filter by file type and language
|
|
94
|
+
search({
|
|
95
|
+
query: "authentication logic",
|
|
96
|
+
fileType: "code",
|
|
97
|
+
language: "typescript"
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
// v2: Filter by date
|
|
101
|
+
search({
|
|
102
|
+
query: "recent changes",
|
|
103
|
+
modifiedAfter: "2024-06-01"
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
// v2: Filter by frontmatter tags
|
|
107
|
+
search({
|
|
108
|
+
query: "security",
|
|
109
|
+
tags: "auth,security"
|
|
110
|
+
})
|
|
82
111
|
```
|
|
83
112
|
|
|
84
113
|
### Index Management
|
|
@@ -99,6 +128,9 @@ codeindex({
|
|
|
99
128
|
index: "docs",
|
|
100
129
|
dir: "docs/"
|
|
101
130
|
})
|
|
131
|
+
|
|
132
|
+
// v2: Run quality tests against gold dataset
|
|
133
|
+
codeindex({ action: "test", index: "code" })
|
|
102
134
|
```
|
|
103
135
|
|
|
104
136
|
---
|
|
@@ -108,9 +140,12 @@ codeindex({
|
|
|
108
140
|
### Semantic Search
|
|
109
141
|
|
|
110
142
|
Instead of searching for exact text matches, the plugin:
|
|
111
|
-
1.
|
|
112
|
-
2.
|
|
113
|
-
3.
|
|
143
|
+
1. **Cleans** content (removes TOC, noise, auto-generated markers)
|
|
144
|
+
2. **Chunks** intelligently (Markdown by headings, code by functions/classes)
|
|
145
|
+
3. Converts chunks into **vectors** (numerical representations of meaning)
|
|
146
|
+
4. Compares vectors of your query with vectors of code
|
|
147
|
+
5. Optionally combines with **BM25 keyword search** (hybrid mode)
|
|
148
|
+
6. Returns the most **semantically similar** fragments with rich metadata
|
|
114
149
|
|
|
115
150
|
**Example:**
|
|
116
151
|
```javascript
|
|
@@ -138,16 +173,44 @@ vectorizer:
|
|
|
138
173
|
auto_index: true # Automatic indexing
|
|
139
174
|
debounce_ms: 1000 # Delay before indexing (ms)
|
|
140
175
|
|
|
176
|
+
# v2: Content cleaning
|
|
177
|
+
cleaning:
|
|
178
|
+
remove_toc: true
|
|
179
|
+
remove_frontmatter_metadata: false
|
|
180
|
+
remove_imports: false
|
|
181
|
+
remove_comments: false
|
|
182
|
+
|
|
183
|
+
# v2: Semantic chunking
|
|
184
|
+
chunking:
|
|
185
|
+
strategy: "semantic" # fixed | semantic
|
|
186
|
+
markdown:
|
|
187
|
+
split_by_headings: true
|
|
188
|
+
min_chunk_size: 200
|
|
189
|
+
max_chunk_size: 2000
|
|
190
|
+
preserve_heading_hierarchy: true
|
|
191
|
+
code:
|
|
192
|
+
split_by_functions: true
|
|
193
|
+
include_function_signature: true
|
|
194
|
+
min_chunk_size: 300
|
|
195
|
+
max_chunk_size: 1500
|
|
196
|
+
|
|
197
|
+
# v2: Hybrid search
|
|
198
|
+
search:
|
|
199
|
+
hybrid: false # vector + BM25
|
|
200
|
+
bm25_weight: 0.3
|
|
201
|
+
|
|
202
|
+
# v2: Quality monitoring
|
|
203
|
+
quality:
|
|
204
|
+
enable_metrics: false
|
|
205
|
+
enable_cache: true
|
|
206
|
+
|
|
141
207
|
indexes:
|
|
142
208
|
code:
|
|
143
209
|
enabled: true
|
|
144
|
-
extensions: [.js, .ts, .jsx, .tsx, .py, .go, ...]
|
|
145
210
|
docs:
|
|
146
211
|
enabled: true
|
|
147
|
-
extensions: [.md, .mdx, .txt, .rst, .adoc]
|
|
148
212
|
config:
|
|
149
|
-
enabled: false
|
|
150
|
-
extensions: [.yaml, .yml, .json, .toml, ...]
|
|
213
|
+
enabled: false
|
|
151
214
|
|
|
152
215
|
exclude:
|
|
153
216
|
- node_modules
|
|
@@ -324,6 +387,34 @@ codeindex({ action: "list" })
|
|
|
324
387
|
- **Model size:** ~23 MB (downloaded once)
|
|
325
388
|
- **Speed:** ~0.5 sec/file (after model loading)
|
|
326
389
|
|
|
390
|
+
### v2 Architecture
|
|
391
|
+
|
|
392
|
+
```
|
|
393
|
+
File → Content Cleaner → Chunker Factory → Embedder → LanceDB
|
|
394
|
+
├── Markdown Chunker (heading-aware)
|
|
395
|
+
├── Code Chunker (function/class-aware)
|
|
396
|
+
└── Fixed Chunker (fallback)
|
|
397
|
+
|
|
398
|
+
Query → Query Cache → Embedder → Vector Search ─┐
|
|
399
|
+
└──────────→ BM25 Search ────┤→ Hybrid Merge → Filter → Results
|
|
400
|
+
│
|
|
401
|
+
Metadata Filter (type, lang, date, tags)
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
### New Modules (v2)
|
|
405
|
+
|
|
406
|
+
| Module | Purpose |
|
|
407
|
+
|--------|---------|
|
|
408
|
+
| `content-cleaner.ts` | Remove noise (TOC, breadcrumbs, markers) |
|
|
409
|
+
| `metadata-extractor.ts` | Extract file_type, language, tags, dates |
|
|
410
|
+
| `markdown-chunker.ts` | Heading-aware splitting with hierarchy |
|
|
411
|
+
| `code-chunker.ts` | Function/class-aware splitting |
|
|
412
|
+
| `chunker-factory.ts` | Route to correct chunker by file type |
|
|
413
|
+
| `bm25-index.ts` | Inverted index for keyword search |
|
|
414
|
+
| `hybrid-search.ts` | Merge vector + BM25 scores |
|
|
415
|
+
| `query-cache.ts` | LRU cache for query embeddings |
|
|
416
|
+
| `search-metrics.ts` | Track search quality metrics |
|
|
417
|
+
|
|
327
418
|
---
|
|
328
419
|
|
|
329
420
|
## 🤝 Contributing
|
package/file-indexer.ts
CHANGED
|
@@ -326,6 +326,8 @@ async function ensureIndexOnSessionStart(
|
|
|
326
326
|
return { totalFiles, elapsedSeconds, action }
|
|
327
327
|
}
|
|
328
328
|
|
|
329
|
+
const STALE_THRESHOLD_MS = 5 * 60 * 1000 // 5 minutes — evict stuck entries
|
|
330
|
+
|
|
329
331
|
async function processPendingFiles(projectRoot: string, config: VectorizerConfig): Promise<void> {
|
|
330
332
|
if (pendingFiles.size === 0) return
|
|
331
333
|
if (SKIP_AUTO_INDEX) {
|
|
@@ -335,6 +337,7 @@ async function processPendingFiles(projectRoot: string, config: VectorizerConfig
|
|
|
335
337
|
|
|
336
338
|
const now = Date.now()
|
|
337
339
|
const filesToProcess: Map<string, string[]> = new Map()
|
|
340
|
+
const staleKeys: string[] = []
|
|
338
341
|
|
|
339
342
|
for (const [filePath, info] of pendingFiles.entries()) {
|
|
340
343
|
if (now - info.timestamp >= config.debounce_ms) {
|
|
@@ -342,9 +345,17 @@ async function processPendingFiles(projectRoot: string, config: VectorizerConfig
|
|
|
342
345
|
files.push(filePath)
|
|
343
346
|
filesToProcess.set(info.indexName, files)
|
|
344
347
|
pendingFiles.delete(filePath)
|
|
348
|
+
} else if (now - info.timestamp > STALE_THRESHOLD_MS) {
|
|
349
|
+
staleKeys.push(filePath)
|
|
345
350
|
}
|
|
346
351
|
}
|
|
347
352
|
|
|
353
|
+
// Evict entries stuck for >5 minutes (prevents unbounded growth)
|
|
354
|
+
for (const key of staleKeys) {
|
|
355
|
+
debug(`Evicting stale pending file: ${key}`)
|
|
356
|
+
pendingFiles.delete(key)
|
|
357
|
+
}
|
|
358
|
+
|
|
348
359
|
if (filesToProcess.size === 0) return
|
|
349
360
|
|
|
350
361
|
debug(`Processing ${filesToProcess.size} index(es)...`)
|
|
@@ -425,6 +436,9 @@ export const FileIndexerPlugin: Plugin = async ({ directory, client }) => {
|
|
|
425
436
|
}, 1000)
|
|
426
437
|
}
|
|
427
438
|
|
|
439
|
+
let lastProcessTime = Date.now()
|
|
440
|
+
const MAX_DEBOUNCE_WAIT_MS = 5000 // Force processing after 5s of rapid edits
|
|
441
|
+
|
|
428
442
|
function queueFileForIndexing(filePath: string): void {
|
|
429
443
|
const relativePath = path.relative(directory, filePath)
|
|
430
444
|
if (relativePath.startsWith("..") || path.isAbsolute(relativePath)) return
|
|
@@ -439,9 +453,15 @@ export const FileIndexerPlugin: Plugin = async ({ directory, client }) => {
|
|
|
439
453
|
if (processingTimeout) {
|
|
440
454
|
clearTimeout(processingTimeout)
|
|
441
455
|
}
|
|
456
|
+
|
|
457
|
+
// If rapid edits keep resetting the timer, force processing after MAX_DEBOUNCE_WAIT_MS
|
|
458
|
+
const timeSinceLast = Date.now() - lastProcessTime
|
|
459
|
+
const waitTime = timeSinceLast > MAX_DEBOUNCE_WAIT_MS ? 0 : config.debounce_ms + 100
|
|
460
|
+
|
|
442
461
|
processingTimeout = setTimeout(async () => {
|
|
462
|
+
lastProcessTime = Date.now()
|
|
443
463
|
await processPendingFiles(directory, config)
|
|
444
|
-
},
|
|
464
|
+
}, waitTime)
|
|
445
465
|
}
|
|
446
466
|
|
|
447
467
|
return {
|
package/index.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { Plugin } from "@opencode-ai/plugin"
|
|
|
2
2
|
|
|
3
3
|
import search from "./tools/search"
|
|
4
4
|
import codeindex from "./tools/codeindex"
|
|
5
|
+
import readInterceptor from "./tools/read-interceptor"
|
|
5
6
|
import FileIndexerPlugin from "./file-indexer"
|
|
6
7
|
|
|
7
8
|
const UsethisSearchPlugin: Plugin = async (ctx) => {
|
|
@@ -12,6 +13,7 @@ const UsethisSearchPlugin: Plugin = async (ctx) => {
|
|
|
12
13
|
tool: {
|
|
13
14
|
search,
|
|
14
15
|
codeindex,
|
|
16
|
+
read: readInterceptor,
|
|
15
17
|
},
|
|
16
18
|
}
|
|
17
19
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "OpenCode plugin: semantic search +
|
|
3
|
+
"version": "3.0.0-dev.0",
|
|
4
|
+
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
7
7
|
"exports": {
|
|
@@ -15,14 +15,31 @@
|
|
|
15
15
|
"file-indexer.ts",
|
|
16
16
|
"tools/search.ts",
|
|
17
17
|
"tools/codeindex.ts",
|
|
18
|
+
"tools/read-interceptor.ts",
|
|
18
19
|
"vectorizer/index.js",
|
|
20
|
+
"vectorizer/content-cleaner.ts",
|
|
21
|
+
"vectorizer/metadata-extractor.ts",
|
|
22
|
+
"vectorizer/bm25-index.ts",
|
|
23
|
+
"vectorizer/hybrid-search.ts",
|
|
24
|
+
"vectorizer/query-cache.ts",
|
|
25
|
+
"vectorizer/search-metrics.ts",
|
|
26
|
+
"vectorizer/graph-db.ts",
|
|
27
|
+
"vectorizer/graph-builder.ts",
|
|
28
|
+
"vectorizer/analyzers/regex-analyzer.ts",
|
|
29
|
+
"vectorizer/analyzers/lsp-analyzer.ts",
|
|
30
|
+
"vectorizer/chunkers/markdown-chunker.ts",
|
|
31
|
+
"vectorizer/chunkers/code-chunker.ts",
|
|
32
|
+
"vectorizer/chunkers/chunker-factory.ts",
|
|
33
|
+
"vectorizer.yaml",
|
|
19
34
|
"README.md",
|
|
20
35
|
"LICENSE"
|
|
21
36
|
],
|
|
22
37
|
"dependencies": {
|
|
23
|
-
"@opencode-ai/plugin": "1.1.
|
|
38
|
+
"@opencode-ai/plugin": ">=1.1.0",
|
|
24
39
|
"@xenova/transformers": "^2.17.0",
|
|
25
40
|
"glob": "^10.3.10",
|
|
41
|
+
"level": "^8.0.1",
|
|
42
|
+
"levelgraph": "^4.0.0",
|
|
26
43
|
"vectordb": "^0.4.0"
|
|
27
44
|
},
|
|
28
45
|
"peerDependencies": {
|
package/tools/codeindex.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Code Index Status & Management Tool
|
|
2
|
+
* Code Index Status & Management Tool (v2)
|
|
3
3
|
*
|
|
4
4
|
* Uses bundled vectorizer. Index data is stored in `.opencode/vectors/<index>/`.
|
|
5
|
+
* v2: added "test" action for gold dataset testing, richer stats.
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
import { tool } from "@opencode-ai/plugin"
|
|
@@ -59,6 +60,7 @@ Actions:
|
|
|
59
60
|
- "status" → Show index statistics
|
|
60
61
|
- "list" → List all available indexes with stats
|
|
61
62
|
- "reindex" → Re-index files using local vectorizer
|
|
63
|
+
- "test" → Run gold dataset quality tests (if configured)
|
|
62
64
|
|
|
63
65
|
Available indexes:
|
|
64
66
|
- "code" - Source code files
|
|
@@ -66,7 +68,7 @@ Available indexes:
|
|
|
66
68
|
- "config" - Configuration files`,
|
|
67
69
|
|
|
68
70
|
args: {
|
|
69
|
-
action: tool.schema.enum(["status", "list", "reindex"]).describe("Action to perform"),
|
|
71
|
+
action: tool.schema.enum(["status", "list", "reindex", "test"]).describe("Action to perform"),
|
|
70
72
|
index: tool.schema.string().optional().default("code").describe("Index name: code, docs, config"),
|
|
71
73
|
dir: tool.schema.string().optional().describe("Directory to index (default: project root)"),
|
|
72
74
|
},
|
|
@@ -87,7 +89,7 @@ Available indexes:
|
|
|
87
89
|
} catch {}
|
|
88
90
|
|
|
89
91
|
if (indexes.length === 0) {
|
|
90
|
-
output +=
|
|
92
|
+
output += `No indexes created yet\n\nCreate indexes:\n\n\`\`\`\n`
|
|
91
93
|
output += `codeindex({ action: "reindex", index: "code" })\n`
|
|
92
94
|
output += `codeindex({ action: "reindex", index: "docs", dir: "docs/" })\n`
|
|
93
95
|
output += `\`\`\`\n`
|
|
@@ -95,31 +97,62 @@ Available indexes:
|
|
|
95
97
|
output += `### Active Indexes\n\n`
|
|
96
98
|
for (const idx of indexes) {
|
|
97
99
|
try {
|
|
98
|
-
const
|
|
99
|
-
const
|
|
100
|
-
|
|
100
|
+
const indexer = await new CodebaseIndexer(projectRoot, idx).init()
|
|
101
|
+
const stats = await indexer.getStats()
|
|
102
|
+
await indexer.unloadModel()
|
|
101
103
|
const desc = INDEX_DESCRIPTIONS[idx] || "Custom index"
|
|
102
|
-
|
|
104
|
+
const features = stats.features
|
|
105
|
+
? ` | chunking: ${stats.features.chunking}, hybrid: ${stats.features.hybrid ? "on" : "off"}`
|
|
106
|
+
: ""
|
|
107
|
+
output += `- **${idx}** - ${desc} (files: ${stats.fileCount}, chunks: ${stats.chunkCount}${features})\n`
|
|
103
108
|
} catch {
|
|
104
109
|
output += `- ${idx}\n`
|
|
105
110
|
}
|
|
106
111
|
}
|
|
107
112
|
}
|
|
108
113
|
|
|
109
|
-
output += `\n### Usage\n\n\`\`\`\nsearch({ query: "your query", index: "code" })\n\`\`\``
|
|
114
|
+
output += `\n### Usage\n\n\`\`\`\nsearch({ query: "your query", index: "code" })\nsearch({ query: "your query", hybrid: true }) // v2: hybrid search\nsearch({ query: "your query", fileType: "code", language: "typescript" }) // v2: filters\n\`\`\``
|
|
110
115
|
return output
|
|
111
116
|
}
|
|
112
117
|
|
|
113
118
|
if (args.action === "status") {
|
|
114
119
|
const hashesFile = path.join(vectorsDir, indexName, "hashes.json")
|
|
115
120
|
try {
|
|
116
|
-
const
|
|
117
|
-
const
|
|
118
|
-
|
|
119
|
-
|
|
121
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
122
|
+
const stats = await indexer.getStats()
|
|
123
|
+
await indexer.unloadModel()
|
|
124
|
+
|
|
125
|
+
const sampleFiles = Object.keys(JSON.parse(await fs.readFile(hashesFile, "utf8"))).slice(0, 5)
|
|
120
126
|
const desc = INDEX_DESCRIPTIONS[indexName] || "Custom index"
|
|
121
127
|
|
|
122
|
-
|
|
128
|
+
let output = `## Index Status: "${indexName}"\n\n`
|
|
129
|
+
output += `**Description:** ${desc}\n`
|
|
130
|
+
output += `**Files indexed:** ${stats.fileCount}\n`
|
|
131
|
+
output += `**Total chunks:** ${stats.chunkCount}\n`
|
|
132
|
+
output += `**Model:** ${stats.model}\n`
|
|
133
|
+
|
|
134
|
+
if (stats.features) {
|
|
135
|
+
output += `\n**Features:**\n`
|
|
136
|
+
output += `- Chunking strategy: ${stats.features.chunking}\n`
|
|
137
|
+
output += `- Hybrid search: ${stats.features.hybrid ? "enabled" : "disabled"}\n`
|
|
138
|
+
output += `- Metrics: ${stats.features.metrics ? "enabled" : "disabled"}\n`
|
|
139
|
+
output += `- Query cache: ${stats.features.cache ? "enabled" : "disabled"}\n`
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Show metrics summary if available
|
|
143
|
+
try {
|
|
144
|
+
const metrics = await indexer.getMetrics()
|
|
145
|
+
if (metrics.total_queries > 0) {
|
|
146
|
+
output += `\n**Search Metrics:**\n`
|
|
147
|
+
output += `- Total queries: ${metrics.total_queries}\n`
|
|
148
|
+
output += `- Avg results/query: ${metrics.avg_results_per_query.toFixed(1)}\n`
|
|
149
|
+
output += `- Zero results rate: ${(metrics.zero_results_rate * 100).toFixed(1)}%\n`
|
|
150
|
+
output += `- Avg relevance: ${metrics.avg_relevance.toFixed(3)}\n`
|
|
151
|
+
}
|
|
152
|
+
} catch {}
|
|
153
|
+
|
|
154
|
+
output += `\n**Sample indexed files:**\n${sampleFiles.map((f) => `- ${f}`).join("\n")}${stats.fileCount > 5 ? `\n- ... and ${stats.fileCount - 5} more` : ""}`
|
|
155
|
+
return output
|
|
123
156
|
} catch {
|
|
124
157
|
return `## Index Status: "${indexName}"\n\nIndex "${indexName}" not created yet. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
|
|
125
158
|
}
|
|
@@ -148,12 +181,98 @@ Available indexes:
|
|
|
148
181
|
await indexer.unloadModel()
|
|
149
182
|
const stats = await indexer.getStats()
|
|
150
183
|
|
|
151
|
-
|
|
184
|
+
let output = `## Re-indexing Complete\n\n`
|
|
185
|
+
output += `**Index:** ${indexName}\n`
|
|
186
|
+
output += `**Directory:** ${args.dir || "(project root)"}\n`
|
|
187
|
+
output += `**Files found:** ${files.length}\n`
|
|
188
|
+
output += `**Files indexed:** ${indexed}\n`
|
|
189
|
+
output += `**Files unchanged:** ${skipped}\n`
|
|
190
|
+
output += `**Total chunks:** ${stats.chunkCount}\n`
|
|
191
|
+
if (stats.features) {
|
|
192
|
+
output += `**Chunking:** ${stats.features.chunking}\n`
|
|
193
|
+
}
|
|
194
|
+
return output
|
|
195
|
+
} catch (error: any) {
|
|
196
|
+
return `Re-indexing failed: ${error.message || String(error)}`
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (args.action === "test") {
|
|
201
|
+
try {
|
|
202
|
+
const goldPath = path.join(projectRoot, ".opencode", "vectors", "gold-dataset.yaml")
|
|
203
|
+
let goldContent: string
|
|
204
|
+
try {
|
|
205
|
+
goldContent = await fs.readFile(goldPath, "utf8")
|
|
206
|
+
} catch {
|
|
207
|
+
return `## Gold Dataset Test\n\nNo gold dataset found at: ${goldPath}\n\nCreate one with test queries and expected results.\nSee docs/search-plugin-upgrade-plan.md for format.`
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Simple YAML parsing for test queries
|
|
211
|
+
const tests: { query: string; expected_files: string[]; min_relevance: number; description?: string }[] = []
|
|
212
|
+
const queryBlocks = goldContent.split(/\n\s+-\s+query:\s*/)
|
|
213
|
+
for (const block of queryBlocks.slice(1)) {
|
|
214
|
+
const queryMatch = block.match(/^["']?([^"'\n]+)["']?/)
|
|
215
|
+
const filesMatch = block.match(/expected_files:\s*\n((?:\s+-\s+.+\n?)+)/)
|
|
216
|
+
const relMatch = block.match(/min_relevance:\s*([\d.]+)/)
|
|
217
|
+
const descMatch = block.match(/description:\s*["']?([^"'\n]+)/)
|
|
218
|
+
|
|
219
|
+
if (queryMatch) {
|
|
220
|
+
const expectedFiles = filesMatch
|
|
221
|
+
? filesMatch[1].split("\n").map(l => l.replace(/^\s+-\s+["']?/, "").replace(/["']$/, "").trim()).filter(Boolean)
|
|
222
|
+
: []
|
|
223
|
+
tests.push({
|
|
224
|
+
query: queryMatch[1].trim(),
|
|
225
|
+
expected_files: expectedFiles,
|
|
226
|
+
min_relevance: relMatch ? parseFloat(relMatch[1]) : 0.7,
|
|
227
|
+
description: descMatch ? descMatch[1].trim() : undefined,
|
|
228
|
+
})
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (tests.length === 0) {
|
|
233
|
+
return `## Gold Dataset Test\n\nNo test queries found in gold dataset.`
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
237
|
+
let passed = 0
|
|
238
|
+
let failed = 0
|
|
239
|
+
let output = `## Gold Dataset Test Results\n\n`
|
|
240
|
+
|
|
241
|
+
for (const t of tests) {
|
|
242
|
+
const results = await indexer.search(t.query, 10, false)
|
|
243
|
+
const foundFiles = results.map((r: any) => r.file)
|
|
244
|
+
const foundExpected = t.expected_files.filter(f => foundFiles.includes(f))
|
|
245
|
+
const topScore = results.length > 0 && results[0]._distance != null
|
|
246
|
+
? 1 - results[0]._distance
|
|
247
|
+
: 0
|
|
248
|
+
|
|
249
|
+
const pass = foundExpected.length >= Math.ceil(t.expected_files.length * 0.5) && topScore >= t.min_relevance
|
|
250
|
+
|
|
251
|
+
if (pass) {
|
|
252
|
+
passed++
|
|
253
|
+
output += `**PASS** Query: "${t.query}"\n`
|
|
254
|
+
} else {
|
|
255
|
+
failed++
|
|
256
|
+
output += `**FAIL** Query: "${t.query}"\n`
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
output += ` Found: ${foundFiles.slice(0, 3).map((f: string) => `${f} (${(1 - (results.find((r: any) => r.file === f)?._distance ?? 1)).toFixed(2)})`).join(", ")}\n`
|
|
260
|
+
if (foundExpected.length < t.expected_files.length) {
|
|
261
|
+
const missing = t.expected_files.filter(f => !foundFiles.includes(f))
|
|
262
|
+
output += ` Missing: ${missing.join(", ")}\n`
|
|
263
|
+
}
|
|
264
|
+
output += `\n`
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
await indexer.unloadModel()
|
|
268
|
+
|
|
269
|
+
output += `---\n**Summary:** ${passed}/${tests.length} tests passed (${Math.round(passed / tests.length * 100)}%)\n`
|
|
270
|
+
return output
|
|
152
271
|
} catch (error: any) {
|
|
153
|
-
return
|
|
272
|
+
return `Gold dataset test failed: ${error.message || String(error)}`
|
|
154
273
|
}
|
|
155
274
|
}
|
|
156
275
|
|
|
157
|
-
return `Unknown action: ${args.action}. Use: status, list, or
|
|
276
|
+
return `Unknown action: ${args.action}. Use: status, list, reindex, or test`
|
|
158
277
|
},
|
|
159
278
|
})
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { tool } from "@opencode-ai/plugin"
|
|
2
|
+
import path from "path"
|
|
3
|
+
|
|
4
|
+
import { CodebaseIndexer } from "../vectorizer/index.js"
|
|
5
|
+
|
|
6
|
+
export default tool({
|
|
7
|
+
description: `Read file with graph-aware context attachment. When available, this tool searches the file in the index and returns content + related context from the graph (imports, links, etc.).
|
|
8
|
+
|
|
9
|
+
Use this instead of the standard Read tool for better context awareness.`,
|
|
10
|
+
|
|
11
|
+
args: {
|
|
12
|
+
filePath: tool.schema.string().describe("Path to the file to read"),
|
|
13
|
+
},
|
|
14
|
+
|
|
15
|
+
async execute(args) {
|
|
16
|
+
const projectRoot = process.cwd()
|
|
17
|
+
const filePath = path.isAbsolute(args.filePath) ? args.filePath : path.join(projectRoot, args.filePath)
|
|
18
|
+
|
|
19
|
+
const relPath = path.relative(projectRoot, filePath)
|
|
20
|
+
|
|
21
|
+
const indexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
22
|
+
const results = await indexer.search(relPath, 20, false, {})
|
|
23
|
+
const fileChunks = results.filter(r => r.file === relPath)
|
|
24
|
+
await indexer.unloadModel()
|
|
25
|
+
|
|
26
|
+
if (fileChunks.length === 0) {
|
|
27
|
+
return `File "${relPath}" not indexed. Use original Read tool or run codeindex({ action: "reindex", index: "code" })`
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
let output = `## ${relPath}\n\n`
|
|
31
|
+
|
|
32
|
+
output += `### Content\n\n`
|
|
33
|
+
for (const chunk of fileChunks) {
|
|
34
|
+
output += chunk.content + "\n\n"
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const allRelated = fileChunks
|
|
38
|
+
.flatMap(c => c.relatedContext || [])
|
|
39
|
+
.filter((r, i, arr) => arr.findIndex(x => x.chunk_id === r.chunk_id) === i)
|
|
40
|
+
|
|
41
|
+
if (allRelated.length > 0) {
|
|
42
|
+
output += `### Related Context\n\n`
|
|
43
|
+
for (const rel of allRelated) {
|
|
44
|
+
const snippet = rel.content.length > 300
|
|
45
|
+
? rel.content.substring(0, 300) + "..."
|
|
46
|
+
: rel.content
|
|
47
|
+
output += `**${rel.file}** (${rel.relation})\n`
|
|
48
|
+
output += `\`\`\`\n${snippet}\n\`\`\`\n\n`
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return output
|
|
53
|
+
},
|
|
54
|
+
})
|