@comfanion/usethis_search 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +22 -0
- package/file-indexer.ts +460 -0
- package/index.ts +19 -0
- package/package.json +33 -0
- package/tools/codeindex.ts +159 -0
- package/tools/search.ts +115 -0
- package/vectorizer/index.js +555 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) Comfanion
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# @comfanion/usethis_search
|
|
2
|
+
|
|
3
|
+
OpenCode plugin that provides semantic search and index management tools.
|
|
4
|
+
|
|
5
|
+
## Tools
|
|
6
|
+
|
|
7
|
+
- `search` (semantic search)
|
|
8
|
+
- `codeindex` (index status, list, reindex)
|
|
9
|
+
|
|
10
|
+
## Storage
|
|
11
|
+
|
|
12
|
+
- Vectors are stored in `.opencode/vectors/<index>/` in the project.
|
|
13
|
+
|
|
14
|
+
## Install (OpenCode)
|
|
15
|
+
|
|
16
|
+
Add to `opencode.json`:
|
|
17
|
+
|
|
18
|
+
```json
|
|
19
|
+
{
|
|
20
|
+
"plugin": ["@comfanion/usethis_search"]
|
|
21
|
+
}
|
|
22
|
+
```
|
package/file-indexer.ts
ADDED
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
import type { Plugin } from "@opencode-ai/plugin"
|
|
2
|
+
import path from "path"
|
|
3
|
+
import fs from "fs/promises"
|
|
4
|
+
import fsSync from "fs"
|
|
5
|
+
|
|
6
|
+
import { CodebaseIndexer } from "./vectorizer/index.js"
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* File Indexer Plugin
|
|
10
|
+
*
|
|
11
|
+
* Automatically manages semantic search indexes:
|
|
12
|
+
* - On plugin load (opencode startup): freshen existing indexes
|
|
13
|
+
* - On file edit: queue file for reindexing (debounced)
|
|
14
|
+
*
|
|
15
|
+
* Configuration in .opencode/vectorizer.yaml:
|
|
16
|
+
* vectorizer:
|
|
17
|
+
* enabled: true # Master switch
|
|
18
|
+
* auto_index: true # Enable this plugin
|
|
19
|
+
* debounce_ms: 1000 # Wait time before indexing
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
const DEBUG = process.env.DEBUG?.includes("file-indexer") || process.env.DEBUG === "*"
|
|
23
|
+
const SKIP_AUTO_INDEX = process.env.OPENCODE_SKIP_AUTO_INDEX === "1"
|
|
24
|
+
|
|
25
|
+
let logFilePath: string | null = null
|
|
26
|
+
|
|
27
|
+
function logFile(msg: string): void {
|
|
28
|
+
if (logFilePath) {
|
|
29
|
+
try {
|
|
30
|
+
const timestamp = new Date().toISOString().slice(11, 19)
|
|
31
|
+
fsSync.appendFileSync(logFilePath, `${timestamp} ${msg}\n`)
|
|
32
|
+
} catch {
|
|
33
|
+
// ignore
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function log(msg: string): void {
|
|
39
|
+
if (DEBUG) console.log(`[file-indexer] ${msg}`)
|
|
40
|
+
logFile(msg)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function debug(msg: string): void {
|
|
44
|
+
if (DEBUG) log(msg)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const DEFAULT_CONFIG = {
|
|
48
|
+
enabled: true,
|
|
49
|
+
auto_index: true,
|
|
50
|
+
debounce_ms: 1000,
|
|
51
|
+
indexes: {
|
|
52
|
+
code: { enabled: true, extensions: [".js", ".ts", ".jsx", ".tsx", ".py", ".go", ".rs", ".java", ".kt", ".swift", ".c", ".cpp", ".h", ".hpp", ".cs", ".rb", ".php", ".scala", ".clj"] },
|
|
53
|
+
docs: { enabled: true, extensions: [".md", ".mdx", ".txt", ".rst", ".adoc"] },
|
|
54
|
+
config: { enabled: false, extensions: [".yaml", ".yml", ".json", ".toml", ".ini", ".xml"] },
|
|
55
|
+
},
|
|
56
|
+
exclude: ["node_modules", "vendor", "dist", "build", "out", "__pycache__"],
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
interface VectorizerConfig {
|
|
60
|
+
enabled: boolean
|
|
61
|
+
auto_index: boolean
|
|
62
|
+
debounce_ms: number
|
|
63
|
+
indexes: Record<string, { enabled: boolean; extensions: string[] }>
|
|
64
|
+
exclude: string[]
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const FUN_MESSAGES = {
|
|
68
|
+
en: {
|
|
69
|
+
indexing: (files: number) => `Indexing ${files} files...`,
|
|
70
|
+
fun: (files: number, mins: number) => {
|
|
71
|
+
if (files < 20) return `Quick coffee? ☕`
|
|
72
|
+
if (files < 100) return `~${mins}min. Stretch break? 🧘`
|
|
73
|
+
if (files < 500) return `~${mins}min. Make coffee ☕ and relax 🛋️`
|
|
74
|
+
return `~${mins}min. Go touch grass 🌿 or take a nap 😴`
|
|
75
|
+
},
|
|
76
|
+
done: (files: number, duration: string) => {
|
|
77
|
+
if (files < 20) return `Done! ${files} files in ${duration}. Fast! 🚀`
|
|
78
|
+
if (files < 100) return `Indexed ${files} files in ${duration}. Let's go! 🎸`
|
|
79
|
+
return `${files} files in ${duration}. Worth the wait! 🎉`
|
|
80
|
+
},
|
|
81
|
+
fresh: () => `Everything's fresh! Nothing to do 😎`,
|
|
82
|
+
error: (msg: string) => `Oops! ${msg} 😬`,
|
|
83
|
+
},
|
|
84
|
+
uk: {
|
|
85
|
+
indexing: (files: number) => `Індексую ${files} файлів...`,
|
|
86
|
+
fun: (files: number, mins: number) => {
|
|
87
|
+
if (files < 20) return `Швидка кава? ☕`
|
|
88
|
+
if (files < 100) return `~${mins}хв. Розімнись! 🧘`
|
|
89
|
+
if (files < 500) return `~${mins}хв. Зроби каву ☕ і відпочинь 🛋️`
|
|
90
|
+
return `~${mins}хв. Йди погуляй 🌿 або поспи 😴`
|
|
91
|
+
},
|
|
92
|
+
done: (files: number, duration: string) => {
|
|
93
|
+
if (files < 20) return `Готово! ${files} файлів за ${duration}. Швидко! 🚀`
|
|
94
|
+
if (files < 100) return `${files} файлів за ${duration}. Поїхали! 🎸`
|
|
95
|
+
return `${files} файлів за ${duration}. Варто було чекати! 🎉`
|
|
96
|
+
},
|
|
97
|
+
fresh: () => `Все свіже! Нічого робити 😎`,
|
|
98
|
+
error: (msg: string) => `Ой! ${msg} 😬`,
|
|
99
|
+
},
|
|
100
|
+
ru: {
|
|
101
|
+
indexing: (files: number) => `Индексирую ${files} файлов...`,
|
|
102
|
+
fun: (files: number, mins: number) => {
|
|
103
|
+
if (files < 20) return `Кофе? ☕`
|
|
104
|
+
if (files < 100) return `~${mins}мин. Разомнись! 🧘`
|
|
105
|
+
if (files < 500) return `~${mins}мин. Сделай кофе ☕ и отдохни 🛋️`
|
|
106
|
+
return `~${mins}мин. Иди погуляй 🌿 или поспи 😴`
|
|
107
|
+
},
|
|
108
|
+
done: (files: number, duration: string) => {
|
|
109
|
+
if (files < 20) return `Готово! ${files} файлов за ${duration}. Быстро! 🚀`
|
|
110
|
+
if (files < 100) return `${files} файлов за ${duration}. Поехали! 🎸`
|
|
111
|
+
return `${files} файлов за ${duration}. Стоило подождать! 🎉`
|
|
112
|
+
},
|
|
113
|
+
fresh: () => `Всё свежее! Делать нечего 😎`,
|
|
114
|
+
error: (msg: string) => `Ой! ${msg} 😬`,
|
|
115
|
+
},
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
type Lang = keyof typeof FUN_MESSAGES
|
|
119
|
+
|
|
120
|
+
async function getLanguage(projectRoot: string): Promise<Lang> {
|
|
121
|
+
try {
|
|
122
|
+
const configPath = path.join(projectRoot, ".opencode", "config.yaml")
|
|
123
|
+
const content = await fs.readFile(configPath, "utf8")
|
|
124
|
+
const match = content.match(/communication_language:\s*["']?(\w+)["']?/i)
|
|
125
|
+
const lang = match?.[1]?.toLowerCase()
|
|
126
|
+
if (lang === "ukrainian" || lang === "uk") return "uk"
|
|
127
|
+
if (lang === "russian" || lang === "ru") return "ru"
|
|
128
|
+
return "en"
|
|
129
|
+
} catch {
|
|
130
|
+
return "en"
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function estimateTime(fileCount: number): number {
|
|
135
|
+
const modelLoadTime = 30
|
|
136
|
+
const perFileTime = 0.5
|
|
137
|
+
const totalSeconds = modelLoadTime + fileCount * perFileTime
|
|
138
|
+
return Math.ceil(totalSeconds / 60)
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function formatDuration(seconds: number): string {
|
|
142
|
+
if (seconds < 60) return `${Math.round(seconds)}s`
|
|
143
|
+
const mins = Math.floor(seconds / 60)
|
|
144
|
+
const secs = Math.round(seconds % 60)
|
|
145
|
+
return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const pendingFiles: Map<string, { indexName: string; timestamp: number }> = new Map()
|
|
149
|
+
|
|
150
|
+
async function loadConfig(projectRoot: string): Promise<VectorizerConfig> {
|
|
151
|
+
try {
|
|
152
|
+
const candidates = [path.join(projectRoot, ".opencode", "vectorizer.yaml")]
|
|
153
|
+
|
|
154
|
+
let content: string | null = null
|
|
155
|
+
for (const configPath of candidates) {
|
|
156
|
+
try {
|
|
157
|
+
content = await fs.readFile(configPath, "utf8")
|
|
158
|
+
break
|
|
159
|
+
} catch {
|
|
160
|
+
// try next
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (!content) {
|
|
165
|
+
debug("No vectorizer config file found, using defaults")
|
|
166
|
+
return DEFAULT_CONFIG
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const vectorizerMatch = content.match(/vectorizer:\s*\n([\s\S]*?)(?=\n[a-zA-Z_\-]+:|$)/i)
|
|
170
|
+
if (!vectorizerMatch) {
|
|
171
|
+
debug("No vectorizer section, using defaults")
|
|
172
|
+
return DEFAULT_CONFIG
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const section = vectorizerMatch[1]
|
|
176
|
+
|
|
177
|
+
const enabledMatch = section.match(/^\s+enabled:\s*(true|false)/m)
|
|
178
|
+
const enabled = enabledMatch ? enabledMatch[1] === "true" : DEFAULT_CONFIG.enabled
|
|
179
|
+
|
|
180
|
+
const autoIndexMatch = section.match(/^\s+auto_index:\s*(true|false)/m)
|
|
181
|
+
const auto_index = autoIndexMatch ? autoIndexMatch[1] === "true" : DEFAULT_CONFIG.auto_index
|
|
182
|
+
|
|
183
|
+
const debounceMatch = section.match(/^\s+debounce_ms:\s*(\d+)/m)
|
|
184
|
+
const debounce_ms = debounceMatch ? parseInt(debounceMatch[1]) : DEFAULT_CONFIG.debounce_ms
|
|
185
|
+
|
|
186
|
+
const excludeMatch = section.match(/exclude:\s*\n((?:\s+-\s+.+\n?)+)/m)
|
|
187
|
+
let exclude = DEFAULT_CONFIG.exclude
|
|
188
|
+
if (excludeMatch) {
|
|
189
|
+
exclude = excludeMatch[1].match(/-\s+(.+)/g)?.map((m) => m.replace(/^-\s+/, "").trim()) || DEFAULT_CONFIG.exclude
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return { enabled, auto_index, debounce_ms, indexes: DEFAULT_CONFIG.indexes, exclude }
|
|
193
|
+
} catch (e) {
|
|
194
|
+
debug(`Failed to load config: ${(e as Error).message}`)
|
|
195
|
+
return DEFAULT_CONFIG
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function getIndexForFile(filePath: string, config: VectorizerConfig): string | null {
|
|
200
|
+
const ext = path.extname(filePath).toLowerCase()
|
|
201
|
+
for (const [indexName, indexConfig] of Object.entries(config.indexes)) {
|
|
202
|
+
if (indexConfig.enabled && indexConfig.extensions.includes(ext)) {
|
|
203
|
+
return indexName
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return null
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function isExcluded(relativePath: string, config: VectorizerConfig): boolean {
|
|
210
|
+
const norm = relativePath.replace(/\\/g, "/")
|
|
211
|
+
return config.exclude.some((pattern) => {
|
|
212
|
+
const p = pattern.replace(/\\/g, "/").replace(/\/+$/, "")
|
|
213
|
+
return norm === p || norm.startsWith(`${p}/`) || norm.includes(`/${p}/`)
|
|
214
|
+
})
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
async function hasIndex(projectRoot: string, indexName: string): Promise<boolean> {
|
|
218
|
+
try {
|
|
219
|
+
await fs.access(path.join(projectRoot, ".opencode", "vectors", indexName, "hashes.json"))
|
|
220
|
+
return true
|
|
221
|
+
} catch {
|
|
222
|
+
return false
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
interface IndexResult {
|
|
227
|
+
totalFiles: number
|
|
228
|
+
elapsedSeconds: number
|
|
229
|
+
action: "created" | "rebuilt" | "freshened" | "skipped"
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
async function ensureIndexOnSessionStart(
|
|
233
|
+
projectRoot: string,
|
|
234
|
+
config: VectorizerConfig,
|
|
235
|
+
onStart?: (totalFiles: number, estimatedMins: number) => void,
|
|
236
|
+
): Promise<IndexResult> {
|
|
237
|
+
if (SKIP_AUTO_INDEX) {
|
|
238
|
+
return { totalFiles: 0, elapsedSeconds: 0, action: "skipped" }
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
let totalFiles = 0
|
|
242
|
+
let elapsedSeconds = 0
|
|
243
|
+
let action: IndexResult["action"] = "skipped"
|
|
244
|
+
|
|
245
|
+
const overallStart = Date.now()
|
|
246
|
+
|
|
247
|
+
// First pass - count files and check health
|
|
248
|
+
let needsWork = false
|
|
249
|
+
let totalExpectedFiles = 0
|
|
250
|
+
|
|
251
|
+
for (const [indexName, indexConfig] of Object.entries(config.indexes)) {
|
|
252
|
+
if (!indexConfig.enabled) continue
|
|
253
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
254
|
+
try {
|
|
255
|
+
const indexExists = await hasIndex(projectRoot, indexName)
|
|
256
|
+
const health = await indexer.checkHealth(config.exclude)
|
|
257
|
+
|
|
258
|
+
if (!indexExists || health.needsReindex) {
|
|
259
|
+
totalExpectedFiles += health.expectedCount
|
|
260
|
+
needsWork = true
|
|
261
|
+
}
|
|
262
|
+
} finally {
|
|
263
|
+
await indexer.unloadModel()
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
if (needsWork && onStart) {
|
|
268
|
+
onStart(totalExpectedFiles, estimateTime(totalExpectedFiles))
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Second pass - do the actual work
|
|
272
|
+
for (const [indexName, indexConfig] of Object.entries(config.indexes)) {
|
|
273
|
+
if (!indexConfig.enabled) continue
|
|
274
|
+
|
|
275
|
+
const indexExists = await hasIndex(projectRoot, indexName)
|
|
276
|
+
const startTime = Date.now()
|
|
277
|
+
|
|
278
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
279
|
+
try {
|
|
280
|
+
if (!indexExists) {
|
|
281
|
+
log(`Creating "${indexName}" index...`)
|
|
282
|
+
const stats = await indexer.indexAll((indexed: number, total: number, file: string) => {
|
|
283
|
+
if (indexed % 10 === 0 || indexed === total) {
|
|
284
|
+
logFile(`"${indexName}": ${indexed}/${total} - ${file}`)
|
|
285
|
+
}
|
|
286
|
+
}, config.exclude)
|
|
287
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1)
|
|
288
|
+
log(`"${indexName}": done ${stats.indexed} files (${elapsed}s)`)
|
|
289
|
+
totalFiles += stats.indexed
|
|
290
|
+
action = "created"
|
|
291
|
+
} else {
|
|
292
|
+
const health = await indexer.checkHealth(config.exclude)
|
|
293
|
+
|
|
294
|
+
if (health.needsReindex) {
|
|
295
|
+
log(`Rebuilding "${indexName}" (${health.reason}: ${health.currentCount} vs ${health.expectedCount} files)...`)
|
|
296
|
+
const stats = await indexer.indexAll((indexed: number, total: number, file: string) => {
|
|
297
|
+
if (indexed % 10 === 0 || indexed === total) {
|
|
298
|
+
logFile(`"${indexName}": ${indexed}/${total} - ${file}`)
|
|
299
|
+
}
|
|
300
|
+
}, config.exclude)
|
|
301
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1)
|
|
302
|
+
log(`"${indexName}": rebuilt ${stats.indexed} files (${elapsed}s)`)
|
|
303
|
+
totalFiles += stats.indexed
|
|
304
|
+
action = "rebuilt"
|
|
305
|
+
} else {
|
|
306
|
+
log(`Freshening "${indexName}"...`)
|
|
307
|
+
const stats = await indexer.freshen()
|
|
308
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1)
|
|
309
|
+
|
|
310
|
+
if (stats.updated > 0 || stats.deleted > 0) {
|
|
311
|
+
log(`"${indexName}": +${stats.updated} -${stats.deleted} (${elapsed}s)`)
|
|
312
|
+
action = "freshened"
|
|
313
|
+
} else {
|
|
314
|
+
log(`"${indexName}": fresh (${elapsed}s)`)
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
} finally {
|
|
319
|
+
await indexer.unloadModel()
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
elapsedSeconds = (Date.now() - overallStart) / 1000
|
|
324
|
+
log(`Indexes ready!`)
|
|
325
|
+
return { totalFiles, elapsedSeconds, action }
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
async function processPendingFiles(projectRoot: string, config: VectorizerConfig): Promise<void> {
|
|
329
|
+
if (pendingFiles.size === 0) return
|
|
330
|
+
if (SKIP_AUTO_INDEX) {
|
|
331
|
+
pendingFiles.clear()
|
|
332
|
+
return
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const now = Date.now()
|
|
336
|
+
const filesToProcess: Map<string, string[]> = new Map()
|
|
337
|
+
|
|
338
|
+
for (const [filePath, info] of pendingFiles.entries()) {
|
|
339
|
+
if (now - info.timestamp >= config.debounce_ms) {
|
|
340
|
+
const files = filesToProcess.get(info.indexName) || []
|
|
341
|
+
files.push(filePath)
|
|
342
|
+
filesToProcess.set(info.indexName, files)
|
|
343
|
+
pendingFiles.delete(filePath)
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (filesToProcess.size === 0) return
|
|
348
|
+
|
|
349
|
+
debug(`Processing ${filesToProcess.size} index(es)...`)
|
|
350
|
+
|
|
351
|
+
for (const [indexName, files] of filesToProcess.entries()) {
|
|
352
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
353
|
+
try {
|
|
354
|
+
for (const filePath of files) {
|
|
355
|
+
try {
|
|
356
|
+
const wasIndexed = await indexer.indexSingleFile(filePath)
|
|
357
|
+
if (wasIndexed) {
|
|
358
|
+
log(`Reindexed: ${path.relative(projectRoot, filePath)} → ${indexName}`)
|
|
359
|
+
} else {
|
|
360
|
+
logFile(`Skipped (unchanged): ${path.relative(projectRoot, filePath)}`)
|
|
361
|
+
}
|
|
362
|
+
} catch (e) {
|
|
363
|
+
log(`Error reindexing ${path.relative(projectRoot, filePath)}: ${(e as Error).message}`)
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
} finally {
|
|
367
|
+
await indexer.unloadModel()
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
export const FileIndexerPlugin: Plugin = async ({ directory, client }) => {
|
|
373
|
+
let processingTimeout: NodeJS.Timeout | null = null
|
|
374
|
+
const config = await loadConfig(directory)
|
|
375
|
+
|
|
376
|
+
const toast = async (message: string, variant: "info" | "success" | "error" = "info") => {
|
|
377
|
+
try {
|
|
378
|
+
await client?.tui?.showToast?.({ body: { message, variant } })
|
|
379
|
+
} catch {}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
log(`Plugin loaded for: ${path.basename(directory)}`)
|
|
383
|
+
|
|
384
|
+
if (!config.enabled || !config.auto_index) {
|
|
385
|
+
log(`Plugin DISABLED (enabled: ${config.enabled}, auto_index: ${config.auto_index})`)
|
|
386
|
+
return { event: async () => {} }
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
logFilePath = path.join(directory, ".opencode", "indexer.log")
|
|
390
|
+
try {
|
|
391
|
+
fsSync.writeFileSync(logFilePath, "")
|
|
392
|
+
} catch {
|
|
393
|
+
logFilePath = null
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
log(`Plugin ACTIVE`)
|
|
397
|
+
|
|
398
|
+
const lang = await getLanguage(directory)
|
|
399
|
+
const messages = FUN_MESSAGES[lang]
|
|
400
|
+
|
|
401
|
+
if (!SKIP_AUTO_INDEX) {
|
|
402
|
+
setTimeout(async () => {
|
|
403
|
+
try {
|
|
404
|
+
const result = await ensureIndexOnSessionStart(
|
|
405
|
+
directory,
|
|
406
|
+
config,
|
|
407
|
+
async (totalFiles, estimatedMins) => {
|
|
408
|
+
await toast(messages.indexing(totalFiles), "info")
|
|
409
|
+
if (totalFiles > 0) {
|
|
410
|
+
setTimeout(() => toast(messages.fun(totalFiles, estimatedMins), "info"), 1500)
|
|
411
|
+
}
|
|
412
|
+
},
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
if (result.action === "skipped") {
|
|
416
|
+
toast(messages.fresh(), "success")
|
|
417
|
+
} else {
|
|
418
|
+
const duration = formatDuration(result.elapsedSeconds)
|
|
419
|
+
toast(messages.done(result.totalFiles, duration), "success")
|
|
420
|
+
}
|
|
421
|
+
} catch (e: any) {
|
|
422
|
+
toast(messages.error(e.message), "error")
|
|
423
|
+
}
|
|
424
|
+
}, 1000)
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function queueFileForIndexing(filePath: string): void {
|
|
428
|
+
const relativePath = path.relative(directory, filePath)
|
|
429
|
+
if (relativePath.startsWith("..") || path.isAbsolute(relativePath)) return
|
|
430
|
+
if (isExcluded(relativePath, config)) return
|
|
431
|
+
|
|
432
|
+
const indexName = getIndexForFile(filePath, config)
|
|
433
|
+
if (!indexName) return
|
|
434
|
+
|
|
435
|
+
debug(`Queued: ${relativePath} -> ${indexName}`)
|
|
436
|
+
pendingFiles.set(filePath, { indexName, timestamp: Date.now() })
|
|
437
|
+
|
|
438
|
+
if (processingTimeout) {
|
|
439
|
+
clearTimeout(processingTimeout)
|
|
440
|
+
}
|
|
441
|
+
processingTimeout = setTimeout(async () => {
|
|
442
|
+
await processPendingFiles(directory, config)
|
|
443
|
+
}, config.debounce_ms + 100)
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
return {
|
|
447
|
+
event: async ({ event }) => {
|
|
448
|
+
if (event.type === "file.edited" || event.type === "file.watcher.updated") {
|
|
449
|
+
const props = (event as any).properties || {}
|
|
450
|
+
const filePath = props.file || props.path || props.filePath
|
|
451
|
+
if (filePath) {
|
|
452
|
+
log(`Event: ${event.type} → ${filePath}`)
|
|
453
|
+
queueFileForIndexing(filePath)
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
},
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
export default FileIndexerPlugin
|
package/index.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { Plugin } from "@opencode-ai/plugin"
|
|
2
|
+
|
|
3
|
+
import search from "./tools/search"
|
|
4
|
+
import codeindex from "./tools/codeindex"
|
|
5
|
+
import FileIndexerPlugin from "./file-indexer"
|
|
6
|
+
|
|
7
|
+
export const UsethisSearchPlugin: Plugin = async (ctx) => {
|
|
8
|
+
const fileIndexerHooks = await FileIndexerPlugin(ctx as any)
|
|
9
|
+
|
|
10
|
+
return {
|
|
11
|
+
...fileIndexerHooks,
|
|
12
|
+
tool: {
|
|
13
|
+
search,
|
|
14
|
+
codeindex,
|
|
15
|
+
},
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export default UsethisSearchPlugin
|
package/package.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@comfanion/usethis_search",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "OpenCode plugin: semantic search + code index management",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./index.ts",
|
|
7
|
+
"exports": "./index.ts",
|
|
8
|
+
"scripts": {
|
|
9
|
+
"test": "bun test"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"index.ts",
|
|
13
|
+
"file-indexer.ts",
|
|
14
|
+
"tools/search.ts",
|
|
15
|
+
"tools/codeindex.ts",
|
|
16
|
+
"vectorizer/index.js",
|
|
17
|
+
"README.md",
|
|
18
|
+
"LICENSE"
|
|
19
|
+
],
|
|
20
|
+
"dependencies": {
|
|
21
|
+
"@opencode-ai/plugin": "1.1.39",
|
|
22
|
+
"@xenova/transformers": "^2.17.0",
|
|
23
|
+
"glob": "^10.3.10",
|
|
24
|
+
"vectordb": "^0.4.0"
|
|
25
|
+
},
|
|
26
|
+
"peerDependencies": {
|
|
27
|
+
"@opencode-ai/plugin": ">=1.1.0"
|
|
28
|
+
},
|
|
29
|
+
"engines": {
|
|
30
|
+
"node": ">=18"
|
|
31
|
+
},
|
|
32
|
+
"license": "MIT"
|
|
33
|
+
}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Index Status & Management Tool
|
|
3
|
+
*
|
|
4
|
+
* Uses bundled vectorizer. Index data is stored in `.opencode/vectors/<index>/`.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { tool } from "@opencode-ai/plugin"
|
|
8
|
+
import path from "path"
|
|
9
|
+
import fs from "fs/promises"
|
|
10
|
+
|
|
11
|
+
import { CodebaseIndexer } from "../vectorizer/index.js"
|
|
12
|
+
|
|
13
|
+
const INDEX_EXTENSIONS: Record<string, string[]> = {
|
|
14
|
+
code: [".js", ".ts", ".jsx", ".tsx", ".go", ".py", ".rs", ".java", ".kt", ".swift", ".c", ".cpp", ".h", ".cs", ".rb", ".php"],
|
|
15
|
+
docs: [".md", ".mdx", ".txt", ".rst", ".adoc"],
|
|
16
|
+
config: [".yaml", ".yml", ".json", ".toml", ".ini", ".xml"],
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const INDEX_DESCRIPTIONS: Record<string, string> = {
|
|
20
|
+
code: "Source code files",
|
|
21
|
+
docs: "Documentation files",
|
|
22
|
+
config: "Configuration files",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async function walkDir(dir: string, extensions: string[], ignore: string[] = []): Promise<string[]> {
|
|
26
|
+
const files: string[] = []
|
|
27
|
+
|
|
28
|
+
async function walk(currentDir: string) {
|
|
29
|
+
try {
|
|
30
|
+
const entries = await fs.readdir(currentDir, { withFileTypes: true })
|
|
31
|
+
for (const entry of entries) {
|
|
32
|
+
const fullPath = path.join(currentDir, entry.name)
|
|
33
|
+
const relativePath = path.relative(dir, fullPath)
|
|
34
|
+
|
|
35
|
+
if (ignore.some((ig) => relativePath.startsWith(ig) || entry.name === ig)) {
|
|
36
|
+
continue
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (entry.isDirectory()) {
|
|
40
|
+
await walk(fullPath)
|
|
41
|
+
} else if (entry.isFile()) {
|
|
42
|
+
const ext = path.extname(entry.name).toLowerCase()
|
|
43
|
+
if (extensions.includes(ext)) {
|
|
44
|
+
files.push(fullPath)
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
} catch {}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
await walk(dir)
|
|
52
|
+
return files
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export default tool({
|
|
56
|
+
description: `Check codebase index status or trigger re-indexing for semantic search.
|
|
57
|
+
|
|
58
|
+
Actions:
|
|
59
|
+
- "status" → Show index statistics
|
|
60
|
+
- "list" → List all available indexes with stats
|
|
61
|
+
- "reindex" → Re-index files using local vectorizer
|
|
62
|
+
|
|
63
|
+
Available indexes:
|
|
64
|
+
- "code" - Source code files
|
|
65
|
+
- "docs" - Documentation files
|
|
66
|
+
- "config" - Configuration files`,
|
|
67
|
+
|
|
68
|
+
args: {
|
|
69
|
+
action: tool.schema.enum(["status", "list", "reindex"]).describe("Action to perform"),
|
|
70
|
+
index: tool.schema.string().optional().default("code").describe("Index name: code, docs, config"),
|
|
71
|
+
dir: tool.schema.string().optional().describe("Directory to index (default: project root)"),
|
|
72
|
+
},
|
|
73
|
+
|
|
74
|
+
async execute(args) {
|
|
75
|
+
const projectRoot = process.cwd()
|
|
76
|
+
const vectorsDir = path.join(projectRoot, ".opencode", "vectors")
|
|
77
|
+
const indexName = args.index || "code"
|
|
78
|
+
|
|
79
|
+
if (args.action === "list") {
|
|
80
|
+
let output = `## Codebase Index Overview\n\n`
|
|
81
|
+
const indexes: string[] = []
|
|
82
|
+
try {
|
|
83
|
+
const entries = await fs.readdir(vectorsDir, { withFileTypes: true })
|
|
84
|
+
for (const entry of entries) {
|
|
85
|
+
if (entry.isDirectory()) indexes.push(entry.name)
|
|
86
|
+
}
|
|
87
|
+
} catch {}
|
|
88
|
+
|
|
89
|
+
if (indexes.length === 0) {
|
|
90
|
+
output += `⚠️ No indexes created yet\n\nCreate indexes:\n\n\`\`\`\n`
|
|
91
|
+
output += `codeindex({ action: "reindex", index: "code" })\n`
|
|
92
|
+
output += `codeindex({ action: "reindex", index: "docs", dir: "docs/" })\n`
|
|
93
|
+
output += `\`\`\`\n`
|
|
94
|
+
} else {
|
|
95
|
+
output += `### Active Indexes\n\n`
|
|
96
|
+
for (const idx of indexes) {
|
|
97
|
+
try {
|
|
98
|
+
const hashesPath = path.join(vectorsDir, idx, "hashes.json")
|
|
99
|
+
const hashes = JSON.parse(await fs.readFile(hashesPath, "utf8"))
|
|
100
|
+
const fileCount = Object.keys(hashes).length
|
|
101
|
+
const desc = INDEX_DESCRIPTIONS[idx] || "Custom index"
|
|
102
|
+
output += `- ${idx} - ${desc} (files: ${fileCount})\n`
|
|
103
|
+
} catch {
|
|
104
|
+
output += `- ${idx}\n`
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
output += `\n### Usage\n\n\`\`\`\nsearch({ query: "your query", index: "code" })\n\`\`\``
|
|
110
|
+
return output
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (args.action === "status") {
|
|
114
|
+
const hashesFile = path.join(vectorsDir, indexName, "hashes.json")
|
|
115
|
+
try {
|
|
116
|
+
const hashesContent = await fs.readFile(hashesFile, "utf8")
|
|
117
|
+
const hashes = JSON.parse(hashesContent)
|
|
118
|
+
const fileCount = Object.keys(hashes).length
|
|
119
|
+
const sampleFiles = Object.keys(hashes).slice(0, 5)
|
|
120
|
+
const desc = INDEX_DESCRIPTIONS[indexName] || "Custom index"
|
|
121
|
+
|
|
122
|
+
return `## Index Status: "${indexName}"\n\n**Description:** ${desc}\n**Files indexed:** ${fileCount}\n\n**Sample indexed files:**\n${sampleFiles.map((f) => `- ${f}`).join("\n")}${fileCount > 5 ? `\n- ... and ${fileCount - 5} more` : ""}`
|
|
123
|
+
} catch {
|
|
124
|
+
return `## Index Status: "${indexName}"\n\nIndex "${indexName}" not created yet. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (args.action === "reindex") {
|
|
129
|
+
try {
|
|
130
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
131
|
+
|
|
132
|
+
const baseDir = args.dir ? path.resolve(projectRoot, args.dir) : projectRoot
|
|
133
|
+
const extensions = INDEX_EXTENSIONS[indexName] || INDEX_EXTENSIONS.code
|
|
134
|
+
|
|
135
|
+
const ignoreList = ["node_modules", ".git", "dist", "build", ".opencode", "vendor", "__pycache__"]
|
|
136
|
+
const files = await walkDir(baseDir, extensions, ignoreList)
|
|
137
|
+
|
|
138
|
+
let indexed = 0
|
|
139
|
+
let skipped = 0
|
|
140
|
+
for (const filePath of files) {
|
|
141
|
+
try {
|
|
142
|
+
const wasIndexed = await indexer.indexFile(filePath)
|
|
143
|
+
if (wasIndexed) indexed++
|
|
144
|
+
else skipped++
|
|
145
|
+
} catch {}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
await indexer.unloadModel()
|
|
149
|
+
const stats = await indexer.getStats()
|
|
150
|
+
|
|
151
|
+
return `## Re-indexing Complete ✅\n\n**Index:** ${indexName}\n**Directory:** ${args.dir || "(project root)"}\n**Files found:** ${files.length}\n**Files indexed:** ${indexed}\n**Files unchanged:** ${skipped}\n**Total chunks:** ${stats.chunkCount}`
|
|
152
|
+
} catch (error: any) {
|
|
153
|
+
return `❌ Re-indexing failed: ${error.message || String(error)}`
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return `Unknown action: ${args.action}. Use: status, list, or reindex`
|
|
158
|
+
},
|
|
159
|
+
})
|
package/tools/search.ts
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Code Search Tool
|
|
3
|
+
*
|
|
4
|
+
* Uses local embeddings + LanceDB vector store via bundled vectorizer.
|
|
5
|
+
* Index data is stored in `.opencode/vectors/<index>/`.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { tool } from "@opencode-ai/plugin"
|
|
9
|
+
import path from "path"
|
|
10
|
+
import fs from "fs/promises"
|
|
11
|
+
|
|
12
|
+
import { CodebaseIndexer } from "../vectorizer/index.js"
|
|
13
|
+
|
|
14
|
+
export default tool({
|
|
15
|
+
description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
|
|
16
|
+
|
|
17
|
+
Available indexes:
|
|
18
|
+
- "code" (default) - Source code files (*.js, *.ts, *.py, *.go, etc.)
|
|
19
|
+
- "docs" - Documentation files (*.md, *.txt, etc.)
|
|
20
|
+
- "config" - Configuration files (*.yaml, *.json, etc.)
|
|
21
|
+
- searchAll: true - Search across all indexes
|
|
22
|
+
|
|
23
|
+
Examples:
|
|
24
|
+
- "authentication logic" → finds auth-related code
|
|
25
|
+
- "database connection handling" → finds DB setup code
|
|
26
|
+
- "how to deploy" with index: "docs" → finds deployment docs
|
|
27
|
+
- "API keys" with index: "config" → finds config with API settings`,
|
|
28
|
+
|
|
29
|
+
args: {
|
|
30
|
+
query: tool.schema.string().describe("Semantic search query describing what you're looking for"),
|
|
31
|
+
index: tool.schema.string().optional().default("code").describe("Index to search: code, docs, config, or custom name"),
|
|
32
|
+
limit: tool.schema.number().optional().default(10).describe("Number of results to return (default: 10)"),
|
|
33
|
+
searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
|
|
34
|
+
freshen: tool.schema.boolean().optional().default(true).describe("Auto-update stale files before searching (default: true)"),
|
|
35
|
+
includeArchived: tool.schema.boolean().optional().default(false).describe("Include archived files in results (default: false). Files are archived if in /archive/ folder or have 'archived: true' in frontmatter."),
|
|
36
|
+
},
|
|
37
|
+
|
|
38
|
+
async execute(args) {
|
|
39
|
+
const projectRoot = process.cwd()
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
let allResults: any[] = []
|
|
43
|
+
const limit = args.limit || 10
|
|
44
|
+
const indexName = args.index || "code"
|
|
45
|
+
|
|
46
|
+
// Auto-freshen stale files before searching
|
|
47
|
+
if (args.freshen !== false) {
|
|
48
|
+
const tempIndexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
49
|
+
await tempIndexer.freshen()
|
|
50
|
+
await tempIndexer.unloadModel()
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (args.searchAll) {
|
|
54
|
+
const tempIndexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
55
|
+
const indexes = await tempIndexer.listIndexes()
|
|
56
|
+
await tempIndexer.unloadModel()
|
|
57
|
+
|
|
58
|
+
if (indexes.length === 0) {
|
|
59
|
+
return `❌ No indexes found. Create one with: codeindex({ action: "reindex", index: "code" })`
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
for (const idx of indexes) {
|
|
63
|
+
const indexer = await new CodebaseIndexer(projectRoot, idx).init()
|
|
64
|
+
if (args.freshen !== false) {
|
|
65
|
+
await indexer.freshen()
|
|
66
|
+
}
|
|
67
|
+
const results = await indexer.search(args.query, limit, args.includeArchived)
|
|
68
|
+
allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
|
|
69
|
+
await indexer.unloadModel()
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
allResults.sort((a, b) => (a._distance || 0) - (b._distance || 0))
|
|
73
|
+
allResults = allResults.slice(0, limit)
|
|
74
|
+
} else {
|
|
75
|
+
const hashesFile = path.join(projectRoot, ".opencode", "vectors", indexName, "hashes.json")
|
|
76
|
+
try {
|
|
77
|
+
await fs.access(hashesFile)
|
|
78
|
+
} catch {
|
|
79
|
+
return `❌ Index "${indexName}" not found. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
83
|
+
const results = await indexer.search(args.query, limit, args.includeArchived)
|
|
84
|
+
allResults = results.map((r: any) => ({ ...r, _index: indexName }))
|
|
85
|
+
await indexer.unloadModel()
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (allResults.length === 0) {
|
|
89
|
+
const scope = args.searchAll ? "any index" : `index "${indexName}"`
|
|
90
|
+
return `No results found in ${scope} for: "${args.query}"\n\nTry:\n- Different keywords\n- Re-index with: codeindex({ action: "reindex", index: "${indexName}" })`
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
|
|
94
|
+
let output = `## Search Results for: "${args.query}" (${scope})\n\n`
|
|
95
|
+
|
|
96
|
+
for (let i = 0; i < allResults.length; i++) {
|
|
97
|
+
const r = allResults[i]
|
|
98
|
+
const score = r._distance ? (1 - r._distance).toFixed(3) : "N/A"
|
|
99
|
+
const indexLabel = args.searchAll ? ` [${r._index}]` : ""
|
|
100
|
+
|
|
101
|
+
output += `### ${i + 1}. ${r.file}${indexLabel}\n`
|
|
102
|
+
output += `**Relevance:** ${score}\n\n`
|
|
103
|
+
output += "```\n"
|
|
104
|
+
const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
|
|
105
|
+
output += content
|
|
106
|
+
output += "\n```\n\n"
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
output += `---\n*Found ${allResults.length} results. Use Read tool to see full files.*`
|
|
110
|
+
return output
|
|
111
|
+
} catch (error: any) {
|
|
112
|
+
return `❌ Search failed: ${error.message || String(error)}`
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
})
|
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
// OpenCode Vectorizer - Semantic Code Search with Multi-Index Support
|
|
2
|
+
|
|
3
|
+
import { pipeline, env } from "@xenova/transformers";
|
|
4
|
+
import * as lancedb from "vectordb";
|
|
5
|
+
import fs from "fs/promises";
|
|
6
|
+
import path from "path";
|
|
7
|
+
import crypto from "crypto";
|
|
8
|
+
|
|
9
|
+
// Suppress transformers.js logs unless DEBUG is set
|
|
10
|
+
const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*";
|
|
11
|
+
if (!DEBUG) {
|
|
12
|
+
env.allowLocalModels = true;
|
|
13
|
+
env.useBrowserCache = false;
|
|
14
|
+
env.logLevel = "error";
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Default index presets (can be overridden by config).
|
|
19
|
+
*/
|
|
20
|
+
const DEFAULT_PRESETS = {
|
|
21
|
+
code: {
|
|
22
|
+
pattern: "**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}",
|
|
23
|
+
ignore: [
|
|
24
|
+
"**/node_modules/**",
|
|
25
|
+
"**/.git/**",
|
|
26
|
+
"**/dist/**",
|
|
27
|
+
"**/build/**",
|
|
28
|
+
"**/.opencode/**",
|
|
29
|
+
"**/docs/**",
|
|
30
|
+
"**/vendor/**",
|
|
31
|
+
"**/__pycache__/**",
|
|
32
|
+
],
|
|
33
|
+
description: "Source code files (excludes docs, vendor, node_modules)",
|
|
34
|
+
},
|
|
35
|
+
docs: {
|
|
36
|
+
pattern: "docs/**/*.{md,mdx,txt,rst,adoc}",
|
|
37
|
+
ignore: [],
|
|
38
|
+
description: "Documentation in docs/ folder",
|
|
39
|
+
},
|
|
40
|
+
config: {
|
|
41
|
+
pattern: "**/*.{yaml,yml,json,toml,ini,env,xml}",
|
|
42
|
+
ignore: ["**/node_modules/**", "**/.git/**", "**/.opencode/**"],
|
|
43
|
+
description: "Configuration files",
|
|
44
|
+
},
|
|
45
|
+
all: {
|
|
46
|
+
pattern:
|
|
47
|
+
"**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj,md,mdx,txt,rst,adoc,yaml,yml,json,toml}",
|
|
48
|
+
ignore: ["**/node_modules/**", "**/.git/**", "**/.opencode/**"],
|
|
49
|
+
description: "All supported files",
|
|
50
|
+
},
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
// Will be populated from config if available
|
|
54
|
+
let INDEX_PRESETS = { ...DEFAULT_PRESETS };
|
|
55
|
+
let GLOBAL_IGNORE = [];
|
|
56
|
+
|
|
57
|
+
// Default embedding model (fast). Can be overridden by config.
|
|
58
|
+
let EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
|
|
59
|
+
|
|
60
|
+
function defaultVectorizerYaml() {
|
|
61
|
+
return (
|
|
62
|
+
`vectorizer:\n` +
|
|
63
|
+
` enabled: true\n` +
|
|
64
|
+
` auto_index: true\n` +
|
|
65
|
+
` model: \"${EMBEDDING_MODEL}\"\n` +
|
|
66
|
+
` debounce_ms: 1000\n` +
|
|
67
|
+
` indexes:\n` +
|
|
68
|
+
` code:\n` +
|
|
69
|
+
` enabled: true\n` +
|
|
70
|
+
` pattern: \"${DEFAULT_PRESETS.code.pattern}\"\n` +
|
|
71
|
+
` ignore:\n` +
|
|
72
|
+
DEFAULT_PRESETS.code.ignore.map((p) => ` - \"${p}\"\n`).join("") +
|
|
73
|
+
` docs:\n` +
|
|
74
|
+
` enabled: true\n` +
|
|
75
|
+
` pattern: \"${DEFAULT_PRESETS.docs.pattern}\"\n` +
|
|
76
|
+
` ignore: []\n` +
|
|
77
|
+
` config:\n` +
|
|
78
|
+
` enabled: false\n` +
|
|
79
|
+
` pattern: \"${DEFAULT_PRESETS.config.pattern}\"\n` +
|
|
80
|
+
` ignore:\n` +
|
|
81
|
+
DEFAULT_PRESETS.config.ignore.map((p) => ` - \"${p}\"\n`).join("") +
|
|
82
|
+
` exclude:\n` +
|
|
83
|
+
` - node_modules\n` +
|
|
84
|
+
` - vendor\n` +
|
|
85
|
+
` - dist\n` +
|
|
86
|
+
` - build\n` +
|
|
87
|
+
` - out\n` +
|
|
88
|
+
` - __pycache__\n`
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function ensureDefaultConfig(projectRoot) {
|
|
93
|
+
try {
|
|
94
|
+
const configDir = path.join(projectRoot, ".opencode");
|
|
95
|
+
const outPath = path.join(configDir, "vectorizer.yaml");
|
|
96
|
+
await fs.mkdir(configDir, { recursive: true });
|
|
97
|
+
|
|
98
|
+
await fs.access(outPath).catch(async () => {
|
|
99
|
+
await fs.writeFile(outPath, defaultVectorizerYaml(), "utf8");
|
|
100
|
+
if (DEBUG) console.log("[vectorizer] Wrote default config:", outPath);
|
|
101
|
+
});
|
|
102
|
+
} catch {
|
|
103
|
+
// non-fatal
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Load index configuration from .opencode/vectorizer.yaml (preferred) or .opencode/config.yaml.
|
|
109
|
+
*/
|
|
110
|
+
async function loadConfig(projectRoot) {
|
|
111
|
+
try {
|
|
112
|
+
const candidates = [path.join(projectRoot, ".opencode", "vectorizer.yaml")];
|
|
113
|
+
|
|
114
|
+
let content = null;
|
|
115
|
+
for (const configPath of candidates) {
|
|
116
|
+
try {
|
|
117
|
+
content = await fs.readFile(configPath, "utf8");
|
|
118
|
+
break;
|
|
119
|
+
} catch {
|
|
120
|
+
// try next
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (!content) {
|
|
125
|
+
await ensureDefaultConfig(projectRoot);
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Parse vectorizer section from YAML
|
|
130
|
+
const vectorizerMatch = content.match(/^vectorizer:([\s\S]*?)(?=^[a-zA-Z_\-]+:|\Z)/m);
|
|
131
|
+
if (!vectorizerMatch) {
|
|
132
|
+
await ensureDefaultConfig(projectRoot);
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const section = vectorizerMatch[1];
|
|
137
|
+
|
|
138
|
+
// Parse embedding model
|
|
139
|
+
const modelMatch = section.match(/^\s{2}model:\s*["']?([^"'\n]+)["']?/m);
|
|
140
|
+
if (modelMatch) {
|
|
141
|
+
EMBEDDING_MODEL = modelMatch[1].trim();
|
|
142
|
+
if (DEBUG) console.log("[vectorizer] Using model from config:", EMBEDDING_MODEL);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Parse global exclude
|
|
146
|
+
const excludeMatch = section.match(/^\s{2}exclude:\s*\n((?:\s{4}-\s+.+\n?)*)/m);
|
|
147
|
+
if (excludeMatch) {
|
|
148
|
+
GLOBAL_IGNORE = excludeMatch[1]
|
|
149
|
+
.split("\n")
|
|
150
|
+
.map((line) => line.replace(/^\s*-\s*/, "").trim())
|
|
151
|
+
.filter(Boolean)
|
|
152
|
+
.map((p) => (p.includes("*") ? p : `**/${p}/**`));
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Parse indexes section
|
|
156
|
+
const indexesMatch = section.match(/^\s{2}indexes:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\s{2}exclude:|\Z)/m);
|
|
157
|
+
if (!indexesMatch) return;
|
|
158
|
+
|
|
159
|
+
const indexesSection = indexesMatch[1];
|
|
160
|
+
|
|
161
|
+
// Parse each index (code, docs, config)
|
|
162
|
+
for (const indexName of ["code", "docs", "config"]) {
|
|
163
|
+
const indexRegex = new RegExp(
|
|
164
|
+
`^\\s{4}${indexName}:\\s*\\n([\\s\\S]*?)(?=^\\s{4}[a-zA-Z_\\-]+:|\\Z)`,
|
|
165
|
+
"m",
|
|
166
|
+
);
|
|
167
|
+
const indexMatch = indexesSection.match(indexRegex);
|
|
168
|
+
if (!indexMatch) continue;
|
|
169
|
+
|
|
170
|
+
const indexSection = indexMatch[1];
|
|
171
|
+
|
|
172
|
+
// Parse enabled
|
|
173
|
+
const enabledMatch = indexSection.match(/^\s+enabled:\s*(true|false)/m);
|
|
174
|
+
const enabled = enabledMatch ? enabledMatch[1] === "true" : true;
|
|
175
|
+
|
|
176
|
+
// Parse pattern
|
|
177
|
+
const patternMatch = indexSection.match(/^\s+pattern:\s*["']?([^"'\n]+)["']?/m);
|
|
178
|
+
const pattern = patternMatch ? patternMatch[1].trim() : DEFAULT_PRESETS[indexName]?.pattern;
|
|
179
|
+
|
|
180
|
+
// Parse ignore array
|
|
181
|
+
const ignoreMatch = indexSection.match(/^\s+ignore:\s*\n((?:\s+-\s+.+\n?)*)/m);
|
|
182
|
+
let ignore = [];
|
|
183
|
+
if (ignoreMatch) {
|
|
184
|
+
ignore = ignoreMatch[1]
|
|
185
|
+
.split("\n")
|
|
186
|
+
.map((line) => line.replace(/^\s*-\s*/, "").replace(/["']/g, "").trim())
|
|
187
|
+
.filter(Boolean);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (enabled && pattern) {
|
|
191
|
+
INDEX_PRESETS[indexName] = {
|
|
192
|
+
pattern,
|
|
193
|
+
ignore,
|
|
194
|
+
description: `${indexName} files from config`,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (DEBUG) console.log("[vectorizer] Loaded config:", { INDEX_PRESETS, GLOBAL_IGNORE });
|
|
200
|
+
} catch {
|
|
201
|
+
if (DEBUG) console.log("[vectorizer] Using default presets (config load failed)");
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
class CodebaseIndexer {
|
|
206
|
+
constructor(projectRoot, indexName = "code") {
|
|
207
|
+
this.root = projectRoot;
|
|
208
|
+
this.indexName = indexName;
|
|
209
|
+
this.baseDir = path.join(projectRoot, ".opencode", "vectors");
|
|
210
|
+
this.cacheDir = path.join(this.baseDir, indexName);
|
|
211
|
+
this.model = null;
|
|
212
|
+
this.db = null;
|
|
213
|
+
this.hashes = {};
|
|
214
|
+
this.configLoaded = false;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
async init() {
|
|
218
|
+
if (!this.configLoaded) {
|
|
219
|
+
await loadConfig(this.root);
|
|
220
|
+
this.configLoaded = true;
|
|
221
|
+
}
|
|
222
|
+
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
223
|
+
this.db = await lancedb.connect(path.join(this.cacheDir, "lancedb"));
|
|
224
|
+
await this.loadHashes();
|
|
225
|
+
return this;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
async loadModel() {
|
|
229
|
+
if (!this.model) {
|
|
230
|
+
if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}...`);
|
|
231
|
+
this.model = await pipeline("feature-extraction", EMBEDDING_MODEL, {
|
|
232
|
+
progress_callback: DEBUG ? undefined : null,
|
|
233
|
+
});
|
|
234
|
+
if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`);
|
|
235
|
+
}
|
|
236
|
+
return this.model;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
async unloadModel() {
|
|
240
|
+
this.model = null;
|
|
241
|
+
if (global.gc) global.gc();
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
async loadHashes() {
|
|
245
|
+
try {
|
|
246
|
+
const hashFile = path.join(this.cacheDir, "hashes.json");
|
|
247
|
+
const data = await fs.readFile(hashFile, "utf8");
|
|
248
|
+
this.hashes = JSON.parse(data);
|
|
249
|
+
} catch {
|
|
250
|
+
this.hashes = {};
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
async saveHashes() {
|
|
255
|
+
const hashFile = path.join(this.cacheDir, "hashes.json");
|
|
256
|
+
await fs.writeFile(hashFile, JSON.stringify(this.hashes, null, 2));
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
fileHash(content) {
|
|
260
|
+
return crypto.createHash("md5").update(content).digest("hex");
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
isArchived(relPath, content) {
|
|
264
|
+
if (relPath.includes("/archive/") || relPath.startsWith("archive/")) {
|
|
265
|
+
return true;
|
|
266
|
+
}
|
|
267
|
+
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
|
268
|
+
if (frontmatterMatch) {
|
|
269
|
+
const frontmatter = frontmatterMatch[1];
|
|
270
|
+
if (/^archived:\s*true/m.test(frontmatter)) {
|
|
271
|
+
return true;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return false;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
async embed(text) {
|
|
278
|
+
const model = await this.loadModel();
|
|
279
|
+
const result = await model(text, { pooling: "mean", normalize: true });
|
|
280
|
+
return Array.from(result.data);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
chunkCode(content, maxChars = 1500) {
|
|
284
|
+
const chunks = [];
|
|
285
|
+
const lines = content.split("\n");
|
|
286
|
+
let current = [];
|
|
287
|
+
let currentLen = 0;
|
|
288
|
+
|
|
289
|
+
for (const line of lines) {
|
|
290
|
+
if (currentLen + line.length > maxChars && current.length > 0) {
|
|
291
|
+
chunks.push(current.join("\n"));
|
|
292
|
+
current = [];
|
|
293
|
+
currentLen = 0;
|
|
294
|
+
}
|
|
295
|
+
current.push(line);
|
|
296
|
+
currentLen += line.length + 1;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
if (current.length > 0) {
|
|
300
|
+
chunks.push(current.join("\n"));
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return chunks;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
needsIndex(filePath, content) {
|
|
307
|
+
const relPath = path.relative(this.root, filePath);
|
|
308
|
+
const currentHash = this.fileHash(content);
|
|
309
|
+
return this.hashes[relPath] !== currentHash;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
async indexFile(filePath) {
|
|
313
|
+
const relPath = path.relative(this.root, filePath);
|
|
314
|
+
|
|
315
|
+
let content;
|
|
316
|
+
try {
|
|
317
|
+
content = await fs.readFile(filePath, "utf8");
|
|
318
|
+
} catch {
|
|
319
|
+
return false;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const hash = this.fileHash(content);
|
|
323
|
+
if (this.hashes[relPath] === hash) {
|
|
324
|
+
return false;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const chunks = this.chunkCode(content);
|
|
328
|
+
const archived = this.isArchived(relPath, content);
|
|
329
|
+
const data = [];
|
|
330
|
+
|
|
331
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
332
|
+
const embedding = await this.embed(chunks[i]);
|
|
333
|
+
data.push({
|
|
334
|
+
file: relPath,
|
|
335
|
+
chunk_index: i,
|
|
336
|
+
content: chunks[i],
|
|
337
|
+
vector: embedding,
|
|
338
|
+
archived: archived,
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
const tableName = "chunks";
|
|
343
|
+
const tables = await this.db.tableNames();
|
|
344
|
+
if (tables.includes(tableName)) {
|
|
345
|
+
const table = await this.db.openTable(tableName);
|
|
346
|
+
await table.add(data);
|
|
347
|
+
} else {
|
|
348
|
+
await this.db.createTable(tableName, data);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
this.hashes[relPath] = hash;
|
|
352
|
+
await this.saveHashes();
|
|
353
|
+
|
|
354
|
+
return true;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
async search(query, limit = 5, includeArchived = false) {
|
|
358
|
+
const tableName = "chunks";
|
|
359
|
+
const tables = await this.db.tableNames();
|
|
360
|
+
if (!tables.includes(tableName)) {
|
|
361
|
+
return [];
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const queryEmbedding = await this.embed(query);
|
|
365
|
+
const table = await this.db.openTable(tableName);
|
|
366
|
+
|
|
367
|
+
const fetchLimit = includeArchived ? limit : limit * 3;
|
|
368
|
+
let results = await table.search(queryEmbedding).limit(fetchLimit).execute();
|
|
369
|
+
|
|
370
|
+
if (!includeArchived) {
|
|
371
|
+
results = results.filter((r) => !r.archived);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
return results.slice(0, limit);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
async checkHealth(extraIgnore = []) {
|
|
378
|
+
const { glob } = await import("glob");
|
|
379
|
+
const preset = INDEX_PRESETS[this.indexName] || DEFAULT_PRESETS.code;
|
|
380
|
+
|
|
381
|
+
const ignore = [
|
|
382
|
+
...(preset.ignore || []),
|
|
383
|
+
...GLOBAL_IGNORE,
|
|
384
|
+
...extraIgnore.map((p) => (p.includes("*") ? p : `**/${p}/**`)),
|
|
385
|
+
];
|
|
386
|
+
|
|
387
|
+
const expectedFiles = await glob(preset.pattern, {
|
|
388
|
+
cwd: this.root,
|
|
389
|
+
nodir: true,
|
|
390
|
+
ignore,
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
const indexedFiles = Object.keys(this.hashes);
|
|
394
|
+
const currentCount = indexedFiles.length;
|
|
395
|
+
const expectedCount = expectedFiles.length;
|
|
396
|
+
|
|
397
|
+
const diff = Math.abs(currentCount - expectedCount);
|
|
398
|
+
const threshold = Math.max(5, expectedCount * 0.2);
|
|
399
|
+
|
|
400
|
+
if (currentCount === 0 && expectedCount > 0) {
|
|
401
|
+
return { needsReindex: true, reason: "empty", currentCount, expectedCount };
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
if (diff > threshold) {
|
|
405
|
+
return { needsReindex: true, reason: "mismatch", currentCount, expectedCount };
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
return { needsReindex: false, reason: "ok", currentCount, expectedCount };
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
async freshen() {
|
|
412
|
+
let checked = 0;
|
|
413
|
+
let updated = 0;
|
|
414
|
+
let deleted = 0;
|
|
415
|
+
|
|
416
|
+
const indexedFiles = Object.keys(this.hashes);
|
|
417
|
+
for (const relPath of indexedFiles) {
|
|
418
|
+
checked++;
|
|
419
|
+
const filePath = path.join(this.root, relPath);
|
|
420
|
+
|
|
421
|
+
try {
|
|
422
|
+
const content = await fs.readFile(filePath, "utf8");
|
|
423
|
+
const currentHash = this.fileHash(content);
|
|
424
|
+
|
|
425
|
+
if (this.hashes[relPath] !== currentHash) {
|
|
426
|
+
await this.indexFile(filePath);
|
|
427
|
+
updated++;
|
|
428
|
+
}
|
|
429
|
+
} catch {
|
|
430
|
+
delete this.hashes[relPath];
|
|
431
|
+
deleted++;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
if (deleted > 0) {
|
|
436
|
+
await this.saveHashes();
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
return { checked, updated, deleted };
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
async indexAll(onProgress = null, extraIgnore = []) {
|
|
443
|
+
const { glob } = await import("glob");
|
|
444
|
+
const preset = INDEX_PRESETS[this.indexName] || DEFAULT_PRESETS.code;
|
|
445
|
+
|
|
446
|
+
const ignore = [
|
|
447
|
+
...(preset.ignore || []),
|
|
448
|
+
...GLOBAL_IGNORE,
|
|
449
|
+
...extraIgnore.map((p) => (p.includes("*") ? p : `**/${p}/**`)),
|
|
450
|
+
];
|
|
451
|
+
|
|
452
|
+
const files = await glob(preset.pattern, {
|
|
453
|
+
cwd: this.root,
|
|
454
|
+
nodir: true,
|
|
455
|
+
ignore,
|
|
456
|
+
});
|
|
457
|
+
|
|
458
|
+
let indexed = 0;
|
|
459
|
+
let skipped = 0;
|
|
460
|
+
|
|
461
|
+
for (const relPath of files) {
|
|
462
|
+
const filePath = path.join(this.root, relPath);
|
|
463
|
+
try {
|
|
464
|
+
const wasIndexed = await this.indexFile(filePath);
|
|
465
|
+
if (wasIndexed) {
|
|
466
|
+
indexed++;
|
|
467
|
+
if (onProgress) onProgress(indexed, files.length, relPath);
|
|
468
|
+
} else {
|
|
469
|
+
skipped++;
|
|
470
|
+
}
|
|
471
|
+
} catch {
|
|
472
|
+
skipped++;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
return { indexed, skipped, total: files.length };
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
async indexSingleFile(filePath) {
|
|
480
|
+
const absPath = path.isAbsolute(filePath) ? filePath : path.join(this.root, filePath);
|
|
481
|
+
return await this.indexFile(absPath);
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
async getStats() {
|
|
485
|
+
const fileCount = Object.keys(this.hashes).length;
|
|
486
|
+
let chunkCount = 0;
|
|
487
|
+
|
|
488
|
+
try {
|
|
489
|
+
const tables = await this.db.tableNames();
|
|
490
|
+
if (tables.includes("chunks")) {
|
|
491
|
+
const table = await this.db.openTable("chunks");
|
|
492
|
+
chunkCount = await table.countRows();
|
|
493
|
+
}
|
|
494
|
+
} catch {}
|
|
495
|
+
|
|
496
|
+
const preset = INDEX_PRESETS[this.indexName];
|
|
497
|
+
return {
|
|
498
|
+
indexName: this.indexName,
|
|
499
|
+
description: preset?.description || "Custom index",
|
|
500
|
+
model: EMBEDDING_MODEL,
|
|
501
|
+
fileCount,
|
|
502
|
+
chunkCount,
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
async getAllStats() {
|
|
507
|
+
const stats = [];
|
|
508
|
+
try {
|
|
509
|
+
const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
|
|
510
|
+
for (const entry of entries) {
|
|
511
|
+
if (entry.isDirectory() && entry.name !== "lancedb") {
|
|
512
|
+
try {
|
|
513
|
+
const indexer = await new CodebaseIndexer(this.root, entry.name).init();
|
|
514
|
+
const stat = await indexer.getStats();
|
|
515
|
+
if (stat.fileCount > 0 || stat.chunkCount > 0) {
|
|
516
|
+
stats.push(stat);
|
|
517
|
+
}
|
|
518
|
+
} catch {}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
} catch {}
|
|
522
|
+
return stats;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
async clear() {
|
|
526
|
+
await fs.rm(this.cacheDir, { recursive: true, force: true });
|
|
527
|
+
this.hashes = {};
|
|
528
|
+
await this.init();
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
async clearAll() {
|
|
532
|
+
await fs.rm(this.baseDir, { recursive: true, force: true });
|
|
533
|
+
this.hashes = {};
|
|
534
|
+
await this.init();
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
async listIndexes() {
|
|
538
|
+
const indexes = [];
|
|
539
|
+
try {
|
|
540
|
+
const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
|
|
541
|
+
for (const entry of entries) {
|
|
542
|
+
if (entry.isDirectory() && entry.name !== "lancedb") {
|
|
543
|
+
indexes.push(entry.name);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
} catch {}
|
|
547
|
+
return indexes;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
function getEmbeddingModel() {
|
|
552
|
+
return EMBEDDING_MODEL;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel };
|