@comfanion/usethis_search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) Comfanion
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,22 @@
1
+ # @comfanion/usethis_search
2
+
3
+ OpenCode plugin that provides semantic search and index management tools.
4
+
5
+ ## Tools
6
+
7
+ - `search` (semantic search)
8
+ - `codeindex` (index status, list, reindex)
9
+
10
+ ## Storage
11
+
12
+ - Vectors are stored in `.opencode/vectors/<index>/` in the project.
13
+
14
+ ## Install (OpenCode)
15
+
16
+ Add to `opencode.json`:
17
+
18
+ ```json
19
+ {
20
+ "plugin": ["@comfanion/usethis_search"]
21
+ }
22
+ ```
@@ -0,0 +1,460 @@
1
+ import type { Plugin } from "@opencode-ai/plugin"
2
+ import path from "path"
3
+ import fs from "fs/promises"
4
+ import fsSync from "fs"
5
+
6
+ import { CodebaseIndexer } from "./vectorizer/index.js"
7
+
8
+ /**
9
+ * File Indexer Plugin
10
+ *
11
+ * Automatically manages semantic search indexes:
12
+ * - On plugin load (opencode startup): freshen existing indexes
13
+ * - On file edit: queue file for reindexing (debounced)
14
+ *
15
+ * Configuration in .opencode/vectorizer.yaml:
16
+ * vectorizer:
17
+ * enabled: true # Master switch
18
+ * auto_index: true # Enable this plugin
19
+ * debounce_ms: 1000 # Wait time before indexing
20
+ */
21
+
22
+ const DEBUG = process.env.DEBUG?.includes("file-indexer") || process.env.DEBUG === "*"
23
+ const SKIP_AUTO_INDEX = process.env.OPENCODE_SKIP_AUTO_INDEX === "1"
24
+
25
+ let logFilePath: string | null = null
26
+
27
+ function logFile(msg: string): void {
28
+ if (logFilePath) {
29
+ try {
30
+ const timestamp = new Date().toISOString().slice(11, 19)
31
+ fsSync.appendFileSync(logFilePath, `${timestamp} ${msg}\n`)
32
+ } catch {
33
+ // ignore
34
+ }
35
+ }
36
+ }
37
+
38
+ function log(msg: string): void {
39
+ if (DEBUG) console.log(`[file-indexer] ${msg}`)
40
+ logFile(msg)
41
+ }
42
+
43
+ function debug(msg: string): void {
44
+ if (DEBUG) log(msg)
45
+ }
46
+
47
+ const DEFAULT_CONFIG = {
48
+ enabled: true,
49
+ auto_index: true,
50
+ debounce_ms: 1000,
51
+ indexes: {
52
+ code: { enabled: true, extensions: [".js", ".ts", ".jsx", ".tsx", ".py", ".go", ".rs", ".java", ".kt", ".swift", ".c", ".cpp", ".h", ".hpp", ".cs", ".rb", ".php", ".scala", ".clj"] },
53
+ docs: { enabled: true, extensions: [".md", ".mdx", ".txt", ".rst", ".adoc"] },
54
+ config: { enabled: false, extensions: [".yaml", ".yml", ".json", ".toml", ".ini", ".xml"] },
55
+ },
56
+ exclude: ["node_modules", "vendor", "dist", "build", "out", "__pycache__"],
57
+ }
58
+
59
+ interface VectorizerConfig {
60
+ enabled: boolean
61
+ auto_index: boolean
62
+ debounce_ms: number
63
+ indexes: Record<string, { enabled: boolean; extensions: string[] }>
64
+ exclude: string[]
65
+ }
66
+
67
+ const FUN_MESSAGES = {
68
+ en: {
69
+ indexing: (files: number) => `Indexing ${files} files...`,
70
+ fun: (files: number, mins: number) => {
71
+ if (files < 20) return `Quick coffee? ☕`
72
+ if (files < 100) return `~${mins}min. Stretch break? 🧘`
73
+ if (files < 500) return `~${mins}min. Make coffee ☕ and relax 🛋️`
74
+ return `~${mins}min. Go touch grass 🌿 or take a nap 😴`
75
+ },
76
+ done: (files: number, duration: string) => {
77
+ if (files < 20) return `Done! ${files} files in ${duration}. Fast! 🚀`
78
+ if (files < 100) return `Indexed ${files} files in ${duration}. Let's go! 🎸`
79
+ return `${files} files in ${duration}. Worth the wait! 🎉`
80
+ },
81
+ fresh: () => `Everything's fresh! Nothing to do 😎`,
82
+ error: (msg: string) => `Oops! ${msg} 😬`,
83
+ },
84
+ uk: {
85
+ indexing: (files: number) => `Індексую ${files} файлів...`,
86
+ fun: (files: number, mins: number) => {
87
+ if (files < 20) return `Швидка кава? ☕`
88
+ if (files < 100) return `~${mins}хв. Розімнись! 🧘`
89
+ if (files < 500) return `~${mins}хв. Зроби каву ☕ і відпочинь 🛋️`
90
+ return `~${mins}хв. Йди погуляй 🌿 або поспи 😴`
91
+ },
92
+ done: (files: number, duration: string) => {
93
+ if (files < 20) return `Готово! ${files} файлів за ${duration}. Швидко! 🚀`
94
+ if (files < 100) return `${files} файлів за ${duration}. Поїхали! 🎸`
95
+ return `${files} файлів за ${duration}. Варто було чекати! 🎉`
96
+ },
97
+ fresh: () => `Все свіже! Нічого робити 😎`,
98
+ error: (msg: string) => `Ой! ${msg} 😬`,
99
+ },
100
+ ru: {
101
+ indexing: (files: number) => `Индексирую ${files} файлов...`,
102
+ fun: (files: number, mins: number) => {
103
+ if (files < 20) return `Кофе? ☕`
104
+ if (files < 100) return `~${mins}мин. Разомнись! 🧘`
105
+ if (files < 500) return `~${mins}мин. Сделай кофе ☕ и отдохни 🛋️`
106
+ return `~${mins}мин. Иди погуляй 🌿 или поспи 😴`
107
+ },
108
+ done: (files: number, duration: string) => {
109
+ if (files < 20) return `Готово! ${files} файлов за ${duration}. Быстро! 🚀`
110
+ if (files < 100) return `${files} файлов за ${duration}. Поехали! 🎸`
111
+ return `${files} файлов за ${duration}. Стоило подождать! 🎉`
112
+ },
113
+ fresh: () => `Всё свежее! Делать нечего 😎`,
114
+ error: (msg: string) => `Ой! ${msg} 😬`,
115
+ },
116
+ }
117
+
118
+ type Lang = keyof typeof FUN_MESSAGES
119
+
120
+ async function getLanguage(projectRoot: string): Promise<Lang> {
121
+ try {
122
+ const configPath = path.join(projectRoot, ".opencode", "config.yaml")
123
+ const content = await fs.readFile(configPath, "utf8")
124
+ const match = content.match(/communication_language:\s*["']?(\w+)["']?/i)
125
+ const lang = match?.[1]?.toLowerCase()
126
+ if (lang === "ukrainian" || lang === "uk") return "uk"
127
+ if (lang === "russian" || lang === "ru") return "ru"
128
+ return "en"
129
+ } catch {
130
+ return "en"
131
+ }
132
+ }
133
+
134
+ function estimateTime(fileCount: number): number {
135
+ const modelLoadTime = 30
136
+ const perFileTime = 0.5
137
+ const totalSeconds = modelLoadTime + fileCount * perFileTime
138
+ return Math.ceil(totalSeconds / 60)
139
+ }
140
+
141
+ function formatDuration(seconds: number): string {
142
+ if (seconds < 60) return `${Math.round(seconds)}s`
143
+ const mins = Math.floor(seconds / 60)
144
+ const secs = Math.round(seconds % 60)
145
+ return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`
146
+ }
147
+
148
+ const pendingFiles: Map<string, { indexName: string; timestamp: number }> = new Map()
149
+
150
+ async function loadConfig(projectRoot: string): Promise<VectorizerConfig> {
151
+ try {
152
+ const candidates = [path.join(projectRoot, ".opencode", "vectorizer.yaml")]
153
+
154
+ let content: string | null = null
155
+ for (const configPath of candidates) {
156
+ try {
157
+ content = await fs.readFile(configPath, "utf8")
158
+ break
159
+ } catch {
160
+ // try next
161
+ }
162
+ }
163
+
164
+ if (!content) {
165
+ debug("No vectorizer config file found, using defaults")
166
+ return DEFAULT_CONFIG
167
+ }
168
+
169
+ const vectorizerMatch = content.match(/vectorizer:\s*\n([\s\S]*?)(?=\n[a-zA-Z_\-]+:|$)/i)
170
+ if (!vectorizerMatch) {
171
+ debug("No vectorizer section, using defaults")
172
+ return DEFAULT_CONFIG
173
+ }
174
+
175
+ const section = vectorizerMatch[1]
176
+
177
+ const enabledMatch = section.match(/^\s+enabled:\s*(true|false)/m)
178
+ const enabled = enabledMatch ? enabledMatch[1] === "true" : DEFAULT_CONFIG.enabled
179
+
180
+ const autoIndexMatch = section.match(/^\s+auto_index:\s*(true|false)/m)
181
+ const auto_index = autoIndexMatch ? autoIndexMatch[1] === "true" : DEFAULT_CONFIG.auto_index
182
+
183
+ const debounceMatch = section.match(/^\s+debounce_ms:\s*(\d+)/m)
184
+ const debounce_ms = debounceMatch ? parseInt(debounceMatch[1]) : DEFAULT_CONFIG.debounce_ms
185
+
186
+ const excludeMatch = section.match(/exclude:\s*\n((?:\s+-\s+.+\n?)+)/m)
187
+ let exclude = DEFAULT_CONFIG.exclude
188
+ if (excludeMatch) {
189
+ exclude = excludeMatch[1].match(/-\s+(.+)/g)?.map((m) => m.replace(/^-\s+/, "").trim()) || DEFAULT_CONFIG.exclude
190
+ }
191
+
192
+ return { enabled, auto_index, debounce_ms, indexes: DEFAULT_CONFIG.indexes, exclude }
193
+ } catch (e) {
194
+ debug(`Failed to load config: ${(e as Error).message}`)
195
+ return DEFAULT_CONFIG
196
+ }
197
+ }
198
+
199
+ function getIndexForFile(filePath: string, config: VectorizerConfig): string | null {
200
+ const ext = path.extname(filePath).toLowerCase()
201
+ for (const [indexName, indexConfig] of Object.entries(config.indexes)) {
202
+ if (indexConfig.enabled && indexConfig.extensions.includes(ext)) {
203
+ return indexName
204
+ }
205
+ }
206
+ return null
207
+ }
208
+
209
+ function isExcluded(relativePath: string, config: VectorizerConfig): boolean {
210
+ const norm = relativePath.replace(/\\/g, "/")
211
+ return config.exclude.some((pattern) => {
212
+ const p = pattern.replace(/\\/g, "/").replace(/\/+$/, "")
213
+ return norm === p || norm.startsWith(`${p}/`) || norm.includes(`/${p}/`)
214
+ })
215
+ }
216
+
217
+ async function hasIndex(projectRoot: string, indexName: string): Promise<boolean> {
218
+ try {
219
+ await fs.access(path.join(projectRoot, ".opencode", "vectors", indexName, "hashes.json"))
220
+ return true
221
+ } catch {
222
+ return false
223
+ }
224
+ }
225
+
226
+ interface IndexResult {
227
+ totalFiles: number
228
+ elapsedSeconds: number
229
+ action: "created" | "rebuilt" | "freshened" | "skipped"
230
+ }
231
+
232
+ async function ensureIndexOnSessionStart(
233
+ projectRoot: string,
234
+ config: VectorizerConfig,
235
+ onStart?: (totalFiles: number, estimatedMins: number) => void,
236
+ ): Promise<IndexResult> {
237
+ if (SKIP_AUTO_INDEX) {
238
+ return { totalFiles: 0, elapsedSeconds: 0, action: "skipped" }
239
+ }
240
+
241
+ let totalFiles = 0
242
+ let elapsedSeconds = 0
243
+ let action: IndexResult["action"] = "skipped"
244
+
245
+ const overallStart = Date.now()
246
+
247
+ // First pass - count files and check health
248
+ let needsWork = false
249
+ let totalExpectedFiles = 0
250
+
251
+ for (const [indexName, indexConfig] of Object.entries(config.indexes)) {
252
+ if (!indexConfig.enabled) continue
253
+ const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
254
+ try {
255
+ const indexExists = await hasIndex(projectRoot, indexName)
256
+ const health = await indexer.checkHealth(config.exclude)
257
+
258
+ if (!indexExists || health.needsReindex) {
259
+ totalExpectedFiles += health.expectedCount
260
+ needsWork = true
261
+ }
262
+ } finally {
263
+ await indexer.unloadModel()
264
+ }
265
+ }
266
+
267
+ if (needsWork && onStart) {
268
+ onStart(totalExpectedFiles, estimateTime(totalExpectedFiles))
269
+ }
270
+
271
+ // Second pass - do the actual work
272
+ for (const [indexName, indexConfig] of Object.entries(config.indexes)) {
273
+ if (!indexConfig.enabled) continue
274
+
275
+ const indexExists = await hasIndex(projectRoot, indexName)
276
+ const startTime = Date.now()
277
+
278
+ const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
279
+ try {
280
+ if (!indexExists) {
281
+ log(`Creating "${indexName}" index...`)
282
+ const stats = await indexer.indexAll((indexed: number, total: number, file: string) => {
283
+ if (indexed % 10 === 0 || indexed === total) {
284
+ logFile(`"${indexName}": ${indexed}/${total} - ${file}`)
285
+ }
286
+ }, config.exclude)
287
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1)
288
+ log(`"${indexName}": done ${stats.indexed} files (${elapsed}s)`)
289
+ totalFiles += stats.indexed
290
+ action = "created"
291
+ } else {
292
+ const health = await indexer.checkHealth(config.exclude)
293
+
294
+ if (health.needsReindex) {
295
+ log(`Rebuilding "${indexName}" (${health.reason}: ${health.currentCount} vs ${health.expectedCount} files)...`)
296
+ const stats = await indexer.indexAll((indexed: number, total: number, file: string) => {
297
+ if (indexed % 10 === 0 || indexed === total) {
298
+ logFile(`"${indexName}": ${indexed}/${total} - ${file}`)
299
+ }
300
+ }, config.exclude)
301
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1)
302
+ log(`"${indexName}": rebuilt ${stats.indexed} files (${elapsed}s)`)
303
+ totalFiles += stats.indexed
304
+ action = "rebuilt"
305
+ } else {
306
+ log(`Freshening "${indexName}"...`)
307
+ const stats = await indexer.freshen()
308
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1)
309
+
310
+ if (stats.updated > 0 || stats.deleted > 0) {
311
+ log(`"${indexName}": +${stats.updated} -${stats.deleted} (${elapsed}s)`)
312
+ action = "freshened"
313
+ } else {
314
+ log(`"${indexName}": fresh (${elapsed}s)`)
315
+ }
316
+ }
317
+ }
318
+ } finally {
319
+ await indexer.unloadModel()
320
+ }
321
+ }
322
+
323
+ elapsedSeconds = (Date.now() - overallStart) / 1000
324
+ log(`Indexes ready!`)
325
+ return { totalFiles, elapsedSeconds, action }
326
+ }
327
+
328
+ async function processPendingFiles(projectRoot: string, config: VectorizerConfig): Promise<void> {
329
+ if (pendingFiles.size === 0) return
330
+ if (SKIP_AUTO_INDEX) {
331
+ pendingFiles.clear()
332
+ return
333
+ }
334
+
335
+ const now = Date.now()
336
+ const filesToProcess: Map<string, string[]> = new Map()
337
+
338
+ for (const [filePath, info] of pendingFiles.entries()) {
339
+ if (now - info.timestamp >= config.debounce_ms) {
340
+ const files = filesToProcess.get(info.indexName) || []
341
+ files.push(filePath)
342
+ filesToProcess.set(info.indexName, files)
343
+ pendingFiles.delete(filePath)
344
+ }
345
+ }
346
+
347
+ if (filesToProcess.size === 0) return
348
+
349
+ debug(`Processing ${filesToProcess.size} index(es)...`)
350
+
351
+ for (const [indexName, files] of filesToProcess.entries()) {
352
+ const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
353
+ try {
354
+ for (const filePath of files) {
355
+ try {
356
+ const wasIndexed = await indexer.indexSingleFile(filePath)
357
+ if (wasIndexed) {
358
+ log(`Reindexed: ${path.relative(projectRoot, filePath)} → ${indexName}`)
359
+ } else {
360
+ logFile(`Skipped (unchanged): ${path.relative(projectRoot, filePath)}`)
361
+ }
362
+ } catch (e) {
363
+ log(`Error reindexing ${path.relative(projectRoot, filePath)}: ${(e as Error).message}`)
364
+ }
365
+ }
366
+ } finally {
367
+ await indexer.unloadModel()
368
+ }
369
+ }
370
+ }
371
+
372
+ export const FileIndexerPlugin: Plugin = async ({ directory, client }) => {
373
+ let processingTimeout: NodeJS.Timeout | null = null
374
+ const config = await loadConfig(directory)
375
+
376
+ const toast = async (message: string, variant: "info" | "success" | "error" = "info") => {
377
+ try {
378
+ await client?.tui?.showToast?.({ body: { message, variant } })
379
+ } catch {}
380
+ }
381
+
382
+ log(`Plugin loaded for: ${path.basename(directory)}`)
383
+
384
+ if (!config.enabled || !config.auto_index) {
385
+ log(`Plugin DISABLED (enabled: ${config.enabled}, auto_index: ${config.auto_index})`)
386
+ return { event: async () => {} }
387
+ }
388
+
389
+ logFilePath = path.join(directory, ".opencode", "indexer.log")
390
+ try {
391
+ fsSync.writeFileSync(logFilePath, "")
392
+ } catch {
393
+ logFilePath = null
394
+ }
395
+
396
+ log(`Plugin ACTIVE`)
397
+
398
+ const lang = await getLanguage(directory)
399
+ const messages = FUN_MESSAGES[lang]
400
+
401
+ if (!SKIP_AUTO_INDEX) {
402
+ setTimeout(async () => {
403
+ try {
404
+ const result = await ensureIndexOnSessionStart(
405
+ directory,
406
+ config,
407
+ async (totalFiles, estimatedMins) => {
408
+ await toast(messages.indexing(totalFiles), "info")
409
+ if (totalFiles > 0) {
410
+ setTimeout(() => toast(messages.fun(totalFiles, estimatedMins), "info"), 1500)
411
+ }
412
+ },
413
+ )
414
+
415
+ if (result.action === "skipped") {
416
+ toast(messages.fresh(), "success")
417
+ } else {
418
+ const duration = formatDuration(result.elapsedSeconds)
419
+ toast(messages.done(result.totalFiles, duration), "success")
420
+ }
421
+ } catch (e: any) {
422
+ toast(messages.error(e.message), "error")
423
+ }
424
+ }, 1000)
425
+ }
426
+
427
+ function queueFileForIndexing(filePath: string): void {
428
+ const relativePath = path.relative(directory, filePath)
429
+ if (relativePath.startsWith("..") || path.isAbsolute(relativePath)) return
430
+ if (isExcluded(relativePath, config)) return
431
+
432
+ const indexName = getIndexForFile(filePath, config)
433
+ if (!indexName) return
434
+
435
+ debug(`Queued: ${relativePath} -> ${indexName}`)
436
+ pendingFiles.set(filePath, { indexName, timestamp: Date.now() })
437
+
438
+ if (processingTimeout) {
439
+ clearTimeout(processingTimeout)
440
+ }
441
+ processingTimeout = setTimeout(async () => {
442
+ await processPendingFiles(directory, config)
443
+ }, config.debounce_ms + 100)
444
+ }
445
+
446
+ return {
447
+ event: async ({ event }) => {
448
+ if (event.type === "file.edited" || event.type === "file.watcher.updated") {
449
+ const props = (event as any).properties || {}
450
+ const filePath = props.file || props.path || props.filePath
451
+ if (filePath) {
452
+ log(`Event: ${event.type} → ${filePath}`)
453
+ queueFileForIndexing(filePath)
454
+ }
455
+ }
456
+ },
457
+ }
458
+ }
459
+
460
+ export default FileIndexerPlugin
package/index.ts ADDED
@@ -0,0 +1,19 @@
1
+ import type { Plugin } from "@opencode-ai/plugin"
2
+
3
+ import search from "./tools/search"
4
+ import codeindex from "./tools/codeindex"
5
+ import FileIndexerPlugin from "./file-indexer"
6
+
7
+ export const UsethisSearchPlugin: Plugin = async (ctx) => {
8
+ const fileIndexerHooks = await FileIndexerPlugin(ctx as any)
9
+
10
+ return {
11
+ ...fileIndexerHooks,
12
+ tool: {
13
+ search,
14
+ codeindex,
15
+ },
16
+ }
17
+ }
18
+
19
+ export default UsethisSearchPlugin
package/package.json ADDED
@@ -0,0 +1,33 @@
1
+ {
2
+ "name": "@comfanion/usethis_search",
3
+ "version": "0.1.0",
4
+ "description": "OpenCode plugin: semantic search + code index management",
5
+ "type": "module",
6
+ "main": "./index.ts",
7
+ "exports": "./index.ts",
8
+ "scripts": {
9
+ "test": "bun test"
10
+ },
11
+ "files": [
12
+ "index.ts",
13
+ "file-indexer.ts",
14
+ "tools/search.ts",
15
+ "tools/codeindex.ts",
16
+ "vectorizer/index.js",
17
+ "README.md",
18
+ "LICENSE"
19
+ ],
20
+ "dependencies": {
21
+ "@opencode-ai/plugin": "1.1.39",
22
+ "@xenova/transformers": "^2.17.0",
23
+ "glob": "^10.3.10",
24
+ "vectordb": "^0.4.0"
25
+ },
26
+ "peerDependencies": {
27
+ "@opencode-ai/plugin": ">=1.1.0"
28
+ },
29
+ "engines": {
30
+ "node": ">=18"
31
+ },
32
+ "license": "MIT"
33
+ }
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Code Index Status & Management Tool
3
+ *
4
+ * Uses bundled vectorizer. Index data is stored in `.opencode/vectors/<index>/`.
5
+ */
6
+
7
+ import { tool } from "@opencode-ai/plugin"
8
+ import path from "path"
9
+ import fs from "fs/promises"
10
+
11
+ import { CodebaseIndexer } from "../vectorizer/index.js"
12
+
13
+ const INDEX_EXTENSIONS: Record<string, string[]> = {
14
+ code: [".js", ".ts", ".jsx", ".tsx", ".go", ".py", ".rs", ".java", ".kt", ".swift", ".c", ".cpp", ".h", ".cs", ".rb", ".php"],
15
+ docs: [".md", ".mdx", ".txt", ".rst", ".adoc"],
16
+ config: [".yaml", ".yml", ".json", ".toml", ".ini", ".xml"],
17
+ }
18
+
19
+ const INDEX_DESCRIPTIONS: Record<string, string> = {
20
+ code: "Source code files",
21
+ docs: "Documentation files",
22
+ config: "Configuration files",
23
+ }
24
+
25
+ async function walkDir(dir: string, extensions: string[], ignore: string[] = []): Promise<string[]> {
26
+ const files: string[] = []
27
+
28
+ async function walk(currentDir: string) {
29
+ try {
30
+ const entries = await fs.readdir(currentDir, { withFileTypes: true })
31
+ for (const entry of entries) {
32
+ const fullPath = path.join(currentDir, entry.name)
33
+ const relativePath = path.relative(dir, fullPath)
34
+
35
+ if (ignore.some((ig) => relativePath.startsWith(ig) || entry.name === ig)) {
36
+ continue
37
+ }
38
+
39
+ if (entry.isDirectory()) {
40
+ await walk(fullPath)
41
+ } else if (entry.isFile()) {
42
+ const ext = path.extname(entry.name).toLowerCase()
43
+ if (extensions.includes(ext)) {
44
+ files.push(fullPath)
45
+ }
46
+ }
47
+ }
48
+ } catch {}
49
+ }
50
+
51
+ await walk(dir)
52
+ return files
53
+ }
54
+
55
+ export default tool({
56
+ description: `Check codebase index status or trigger re-indexing for semantic search.
57
+
58
+ Actions:
59
+ - "status" → Show index statistics
60
+ - "list" → List all available indexes with stats
61
+ - "reindex" → Re-index files using local vectorizer
62
+
63
+ Available indexes:
64
+ - "code" - Source code files
65
+ - "docs" - Documentation files
66
+ - "config" - Configuration files`,
67
+
68
+ args: {
69
+ action: tool.schema.enum(["status", "list", "reindex"]).describe("Action to perform"),
70
+ index: tool.schema.string().optional().default("code").describe("Index name: code, docs, config"),
71
+ dir: tool.schema.string().optional().describe("Directory to index (default: project root)"),
72
+ },
73
+
74
+ async execute(args) {
75
+ const projectRoot = process.cwd()
76
+ const vectorsDir = path.join(projectRoot, ".opencode", "vectors")
77
+ const indexName = args.index || "code"
78
+
79
+ if (args.action === "list") {
80
+ let output = `## Codebase Index Overview\n\n`
81
+ const indexes: string[] = []
82
+ try {
83
+ const entries = await fs.readdir(vectorsDir, { withFileTypes: true })
84
+ for (const entry of entries) {
85
+ if (entry.isDirectory()) indexes.push(entry.name)
86
+ }
87
+ } catch {}
88
+
89
+ if (indexes.length === 0) {
90
+ output += `⚠️ No indexes created yet\n\nCreate indexes:\n\n\`\`\`\n`
91
+ output += `codeindex({ action: "reindex", index: "code" })\n`
92
+ output += `codeindex({ action: "reindex", index: "docs", dir: "docs/" })\n`
93
+ output += `\`\`\`\n`
94
+ } else {
95
+ output += `### Active Indexes\n\n`
96
+ for (const idx of indexes) {
97
+ try {
98
+ const hashesPath = path.join(vectorsDir, idx, "hashes.json")
99
+ const hashes = JSON.parse(await fs.readFile(hashesPath, "utf8"))
100
+ const fileCount = Object.keys(hashes).length
101
+ const desc = INDEX_DESCRIPTIONS[idx] || "Custom index"
102
+ output += `- ${idx} - ${desc} (files: ${fileCount})\n`
103
+ } catch {
104
+ output += `- ${idx}\n`
105
+ }
106
+ }
107
+ }
108
+
109
+ output += `\n### Usage\n\n\`\`\`\nsearch({ query: "your query", index: "code" })\n\`\`\``
110
+ return output
111
+ }
112
+
113
+ if (args.action === "status") {
114
+ const hashesFile = path.join(vectorsDir, indexName, "hashes.json")
115
+ try {
116
+ const hashesContent = await fs.readFile(hashesFile, "utf8")
117
+ const hashes = JSON.parse(hashesContent)
118
+ const fileCount = Object.keys(hashes).length
119
+ const sampleFiles = Object.keys(hashes).slice(0, 5)
120
+ const desc = INDEX_DESCRIPTIONS[indexName] || "Custom index"
121
+
122
+ return `## Index Status: "${indexName}"\n\n**Description:** ${desc}\n**Files indexed:** ${fileCount}\n\n**Sample indexed files:**\n${sampleFiles.map((f) => `- ${f}`).join("\n")}${fileCount > 5 ? `\n- ... and ${fileCount - 5} more` : ""}`
123
+ } catch {
124
+ return `## Index Status: "${indexName}"\n\nIndex "${indexName}" not created yet. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
125
+ }
126
+ }
127
+
128
+ if (args.action === "reindex") {
129
+ try {
130
+ const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
131
+
132
+ const baseDir = args.dir ? path.resolve(projectRoot, args.dir) : projectRoot
133
+ const extensions = INDEX_EXTENSIONS[indexName] || INDEX_EXTENSIONS.code
134
+
135
+ const ignoreList = ["node_modules", ".git", "dist", "build", ".opencode", "vendor", "__pycache__"]
136
+ const files = await walkDir(baseDir, extensions, ignoreList)
137
+
138
+ let indexed = 0
139
+ let skipped = 0
140
+ for (const filePath of files) {
141
+ try {
142
+ const wasIndexed = await indexer.indexFile(filePath)
143
+ if (wasIndexed) indexed++
144
+ else skipped++
145
+ } catch {}
146
+ }
147
+
148
+ await indexer.unloadModel()
149
+ const stats = await indexer.getStats()
150
+
151
+ return `## Re-indexing Complete ✅\n\n**Index:** ${indexName}\n**Directory:** ${args.dir || "(project root)"}\n**Files found:** ${files.length}\n**Files indexed:** ${indexed}\n**Files unchanged:** ${skipped}\n**Total chunks:** ${stats.chunkCount}`
152
+ } catch (error: any) {
153
+ return `❌ Re-indexing failed: ${error.message || String(error)}`
154
+ }
155
+ }
156
+
157
+ return `Unknown action: ${args.action}. Use: status, list, or reindex`
158
+ },
159
+ })
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Semantic Code Search Tool
3
+ *
4
+ * Uses local embeddings + LanceDB vector store via bundled vectorizer.
5
+ * Index data is stored in `.opencode/vectors/<index>/`.
6
+ */
7
+
8
+ import { tool } from "@opencode-ai/plugin"
9
+ import path from "path"
10
+ import fs from "fs/promises"
11
+
12
+ import { CodebaseIndexer } from "../vectorizer/index.js"
13
+
14
+ export default tool({
15
+ description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
16
+
17
+ Available indexes:
18
+ - "code" (default) - Source code files (*.js, *.ts, *.py, *.go, etc.)
19
+ - "docs" - Documentation files (*.md, *.txt, etc.)
20
+ - "config" - Configuration files (*.yaml, *.json, etc.)
21
+ - searchAll: true - Search across all indexes
22
+
23
+ Examples:
24
+ - "authentication logic" → finds auth-related code
25
+ - "database connection handling" → finds DB setup code
26
+ - "how to deploy" with index: "docs" → finds deployment docs
27
+ - "API keys" with index: "config" → finds config with API settings`,
28
+
29
+ args: {
30
+ query: tool.schema.string().describe("Semantic search query describing what you're looking for"),
31
+ index: tool.schema.string().optional().default("code").describe("Index to search: code, docs, config, or custom name"),
32
+ limit: tool.schema.number().optional().default(10).describe("Number of results to return (default: 10)"),
33
+ searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
34
+ freshen: tool.schema.boolean().optional().default(true).describe("Auto-update stale files before searching (default: true)"),
35
+ includeArchived: tool.schema.boolean().optional().default(false).describe("Include archived files in results (default: false). Files are archived if in /archive/ folder or have 'archived: true' in frontmatter."),
36
+ },
37
+
38
+ async execute(args) {
39
+ const projectRoot = process.cwd()
40
+
41
+ try {
42
+ let allResults: any[] = []
43
+ const limit = args.limit || 10
44
+ const indexName = args.index || "code"
45
+
46
+ // Auto-freshen stale files before searching
47
+ if (args.freshen !== false) {
48
+ const tempIndexer = await new CodebaseIndexer(projectRoot, indexName).init()
49
+ await tempIndexer.freshen()
50
+ await tempIndexer.unloadModel()
51
+ }
52
+
53
+ if (args.searchAll) {
54
+ const tempIndexer = await new CodebaseIndexer(projectRoot, "code").init()
55
+ const indexes = await tempIndexer.listIndexes()
56
+ await tempIndexer.unloadModel()
57
+
58
+ if (indexes.length === 0) {
59
+ return `❌ No indexes found. Create one with: codeindex({ action: "reindex", index: "code" })`
60
+ }
61
+
62
+ for (const idx of indexes) {
63
+ const indexer = await new CodebaseIndexer(projectRoot, idx).init()
64
+ if (args.freshen !== false) {
65
+ await indexer.freshen()
66
+ }
67
+ const results = await indexer.search(args.query, limit, args.includeArchived)
68
+ allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
69
+ await indexer.unloadModel()
70
+ }
71
+
72
+ allResults.sort((a, b) => (a._distance || 0) - (b._distance || 0))
73
+ allResults = allResults.slice(0, limit)
74
+ } else {
75
+ const hashesFile = path.join(projectRoot, ".opencode", "vectors", indexName, "hashes.json")
76
+ try {
77
+ await fs.access(hashesFile)
78
+ } catch {
79
+ return `❌ Index "${indexName}" not found. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
80
+ }
81
+
82
+ const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
83
+ const results = await indexer.search(args.query, limit, args.includeArchived)
84
+ allResults = results.map((r: any) => ({ ...r, _index: indexName }))
85
+ await indexer.unloadModel()
86
+ }
87
+
88
+ if (allResults.length === 0) {
89
+ const scope = args.searchAll ? "any index" : `index "${indexName}"`
90
+ return `No results found in ${scope} for: "${args.query}"\n\nTry:\n- Different keywords\n- Re-index with: codeindex({ action: "reindex", index: "${indexName}" })`
91
+ }
92
+
93
+ const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
94
+ let output = `## Search Results for: "${args.query}" (${scope})\n\n`
95
+
96
+ for (let i = 0; i < allResults.length; i++) {
97
+ const r = allResults[i]
98
+ const score = r._distance ? (1 - r._distance).toFixed(3) : "N/A"
99
+ const indexLabel = args.searchAll ? ` [${r._index}]` : ""
100
+
101
+ output += `### ${i + 1}. ${r.file}${indexLabel}\n`
102
+ output += `**Relevance:** ${score}\n\n`
103
+ output += "```\n"
104
+ const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
105
+ output += content
106
+ output += "\n```\n\n"
107
+ }
108
+
109
+ output += `---\n*Found ${allResults.length} results. Use Read tool to see full files.*`
110
+ return output
111
+ } catch (error: any) {
112
+ return `❌ Search failed: ${error.message || String(error)}`
113
+ }
114
+ },
115
+ })
@@ -0,0 +1,555 @@
1
+ // OpenCode Vectorizer - Semantic Code Search with Multi-Index Support
2
+
3
+ import { pipeline, env } from "@xenova/transformers";
4
+ import * as lancedb from "vectordb";
5
+ import fs from "fs/promises";
6
+ import path from "path";
7
+ import crypto from "crypto";
8
+
9
+ // Suppress transformers.js logs unless DEBUG is set
10
+ const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*";
11
+ if (!DEBUG) {
12
+ env.allowLocalModels = true;
13
+ env.useBrowserCache = false;
14
+ env.logLevel = "error";
15
+ }
16
+
17
+ /**
18
+ * Default index presets (can be overridden by config).
19
+ */
20
+ const DEFAULT_PRESETS = {
21
+ code: {
22
+ pattern: "**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}",
23
+ ignore: [
24
+ "**/node_modules/**",
25
+ "**/.git/**",
26
+ "**/dist/**",
27
+ "**/build/**",
28
+ "**/.opencode/**",
29
+ "**/docs/**",
30
+ "**/vendor/**",
31
+ "**/__pycache__/**",
32
+ ],
33
+ description: "Source code files (excludes docs, vendor, node_modules)",
34
+ },
35
+ docs: {
36
+ pattern: "docs/**/*.{md,mdx,txt,rst,adoc}",
37
+ ignore: [],
38
+ description: "Documentation in docs/ folder",
39
+ },
40
+ config: {
41
+ pattern: "**/*.{yaml,yml,json,toml,ini,env,xml}",
42
+ ignore: ["**/node_modules/**", "**/.git/**", "**/.opencode/**"],
43
+ description: "Configuration files",
44
+ },
45
+ all: {
46
+ pattern:
47
+ "**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj,md,mdx,txt,rst,adoc,yaml,yml,json,toml}",
48
+ ignore: ["**/node_modules/**", "**/.git/**", "**/.opencode/**"],
49
+ description: "All supported files",
50
+ },
51
+ };
52
+
53
+ // Will be populated from config if available
54
+ let INDEX_PRESETS = { ...DEFAULT_PRESETS };
55
+ let GLOBAL_IGNORE = [];
56
+
57
+ // Default embedding model (fast). Can be overridden by config.
58
+ let EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
59
+
60
+ function defaultVectorizerYaml() {
61
+ return (
62
+ `vectorizer:\n` +
63
+ ` enabled: true\n` +
64
+ ` auto_index: true\n` +
65
+ ` model: \"${EMBEDDING_MODEL}\"\n` +
66
+ ` debounce_ms: 1000\n` +
67
+ ` indexes:\n` +
68
+ ` code:\n` +
69
+ ` enabled: true\n` +
70
+ ` pattern: \"${DEFAULT_PRESETS.code.pattern}\"\n` +
71
+ ` ignore:\n` +
72
+ DEFAULT_PRESETS.code.ignore.map((p) => ` - \"${p}\"\n`).join("") +
73
+ ` docs:\n` +
74
+ ` enabled: true\n` +
75
+ ` pattern: \"${DEFAULT_PRESETS.docs.pattern}\"\n` +
76
+ ` ignore: []\n` +
77
+ ` config:\n` +
78
+ ` enabled: false\n` +
79
+ ` pattern: \"${DEFAULT_PRESETS.config.pattern}\"\n` +
80
+ ` ignore:\n` +
81
+ DEFAULT_PRESETS.config.ignore.map((p) => ` - \"${p}\"\n`).join("") +
82
+ ` exclude:\n` +
83
+ ` - node_modules\n` +
84
+ ` - vendor\n` +
85
+ ` - dist\n` +
86
+ ` - build\n` +
87
+ ` - out\n` +
88
+ ` - __pycache__\n`
89
+ );
90
+ }
91
+
92
+ async function ensureDefaultConfig(projectRoot) {
93
+ try {
94
+ const configDir = path.join(projectRoot, ".opencode");
95
+ const outPath = path.join(configDir, "vectorizer.yaml");
96
+ await fs.mkdir(configDir, { recursive: true });
97
+
98
+ await fs.access(outPath).catch(async () => {
99
+ await fs.writeFile(outPath, defaultVectorizerYaml(), "utf8");
100
+ if (DEBUG) console.log("[vectorizer] Wrote default config:", outPath);
101
+ });
102
+ } catch {
103
+ // non-fatal
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Load index configuration from .opencode/vectorizer.yaml (preferred) or .opencode/config.yaml.
109
+ */
110
+ async function loadConfig(projectRoot) {
111
+ try {
112
+ const candidates = [path.join(projectRoot, ".opencode", "vectorizer.yaml")];
113
+
114
+ let content = null;
115
+ for (const configPath of candidates) {
116
+ try {
117
+ content = await fs.readFile(configPath, "utf8");
118
+ break;
119
+ } catch {
120
+ // try next
121
+ }
122
+ }
123
+
124
+ if (!content) {
125
+ await ensureDefaultConfig(projectRoot);
126
+ return;
127
+ }
128
+
129
+ // Parse vectorizer section from YAML
130
+ const vectorizerMatch = content.match(/^vectorizer:([\s\S]*?)(?=^[a-zA-Z_\-]+:|\Z)/m);
131
+ if (!vectorizerMatch) {
132
+ await ensureDefaultConfig(projectRoot);
133
+ return;
134
+ }
135
+
136
+ const section = vectorizerMatch[1];
137
+
138
+ // Parse embedding model
139
+ const modelMatch = section.match(/^\s{2}model:\s*["']?([^"'\n]+)["']?/m);
140
+ if (modelMatch) {
141
+ EMBEDDING_MODEL = modelMatch[1].trim();
142
+ if (DEBUG) console.log("[vectorizer] Using model from config:", EMBEDDING_MODEL);
143
+ }
144
+
145
+ // Parse global exclude
146
+ const excludeMatch = section.match(/^\s{2}exclude:\s*\n((?:\s{4}-\s+.+\n?)*)/m);
147
+ if (excludeMatch) {
148
+ GLOBAL_IGNORE = excludeMatch[1]
149
+ .split("\n")
150
+ .map((line) => line.replace(/^\s*-\s*/, "").trim())
151
+ .filter(Boolean)
152
+ .map((p) => (p.includes("*") ? p : `**/${p}/**`));
153
+ }
154
+
155
+ // Parse indexes section
156
+ const indexesMatch = section.match(/^\s{2}indexes:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\s{2}exclude:|\Z)/m);
157
+ if (!indexesMatch) return;
158
+
159
+ const indexesSection = indexesMatch[1];
160
+
161
+ // Parse each index (code, docs, config)
162
+ for (const indexName of ["code", "docs", "config"]) {
163
+ const indexRegex = new RegExp(
164
+ `^\\s{4}${indexName}:\\s*\\n([\\s\\S]*?)(?=^\\s{4}[a-zA-Z_\\-]+:|\\Z)`,
165
+ "m",
166
+ );
167
+ const indexMatch = indexesSection.match(indexRegex);
168
+ if (!indexMatch) continue;
169
+
170
+ const indexSection = indexMatch[1];
171
+
172
+ // Parse enabled
173
+ const enabledMatch = indexSection.match(/^\s+enabled:\s*(true|false)/m);
174
+ const enabled = enabledMatch ? enabledMatch[1] === "true" : true;
175
+
176
+ // Parse pattern
177
+ const patternMatch = indexSection.match(/^\s+pattern:\s*["']?([^"'\n]+)["']?/m);
178
+ const pattern = patternMatch ? patternMatch[1].trim() : DEFAULT_PRESETS[indexName]?.pattern;
179
+
180
+ // Parse ignore array
181
+ const ignoreMatch = indexSection.match(/^\s+ignore:\s*\n((?:\s+-\s+.+\n?)*)/m);
182
+ let ignore = [];
183
+ if (ignoreMatch) {
184
+ ignore = ignoreMatch[1]
185
+ .split("\n")
186
+ .map((line) => line.replace(/^\s*-\s*/, "").replace(/["']/g, "").trim())
187
+ .filter(Boolean);
188
+ }
189
+
190
+ if (enabled && pattern) {
191
+ INDEX_PRESETS[indexName] = {
192
+ pattern,
193
+ ignore,
194
+ description: `${indexName} files from config`,
195
+ };
196
+ }
197
+ }
198
+
199
+ if (DEBUG) console.log("[vectorizer] Loaded config:", { INDEX_PRESETS, GLOBAL_IGNORE });
200
+ } catch {
201
+ if (DEBUG) console.log("[vectorizer] Using default presets (config load failed)");
202
+ }
203
+ }
204
+
205
+ class CodebaseIndexer {
206
+ constructor(projectRoot, indexName = "code") {
207
+ this.root = projectRoot;
208
+ this.indexName = indexName;
209
+ this.baseDir = path.join(projectRoot, ".opencode", "vectors");
210
+ this.cacheDir = path.join(this.baseDir, indexName);
211
+ this.model = null;
212
+ this.db = null;
213
+ this.hashes = {};
214
+ this.configLoaded = false;
215
+ }
216
+
217
+ async init() {
218
+ if (!this.configLoaded) {
219
+ await loadConfig(this.root);
220
+ this.configLoaded = true;
221
+ }
222
+ await fs.mkdir(this.cacheDir, { recursive: true });
223
+ this.db = await lancedb.connect(path.join(this.cacheDir, "lancedb"));
224
+ await this.loadHashes();
225
+ return this;
226
+ }
227
+
228
+ async loadModel() {
229
+ if (!this.model) {
230
+ if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}...`);
231
+ this.model = await pipeline("feature-extraction", EMBEDDING_MODEL, {
232
+ progress_callback: DEBUG ? undefined : null,
233
+ });
234
+ if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`);
235
+ }
236
+ return this.model;
237
+ }
238
+
239
+ async unloadModel() {
240
+ this.model = null;
241
+ if (global.gc) global.gc();
242
+ }
243
+
244
+ async loadHashes() {
245
+ try {
246
+ const hashFile = path.join(this.cacheDir, "hashes.json");
247
+ const data = await fs.readFile(hashFile, "utf8");
248
+ this.hashes = JSON.parse(data);
249
+ } catch {
250
+ this.hashes = {};
251
+ }
252
+ }
253
+
254
+ async saveHashes() {
255
+ const hashFile = path.join(this.cacheDir, "hashes.json");
256
+ await fs.writeFile(hashFile, JSON.stringify(this.hashes, null, 2));
257
+ }
258
+
259
+ fileHash(content) {
260
+ return crypto.createHash("md5").update(content).digest("hex");
261
+ }
262
+
263
+ isArchived(relPath, content) {
264
+ if (relPath.includes("/archive/") || relPath.startsWith("archive/")) {
265
+ return true;
266
+ }
267
+ const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
268
+ if (frontmatterMatch) {
269
+ const frontmatter = frontmatterMatch[1];
270
+ if (/^archived:\s*true/m.test(frontmatter)) {
271
+ return true;
272
+ }
273
+ }
274
+ return false;
275
+ }
276
+
277
+ async embed(text) {
278
+ const model = await this.loadModel();
279
+ const result = await model(text, { pooling: "mean", normalize: true });
280
+ return Array.from(result.data);
281
+ }
282
+
283
+ chunkCode(content, maxChars = 1500) {
284
+ const chunks = [];
285
+ const lines = content.split("\n");
286
+ let current = [];
287
+ let currentLen = 0;
288
+
289
+ for (const line of lines) {
290
+ if (currentLen + line.length > maxChars && current.length > 0) {
291
+ chunks.push(current.join("\n"));
292
+ current = [];
293
+ currentLen = 0;
294
+ }
295
+ current.push(line);
296
+ currentLen += line.length + 1;
297
+ }
298
+
299
+ if (current.length > 0) {
300
+ chunks.push(current.join("\n"));
301
+ }
302
+
303
+ return chunks;
304
+ }
305
+
306
+ needsIndex(filePath, content) {
307
+ const relPath = path.relative(this.root, filePath);
308
+ const currentHash = this.fileHash(content);
309
+ return this.hashes[relPath] !== currentHash;
310
+ }
311
+
312
+ async indexFile(filePath) {
313
+ const relPath = path.relative(this.root, filePath);
314
+
315
+ let content;
316
+ try {
317
+ content = await fs.readFile(filePath, "utf8");
318
+ } catch {
319
+ return false;
320
+ }
321
+
322
+ const hash = this.fileHash(content);
323
+ if (this.hashes[relPath] === hash) {
324
+ return false;
325
+ }
326
+
327
+ const chunks = this.chunkCode(content);
328
+ const archived = this.isArchived(relPath, content);
329
+ const data = [];
330
+
331
+ for (let i = 0; i < chunks.length; i++) {
332
+ const embedding = await this.embed(chunks[i]);
333
+ data.push({
334
+ file: relPath,
335
+ chunk_index: i,
336
+ content: chunks[i],
337
+ vector: embedding,
338
+ archived: archived,
339
+ });
340
+ }
341
+
342
+ const tableName = "chunks";
343
+ const tables = await this.db.tableNames();
344
+ if (tables.includes(tableName)) {
345
+ const table = await this.db.openTable(tableName);
346
+ await table.add(data);
347
+ } else {
348
+ await this.db.createTable(tableName, data);
349
+ }
350
+
351
+ this.hashes[relPath] = hash;
352
+ await this.saveHashes();
353
+
354
+ return true;
355
+ }
356
+
357
+ async search(query, limit = 5, includeArchived = false) {
358
+ const tableName = "chunks";
359
+ const tables = await this.db.tableNames();
360
+ if (!tables.includes(tableName)) {
361
+ return [];
362
+ }
363
+
364
+ const queryEmbedding = await this.embed(query);
365
+ const table = await this.db.openTable(tableName);
366
+
367
+ const fetchLimit = includeArchived ? limit : limit * 3;
368
+ let results = await table.search(queryEmbedding).limit(fetchLimit).execute();
369
+
370
+ if (!includeArchived) {
371
+ results = results.filter((r) => !r.archived);
372
+ }
373
+
374
+ return results.slice(0, limit);
375
+ }
376
+
377
+ async checkHealth(extraIgnore = []) {
378
+ const { glob } = await import("glob");
379
+ const preset = INDEX_PRESETS[this.indexName] || DEFAULT_PRESETS.code;
380
+
381
+ const ignore = [
382
+ ...(preset.ignore || []),
383
+ ...GLOBAL_IGNORE,
384
+ ...extraIgnore.map((p) => (p.includes("*") ? p : `**/${p}/**`)),
385
+ ];
386
+
387
+ const expectedFiles = await glob(preset.pattern, {
388
+ cwd: this.root,
389
+ nodir: true,
390
+ ignore,
391
+ });
392
+
393
+ const indexedFiles = Object.keys(this.hashes);
394
+ const currentCount = indexedFiles.length;
395
+ const expectedCount = expectedFiles.length;
396
+
397
+ const diff = Math.abs(currentCount - expectedCount);
398
+ const threshold = Math.max(5, expectedCount * 0.2);
399
+
400
+ if (currentCount === 0 && expectedCount > 0) {
401
+ return { needsReindex: true, reason: "empty", currentCount, expectedCount };
402
+ }
403
+
404
+ if (diff > threshold) {
405
+ return { needsReindex: true, reason: "mismatch", currentCount, expectedCount };
406
+ }
407
+
408
+ return { needsReindex: false, reason: "ok", currentCount, expectedCount };
409
+ }
410
+
411
+ async freshen() {
412
+ let checked = 0;
413
+ let updated = 0;
414
+ let deleted = 0;
415
+
416
+ const indexedFiles = Object.keys(this.hashes);
417
+ for (const relPath of indexedFiles) {
418
+ checked++;
419
+ const filePath = path.join(this.root, relPath);
420
+
421
+ try {
422
+ const content = await fs.readFile(filePath, "utf8");
423
+ const currentHash = this.fileHash(content);
424
+
425
+ if (this.hashes[relPath] !== currentHash) {
426
+ await this.indexFile(filePath);
427
+ updated++;
428
+ }
429
+ } catch {
430
+ delete this.hashes[relPath];
431
+ deleted++;
432
+ }
433
+ }
434
+
435
+ if (deleted > 0) {
436
+ await this.saveHashes();
437
+ }
438
+
439
+ return { checked, updated, deleted };
440
+ }
441
+
442
+ async indexAll(onProgress = null, extraIgnore = []) {
443
+ const { glob } = await import("glob");
444
+ const preset = INDEX_PRESETS[this.indexName] || DEFAULT_PRESETS.code;
445
+
446
+ const ignore = [
447
+ ...(preset.ignore || []),
448
+ ...GLOBAL_IGNORE,
449
+ ...extraIgnore.map((p) => (p.includes("*") ? p : `**/${p}/**`)),
450
+ ];
451
+
452
+ const files = await glob(preset.pattern, {
453
+ cwd: this.root,
454
+ nodir: true,
455
+ ignore,
456
+ });
457
+
458
+ let indexed = 0;
459
+ let skipped = 0;
460
+
461
+ for (const relPath of files) {
462
+ const filePath = path.join(this.root, relPath);
463
+ try {
464
+ const wasIndexed = await this.indexFile(filePath);
465
+ if (wasIndexed) {
466
+ indexed++;
467
+ if (onProgress) onProgress(indexed, files.length, relPath);
468
+ } else {
469
+ skipped++;
470
+ }
471
+ } catch {
472
+ skipped++;
473
+ }
474
+ }
475
+
476
+ return { indexed, skipped, total: files.length };
477
+ }
478
+
479
+ async indexSingleFile(filePath) {
480
+ const absPath = path.isAbsolute(filePath) ? filePath : path.join(this.root, filePath);
481
+ return await this.indexFile(absPath);
482
+ }
483
+
484
+ async getStats() {
485
+ const fileCount = Object.keys(this.hashes).length;
486
+ let chunkCount = 0;
487
+
488
+ try {
489
+ const tables = await this.db.tableNames();
490
+ if (tables.includes("chunks")) {
491
+ const table = await this.db.openTable("chunks");
492
+ chunkCount = await table.countRows();
493
+ }
494
+ } catch {}
495
+
496
+ const preset = INDEX_PRESETS[this.indexName];
497
+ return {
498
+ indexName: this.indexName,
499
+ description: preset?.description || "Custom index",
500
+ model: EMBEDDING_MODEL,
501
+ fileCount,
502
+ chunkCount,
503
+ };
504
+ }
505
+
506
+ async getAllStats() {
507
+ const stats = [];
508
+ try {
509
+ const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
510
+ for (const entry of entries) {
511
+ if (entry.isDirectory() && entry.name !== "lancedb") {
512
+ try {
513
+ const indexer = await new CodebaseIndexer(this.root, entry.name).init();
514
+ const stat = await indexer.getStats();
515
+ if (stat.fileCount > 0 || stat.chunkCount > 0) {
516
+ stats.push(stat);
517
+ }
518
+ } catch {}
519
+ }
520
+ }
521
+ } catch {}
522
+ return stats;
523
+ }
524
+
525
+ async clear() {
526
+ await fs.rm(this.cacheDir, { recursive: true, force: true });
527
+ this.hashes = {};
528
+ await this.init();
529
+ }
530
+
531
+ async clearAll() {
532
+ await fs.rm(this.baseDir, { recursive: true, force: true });
533
+ this.hashes = {};
534
+ await this.init();
535
+ }
536
+
537
+ async listIndexes() {
538
+ const indexes = [];
539
+ try {
540
+ const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
541
+ for (const entry of entries) {
542
+ if (entry.isDirectory() && entry.name !== "lancedb") {
543
+ indexes.push(entry.name);
544
+ }
545
+ }
546
+ } catch {}
547
+ return indexes;
548
+ }
549
+ }
550
+
551
+ function getEmbeddingModel() {
552
+ return EMBEDDING_MODEL;
553
+ }
554
+
555
+ export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel };