@comfanion/workflow 4.3.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +341 -1
- package/package.json +8 -3
- package/src/build-info.json +2 -1
- package/src/opencode/FLOW.yaml +147 -92
- package/src/opencode/agents/analyst.md +24 -20
- package/src/opencode/agents/architect.md +104 -39
- package/src/opencode/agents/change-manager.md +112 -250
- package/src/opencode/agents/coder.md +36 -19
- package/src/opencode/agents/crawler.md +180 -97
- package/src/opencode/agents/dev.md +117 -29
- package/src/opencode/agents/pm.md +25 -32
- package/src/opencode/agents/researcher.md +116 -241
- package/src/opencode/commands/dev-story.md +1 -5
- package/src/opencode/commands/unit-docs.md +170 -0
- package/src/opencode/config.yaml +29 -0
- package/src/opencode/opencode.json +5 -0
- package/src/opencode/{workflows/dev-story/instructions.md → skills/dev-story/SKILL.md} +2 -2
- package/src/opencode/tools/codeindex.ts +255 -0
- package/src/opencode/tools/codesearch.ts +134 -0
- package/src/vectorizer/index.js +287 -0
- package/src/vectorizer/package.json +15 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Index Status & Management Tool
|
|
3
|
+
*
|
|
4
|
+
* Check indexing status and trigger re-indexing.
|
|
5
|
+
* Supports multiple indexes: code, docs, config.
|
|
6
|
+
*
|
|
7
|
+
* Usage by model:
|
|
8
|
+
* codeindex({ action: "status" })
|
|
9
|
+
* codeindex({ action: "status", index: "docs" })
|
|
10
|
+
* codeindex({ action: "reindex", index: "code" })
|
|
11
|
+
* codeindex({ action: "list" })
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { tool } from "@opencode-ai/plugin"
|
|
15
|
+
import path from "path"
|
|
16
|
+
import fs from "fs/promises"
|
|
17
|
+
import { glob } from "glob"
|
|
18
|
+
import ignore from "ignore"
|
|
19
|
+
|
|
20
|
+
// Index presets (duplicated from vectorizer for independence)
|
|
21
|
+
const INDEX_PRESETS: Record<string, { pattern: string; description: string }> = {
|
|
22
|
+
code: {
|
|
23
|
+
pattern: '**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}',
|
|
24
|
+
description: 'Source code files'
|
|
25
|
+
},
|
|
26
|
+
docs: {
|
|
27
|
+
pattern: '**/*.{md,mdx,txt,rst,adoc}',
|
|
28
|
+
description: 'Documentation files'
|
|
29
|
+
},
|
|
30
|
+
config: {
|
|
31
|
+
pattern: '**/*.{yaml,yml,json,toml,ini,env,xml}',
|
|
32
|
+
description: 'Configuration files'
|
|
33
|
+
},
|
|
34
|
+
all: {
|
|
35
|
+
pattern: '**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj,md,mdx,txt,rst,adoc,yaml,yml,json,toml}',
|
|
36
|
+
description: 'All supported files'
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export default tool({
|
|
41
|
+
description: `Check codebase index status or trigger re-indexing for semantic search.
|
|
42
|
+
|
|
43
|
+
Actions:
|
|
44
|
+
- "status" → Show index statistics (specify index or see all)
|
|
45
|
+
- "list" → List all available indexes with stats
|
|
46
|
+
- "reindex" → Re-index files (specify which index)
|
|
47
|
+
|
|
48
|
+
Available indexes:
|
|
49
|
+
- "code" - Source code files
|
|
50
|
+
- "docs" - Documentation files
|
|
51
|
+
- "config" - Configuration files
|
|
52
|
+
|
|
53
|
+
Note: Initial indexing takes ~30s to load the embedding model.`,
|
|
54
|
+
|
|
55
|
+
args: {
|
|
56
|
+
action: tool.schema.enum(["status", "list", "reindex"]).describe("Action to perform"),
|
|
57
|
+
index: tool.schema.string().optional().default("code").describe("Index name for status/reindex: code, docs, config"),
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
async execute(args, context) {
|
|
61
|
+
const projectRoot = process.cwd()
|
|
62
|
+
const vectorizerDir = path.join(projectRoot, ".opencode", "vectorizer")
|
|
63
|
+
const vectorsDir = path.join(projectRoot, ".opencode", "vectors")
|
|
64
|
+
|
|
65
|
+
// Check if vectorizer is installed
|
|
66
|
+
const isInstalled = await fs.access(path.join(vectorizerDir, "node_modules"))
|
|
67
|
+
.then(() => true)
|
|
68
|
+
.catch(() => false)
|
|
69
|
+
|
|
70
|
+
if (!isInstalled) {
|
|
71
|
+
return `❌ Vectorizer not installed.
|
|
72
|
+
|
|
73
|
+
To install:
|
|
74
|
+
\`\`\`bash
|
|
75
|
+
npx opencode-workflow vectorizer install
|
|
76
|
+
\`\`\`
|
|
77
|
+
|
|
78
|
+
This will download the embedding model (~100MB) and set up the vector database.`
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
const vectorizerModule = path.join(vectorizerDir, "index.js")
|
|
83
|
+
const { CodebaseIndexer, INDEX_PRESETS: PRESETS } = await import(`file://${vectorizerModule}`)
|
|
84
|
+
|
|
85
|
+
// LIST: Show all indexes
|
|
86
|
+
if (args.action === "list") {
|
|
87
|
+
const tempIndexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
88
|
+
const allStats = await tempIndexer.getAllStats()
|
|
89
|
+
|
|
90
|
+
let output = `## Codebase Index Overview\n\n`
|
|
91
|
+
output += `✅ **Vectorizer installed**\n\n`
|
|
92
|
+
|
|
93
|
+
if (allStats.length === 0) {
|
|
94
|
+
output += `⚠️ **No indexes created yet**\n\n`
|
|
95
|
+
output += `Create indexes with:\n`
|
|
96
|
+
output += `\`\`\`bash\n`
|
|
97
|
+
output += `npx opencode-workflow index --index code # Source code\n`
|
|
98
|
+
output += `npx opencode-workflow index --index docs # Documentation\n`
|
|
99
|
+
output += `npx opencode-workflow index --index config # Config files\n`
|
|
100
|
+
output += `\`\`\`\n\n`
|
|
101
|
+
} else {
|
|
102
|
+
output += `### Active Indexes\n\n`
|
|
103
|
+
for (const stat of allStats) {
|
|
104
|
+
output += `**📁 ${stat.indexName}** - ${stat.description}\n`
|
|
105
|
+
output += ` Files: ${stat.fileCount}, Chunks: ${stat.chunkCount}\n\n`
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
output += `### Available Presets\n\n`
|
|
110
|
+
for (const [name, preset] of Object.entries(PRESETS || INDEX_PRESETS) as [string, any][]) {
|
|
111
|
+
const exists = allStats.find((s: any) => s.indexName === name)
|
|
112
|
+
const status = exists ? "✅" : "⬜"
|
|
113
|
+
output += `${status} **${name}**: ${preset.description}\n`
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
output += `\n### Usage\n`
|
|
117
|
+
output += `\`\`\`\n`
|
|
118
|
+
output += `codesearch({ query: "your query", index: "code" })\n`
|
|
119
|
+
output += `codesearch({ query: "deployment guide", index: "docs" })\n`
|
|
120
|
+
output += `codesearch({ query: "api keys", searchAll: true })\n`
|
|
121
|
+
output += `\`\`\``
|
|
122
|
+
|
|
123
|
+
return output
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// STATUS: Show specific index status
|
|
127
|
+
if (args.action === "status") {
|
|
128
|
+
const indexName = args.index || "code"
|
|
129
|
+
const hashesFile = path.join(vectorsDir, indexName, "hashes.json")
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
133
|
+
const stats = await indexer.getStats()
|
|
134
|
+
|
|
135
|
+
// Get sample files
|
|
136
|
+
const hashesContent = await fs.readFile(hashesFile, "utf8")
|
|
137
|
+
const hashes = JSON.parse(hashesContent)
|
|
138
|
+
const sampleFiles = Object.keys(hashes).slice(0, 5)
|
|
139
|
+
|
|
140
|
+
return `## Index Status: "${indexName}"
|
|
141
|
+
|
|
142
|
+
✅ **Vectorizer installed**
|
|
143
|
+
✅ **Index active**
|
|
144
|
+
|
|
145
|
+
**Description:** ${stats.description}
|
|
146
|
+
**Files indexed:** ${stats.fileCount}
|
|
147
|
+
**Chunks:** ${stats.chunkCount}
|
|
148
|
+
|
|
149
|
+
**Sample indexed files:**
|
|
150
|
+
${sampleFiles.map(f => `- ${f}`).join("\n")}
|
|
151
|
+
${stats.fileCount > 5 ? `- ... and ${stats.fileCount - 5} more` : ""}
|
|
152
|
+
|
|
153
|
+
**Usage:**
|
|
154
|
+
\`\`\`
|
|
155
|
+
codesearch({ query: "your search query", index: "${indexName}" })
|
|
156
|
+
\`\`\`
|
|
157
|
+
|
|
158
|
+
To re-index:
|
|
159
|
+
\`\`\`
|
|
160
|
+
codeindex({ action: "reindex", index: "${indexName}" })
|
|
161
|
+
\`\`\``
|
|
162
|
+
|
|
163
|
+
} catch {
|
|
164
|
+
return `## Index Status: "${indexName}"
|
|
165
|
+
|
|
166
|
+
✅ **Vectorizer installed**
|
|
167
|
+
⚠️ **Index "${indexName}" not created yet**
|
|
168
|
+
|
|
169
|
+
To create this index:
|
|
170
|
+
\`\`\`bash
|
|
171
|
+
npx opencode-workflow index --index ${indexName}
|
|
172
|
+
\`\`\`
|
|
173
|
+
|
|
174
|
+
Or use:
|
|
175
|
+
\`\`\`
|
|
176
|
+
codeindex({ action: "reindex", index: "${indexName}" })
|
|
177
|
+
\`\`\``
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// REINDEX: Re-index specific index (do it directly, no shell)
|
|
182
|
+
if (args.action === "reindex") {
|
|
183
|
+
const indexName = args.index || "code"
|
|
184
|
+
|
|
185
|
+
try {
|
|
186
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
187
|
+
|
|
188
|
+
// Get pattern from preset
|
|
189
|
+
const preset = (PRESETS || INDEX_PRESETS)[indexName]
|
|
190
|
+
const pattern = preset?.pattern || '**/*.{js,ts,py,go,md,yaml,json}'
|
|
191
|
+
|
|
192
|
+
// Load .gitignore
|
|
193
|
+
let ig = ignore()
|
|
194
|
+
try {
|
|
195
|
+
const gitignore = await fs.readFile(path.join(projectRoot, '.gitignore'), 'utf8')
|
|
196
|
+
ig = ig.add(gitignore)
|
|
197
|
+
} catch {}
|
|
198
|
+
ig.add(['node_modules', '.git', 'dist', 'build', '.opencode/vectors', '.opencode/vectorizer'])
|
|
199
|
+
|
|
200
|
+
// Find files
|
|
201
|
+
const files = await glob(pattern, { cwd: projectRoot, nodir: true })
|
|
202
|
+
const filtered = files.filter((f: string) => !ig.ignores(f))
|
|
203
|
+
|
|
204
|
+
let indexed = 0
|
|
205
|
+
let skipped = 0
|
|
206
|
+
|
|
207
|
+
for (const file of filtered) {
|
|
208
|
+
const filePath = path.join(projectRoot, file)
|
|
209
|
+
try {
|
|
210
|
+
const wasIndexed = await indexer.indexFile(filePath)
|
|
211
|
+
if (wasIndexed) {
|
|
212
|
+
indexed++
|
|
213
|
+
} else {
|
|
214
|
+
skipped++
|
|
215
|
+
}
|
|
216
|
+
} catch {
|
|
217
|
+
// Skip files that can't be read
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Unload model to free memory
|
|
222
|
+
await indexer.unloadModel()
|
|
223
|
+
|
|
224
|
+
const stats = await indexer.getStats()
|
|
225
|
+
|
|
226
|
+
return `## Re-indexing Complete ✅
|
|
227
|
+
|
|
228
|
+
**Index:** ${indexName}
|
|
229
|
+
**Description:** ${stats.description}
|
|
230
|
+
**Files found:** ${filtered.length}
|
|
231
|
+
**Files indexed:** ${indexed}
|
|
232
|
+
**Files unchanged:** ${skipped}
|
|
233
|
+
**Total chunks:** ${stats.chunkCount}
|
|
234
|
+
|
|
235
|
+
You can now use semantic search:
|
|
236
|
+
\`\`\`
|
|
237
|
+
codesearch({ query: "your search query", index: "${indexName}" })
|
|
238
|
+
\`\`\``
|
|
239
|
+
|
|
240
|
+
} catch (error: any) {
|
|
241
|
+
return `❌ Re-indexing failed: ${error.message}
|
|
242
|
+
|
|
243
|
+
Try:
|
|
244
|
+
1. Check if vectorizer is installed: \`npx opencode-workflow vectorizer status\`
|
|
245
|
+
2. Re-install vectorizer: \`npx opencode-workflow vectorizer install\``
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return `Unknown action: ${args.action}. Use: status, list, or reindex`
|
|
250
|
+
|
|
251
|
+
} catch (error: any) {
|
|
252
|
+
return `❌ Error: ${error.message}`
|
|
253
|
+
}
|
|
254
|
+
},
|
|
255
|
+
})
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Code Search Tool
|
|
3
|
+
*
|
|
4
|
+
* Allows the AI model to search the codebase using semantic similarity.
|
|
5
|
+
* Uses local embeddings (all-MiniLM-L6-v2) and LanceDB vector store.
|
|
6
|
+
* Supports multiple indexes: code, docs, config, or search all.
|
|
7
|
+
*
|
|
8
|
+
* Usage by model:
|
|
9
|
+
* codesearch({ query: "authentication middleware", limit: 5 })
|
|
10
|
+
* codesearch({ query: "how to deploy", index: "docs" })
|
|
11
|
+
* codesearch({ query: "database config", index: "config" })
|
|
12
|
+
* codesearch({ query: "error handling", searchAll: true })
|
|
13
|
+
*
|
|
14
|
+
* Prerequisites:
|
|
15
|
+
* npx opencode-workflow vectorizer install
|
|
16
|
+
* npx opencode-workflow index --index code
|
|
17
|
+
* npx opencode-workflow index --index docs
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { tool } from "@opencode-ai/plugin"
|
|
21
|
+
import path from "path"
|
|
22
|
+
import fs from "fs/promises"
|
|
23
|
+
|
|
24
|
+
export default tool({
|
|
25
|
+
description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
|
|
26
|
+
|
|
27
|
+
Available indexes:
|
|
28
|
+
- "code" (default) - Source code files (*.js, *.ts, *.py, *.go, etc.)
|
|
29
|
+
- "docs" - Documentation files (*.md, *.txt, etc.)
|
|
30
|
+
- "config" - Configuration files (*.yaml, *.json, etc.)
|
|
31
|
+
- searchAll: true - Search across all indexes
|
|
32
|
+
|
|
33
|
+
Examples:
|
|
34
|
+
- "authentication logic" → finds auth-related code
|
|
35
|
+
- "database connection handling" → finds DB setup code
|
|
36
|
+
- "how to deploy" with index: "docs" → finds deployment docs
|
|
37
|
+
- "API keys" with index: "config" → finds config with API settings
|
|
38
|
+
|
|
39
|
+
Prerequisites: Run 'npx opencode-workflow index --index <name>' first.`,
|
|
40
|
+
|
|
41
|
+
args: {
|
|
42
|
+
query: tool.schema.string().describe("Semantic search query describing what you're looking for"),
|
|
43
|
+
index: tool.schema.string().optional().default("code").describe("Index to search: code, docs, config, or custom name"),
|
|
44
|
+
limit: tool.schema.number().optional().default(5).describe("Number of results to return (default: 5)"),
|
|
45
|
+
searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
|
|
46
|
+
},
|
|
47
|
+
|
|
48
|
+
async execute(args, context) {
|
|
49
|
+
const projectRoot = process.cwd()
|
|
50
|
+
const vectorizerDir = path.join(projectRoot, ".opencode", "vectorizer")
|
|
51
|
+
const vectorizerModule = path.join(vectorizerDir, "index.js")
|
|
52
|
+
|
|
53
|
+
// Check if vectorizer is installed
|
|
54
|
+
try {
|
|
55
|
+
await fs.access(path.join(vectorizerDir, "node_modules"))
|
|
56
|
+
} catch {
|
|
57
|
+
return `❌ Vectorizer not installed. Run: npx opencode-workflow vectorizer install`
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
// Dynamic import of the vectorizer
|
|
62
|
+
const { CodebaseIndexer } = await import(`file://${vectorizerModule}`)
|
|
63
|
+
|
|
64
|
+
let allResults: any[] = []
|
|
65
|
+
const limit = args.limit || 5
|
|
66
|
+
const indexName = args.index || "code"
|
|
67
|
+
|
|
68
|
+
if (args.searchAll) {
|
|
69
|
+
// Search all indexes
|
|
70
|
+
const tempIndexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
71
|
+
const indexes = await tempIndexer.listIndexes()
|
|
72
|
+
|
|
73
|
+
if (indexes.length === 0) {
|
|
74
|
+
return `❌ No indexes found. Run: npx opencode-workflow index --index code`
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
for (const idx of indexes) {
|
|
78
|
+
const indexer = await new CodebaseIndexer(projectRoot, idx).init()
|
|
79
|
+
const results = await indexer.search(args.query, limit)
|
|
80
|
+
allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Sort by distance and take top N
|
|
84
|
+
allResults.sort((a, b) => (a._distance || 0) - (b._distance || 0))
|
|
85
|
+
allResults = allResults.slice(0, limit)
|
|
86
|
+
|
|
87
|
+
} else {
|
|
88
|
+
// Search specific index
|
|
89
|
+
const hashesFile = path.join(projectRoot, ".opencode", "vectors", indexName, "hashes.json")
|
|
90
|
+
try {
|
|
91
|
+
await fs.access(hashesFile)
|
|
92
|
+
} catch {
|
|
93
|
+
return `❌ Index "${indexName}" not found. Run: npx opencode-workflow index --index ${indexName}`
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
97
|
+
const results = await indexer.search(args.query, limit)
|
|
98
|
+
allResults = results.map((r: any) => ({ ...r, _index: indexName }))
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (allResults.length === 0) {
|
|
102
|
+
const scope = args.searchAll ? "any index" : `index "${indexName}"`
|
|
103
|
+
return `No results found in ${scope} for: "${args.query}"\n\nTry:\n- Different keywords\n- Re-index with: npx opencode-workflow index --index ${indexName} --force`
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Format results for the model
|
|
107
|
+
const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
|
|
108
|
+
let output = `## Search Results for: "${args.query}" (${scope})\n\n`
|
|
109
|
+
|
|
110
|
+
for (let i = 0; i < allResults.length; i++) {
|
|
111
|
+
const r = allResults[i]
|
|
112
|
+
const score = r._distance ? (1 - r._distance).toFixed(3) : "N/A"
|
|
113
|
+
const indexLabel = args.searchAll ? ` [${r._index}]` : ""
|
|
114
|
+
|
|
115
|
+
output += `### ${i + 1}. ${r.file}${indexLabel}\n`
|
|
116
|
+
output += `**Relevance:** ${score}\n\n`
|
|
117
|
+
output += "```\n"
|
|
118
|
+
// Truncate long content
|
|
119
|
+
const content = r.content.length > 500
|
|
120
|
+
? r.content.substring(0, 500) + "\n... (truncated)"
|
|
121
|
+
: r.content
|
|
122
|
+
output += content
|
|
123
|
+
output += "\n```\n\n"
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
output += `---\n*Found ${allResults.length} results. Use Read tool to see full files.*`
|
|
127
|
+
|
|
128
|
+
return output
|
|
129
|
+
|
|
130
|
+
} catch (error: any) {
|
|
131
|
+
return `❌ Search failed: ${error.message}\n\nTry re-indexing: npx opencode-workflow index --index ${args.index || "code"} --force`
|
|
132
|
+
}
|
|
133
|
+
},
|
|
134
|
+
})
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
// OpenCode Vectorizer - Semantic Code Search with Multi-Index Support
|
|
2
|
+
// Part of @comfanion/workflow
|
|
3
|
+
|
|
4
|
+
import { pipeline } from '@xenova/transformers';
|
|
5
|
+
import * as lancedb from 'vectordb';
|
|
6
|
+
import fs from 'fs/promises';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import crypto from 'crypto';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Index presets for different content types
|
|
12
|
+
*/
|
|
13
|
+
const INDEX_PRESETS = {
|
|
14
|
+
code: {
|
|
15
|
+
pattern: '**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}',
|
|
16
|
+
description: 'Source code files'
|
|
17
|
+
},
|
|
18
|
+
docs: {
|
|
19
|
+
pattern: '**/*.{md,mdx,txt,rst,adoc}',
|
|
20
|
+
description: 'Documentation files'
|
|
21
|
+
},
|
|
22
|
+
config: {
|
|
23
|
+
pattern: '**/*.{yaml,yml,json,toml,ini,env,xml}',
|
|
24
|
+
description: 'Configuration files'
|
|
25
|
+
},
|
|
26
|
+
all: {
|
|
27
|
+
pattern: '**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj,md,mdx,txt,rst,adoc,yaml,yml,json,toml}',
|
|
28
|
+
description: 'All supported files'
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
class CodebaseIndexer {
|
|
33
|
+
/**
|
|
34
|
+
* @param {string} projectRoot - Project root directory
|
|
35
|
+
* @param {string} indexName - Name of the index (e.g., 'code', 'docs', 'config')
|
|
36
|
+
*/
|
|
37
|
+
constructor(projectRoot, indexName = 'code') {
|
|
38
|
+
this.root = projectRoot;
|
|
39
|
+
this.indexName = indexName;
|
|
40
|
+
this.baseDir = path.join(projectRoot, '.opencode', 'vectors');
|
|
41
|
+
this.cacheDir = path.join(this.baseDir, indexName);
|
|
42
|
+
this.model = null;
|
|
43
|
+
this.db = null;
|
|
44
|
+
this.hashes = {};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async init() {
|
|
48
|
+
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
49
|
+
this.db = await lancedb.connect(path.join(this.cacheDir, 'lancedb'));
|
|
50
|
+
await this.loadHashes();
|
|
51
|
+
return this;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async loadModel() {
|
|
55
|
+
if (!this.model) {
|
|
56
|
+
console.log('Loading embedding model (first time takes ~30s)...');
|
|
57
|
+
this.model = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
58
|
+
}
|
|
59
|
+
return this.model;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async unloadModel() {
|
|
63
|
+
this.model = null;
|
|
64
|
+
if (global.gc) global.gc();
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async loadHashes() {
|
|
68
|
+
try {
|
|
69
|
+
const hashFile = path.join(this.cacheDir, 'hashes.json');
|
|
70
|
+
const data = await fs.readFile(hashFile, 'utf8');
|
|
71
|
+
this.hashes = JSON.parse(data);
|
|
72
|
+
} catch {
|
|
73
|
+
this.hashes = {};
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async saveHashes() {
|
|
78
|
+
const hashFile = path.join(this.cacheDir, 'hashes.json');
|
|
79
|
+
await fs.writeFile(hashFile, JSON.stringify(this.hashes, null, 2));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
fileHash(content) {
|
|
83
|
+
return crypto.createHash('md5').update(content).digest('hex');
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async embed(text) {
|
|
87
|
+
const model = await this.loadModel();
|
|
88
|
+
const result = await model(text, { pooling: 'mean', normalize: true });
|
|
89
|
+
return Array.from(result.data);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Chunk code into smaller pieces for embedding
|
|
94
|
+
* Tries to split on function/class boundaries when possible
|
|
95
|
+
*/
|
|
96
|
+
chunkCode(content, maxChars = 1500) {
|
|
97
|
+
const chunks = [];
|
|
98
|
+
const lines = content.split('\n');
|
|
99
|
+
let current = [];
|
|
100
|
+
let currentLen = 0;
|
|
101
|
+
|
|
102
|
+
for (const line of lines) {
|
|
103
|
+
if (currentLen + line.length > maxChars && current.length > 0) {
|
|
104
|
+
chunks.push(current.join('\n'));
|
|
105
|
+
current = [];
|
|
106
|
+
currentLen = 0;
|
|
107
|
+
}
|
|
108
|
+
current.push(line);
|
|
109
|
+
currentLen += line.length + 1;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (current.length > 0) {
|
|
113
|
+
chunks.push(current.join('\n'));
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return chunks;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Check if file needs re-indexing based on content hash
|
|
121
|
+
*/
|
|
122
|
+
needsIndex(filePath, content) {
|
|
123
|
+
const relPath = path.relative(this.root, filePath);
|
|
124
|
+
const currentHash = this.fileHash(content);
|
|
125
|
+
return this.hashes[relPath] !== currentHash;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Index a single file
|
|
130
|
+
* Returns true if file was indexed, false if skipped (unchanged)
|
|
131
|
+
*/
|
|
132
|
+
async indexFile(filePath) {
|
|
133
|
+
const relPath = path.relative(this.root, filePath);
|
|
134
|
+
|
|
135
|
+
let content;
|
|
136
|
+
try {
|
|
137
|
+
content = await fs.readFile(filePath, 'utf8');
|
|
138
|
+
} catch (e) {
|
|
139
|
+
console.warn(`Cannot read ${relPath}: ${e.message}`);
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const hash = this.fileHash(content);
|
|
144
|
+
|
|
145
|
+
// Skip if unchanged
|
|
146
|
+
if (this.hashes[relPath] === hash) {
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const chunks = this.chunkCode(content);
|
|
151
|
+
const data = [];
|
|
152
|
+
|
|
153
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
154
|
+
const embedding = await this.embed(chunks[i]);
|
|
155
|
+
data.push({
|
|
156
|
+
file: relPath,
|
|
157
|
+
chunk_index: i,
|
|
158
|
+
content: chunks[i],
|
|
159
|
+
vector: embedding
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Add to database
|
|
164
|
+
const tableName = 'chunks';
|
|
165
|
+
const tables = await this.db.tableNames();
|
|
166
|
+
if (tables.includes(tableName)) {
|
|
167
|
+
const table = await this.db.openTable(tableName);
|
|
168
|
+
// Note: LanceDB doesn't support delete by filter in all versions
|
|
169
|
+
// So we just add new chunks (may have duplicates until reindex --force)
|
|
170
|
+
await table.add(data);
|
|
171
|
+
} else {
|
|
172
|
+
await this.db.createTable(tableName, data);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Update hash cache
|
|
176
|
+
this.hashes[relPath] = hash;
|
|
177
|
+
await this.saveHashes();
|
|
178
|
+
|
|
179
|
+
return true;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Semantic search across indexed codebase
|
|
184
|
+
*/
|
|
185
|
+
async search(query, limit = 5) {
|
|
186
|
+
const tableName = 'chunks';
|
|
187
|
+
const tables = await this.db.tableNames();
|
|
188
|
+
if (!tables.includes(tableName)) {
|
|
189
|
+
return [];
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const queryEmbedding = await this.embed(query);
|
|
193
|
+
const table = await this.db.openTable(tableName);
|
|
194
|
+
const results = await table.search(queryEmbedding).limit(limit).execute();
|
|
195
|
+
|
|
196
|
+
return results;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Get indexing statistics for this index
|
|
201
|
+
*/
|
|
202
|
+
async getStats() {
|
|
203
|
+
const fileCount = Object.keys(this.hashes).length;
|
|
204
|
+
let chunkCount = 0;
|
|
205
|
+
|
|
206
|
+
try {
|
|
207
|
+
const tables = await this.db.tableNames();
|
|
208
|
+
if (tables.includes('chunks')) {
|
|
209
|
+
const table = await this.db.openTable('chunks');
|
|
210
|
+
chunkCount = await table.countRows();
|
|
211
|
+
}
|
|
212
|
+
} catch {}
|
|
213
|
+
|
|
214
|
+
const preset = INDEX_PRESETS[this.indexName];
|
|
215
|
+
|
|
216
|
+
return {
|
|
217
|
+
indexName: this.indexName,
|
|
218
|
+
description: preset?.description || 'Custom index',
|
|
219
|
+
fileCount,
|
|
220
|
+
chunkCount
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Get statistics for all indexes
|
|
226
|
+
*/
|
|
227
|
+
async getAllStats() {
|
|
228
|
+
const stats = [];
|
|
229
|
+
|
|
230
|
+
try {
|
|
231
|
+
const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
|
|
232
|
+
|
|
233
|
+
for (const entry of entries) {
|
|
234
|
+
if (entry.isDirectory() && entry.name !== 'lancedb') {
|
|
235
|
+
try {
|
|
236
|
+
const indexer = await new CodebaseIndexer(this.root, entry.name).init();
|
|
237
|
+
const stat = await indexer.getStats();
|
|
238
|
+
if (stat.fileCount > 0 || stat.chunkCount > 0) {
|
|
239
|
+
stats.push(stat);
|
|
240
|
+
}
|
|
241
|
+
} catch {}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
} catch {}
|
|
245
|
+
|
|
246
|
+
return stats;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Clear this index's data
|
|
251
|
+
*/
|
|
252
|
+
async clear() {
|
|
253
|
+
await fs.rm(this.cacheDir, { recursive: true, force: true });
|
|
254
|
+
this.hashes = {};
|
|
255
|
+
await this.init();
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Clear all indexes
|
|
260
|
+
*/
|
|
261
|
+
async clearAll() {
|
|
262
|
+
await fs.rm(this.baseDir, { recursive: true, force: true });
|
|
263
|
+
this.hashes = {};
|
|
264
|
+
await this.init();
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* List all available index names
|
|
269
|
+
*/
|
|
270
|
+
async listIndexes() {
|
|
271
|
+
const indexes = [];
|
|
272
|
+
|
|
273
|
+
try {
|
|
274
|
+
const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
|
|
275
|
+
|
|
276
|
+
for (const entry of entries) {
|
|
277
|
+
if (entry.isDirectory() && entry.name !== 'lancedb') {
|
|
278
|
+
indexes.push(entry.name);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
} catch {}
|
|
282
|
+
|
|
283
|
+
return indexes;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
export { CodebaseIndexer, INDEX_PRESETS };
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "opencode-vectorizer",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Semantic code search for OpenCode Workflow",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"private": true,
|
|
7
|
+
"main": "index.js",
|
|
8
|
+
"dependencies": {
|
|
9
|
+
"@xenova/transformers": "^2.17.0",
|
|
10
|
+
"vectordb": "^0.4.0"
|
|
11
|
+
},
|
|
12
|
+
"engines": {
|
|
13
|
+
"node": ">=18"
|
|
14
|
+
}
|
|
15
|
+
}
|