@comfanion/workflow 4.5.0 ā 4.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +6 -2
- package/package.json +1 -1
- package/src/build-info.json +1 -1
- package/src/vectorizer/index.js +287 -0
- package/src/vectorizer/package.json +15 -0
package/bin/cli.js
CHANGED
|
@@ -15,6 +15,10 @@ const OPENCODE_SRC = path.join(PACKAGE_DIR, 'src', 'opencode');
|
|
|
15
15
|
const REPO_TEMPLATES_SRC = path.join(PACKAGE_DIR, 'src', 'repo-structure');
|
|
16
16
|
const VECTORIZER_SRC = path.join(PACKAGE_DIR, 'src', 'vectorizer');
|
|
17
17
|
|
|
18
|
+
// Read version from package.json
|
|
19
|
+
const packageJson = JSON.parse(fs.readFileSync(path.join(PACKAGE_DIR, 'package.json'), 'utf8'));
|
|
20
|
+
const VERSION = packageJson.version;
|
|
21
|
+
|
|
18
22
|
/**
|
|
19
23
|
* Install vectorizer module with dependencies
|
|
20
24
|
*/
|
|
@@ -70,7 +74,7 @@ const program = new Command();
|
|
|
70
74
|
program
|
|
71
75
|
.name('create-opencode-workflow')
|
|
72
76
|
.description('Initialize OpenCode Workflow system for AI-assisted development')
|
|
73
|
-
.version(
|
|
77
|
+
.version(VERSION);
|
|
74
78
|
|
|
75
79
|
program
|
|
76
80
|
.command('init')
|
|
@@ -82,7 +86,7 @@ program
|
|
|
82
86
|
.option('--full', 'Create full repo structure')
|
|
83
87
|
.option('--vectorizer', 'Install vectorizer for semantic code search')
|
|
84
88
|
.action(async (options) => {
|
|
85
|
-
console.log(chalk.blue.bold(
|
|
89
|
+
console.log(chalk.blue.bold(`\nš OpenCode Workflow v${VERSION}\n`));
|
|
86
90
|
|
|
87
91
|
const targetDir = path.join(process.cwd(), '.opencode');
|
|
88
92
|
const existingConfigPath = path.join(targetDir, 'config.yaml');
|
package/package.json
CHANGED
package/src/build-info.json
CHANGED
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
// OpenCode Vectorizer - Semantic Code Search with Multi-Index Support
|
|
2
|
+
// Part of @comfanion/workflow
|
|
3
|
+
|
|
4
|
+
import { pipeline } from '@xenova/transformers';
|
|
5
|
+
import * as lancedb from 'vectordb';
|
|
6
|
+
import fs from 'fs/promises';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import crypto from 'crypto';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Index presets for different content types
|
|
12
|
+
*/
|
|
13
|
+
const INDEX_PRESETS = {
|
|
14
|
+
code: {
|
|
15
|
+
pattern: '**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}',
|
|
16
|
+
description: 'Source code files'
|
|
17
|
+
},
|
|
18
|
+
docs: {
|
|
19
|
+
pattern: '**/*.{md,mdx,txt,rst,adoc}',
|
|
20
|
+
description: 'Documentation files'
|
|
21
|
+
},
|
|
22
|
+
config: {
|
|
23
|
+
pattern: '**/*.{yaml,yml,json,toml,ini,env,xml}',
|
|
24
|
+
description: 'Configuration files'
|
|
25
|
+
},
|
|
26
|
+
all: {
|
|
27
|
+
pattern: '**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj,md,mdx,txt,rst,adoc,yaml,yml,json,toml}',
|
|
28
|
+
description: 'All supported files'
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
class CodebaseIndexer {
|
|
33
|
+
/**
|
|
34
|
+
* @param {string} projectRoot - Project root directory
|
|
35
|
+
* @param {string} indexName - Name of the index (e.g., 'code', 'docs', 'config')
|
|
36
|
+
*/
|
|
37
|
+
constructor(projectRoot, indexName = 'code') {
|
|
38
|
+
this.root = projectRoot;
|
|
39
|
+
this.indexName = indexName;
|
|
40
|
+
this.baseDir = path.join(projectRoot, '.opencode', 'vectors');
|
|
41
|
+
this.cacheDir = path.join(this.baseDir, indexName);
|
|
42
|
+
this.model = null;
|
|
43
|
+
this.db = null;
|
|
44
|
+
this.hashes = {};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async init() {
|
|
48
|
+
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
49
|
+
this.db = await lancedb.connect(path.join(this.cacheDir, 'lancedb'));
|
|
50
|
+
await this.loadHashes();
|
|
51
|
+
return this;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async loadModel() {
|
|
55
|
+
if (!this.model) {
|
|
56
|
+
console.log('Loading embedding model (first time takes ~30s)...');
|
|
57
|
+
this.model = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
58
|
+
}
|
|
59
|
+
return this.model;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async unloadModel() {
|
|
63
|
+
this.model = null;
|
|
64
|
+
if (global.gc) global.gc();
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async loadHashes() {
|
|
68
|
+
try {
|
|
69
|
+
const hashFile = path.join(this.cacheDir, 'hashes.json');
|
|
70
|
+
const data = await fs.readFile(hashFile, 'utf8');
|
|
71
|
+
this.hashes = JSON.parse(data);
|
|
72
|
+
} catch {
|
|
73
|
+
this.hashes = {};
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async saveHashes() {
|
|
78
|
+
const hashFile = path.join(this.cacheDir, 'hashes.json');
|
|
79
|
+
await fs.writeFile(hashFile, JSON.stringify(this.hashes, null, 2));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
fileHash(content) {
|
|
83
|
+
return crypto.createHash('md5').update(content).digest('hex');
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async embed(text) {
|
|
87
|
+
const model = await this.loadModel();
|
|
88
|
+
const result = await model(text, { pooling: 'mean', normalize: true });
|
|
89
|
+
return Array.from(result.data);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Chunk code into smaller pieces for embedding
|
|
94
|
+
* Tries to split on function/class boundaries when possible
|
|
95
|
+
*/
|
|
96
|
+
chunkCode(content, maxChars = 1500) {
|
|
97
|
+
const chunks = [];
|
|
98
|
+
const lines = content.split('\n');
|
|
99
|
+
let current = [];
|
|
100
|
+
let currentLen = 0;
|
|
101
|
+
|
|
102
|
+
for (const line of lines) {
|
|
103
|
+
if (currentLen + line.length > maxChars && current.length > 0) {
|
|
104
|
+
chunks.push(current.join('\n'));
|
|
105
|
+
current = [];
|
|
106
|
+
currentLen = 0;
|
|
107
|
+
}
|
|
108
|
+
current.push(line);
|
|
109
|
+
currentLen += line.length + 1;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (current.length > 0) {
|
|
113
|
+
chunks.push(current.join('\n'));
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return chunks;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Check if file needs re-indexing based on content hash
|
|
121
|
+
*/
|
|
122
|
+
needsIndex(filePath, content) {
|
|
123
|
+
const relPath = path.relative(this.root, filePath);
|
|
124
|
+
const currentHash = this.fileHash(content);
|
|
125
|
+
return this.hashes[relPath] !== currentHash;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Index a single file
|
|
130
|
+
* Returns true if file was indexed, false if skipped (unchanged)
|
|
131
|
+
*/
|
|
132
|
+
async indexFile(filePath) {
|
|
133
|
+
const relPath = path.relative(this.root, filePath);
|
|
134
|
+
|
|
135
|
+
let content;
|
|
136
|
+
try {
|
|
137
|
+
content = await fs.readFile(filePath, 'utf8');
|
|
138
|
+
} catch (e) {
|
|
139
|
+
console.warn(`Cannot read ${relPath}: ${e.message}`);
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const hash = this.fileHash(content);
|
|
144
|
+
|
|
145
|
+
// Skip if unchanged
|
|
146
|
+
if (this.hashes[relPath] === hash) {
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const chunks = this.chunkCode(content);
|
|
151
|
+
const data = [];
|
|
152
|
+
|
|
153
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
154
|
+
const embedding = await this.embed(chunks[i]);
|
|
155
|
+
data.push({
|
|
156
|
+
file: relPath,
|
|
157
|
+
chunk_index: i,
|
|
158
|
+
content: chunks[i],
|
|
159
|
+
vector: embedding
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Add to database
|
|
164
|
+
const tableName = 'chunks';
|
|
165
|
+
const tables = await this.db.tableNames();
|
|
166
|
+
if (tables.includes(tableName)) {
|
|
167
|
+
const table = await this.db.openTable(tableName);
|
|
168
|
+
// Note: LanceDB doesn't support delete by filter in all versions
|
|
169
|
+
// So we just add new chunks (may have duplicates until reindex --force)
|
|
170
|
+
await table.add(data);
|
|
171
|
+
} else {
|
|
172
|
+
await this.db.createTable(tableName, data);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Update hash cache
|
|
176
|
+
this.hashes[relPath] = hash;
|
|
177
|
+
await this.saveHashes();
|
|
178
|
+
|
|
179
|
+
return true;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Semantic search across indexed codebase
|
|
184
|
+
*/
|
|
185
|
+
async search(query, limit = 5) {
|
|
186
|
+
const tableName = 'chunks';
|
|
187
|
+
const tables = await this.db.tableNames();
|
|
188
|
+
if (!tables.includes(tableName)) {
|
|
189
|
+
return [];
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const queryEmbedding = await this.embed(query);
|
|
193
|
+
const table = await this.db.openTable(tableName);
|
|
194
|
+
const results = await table.search(queryEmbedding).limit(limit).execute();
|
|
195
|
+
|
|
196
|
+
return results;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Get indexing statistics for this index
|
|
201
|
+
*/
|
|
202
|
+
async getStats() {
|
|
203
|
+
const fileCount = Object.keys(this.hashes).length;
|
|
204
|
+
let chunkCount = 0;
|
|
205
|
+
|
|
206
|
+
try {
|
|
207
|
+
const tables = await this.db.tableNames();
|
|
208
|
+
if (tables.includes('chunks')) {
|
|
209
|
+
const table = await this.db.openTable('chunks');
|
|
210
|
+
chunkCount = await table.countRows();
|
|
211
|
+
}
|
|
212
|
+
} catch {}
|
|
213
|
+
|
|
214
|
+
const preset = INDEX_PRESETS[this.indexName];
|
|
215
|
+
|
|
216
|
+
return {
|
|
217
|
+
indexName: this.indexName,
|
|
218
|
+
description: preset?.description || 'Custom index',
|
|
219
|
+
fileCount,
|
|
220
|
+
chunkCount
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Get statistics for all indexes
|
|
226
|
+
*/
|
|
227
|
+
async getAllStats() {
|
|
228
|
+
const stats = [];
|
|
229
|
+
|
|
230
|
+
try {
|
|
231
|
+
const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
|
|
232
|
+
|
|
233
|
+
for (const entry of entries) {
|
|
234
|
+
if (entry.isDirectory() && entry.name !== 'lancedb') {
|
|
235
|
+
try {
|
|
236
|
+
const indexer = await new CodebaseIndexer(this.root, entry.name).init();
|
|
237
|
+
const stat = await indexer.getStats();
|
|
238
|
+
if (stat.fileCount > 0 || stat.chunkCount > 0) {
|
|
239
|
+
stats.push(stat);
|
|
240
|
+
}
|
|
241
|
+
} catch {}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
} catch {}
|
|
245
|
+
|
|
246
|
+
return stats;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Clear this index's data
|
|
251
|
+
*/
|
|
252
|
+
async clear() {
|
|
253
|
+
await fs.rm(this.cacheDir, { recursive: true, force: true });
|
|
254
|
+
this.hashes = {};
|
|
255
|
+
await this.init();
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Clear all indexes
|
|
260
|
+
*/
|
|
261
|
+
async clearAll() {
|
|
262
|
+
await fs.rm(this.baseDir, { recursive: true, force: true });
|
|
263
|
+
this.hashes = {};
|
|
264
|
+
await this.init();
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* List all available index names
|
|
269
|
+
*/
|
|
270
|
+
async listIndexes() {
|
|
271
|
+
const indexes = [];
|
|
272
|
+
|
|
273
|
+
try {
|
|
274
|
+
const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
|
|
275
|
+
|
|
276
|
+
for (const entry of entries) {
|
|
277
|
+
if (entry.isDirectory() && entry.name !== 'lancedb') {
|
|
278
|
+
indexes.push(entry.name);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
} catch {}
|
|
282
|
+
|
|
283
|
+
return indexes;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
export { CodebaseIndexer, INDEX_PRESETS };
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "opencode-vectorizer",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Semantic code search for OpenCode Workflow",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"private": true,
|
|
7
|
+
"main": "index.js",
|
|
8
|
+
"dependencies": {
|
|
9
|
+
"@xenova/transformers": "^2.17.0",
|
|
10
|
+
"vectordb": "^0.4.0"
|
|
11
|
+
},
|
|
12
|
+
"engines": {
|
|
13
|
+
"node": ">=18"
|
|
14
|
+
}
|
|
15
|
+
}
|