@comfanion/workflow 4.38.3-dev.2 → 4.38.4-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/build-info.json +4 -5
- package/src/opencode/config.yaml +0 -69
- package/src/opencode/gitignore +2 -0
- package/src/opencode/opencode.json +3 -5
- package/src/opencode/vectorizer.yaml +45 -0
- package/src/opencode/plugins/README.md +0 -182
- package/src/opencode/plugins/__tests__/custom-compaction.test.ts +0 -829
- package/src/opencode/plugins/__tests__/file-indexer.test.ts +0 -425
- package/src/opencode/plugins/__tests__/helpers/mock-ctx.ts +0 -171
- package/src/opencode/plugins/__tests__/leak-stress.test.ts +0 -315
- package/src/opencode/plugins/__tests__/usethis-todo.test.ts +0 -205
- package/src/opencode/plugins/__tests__/version-check.test.ts +0 -223
- package/src/opencode/plugins/custom-compaction.ts +0 -1080
- package/src/opencode/plugins/file-indexer.ts +0 -516
- package/src/opencode/plugins/usethis-todo-publish.ts +0 -44
- package/src/opencode/plugins/usethis-todo-ui.ts +0 -37
- package/src/opencode/plugins/version-check.ts +0 -230
- package/src/opencode/tools/codeindex.ts +0 -264
- package/src/opencode/tools/search.ts +0 -149
- package/src/opencode/tools/usethis_todo.ts +0 -538
- package/src/vectorizer/index.js +0 -573
- package/src/vectorizer/package.json +0 -16
package/src/vectorizer/index.js
DELETED
|
@@ -1,573 +0,0 @@
|
|
|
1
|
-
// OpenCode Vectorizer - Semantic Code Search with Multi-Index Support
|
|
2
|
-
// Part of @comfanion/workflow
|
|
3
|
-
|
|
4
|
-
import { pipeline, env } from '@xenova/transformers';
|
|
5
|
-
import * as lancedb from 'vectordb';
|
|
6
|
-
import fs from 'fs/promises';
|
|
7
|
-
import path from 'path';
|
|
8
|
-
import crypto from 'crypto';
|
|
9
|
-
|
|
10
|
-
// Suppress transformers.js logs unless DEBUG is set
|
|
11
|
-
const DEBUG = process.env.DEBUG?.includes('vectorizer') || process.env.DEBUG === '*';
|
|
12
|
-
if (!DEBUG) {
|
|
13
|
-
env.allowLocalModels = true;
|
|
14
|
-
env.useBrowserCache = false;
|
|
15
|
-
// Disable progress callbacks and logs
|
|
16
|
-
env.logLevel = 'error';
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
/**
|
|
20
|
-
* Default index presets (can be overridden by config.yaml)
|
|
21
|
-
*/
|
|
22
|
-
const DEFAULT_PRESETS = {
|
|
23
|
-
code: {
|
|
24
|
-
pattern: '**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}',
|
|
25
|
-
ignore: ['**/node_modules/**', '**/.git/**', '**/dist/**', '**/build/**', '**/.opencode/**', '**/docs/**', '**/vendor/**', '**/__pycache__/**'],
|
|
26
|
-
description: 'Source code files (excludes docs, vendor, node_modules)'
|
|
27
|
-
},
|
|
28
|
-
docs: {
|
|
29
|
-
pattern: 'docs/**/*.{md,mdx,txt,rst,adoc}',
|
|
30
|
-
ignore: [],
|
|
31
|
-
description: 'Documentation in docs/ folder'
|
|
32
|
-
},
|
|
33
|
-
config: {
|
|
34
|
-
pattern: '**/*.{yaml,yml,json,toml,ini,env,xml}',
|
|
35
|
-
ignore: ['**/node_modules/**', '**/.git/**', '**/.opencode/**'],
|
|
36
|
-
description: 'Configuration files'
|
|
37
|
-
},
|
|
38
|
-
all: {
|
|
39
|
-
pattern: '**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj,md,mdx,txt,rst,adoc,yaml,yml,json,toml}',
|
|
40
|
-
ignore: ['**/node_modules/**', '**/.git/**', '**/.opencode/**'],
|
|
41
|
-
description: 'All supported files'
|
|
42
|
-
}
|
|
43
|
-
};
|
|
44
|
-
|
|
45
|
-
// Will be populated from config.yaml if available
|
|
46
|
-
let INDEX_PRESETS = { ...DEFAULT_PRESETS };
|
|
47
|
-
let GLOBAL_IGNORE = [];
|
|
48
|
-
let EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2'; // Default: fast model
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Load index configuration from config.yaml
|
|
52
|
-
* @param {string} projectRoot - Project root directory
|
|
53
|
-
*/
|
|
54
|
-
async function loadConfig(projectRoot) {
|
|
55
|
-
try {
|
|
56
|
-
const configPath = path.join(projectRoot, '.opencode', 'config.yaml');
|
|
57
|
-
const content = await fs.readFile(configPath, 'utf8');
|
|
58
|
-
|
|
59
|
-
// Parse vectorizer section from YAML
|
|
60
|
-
const vectorizerMatch = content.match(/^vectorizer:([\s\S]*?)(?=^[a-z]|\Z)/m);
|
|
61
|
-
if (!vectorizerMatch) return;
|
|
62
|
-
|
|
63
|
-
const section = vectorizerMatch[1];
|
|
64
|
-
|
|
65
|
-
// Parse embedding model
|
|
66
|
-
const modelMatch = section.match(/^\s{2}model:\s*["']?([^"'\n]+)["']?/m);
|
|
67
|
-
if (modelMatch) {
|
|
68
|
-
EMBEDDING_MODEL = modelMatch[1].trim();
|
|
69
|
-
if (DEBUG) console.log('[vectorizer] Using model from config:', EMBEDDING_MODEL);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// Parse global exclude
|
|
73
|
-
const excludeMatch = section.match(/^\s{2}exclude:\s*\n((?:\s{4}-\s+.+\n?)*)/m);
|
|
74
|
-
if (excludeMatch) {
|
|
75
|
-
GLOBAL_IGNORE = excludeMatch[1]
|
|
76
|
-
.split('\n')
|
|
77
|
-
.map(line => line.replace(/^\s*-\s*/, '').trim())
|
|
78
|
-
.filter(Boolean)
|
|
79
|
-
.map(p => p.includes('*') ? p : `**/${p}/**`);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
// Parse indexes section
|
|
83
|
-
const indexesMatch = section.match(/^\s{2}indexes:\s*\n([\s\S]*?)(?=^\s{2}[a-z]|\s{2}exclude:|\Z)/m);
|
|
84
|
-
if (!indexesMatch) return;
|
|
85
|
-
|
|
86
|
-
const indexesSection = indexesMatch[1];
|
|
87
|
-
|
|
88
|
-
// Parse each index (code, docs, config)
|
|
89
|
-
for (const indexName of ['code', 'docs', 'config']) {
|
|
90
|
-
const indexRegex = new RegExp(`^\\s{4}${indexName}:\\s*\\n([\\s\\S]*?)(?=^\\s{4}[a-z]|\\Z)`, 'm');
|
|
91
|
-
const indexMatch = indexesSection.match(indexRegex);
|
|
92
|
-
if (!indexMatch) continue;
|
|
93
|
-
|
|
94
|
-
const indexSection = indexMatch[1];
|
|
95
|
-
|
|
96
|
-
// Parse enabled
|
|
97
|
-
const enabledMatch = indexSection.match(/^\s+enabled:\s*(true|false)/m);
|
|
98
|
-
const enabled = enabledMatch ? enabledMatch[1] === 'true' : true;
|
|
99
|
-
|
|
100
|
-
// Parse pattern
|
|
101
|
-
const patternMatch = indexSection.match(/^\s+pattern:\s*["']?([^"'\n]+)["']?/m);
|
|
102
|
-
const pattern = patternMatch ? patternMatch[1].trim() : DEFAULT_PRESETS[indexName]?.pattern;
|
|
103
|
-
|
|
104
|
-
// Parse ignore array
|
|
105
|
-
const ignoreMatch = indexSection.match(/^\s+ignore:\s*\n((?:\s+-\s+.+\n?)*)/m);
|
|
106
|
-
let ignore = [];
|
|
107
|
-
if (ignoreMatch) {
|
|
108
|
-
ignore = ignoreMatch[1]
|
|
109
|
-
.split('\n')
|
|
110
|
-
.map(line => line.replace(/^\s*-\s*/, '').replace(/["']/g, '').trim())
|
|
111
|
-
.filter(Boolean);
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
if (enabled && pattern) {
|
|
115
|
-
INDEX_PRESETS[indexName] = {
|
|
116
|
-
pattern,
|
|
117
|
-
ignore,
|
|
118
|
-
description: `${indexName} files from config.yaml`
|
|
119
|
-
};
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
if (DEBUG) console.log('[vectorizer] Loaded config:', { INDEX_PRESETS, GLOBAL_IGNORE });
|
|
124
|
-
} catch (e) {
|
|
125
|
-
if (DEBUG) console.log('[vectorizer] Using default presets (no config.yaml)');
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
class CodebaseIndexer {
|
|
130
|
-
/**
|
|
131
|
-
* @param {string} projectRoot - Project root directory
|
|
132
|
-
* @param {string} indexName - Name of the index (e.g., 'code', 'docs', 'config')
|
|
133
|
-
*/
|
|
134
|
-
constructor(projectRoot, indexName = 'code') {
|
|
135
|
-
this.root = projectRoot;
|
|
136
|
-
this.indexName = indexName;
|
|
137
|
-
this.baseDir = path.join(projectRoot, '.opencode', 'vectors');
|
|
138
|
-
this.cacheDir = path.join(this.baseDir, indexName);
|
|
139
|
-
this.model = null;
|
|
140
|
-
this.db = null;
|
|
141
|
-
this.hashes = {};
|
|
142
|
-
this.configLoaded = false;
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
async init() {
|
|
146
|
-
// Load config on first init
|
|
147
|
-
if (!this.configLoaded) {
|
|
148
|
-
await loadConfig(this.root);
|
|
149
|
-
this.configLoaded = true;
|
|
150
|
-
}
|
|
151
|
-
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
152
|
-
this.db = await lancedb.connect(path.join(this.cacheDir, 'lancedb'));
|
|
153
|
-
await this.loadHashes();
|
|
154
|
-
return this;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
async loadModel() {
|
|
158
|
-
if (!this.model) {
|
|
159
|
-
if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}...`);
|
|
160
|
-
// Model options:
|
|
161
|
-
// - Xenova/all-MiniLM-L6-v2: fast, 384 dims, ~10 files/10sec
|
|
162
|
-
// - Xenova/bge-base-en-v1.5: quality, 768 dims, ~3 files/10sec
|
|
163
|
-
this.model = await pipeline('feature-extraction', EMBEDDING_MODEL, {
|
|
164
|
-
progress_callback: DEBUG ? undefined : null // Suppress progress bar unless DEBUG
|
|
165
|
-
});
|
|
166
|
-
if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`);
|
|
167
|
-
}
|
|
168
|
-
return this.model;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
async unloadModel() {
|
|
172
|
-
this.model = null;
|
|
173
|
-
if (global.gc) global.gc();
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
async loadHashes() {
|
|
177
|
-
try {
|
|
178
|
-
const hashFile = path.join(this.cacheDir, 'hashes.json');
|
|
179
|
-
const data = await fs.readFile(hashFile, 'utf8');
|
|
180
|
-
this.hashes = JSON.parse(data);
|
|
181
|
-
} catch {
|
|
182
|
-
this.hashes = {};
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
async saveHashes() {
|
|
187
|
-
const hashFile = path.join(this.cacheDir, 'hashes.json');
|
|
188
|
-
await fs.writeFile(hashFile, JSON.stringify(this.hashes, null, 2));
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
fileHash(content) {
|
|
192
|
-
return crypto.createHash('md5').update(content).digest('hex');
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
/**
|
|
196
|
-
* Check if file is archived (should be excluded from default search)
|
|
197
|
-
* Archived if:
|
|
198
|
-
* - Path contains /archive/ folder
|
|
199
|
-
* - File has frontmatter with archived: true
|
|
200
|
-
*/
|
|
201
|
-
isArchived(relPath, content) {
|
|
202
|
-
// Check path
|
|
203
|
-
if (relPath.includes('/archive/') || relPath.startsWith('archive/')) {
|
|
204
|
-
return true;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Check frontmatter (YAML between --- markers at start of file)
|
|
208
|
-
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
|
209
|
-
if (frontmatterMatch) {
|
|
210
|
-
const frontmatter = frontmatterMatch[1];
|
|
211
|
-
if (/^archived:\s*true/m.test(frontmatter)) {
|
|
212
|
-
return true;
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
return false;
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
async embed(text) {
|
|
220
|
-
const model = await this.loadModel();
|
|
221
|
-
const result = await model(text, { pooling: 'mean', normalize: true });
|
|
222
|
-
return Array.from(result.data);
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
/**
|
|
226
|
-
* Chunk code into smaller pieces for embedding
|
|
227
|
-
* Tries to split on function/class boundaries when possible
|
|
228
|
-
*/
|
|
229
|
-
chunkCode(content, maxChars = 1500) {
|
|
230
|
-
const chunks = [];
|
|
231
|
-
const lines = content.split('\n');
|
|
232
|
-
let current = [];
|
|
233
|
-
let currentLen = 0;
|
|
234
|
-
|
|
235
|
-
for (const line of lines) {
|
|
236
|
-
if (currentLen + line.length > maxChars && current.length > 0) {
|
|
237
|
-
chunks.push(current.join('\n'));
|
|
238
|
-
current = [];
|
|
239
|
-
currentLen = 0;
|
|
240
|
-
}
|
|
241
|
-
current.push(line);
|
|
242
|
-
currentLen += line.length + 1;
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
if (current.length > 0) {
|
|
246
|
-
chunks.push(current.join('\n'));
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
return chunks;
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
/**
|
|
253
|
-
* Check if file needs re-indexing based on content hash
|
|
254
|
-
*/
|
|
255
|
-
needsIndex(filePath, content) {
|
|
256
|
-
const relPath = path.relative(this.root, filePath);
|
|
257
|
-
const currentHash = this.fileHash(content);
|
|
258
|
-
return this.hashes[relPath] !== currentHash;
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
/**
|
|
262
|
-
* Index a single file
|
|
263
|
-
* Returns true if file was indexed, false if skipped (unchanged)
|
|
264
|
-
*/
|
|
265
|
-
async indexFile(filePath) {
|
|
266
|
-
const relPath = path.relative(this.root, filePath);
|
|
267
|
-
|
|
268
|
-
let content;
|
|
269
|
-
try {
|
|
270
|
-
content = await fs.readFile(filePath, 'utf8');
|
|
271
|
-
} catch (e) {
|
|
272
|
-
console.warn(`Cannot read ${relPath}: ${e.message}`);
|
|
273
|
-
return false;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
const hash = this.fileHash(content);
|
|
277
|
-
|
|
278
|
-
// Skip if unchanged
|
|
279
|
-
if (this.hashes[relPath] === hash) {
|
|
280
|
-
return false;
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
const chunks = this.chunkCode(content);
|
|
284
|
-
const archived = this.isArchived(relPath, content);
|
|
285
|
-
const data = [];
|
|
286
|
-
|
|
287
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
288
|
-
const embedding = await this.embed(chunks[i]);
|
|
289
|
-
data.push({
|
|
290
|
-
file: relPath,
|
|
291
|
-
chunk_index: i,
|
|
292
|
-
content: chunks[i],
|
|
293
|
-
vector: embedding,
|
|
294
|
-
archived: archived
|
|
295
|
-
});
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
// Add to database
|
|
299
|
-
const tableName = 'chunks';
|
|
300
|
-
const tables = await this.db.tableNames();
|
|
301
|
-
if (tables.includes(tableName)) {
|
|
302
|
-
const table = await this.db.openTable(tableName);
|
|
303
|
-
// Note: LanceDB doesn't support delete by filter in all versions
|
|
304
|
-
// So we just add new chunks (may have duplicates until reindex --force)
|
|
305
|
-
await table.add(data);
|
|
306
|
-
} else {
|
|
307
|
-
await this.db.createTable(tableName, data);
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
// Update hash cache
|
|
311
|
-
this.hashes[relPath] = hash;
|
|
312
|
-
await this.saveHashes();
|
|
313
|
-
|
|
314
|
-
return true;
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
/**
|
|
318
|
-
* Semantic search across indexed codebase
|
|
319
|
-
* @param {string} query - Search query
|
|
320
|
-
* @param {number} limit - Max results (default 5)
|
|
321
|
-
* @param {boolean} includeArchived - Include archived files (default false)
|
|
322
|
-
*/
|
|
323
|
-
async search(query, limit = 5, includeArchived = false) {
|
|
324
|
-
const tableName = 'chunks';
|
|
325
|
-
const tables = await this.db.tableNames();
|
|
326
|
-
if (!tables.includes(tableName)) {
|
|
327
|
-
return [];
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
const queryEmbedding = await this.embed(query);
|
|
331
|
-
const table = await this.db.openTable(tableName);
|
|
332
|
-
|
|
333
|
-
// Fetch more results if we need to filter archived
|
|
334
|
-
const fetchLimit = includeArchived ? limit : limit * 3;
|
|
335
|
-
let results = await table.search(queryEmbedding).limit(fetchLimit).execute();
|
|
336
|
-
|
|
337
|
-
// Filter out archived files unless explicitly requested
|
|
338
|
-
if (!includeArchived) {
|
|
339
|
-
results = results.filter(r => !r.archived);
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
// Trim to requested limit
|
|
343
|
-
return results.slice(0, limit);
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
/**
|
|
347
|
-
* Check if index needs full reindex (files don't match current patterns)
|
|
348
|
-
* @param {string[]} extraIgnore - Additional patterns to ignore
|
|
349
|
-
* Returns { needsReindex, reason, currentCount, expectedCount }
|
|
350
|
-
*/
|
|
351
|
-
async checkHealth(extraIgnore = []) {
|
|
352
|
-
const { glob } = await import('glob');
|
|
353
|
-
const preset = INDEX_PRESETS[this.indexName] || DEFAULT_PRESETS.code;
|
|
354
|
-
|
|
355
|
-
// Combine: preset ignore + global ignore + extra ignore
|
|
356
|
-
const ignore = [
|
|
357
|
-
...(preset.ignore || []),
|
|
358
|
-
...GLOBAL_IGNORE,
|
|
359
|
-
...extraIgnore.map(p => p.includes('*') ? p : `**/${p}/**`)
|
|
360
|
-
];
|
|
361
|
-
|
|
362
|
-
const expectedFiles = await glob(preset.pattern, {
|
|
363
|
-
cwd: this.root,
|
|
364
|
-
nodir: true,
|
|
365
|
-
ignore
|
|
366
|
-
});
|
|
367
|
-
|
|
368
|
-
const indexedFiles = Object.keys(this.hashes);
|
|
369
|
-
const currentCount = indexedFiles.length;
|
|
370
|
-
const expectedCount = expectedFiles.length;
|
|
371
|
-
|
|
372
|
-
// Check if counts differ significantly (>20% difference or index is empty)
|
|
373
|
-
const diff = Math.abs(currentCount - expectedCount);
|
|
374
|
-
const threshold = Math.max(5, expectedCount * 0.2); // 20% or at least 5 files
|
|
375
|
-
|
|
376
|
-
if (currentCount === 0 && expectedCount > 0) {
|
|
377
|
-
return { needsReindex: true, reason: 'empty', currentCount, expectedCount };
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
if (diff > threshold) {
|
|
381
|
-
return { needsReindex: true, reason: 'mismatch', currentCount, expectedCount };
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
return { needsReindex: false, reason: 'ok', currentCount, expectedCount };
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
/**
|
|
388
|
-
* Freshen index - check for stale files and reindex only changed ones
|
|
389
|
-
* Returns { checked, updated, deleted } counts
|
|
390
|
-
*/
|
|
391
|
-
async freshen() {
|
|
392
|
-
let checked = 0;
|
|
393
|
-
let updated = 0;
|
|
394
|
-
let deleted = 0;
|
|
395
|
-
|
|
396
|
-
const indexedFiles = Object.keys(this.hashes);
|
|
397
|
-
|
|
398
|
-
for (const relPath of indexedFiles) {
|
|
399
|
-
checked++;
|
|
400
|
-
const filePath = path.join(this.root, relPath);
|
|
401
|
-
|
|
402
|
-
try {
|
|
403
|
-
const content = await fs.readFile(filePath, 'utf8');
|
|
404
|
-
const currentHash = this.fileHash(content);
|
|
405
|
-
|
|
406
|
-
if (this.hashes[relPath] !== currentHash) {
|
|
407
|
-
// File changed - reindex it
|
|
408
|
-
await this.indexFile(filePath);
|
|
409
|
-
updated++;
|
|
410
|
-
}
|
|
411
|
-
} catch (e) {
|
|
412
|
-
// File deleted or unreadable - remove from index
|
|
413
|
-
delete this.hashes[relPath];
|
|
414
|
-
deleted++;
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
if (deleted > 0) {
|
|
419
|
-
await this.saveHashes();
|
|
420
|
-
}
|
|
421
|
-
|
|
422
|
-
return { checked, updated, deleted };
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
/**
|
|
426
|
-
* Index all files matching the preset pattern
|
|
427
|
-
* @param {function} onProgress - Optional callback(indexed, total, currentFile)
|
|
428
|
-
* @param {string[]} extraIgnore - Additional patterns to ignore
|
|
429
|
-
* Returns { indexed, skipped } counts
|
|
430
|
-
*/
|
|
431
|
-
async indexAll(onProgress = null, extraIgnore = []) {
|
|
432
|
-
const { glob } = await import('glob');
|
|
433
|
-
const preset = INDEX_PRESETS[this.indexName] || DEFAULT_PRESETS.code;
|
|
434
|
-
|
|
435
|
-
// Combine: preset ignore + global ignore + extra ignore
|
|
436
|
-
const ignore = [
|
|
437
|
-
...(preset.ignore || []),
|
|
438
|
-
...GLOBAL_IGNORE,
|
|
439
|
-
...extraIgnore.map(p => p.includes('*') ? p : `**/${p}/**`)
|
|
440
|
-
];
|
|
441
|
-
|
|
442
|
-
const files = await glob(preset.pattern, {
|
|
443
|
-
cwd: this.root,
|
|
444
|
-
nodir: true,
|
|
445
|
-
ignore
|
|
446
|
-
});
|
|
447
|
-
|
|
448
|
-
let indexed = 0;
|
|
449
|
-
let skipped = 0;
|
|
450
|
-
|
|
451
|
-
for (const relPath of files) {
|
|
452
|
-
const filePath = path.join(this.root, relPath);
|
|
453
|
-
try {
|
|
454
|
-
const wasIndexed = await this.indexFile(filePath);
|
|
455
|
-
if (wasIndexed) {
|
|
456
|
-
indexed++;
|
|
457
|
-
if (onProgress) onProgress(indexed, files.length, relPath);
|
|
458
|
-
} else {
|
|
459
|
-
skipped++;
|
|
460
|
-
}
|
|
461
|
-
} catch (e) {
|
|
462
|
-
skipped++;
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
return { indexed, skipped, total: files.length };
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
/**
|
|
470
|
-
* Index a single file by path (convenience method)
|
|
471
|
-
*/
|
|
472
|
-
async indexSingleFile(filePath) {
|
|
473
|
-
const absPath = path.isAbsolute(filePath)
|
|
474
|
-
? filePath
|
|
475
|
-
: path.join(this.root, filePath);
|
|
476
|
-
return await this.indexFile(absPath);
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
/**
|
|
480
|
-
* Get indexing statistics for this index
|
|
481
|
-
*/
|
|
482
|
-
async getStats() {
|
|
483
|
-
const fileCount = Object.keys(this.hashes).length;
|
|
484
|
-
let chunkCount = 0;
|
|
485
|
-
|
|
486
|
-
try {
|
|
487
|
-
const tables = await this.db.tableNames();
|
|
488
|
-
if (tables.includes('chunks')) {
|
|
489
|
-
const table = await this.db.openTable('chunks');
|
|
490
|
-
chunkCount = await table.countRows();
|
|
491
|
-
}
|
|
492
|
-
} catch {}
|
|
493
|
-
|
|
494
|
-
const preset = INDEX_PRESETS[this.indexName];
|
|
495
|
-
|
|
496
|
-
return {
|
|
497
|
-
indexName: this.indexName,
|
|
498
|
-
description: preset?.description || 'Custom index',
|
|
499
|
-
model: EMBEDDING_MODEL,
|
|
500
|
-
fileCount,
|
|
501
|
-
chunkCount
|
|
502
|
-
};
|
|
503
|
-
}
|
|
504
|
-
|
|
505
|
-
/**
|
|
506
|
-
* Get statistics for all indexes
|
|
507
|
-
*/
|
|
508
|
-
async getAllStats() {
|
|
509
|
-
const stats = [];
|
|
510
|
-
|
|
511
|
-
try {
|
|
512
|
-
const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
|
|
513
|
-
|
|
514
|
-
for (const entry of entries) {
|
|
515
|
-
if (entry.isDirectory() && entry.name !== 'lancedb') {
|
|
516
|
-
try {
|
|
517
|
-
const indexer = await new CodebaseIndexer(this.root, entry.name).init();
|
|
518
|
-
const stat = await indexer.getStats();
|
|
519
|
-
if (stat.fileCount > 0 || stat.chunkCount > 0) {
|
|
520
|
-
stats.push(stat);
|
|
521
|
-
}
|
|
522
|
-
} catch {}
|
|
523
|
-
}
|
|
524
|
-
}
|
|
525
|
-
} catch {}
|
|
526
|
-
|
|
527
|
-
return stats;
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
/**
|
|
531
|
-
* Clear this index's data
|
|
532
|
-
*/
|
|
533
|
-
async clear() {
|
|
534
|
-
await fs.rm(this.cacheDir, { recursive: true, force: true });
|
|
535
|
-
this.hashes = {};
|
|
536
|
-
await this.init();
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
/**
|
|
540
|
-
* Clear all indexes
|
|
541
|
-
*/
|
|
542
|
-
async clearAll() {
|
|
543
|
-
await fs.rm(this.baseDir, { recursive: true, force: true });
|
|
544
|
-
this.hashes = {};
|
|
545
|
-
await this.init();
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
/**
|
|
549
|
-
* List all available index names
|
|
550
|
-
*/
|
|
551
|
-
async listIndexes() {
|
|
552
|
-
const indexes = [];
|
|
553
|
-
|
|
554
|
-
try {
|
|
555
|
-
const entries = await fs.readdir(this.baseDir, { withFileTypes: true });
|
|
556
|
-
|
|
557
|
-
for (const entry of entries) {
|
|
558
|
-
if (entry.isDirectory() && entry.name !== 'lancedb') {
|
|
559
|
-
indexes.push(entry.name);
|
|
560
|
-
}
|
|
561
|
-
}
|
|
562
|
-
} catch {}
|
|
563
|
-
|
|
564
|
-
return indexes;
|
|
565
|
-
}
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
// Getter for current embedding model (after config loaded)
|
|
569
|
-
function getEmbeddingModel() {
|
|
570
|
-
return EMBEDDING_MODEL;
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel };
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "opencode-vectorizer",
|
|
3
|
-
"version": "1.0.0",
|
|
4
|
-
"description": "Semantic code search for OpenCode Workflow",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"private": true,
|
|
7
|
-
"main": "index.js",
|
|
8
|
-
"dependencies": {
|
|
9
|
-
"@xenova/transformers": "^2.17.0",
|
|
10
|
-
"glob": "^10.3.10",
|
|
11
|
-
"vectordb": "^0.4.0"
|
|
12
|
-
},
|
|
13
|
-
"engines": {
|
|
14
|
-
"node": ">=18"
|
|
15
|
-
}
|
|
16
|
-
}
|