@comfanion/workflow 4.36.43 → 4.36.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +41 -0
- package/package.json +1 -1
- package/src/build-info.json +2 -2
- package/src/vectorizer/index.js +21 -4
package/bin/cli.js
CHANGED
|
@@ -124,6 +124,7 @@ program
|
|
|
124
124
|
install_vectorizer: true, // Vectorizer ON by default
|
|
125
125
|
vectorizer_enabled: true,
|
|
126
126
|
vectorizer_auto_index: true,
|
|
127
|
+
vectorizer_model: 'Xenova/bge-small-en-v1.5', // Default: balanced (quality + speed)
|
|
127
128
|
project_name: path.basename(process.cwd())
|
|
128
129
|
};
|
|
129
130
|
|
|
@@ -146,6 +147,7 @@ program
|
|
|
146
147
|
// Parse vectorizer settings
|
|
147
148
|
const vectorizerEnabledMatch = existingContent.match(/vectorizer:[\s\S]*?enabled:\s*(true|false)/);
|
|
148
149
|
const vectorizerAutoIndexMatch = existingContent.match(/vectorizer:[\s\S]*?auto_index:\s*(true|false)/);
|
|
150
|
+
const vectorizerModelMatch = existingContent.match(/vectorizer:[\s\S]*?model:\s*["']?([^"'\n]+)["']?/);
|
|
149
151
|
|
|
150
152
|
if (nameMatch) config.user_name = nameMatch[1];
|
|
151
153
|
if (langMatch) config.communication_language = langMatch[1];
|
|
@@ -155,6 +157,7 @@ program
|
|
|
155
157
|
if (jiraProjMatch) config.jira_project = jiraProjMatch[1];
|
|
156
158
|
if (vectorizerEnabledMatch) config.vectorizer_enabled = vectorizerEnabledMatch[1] === 'true';
|
|
157
159
|
if (vectorizerAutoIndexMatch) config.vectorizer_auto_index = vectorizerAutoIndexMatch[1] === 'true';
|
|
160
|
+
if (vectorizerModelMatch) config.vectorizer_model = vectorizerModelMatch[1].trim();
|
|
158
161
|
|
|
159
162
|
isUpdate = true;
|
|
160
163
|
} catch (e) {
|
|
@@ -249,6 +252,27 @@ program
|
|
|
249
252
|
message: 'Install vectorizer? (semantic code search, ~100MB)',
|
|
250
253
|
default: true
|
|
251
254
|
},
|
|
255
|
+
{
|
|
256
|
+
type: 'list',
|
|
257
|
+
name: 'vectorizer_model',
|
|
258
|
+
message: 'Embedding model for semantic search:',
|
|
259
|
+
when: (answers) => answers.install_vectorizer,
|
|
260
|
+
choices: [
|
|
261
|
+
{
|
|
262
|
+
name: 'MiniLM-L6 (Fast) - ~10 files/10sec, 384 dims, good quality',
|
|
263
|
+
value: 'Xenova/all-MiniLM-L6-v2'
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
name: 'BGE-small (Balanced) - ~9 files/10sec, 384 dims, better quality',
|
|
267
|
+
value: 'Xenova/bge-small-en-v1.5'
|
|
268
|
+
},
|
|
269
|
+
{
|
|
270
|
+
name: 'BGE-base (Quality) - ~3 files/10sec, 768 dims, best quality',
|
|
271
|
+
value: 'Xenova/bge-base-en-v1.5'
|
|
272
|
+
}
|
|
273
|
+
],
|
|
274
|
+
default: 'Xenova/bge-small-en-v1.5'
|
|
275
|
+
},
|
|
252
276
|
{
|
|
253
277
|
type: 'confirm',
|
|
254
278
|
name: 'vectorizer_auto_index',
|
|
@@ -449,6 +473,23 @@ program
|
|
|
449
473
|
.replace(/(# Auto-index files.*\n\s+auto_index:)\s*(true|false)/,
|
|
450
474
|
`$1 ${config.vectorizer_auto_index}`);
|
|
451
475
|
|
|
476
|
+
// Add/update vectorizer model
|
|
477
|
+
if (config.vectorizer_model) {
|
|
478
|
+
if (configContent.includes('model:') && configContent.match(/vectorizer:[\s\S]*?model:/)) {
|
|
479
|
+
// Update existing model setting
|
|
480
|
+
configContent = configContent.replace(
|
|
481
|
+
/(vectorizer:[\s\S]*?)model:\s*["']?[^"'\n]+["']?/,
|
|
482
|
+
`$1model: "${config.vectorizer_model}"`
|
|
483
|
+
);
|
|
484
|
+
} else {
|
|
485
|
+
// Add model setting after auto_index
|
|
486
|
+
configContent = configContent.replace(
|
|
487
|
+
/(auto_index:\s*(true|false))/,
|
|
488
|
+
`$1\n \n # Embedding model for semantic search\n # Options: Xenova/all-MiniLM-L6-v2 (fast), Xenova/bge-base-en-v1.5 (quality)\n model: "${config.vectorizer_model}"`
|
|
489
|
+
);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
452
493
|
await fs.writeFile(configPath, configContent);
|
|
453
494
|
|
|
454
495
|
// Create docs structure (always)
|
package/package.json
CHANGED
package/src/build-info.json
CHANGED
package/src/vectorizer/index.js
CHANGED
|
@@ -45,6 +45,7 @@ const DEFAULT_PRESETS = {
|
|
|
45
45
|
// Will be populated from config.yaml if available
|
|
46
46
|
let INDEX_PRESETS = { ...DEFAULT_PRESETS };
|
|
47
47
|
let GLOBAL_IGNORE = [];
|
|
48
|
+
let EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2'; // Default: fast model
|
|
48
49
|
|
|
49
50
|
/**
|
|
50
51
|
* Load index configuration from config.yaml
|
|
@@ -61,6 +62,13 @@ async function loadConfig(projectRoot) {
|
|
|
61
62
|
|
|
62
63
|
const section = vectorizerMatch[1];
|
|
63
64
|
|
|
65
|
+
// Parse embedding model
|
|
66
|
+
const modelMatch = section.match(/^\s{2}model:\s*["']?([^"'\n]+)["']?/m);
|
|
67
|
+
if (modelMatch) {
|
|
68
|
+
EMBEDDING_MODEL = modelMatch[1].trim();
|
|
69
|
+
if (DEBUG) console.log('[vectorizer] Using model from config:', EMBEDDING_MODEL);
|
|
70
|
+
}
|
|
71
|
+
|
|
64
72
|
// Parse global exclude
|
|
65
73
|
const excludeMatch = section.match(/^\s{2}exclude:\s*\n((?:\s{4}-\s+.+\n?)*)/m);
|
|
66
74
|
if (excludeMatch) {
|
|
@@ -148,11 +156,14 @@ class CodebaseIndexer {
|
|
|
148
156
|
|
|
149
157
|
async loadModel() {
|
|
150
158
|
if (!this.model) {
|
|
151
|
-
if (DEBUG) console.log(
|
|
152
|
-
|
|
159
|
+
if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}...`);
|
|
160
|
+
// Model options:
|
|
161
|
+
// - Xenova/all-MiniLM-L6-v2: fast, 384 dims, ~10 files/10sec
|
|
162
|
+
// - Xenova/bge-base-en-v1.5: quality, 768 dims, ~3 files/10sec
|
|
163
|
+
this.model = await pipeline('feature-extraction', EMBEDDING_MODEL, {
|
|
153
164
|
progress_callback: DEBUG ? undefined : null // Suppress progress bar unless DEBUG
|
|
154
165
|
});
|
|
155
|
-
if (DEBUG) console.log(
|
|
166
|
+
if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`);
|
|
156
167
|
}
|
|
157
168
|
return this.model;
|
|
158
169
|
}
|
|
@@ -485,6 +496,7 @@ class CodebaseIndexer {
|
|
|
485
496
|
return {
|
|
486
497
|
indexName: this.indexName,
|
|
487
498
|
description: preset?.description || 'Custom index',
|
|
499
|
+
model: EMBEDDING_MODEL,
|
|
488
500
|
fileCount,
|
|
489
501
|
chunkCount
|
|
490
502
|
};
|
|
@@ -553,4 +565,9 @@ class CodebaseIndexer {
|
|
|
553
565
|
}
|
|
554
566
|
}
|
|
555
567
|
|
|
556
|
-
|
|
568
|
+
// Getter for current embedding model (after config loaded)
|
|
569
|
+
function getEmbeddingModel() {
|
|
570
|
+
return EMBEDDING_MODEL;
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel };
|