voyageai-cli 1.30.0 → 1.30.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/package.json +1 -1
- package/src/cli.js +8 -0
- package/src/commands/about.js +3 -3
- package/src/commands/chat.js +32 -11
- package/src/commands/code-search.js +751 -0
- package/src/commands/doctor.js +1 -1
- package/src/commands/export.js +124 -0
- package/src/commands/import.js +195 -0
- package/src/commands/index-workspace.js +243 -0
- package/src/commands/mcp-server.js +113 -3
- package/src/commands/playground.js +120 -4
- package/src/commands/quickstart.js +4 -4
- package/src/commands/workflow.js +132 -65
- package/src/lib/catalog.js +4 -2
- package/src/lib/code-search.js +315 -0
- package/src/lib/codegen.js +1 -1
- package/src/lib/explanations.js +3 -3
- package/src/lib/export/contexts/benchmark-export.js +27 -0
- package/src/lib/export/contexts/chat-export.js +41 -0
- package/src/lib/export/contexts/explore-export.js +22 -0
- package/src/lib/export/contexts/search-export.js +54 -0
- package/src/lib/export/contexts/workflow-export.js +80 -0
- package/src/lib/export/formats/clipboard-export.js +29 -0
- package/src/lib/export/formats/csv-export.js +45 -0
- package/src/lib/export/formats/json-export.js +50 -0
- package/src/lib/export/formats/markdown-export.js +189 -0
- package/src/lib/export/formats/mermaid-export.js +274 -0
- package/src/lib/export/formats/pdf-export.js +117 -0
- package/src/lib/export/formats/png-export.js +96 -0
- package/src/lib/export/formats/svg-export.js +116 -0
- package/src/lib/export/index.js +175 -0
- package/src/lib/github.js +226 -0
- package/src/lib/template-engine.js +154 -20
- package/src/lib/workflow-builder.js +753 -0
- package/src/lib/workflow-formatters.js +454 -0
- package/src/lib/workflow-input-cache.js +111 -0
- package/src/lib/workflow-scaffold.js +1 -1
- package/src/lib/workflow.js +297 -28
- package/src/mcp/install.js +280 -7
- package/src/mcp/schemas/index.js +170 -0
- package/src/mcp/server.js +19 -4
- package/src/mcp/tools/authoring.js +662 -0
- package/src/mcp/tools/code-search.js +620 -0
- package/src/mcp/tools/ingest.js +2 -5
- package/src/mcp/tools/retrieval.js +2 -15
- package/src/mcp/tools/workspace.js +452 -0
- package/src/mcp/utils.js +20 -0
- package/src/playground/announcements.md +52 -5
- package/src/playground/help/workflow-nodes.js +127 -2
- package/src/playground/index.html +17109 -12438
- package/src/playground/vendor/mermaid.min.js +2811 -0
- package/src/workflows/code-review.json +110 -0
- package/src/workflows/cost-analysis.json +5 -0
- package/src/workflows/rag-chat.json +165 -0
- package/src/workflows/tests/code-review.fresh-index.test.json +83 -0
- package/src/workflows/tests/code-review.happy-path.test.json +121 -0
- package/src/workflows/tests/code-review.no-question.test.json +70 -0
- package/src/workflows/tests/consistency-check.happy-path.test.json +28 -0
- package/src/workflows/tests/consistency-check.missing-source.test.json +26 -0
- package/src/workflows/tests/cost-analysis.happy-path.test.json +28 -0
- package/src/workflows/tests/enrich-and-ingest.happy-path.test.json +38 -0
- package/src/workflows/tests/enrich-and-ingest.notify-fails.test.json +38 -0
- package/src/workflows/tests/intelligent-ingest.all-filtered.test.json +26 -0
- package/src/workflows/tests/intelligent-ingest.happy-path.test.json +28 -0
- package/src/workflows/tests/kb-health-report.custom-queries.test.json +24 -0
- package/src/workflows/tests/kb-health-report.happy-path.test.json +26 -0
- package/src/workflows/tests/multi-collection-search.happy-path.test.json +40 -0
- package/src/workflows/tests/multi-collection-search.one-empty.test.json +28 -0
- package/src/workflows/tests/rag-chat.happy-path.test.json +26 -0
- package/src/workflows/tests/rag-chat.no-relevant-results.test.json +25 -0
- package/src/workflows/tests/research-and-summarize.happy-path.test.json +33 -0
- package/src/workflows/tests/research-and-summarize.no-results.test.json +29 -0
- package/src/workflows/tests/search-with-fallback.empty-both.test.json +24 -0
- package/src/workflows/tests/search-with-fallback.fallback-branch.test.json +24 -0
- package/src/workflows/tests/search-with-fallback.happy-path.test.json +27 -0
- package/src/workflows/tests/smart-ingest.duplicate-detected.test.json +34 -0
- package/src/workflows/tests/smart-ingest.happy-path.test.json +31 -0
- package/src/playground/assets/announcements/appstore.jpg +0 -0
- package/src/playground/assets/announcements/circuits.jpg +0 -0
- package/src/playground/assets/announcements/csvingest.jpg +0 -0
- package/src/playground/assets/announcements/green-wave.jpg +0 -0
|
@@ -0,0 +1,751 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const path = require('path');
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const pc = require('picocolors');
|
|
6
|
+
const { generateEmbeddings, apiRequest } = require('../lib/api');
|
|
7
|
+
const { getMongoCollection } = require('../lib/mongo');
|
|
8
|
+
const { loadProject, saveProject } = require('../lib/project');
|
|
9
|
+
const { DEFAULT_RERANK_MODEL } = require('../lib/catalog');
|
|
10
|
+
const { showCombinedCostSummary } = require('../lib/cost-display');
|
|
11
|
+
const ui = require('../lib/ui');
|
|
12
|
+
const {
|
|
13
|
+
DEFAULT_CODE_MODEL,
|
|
14
|
+
DEFAULT_DB,
|
|
15
|
+
smartChunkCode,
|
|
16
|
+
extractSymbols,
|
|
17
|
+
findCodeFiles,
|
|
18
|
+
resolveConfig,
|
|
19
|
+
deriveCollectionName,
|
|
20
|
+
} = require('../lib/code-search');
|
|
21
|
+
|
|
22
|
+
// ── Command registration ──
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Register the code-search command group on a Commander program.
|
|
26
|
+
* @param {import('commander').Command} program
|
|
27
|
+
*/
|
|
28
|
+
function registerCodeSearch(program) {
|
|
29
|
+
const codeSearchCmd = program
|
|
30
|
+
.command('code-search')
|
|
31
|
+
.description('Semantic code search — index and search your codebase')
|
|
32
|
+
.argument('[query]', 'Search query (omit for subcommands)')
|
|
33
|
+
.option('-l, --limit <n>', 'Number of results', (v) => parseInt(v, 10), 10)
|
|
34
|
+
.option('--no-rerank', 'Skip reranking')
|
|
35
|
+
.option('--rerank-model <model>', 'Reranking model', DEFAULT_RERANK_MODEL)
|
|
36
|
+
.option('-m, --model <model>', 'Embedding model')
|
|
37
|
+
.option('--db <name>', 'MongoDB database name')
|
|
38
|
+
.option('--collection <name>', 'Collection name')
|
|
39
|
+
.option('--json', 'Machine-readable JSON output')
|
|
40
|
+
.option('-q, --quiet', 'Suppress non-essential output')
|
|
41
|
+
.action(async (query, opts) => {
|
|
42
|
+
if (!query) {
|
|
43
|
+
codeSearchCmd.outputHelp();
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
await handleSearch(query, opts);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
// ── code-search init ──
|
|
50
|
+
codeSearchCmd
|
|
51
|
+
.command('init [path]')
|
|
52
|
+
.description('Index a codebase for semantic code search')
|
|
53
|
+
.option('-m, --model <model>', 'Embedding model', DEFAULT_CODE_MODEL)
|
|
54
|
+
.option('--db <name>', 'MongoDB database name')
|
|
55
|
+
.option('--collection <name>', 'Collection name')
|
|
56
|
+
.option('--chunk-size <n>', 'Target chunk size in characters', (v) => parseInt(v, 10), 512)
|
|
57
|
+
.option('--chunk-overlap <n>', 'Overlap between chunks', (v) => parseInt(v, 10), 50)
|
|
58
|
+
.option('--max-files <n>', 'Maximum files to index', (v) => parseInt(v, 10), 5000)
|
|
59
|
+
.option('--max-file-size <bytes>', 'Maximum file size in bytes', (v) => parseInt(v, 10), 100000)
|
|
60
|
+
.option('--batch-size <n>', 'Embedding batch size', (v) => parseInt(v, 10), 20)
|
|
61
|
+
.option('--json', 'Machine-readable JSON output')
|
|
62
|
+
.option('-q, --quiet', 'Suppress non-essential output')
|
|
63
|
+
.action(async (workspacePath, opts) => {
|
|
64
|
+
await handleInit(workspacePath, opts);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
// ── code-search status ──
|
|
68
|
+
codeSearchCmd
|
|
69
|
+
.command('status')
|
|
70
|
+
.description('Show index stats for the current codebase')
|
|
71
|
+
.option('--db <name>', 'MongoDB database name')
|
|
72
|
+
.option('--collection <name>', 'Collection name')
|
|
73
|
+
.option('--json', 'Machine-readable JSON output')
|
|
74
|
+
.action(async (opts) => {
|
|
75
|
+
await handleStatus(opts);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
// ── code-search refresh ──
|
|
79
|
+
codeSearchCmd
|
|
80
|
+
.command('refresh [path]')
|
|
81
|
+
.description('Re-index only changed files')
|
|
82
|
+
.option('-m, --model <model>', 'Embedding model')
|
|
83
|
+
.option('--db <name>', 'MongoDB database name')
|
|
84
|
+
.option('--collection <name>', 'Collection name')
|
|
85
|
+
.option('--chunk-size <n>', 'Target chunk size in characters', (v) => parseInt(v, 10), 512)
|
|
86
|
+
.option('--chunk-overlap <n>', 'Overlap between chunks', (v) => parseInt(v, 10), 50)
|
|
87
|
+
.option('--batch-size <n>', 'Embedding batch size', (v) => parseInt(v, 10), 20)
|
|
88
|
+
.option('--json', 'Machine-readable JSON output')
|
|
89
|
+
.option('-q, --quiet', 'Suppress non-essential output')
|
|
90
|
+
.action(async (workspacePath, opts) => {
|
|
91
|
+
await handleRefresh(workspacePath, opts);
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ── Handlers ──
|
|
96
|
+
|
|
97
|
+
async function handleInit(workspacePath, opts) {
|
|
98
|
+
const telemetry = require('../lib/telemetry');
|
|
99
|
+
telemetry.send('cli_code_search_init');
|
|
100
|
+
|
|
101
|
+
const resolvedPath = workspacePath ? path.resolve(workspacePath) : process.cwd();
|
|
102
|
+
const { db, collection: collName, model } = resolveConfig(opts, resolvedPath);
|
|
103
|
+
const useSpinner = !opts.json && !opts.quiet;
|
|
104
|
+
|
|
105
|
+
let spin;
|
|
106
|
+
if (useSpinner) {
|
|
107
|
+
spin = ui.spinner(`Scanning ${resolvedPath}...`);
|
|
108
|
+
spin.start();
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const start = Date.now();
|
|
112
|
+
const files = await findCodeFiles(resolvedPath, {
|
|
113
|
+
maxFiles: opts.maxFiles,
|
|
114
|
+
maxFileSize: opts.maxFileSize,
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
if (spin) spin.stop();
|
|
118
|
+
|
|
119
|
+
if (files.length === 0) {
|
|
120
|
+
console.log(ui.warn(`No code files found in ${resolvedPath}`));
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (!opts.quiet && !opts.json) {
|
|
125
|
+
console.log(ui.info(`Found ${files.length} code files`));
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
let client;
|
|
129
|
+
try {
|
|
130
|
+
const { client: c, collection } = await getMongoCollection(db, collName);
|
|
131
|
+
client = c;
|
|
132
|
+
|
|
133
|
+
// Clear existing data for this workspace
|
|
134
|
+
await collection.deleteMany({ 'metadata.workspace': resolvedPath });
|
|
135
|
+
|
|
136
|
+
const stats = { filesIndexed: 0, chunksCreated: 0, errors: [] };
|
|
137
|
+
const batchSize = opts.batchSize || 20;
|
|
138
|
+
|
|
139
|
+
// Process files and create chunks
|
|
140
|
+
const allDocs = [];
|
|
141
|
+
for (const filePath of files) {
|
|
142
|
+
try {
|
|
143
|
+
const content = await fs.promises.readFile(filePath, 'utf-8');
|
|
144
|
+
const relativePath = path.relative(resolvedPath, filePath);
|
|
145
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
146
|
+
const fileStats = await fs.promises.stat(filePath);
|
|
147
|
+
const symbols = extractSymbols(content, filePath);
|
|
148
|
+
const chunks = smartChunkCode(content, filePath, {
|
|
149
|
+
chunkSize: opts.chunkSize,
|
|
150
|
+
chunkOverlap: opts.chunkOverlap,
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
for (const c of chunks) {
|
|
154
|
+
allDocs.push({
|
|
155
|
+
text: c.text,
|
|
156
|
+
metadata: {
|
|
157
|
+
source: relativePath,
|
|
158
|
+
filePath,
|
|
159
|
+
workspace: resolvedPath,
|
|
160
|
+
language: ext.slice(1),
|
|
161
|
+
startLine: c.startLine,
|
|
162
|
+
endLine: c.endLine,
|
|
163
|
+
chunkType: c.type,
|
|
164
|
+
symbols: symbols.filter(s => c.text.includes(s)),
|
|
165
|
+
mtime: fileStats.mtimeMs,
|
|
166
|
+
indexedAt: new Date().toISOString(),
|
|
167
|
+
},
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
stats.filesIndexed++;
|
|
171
|
+
} catch (err) {
|
|
172
|
+
stats.errors.push({ file: filePath, error: err.message });
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
stats.chunksCreated = allDocs.length;
|
|
177
|
+
|
|
178
|
+
// Embed and insert in batches
|
|
179
|
+
if (useSpinner) {
|
|
180
|
+
spin = ui.spinner(`Embedding ${allDocs.length} chunks...`);
|
|
181
|
+
spin.start();
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
let totalTokens = 0;
|
|
185
|
+
for (let i = 0; i < allDocs.length; i += batchSize) {
|
|
186
|
+
const batch = allDocs.slice(i, i + batchSize);
|
|
187
|
+
const texts = batch.map(d => d.text);
|
|
188
|
+
const embedResult = await generateEmbeddings(texts, { model, inputType: 'document' });
|
|
189
|
+
totalTokens += embedResult.usage?.total_tokens || 0;
|
|
190
|
+
|
|
191
|
+
const docsToInsert = batch.map((doc, idx) => ({
|
|
192
|
+
text: doc.text,
|
|
193
|
+
embedding: embedResult.data[idx].embedding,
|
|
194
|
+
metadata: doc.metadata,
|
|
195
|
+
}));
|
|
196
|
+
|
|
197
|
+
await collection.insertMany(docsToInsert);
|
|
198
|
+
|
|
199
|
+
if (useSpinner && spin) {
|
|
200
|
+
spin.stop();
|
|
201
|
+
spin = ui.spinner(`Embedding chunks... ${Math.min(i + batchSize, allDocs.length)}/${allDocs.length}`);
|
|
202
|
+
spin.start();
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (spin) spin.stop();
|
|
207
|
+
|
|
208
|
+
// Create vector search index
|
|
209
|
+
if (useSpinner) {
|
|
210
|
+
spin = ui.spinner('Creating vector search index...');
|
|
211
|
+
spin.start();
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
try {
|
|
215
|
+
await collection.createSearchIndex({
|
|
216
|
+
name: 'code_search_index',
|
|
217
|
+
type: 'vectorSearch',
|
|
218
|
+
definition: {
|
|
219
|
+
fields: [
|
|
220
|
+
{ type: 'vector', path: 'embedding', numDimensions: 1024, similarity: 'cosine' },
|
|
221
|
+
{ type: 'filter', path: 'metadata.language' },
|
|
222
|
+
{ type: 'filter', path: 'metadata.workspace' },
|
|
223
|
+
],
|
|
224
|
+
},
|
|
225
|
+
});
|
|
226
|
+
} catch (err) {
|
|
227
|
+
// Index may already exist
|
|
228
|
+
if (!err.message?.includes('already exists')) {
|
|
229
|
+
if (spin) spin.stop();
|
|
230
|
+
console.log(ui.warn(`Could not create search index: ${err.message}`));
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (spin) spin.stop();
|
|
235
|
+
|
|
236
|
+
// Save config to .vai.json
|
|
237
|
+
const { config: proj, filePath: projPath } = loadProject(resolvedPath);
|
|
238
|
+
proj.codeSearch = {
|
|
239
|
+
db,
|
|
240
|
+
collection: collName,
|
|
241
|
+
model,
|
|
242
|
+
lastIndexed: new Date().toISOString(),
|
|
243
|
+
workspace: resolvedPath,
|
|
244
|
+
};
|
|
245
|
+
try {
|
|
246
|
+
saveProject(proj, projPath || path.join(resolvedPath, '.vai.json'));
|
|
247
|
+
} catch { /* non-critical */ }
|
|
248
|
+
|
|
249
|
+
const timeMs = Date.now() - start;
|
|
250
|
+
|
|
251
|
+
if (opts.json) {
|
|
252
|
+
console.log(JSON.stringify({
|
|
253
|
+
...stats,
|
|
254
|
+
db,
|
|
255
|
+
collection: collName,
|
|
256
|
+
model,
|
|
257
|
+
totalTokens,
|
|
258
|
+
timeMs,
|
|
259
|
+
}, null, 2));
|
|
260
|
+
} else {
|
|
261
|
+
console.log('');
|
|
262
|
+
console.log(pc.green('✓ Codebase indexed successfully!'));
|
|
263
|
+
console.log('');
|
|
264
|
+
console.log(ui.label('Files indexed', `${stats.filesIndexed}/${files.length}`));
|
|
265
|
+
console.log(ui.label('Chunks created', String(stats.chunksCreated)));
|
|
266
|
+
console.log(ui.label('Collection', `${db}.${collName}`));
|
|
267
|
+
console.log(ui.label('Model', model));
|
|
268
|
+
console.log(ui.label('Time', `${timeMs}ms`));
|
|
269
|
+
console.log(ui.label('Tokens', String(totalTokens)));
|
|
270
|
+
|
|
271
|
+
if (stats.errors.length > 0) {
|
|
272
|
+
console.log('');
|
|
273
|
+
console.log(pc.yellow(`⚠ ${stats.errors.length} file(s) had errors`));
|
|
274
|
+
for (const e of stats.errors.slice(0, 5)) {
|
|
275
|
+
console.log(` ${pc.dim(e.file)}: ${e.error}`);
|
|
276
|
+
}
|
|
277
|
+
if (stats.errors.length > 5) {
|
|
278
|
+
console.log(` ... and ${stats.errors.length - 5} more`);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
console.log('');
|
|
283
|
+
console.log(ui.dim('Search with: vai code-search "your query"'));
|
|
284
|
+
console.log(ui.dim('Note: Vector search index may take a few minutes to become ready.'));
|
|
285
|
+
|
|
286
|
+
showCombinedCostSummary([{ model, tokens: totalTokens, label: `embed (${model})` }], opts);
|
|
287
|
+
}
|
|
288
|
+
} finally {
|
|
289
|
+
if (client) await client.close();
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
async function handleSearch(query, opts) {
|
|
294
|
+
const telemetry = require('../lib/telemetry');
|
|
295
|
+
const { db, collection: collName, model } = resolveConfig(opts);
|
|
296
|
+
const doRerank = opts.rerank !== false;
|
|
297
|
+
const rerankModel = opts.rerankModel || DEFAULT_RERANK_MODEL;
|
|
298
|
+
const limit = opts.limit || 10;
|
|
299
|
+
const useSpinner = !opts.json && !opts.quiet;
|
|
300
|
+
|
|
301
|
+
const done = telemetry.timer('cli_code_search_query', { model, rerank: doRerank });
|
|
302
|
+
|
|
303
|
+
let client;
|
|
304
|
+
try {
|
|
305
|
+
// Embed query
|
|
306
|
+
let spin;
|
|
307
|
+
if (useSpinner) {
|
|
308
|
+
spin = ui.spinner('Embedding query...');
|
|
309
|
+
spin.start();
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
const embedResult = await generateEmbeddings([query], { model, inputType: 'query' });
|
|
313
|
+
const queryVector = embedResult.data[0].embedding;
|
|
314
|
+
const embedTokens = embedResult.usage?.total_tokens || 0;
|
|
315
|
+
|
|
316
|
+
if (spin) spin.stop();
|
|
317
|
+
|
|
318
|
+
// Vector search
|
|
319
|
+
if (useSpinner) {
|
|
320
|
+
spin = ui.spinner(`Searching ${db}.${collName}...`);
|
|
321
|
+
spin.start();
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
const { client: c, collection } = await getMongoCollection(db, collName);
|
|
325
|
+
client = c;
|
|
326
|
+
|
|
327
|
+
// Check if collection has documents
|
|
328
|
+
const docCount = await collection.estimatedDocumentCount();
|
|
329
|
+
if (docCount === 0) {
|
|
330
|
+
if (spin) spin.stop();
|
|
331
|
+
console.log(ui.warn('No indexed code found. Run `vai code-search init` first.'));
|
|
332
|
+
return;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const numCandidates = Math.min(limit * 15, 10000);
|
|
336
|
+
const pipeline = [
|
|
337
|
+
{
|
|
338
|
+
$vectorSearch: {
|
|
339
|
+
index: 'code_search_index',
|
|
340
|
+
path: 'embedding',
|
|
341
|
+
queryVector,
|
|
342
|
+
numCandidates,
|
|
343
|
+
limit: doRerank ? limit * 3 : limit,
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
{ $addFields: { _vsScore: { $meta: 'vectorSearchScore' } } },
|
|
347
|
+
];
|
|
348
|
+
|
|
349
|
+
let searchResults;
|
|
350
|
+
try {
|
|
351
|
+
searchResults = await collection.aggregate(pipeline).toArray();
|
|
352
|
+
} catch (err) {
|
|
353
|
+
if (spin) spin.stop();
|
|
354
|
+
if (err.message?.includes('index') || err.codeName === 'InvalidPipelineOperator') {
|
|
355
|
+
console.log(ui.warn('Vector search index not ready. Run `vai code-search init` and wait a few minutes.'));
|
|
356
|
+
return;
|
|
357
|
+
}
|
|
358
|
+
throw err;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
if (spin) spin.stop();
|
|
362
|
+
|
|
363
|
+
if (searchResults.length === 0) {
|
|
364
|
+
if (opts.json) {
|
|
365
|
+
console.log(JSON.stringify({ query, results: [] }, null, 2));
|
|
366
|
+
} else {
|
|
367
|
+
console.log(ui.yellow('No results found.'));
|
|
368
|
+
}
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// Rerank
|
|
373
|
+
let finalResults;
|
|
374
|
+
let rerankTokens = 0;
|
|
375
|
+
|
|
376
|
+
if (doRerank && searchResults.length > 1) {
|
|
377
|
+
if (useSpinner) {
|
|
378
|
+
spin = ui.spinner(`Reranking ${searchResults.length} results...`);
|
|
379
|
+
spin.start();
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const documents = searchResults.map(d => d.text || '');
|
|
383
|
+
const rerankResult = await apiRequest('/rerank', {
|
|
384
|
+
query,
|
|
385
|
+
documents,
|
|
386
|
+
model: rerankModel,
|
|
387
|
+
top_k: limit,
|
|
388
|
+
});
|
|
389
|
+
rerankTokens = rerankResult.usage?.total_tokens || 0;
|
|
390
|
+
|
|
391
|
+
if (spin) spin.stop();
|
|
392
|
+
|
|
393
|
+
finalResults = (rerankResult.data || []).map(item => {
|
|
394
|
+
const doc = searchResults[item.index];
|
|
395
|
+
return { ...doc, _vsScore: doc._vsScore, _rerankScore: item.relevance_score };
|
|
396
|
+
});
|
|
397
|
+
} else {
|
|
398
|
+
finalResults = searchResults.slice(0, limit);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Output
|
|
402
|
+
if (opts.json) {
|
|
403
|
+
const jsonResults = finalResults.map((r, i) => ({
|
|
404
|
+
rank: i + 1,
|
|
405
|
+
source: r.metadata?.source,
|
|
406
|
+
language: r.metadata?.language,
|
|
407
|
+
startLine: r.metadata?.startLine,
|
|
408
|
+
endLine: r.metadata?.endLine,
|
|
409
|
+
symbols: r.metadata?.symbols,
|
|
410
|
+
score: r._rerankScore || r._vsScore,
|
|
411
|
+
vectorScore: r._vsScore,
|
|
412
|
+
rerankScore: r._rerankScore,
|
|
413
|
+
text: r.text,
|
|
414
|
+
}));
|
|
415
|
+
console.log(JSON.stringify({
|
|
416
|
+
query, model, rerankModel: doRerank ? rerankModel : null,
|
|
417
|
+
db, collection: collName,
|
|
418
|
+
tokens: { embed: embedTokens, rerank: rerankTokens },
|
|
419
|
+
results: jsonResults,
|
|
420
|
+
}, null, 2));
|
|
421
|
+
done({ resultCount: finalResults.length });
|
|
422
|
+
return;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// Pretty print
|
|
426
|
+
console.log('');
|
|
427
|
+
console.log(ui.label('Query', ui.cyan(`"${query}"`)));
|
|
428
|
+
console.log(ui.label('Search', `${searchResults.length} candidates from ${ui.dim(`${db}.${collName}`)}`));
|
|
429
|
+
if (doRerank && searchResults.length > 1) {
|
|
430
|
+
console.log(ui.label('Rerank', `Top ${finalResults.length} via ${ui.dim(rerankModel)}`));
|
|
431
|
+
}
|
|
432
|
+
console.log('');
|
|
433
|
+
|
|
434
|
+
for (let i = 0; i < finalResults.length; i++) {
|
|
435
|
+
const r = finalResults[i];
|
|
436
|
+
const meta = r.metadata || {};
|
|
437
|
+
const score = r._rerankScore || r._vsScore;
|
|
438
|
+
const scoreStr = score != null ? ui.score(score) : '';
|
|
439
|
+
const vsStr = r._vsScore != null ? ui.dim(`vs:${r._vsScore.toFixed(3)}`) : '';
|
|
440
|
+
const rrStr = r._rerankScore != null ? ui.dim(`rr:${r._rerankScore.toFixed(3)}`) : '';
|
|
441
|
+
const scores = [vsStr, rrStr].filter(Boolean).join(' ');
|
|
442
|
+
|
|
443
|
+
// File header
|
|
444
|
+
const lineRange = meta.startLine ? pc.dim(`:${meta.startLine}-${meta.endLine}`) : '';
|
|
445
|
+
console.log(`${pc.bold(`#${i + 1}`)} ${pc.cyan(meta.source || 'unknown')}${lineRange} ${scoreStr} ${scores}`);
|
|
446
|
+
|
|
447
|
+
// Symbols
|
|
448
|
+
if (meta.symbols?.length > 0) {
|
|
449
|
+
console.log(` ${pc.dim('symbols:')} ${meta.symbols.slice(0, 5).join(', ')}`);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// Code snippet
|
|
453
|
+
const snippet = (r.text || '').substring(0, 300);
|
|
454
|
+
const ellipsis = (r.text || '').length > 300 ? '...' : '';
|
|
455
|
+
const indented = snippet.split('\n').map(l => ' ' + l).join('\n');
|
|
456
|
+
console.log(pc.dim(indented + ellipsis));
|
|
457
|
+
console.log('');
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const totalTokens = embedTokens + rerankTokens;
|
|
461
|
+
console.log(ui.dim(` Tokens: ${totalTokens} (embed: ${embedTokens}${rerankTokens ? `, rerank: ${rerankTokens}` : ''})`));
|
|
462
|
+
showCombinedCostSummary([
|
|
463
|
+
{ model, tokens: embedTokens, label: `embed (${model})` },
|
|
464
|
+
...(rerankTokens ? [{ model: rerankModel, tokens: rerankTokens, label: `rerank (${rerankModel})` }] : []),
|
|
465
|
+
], opts);
|
|
466
|
+
|
|
467
|
+
done({ resultCount: finalResults.length });
|
|
468
|
+
} catch (err) {
|
|
469
|
+
telemetry.send('cli_error', { command: 'code-search', errorType: err.constructor.name });
|
|
470
|
+
console.error(ui.error(err.message));
|
|
471
|
+
process.exit(1);
|
|
472
|
+
} finally {
|
|
473
|
+
if (client) await client.close();
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
async function handleStatus(opts) {
|
|
478
|
+
const { db, collection: collName, model } = resolveConfig(opts);
|
|
479
|
+
const useSpinner = !opts.json;
|
|
480
|
+
let client;
|
|
481
|
+
|
|
482
|
+
try {
|
|
483
|
+
let spin;
|
|
484
|
+
if (useSpinner) {
|
|
485
|
+
spin = ui.spinner('Fetching index stats...');
|
|
486
|
+
spin.start();
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
const { client: c, collection } = await getMongoCollection(db, collName);
|
|
490
|
+
client = c;
|
|
491
|
+
|
|
492
|
+
const totalChunks = await collection.estimatedDocumentCount();
|
|
493
|
+
|
|
494
|
+
if (totalChunks === 0) {
|
|
495
|
+
if (spin) spin.stop();
|
|
496
|
+
console.log(ui.warn('No indexed code found. Run `vai code-search init` first.'));
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Get unique files and last indexed time
|
|
501
|
+
const [fileStats] = await collection.aggregate([
|
|
502
|
+
{
|
|
503
|
+
$group: {
|
|
504
|
+
_id: null,
|
|
505
|
+
uniqueFiles: { $addToSet: '$metadata.source' },
|
|
506
|
+
lastIndexed: { $max: '$metadata.indexedAt' },
|
|
507
|
+
languages: { $addToSet: '$metadata.language' },
|
|
508
|
+
},
|
|
509
|
+
},
|
|
510
|
+
]).toArray();
|
|
511
|
+
|
|
512
|
+
// Get index info
|
|
513
|
+
let indexes = [];
|
|
514
|
+
try {
|
|
515
|
+
indexes = await collection.listSearchIndexes().toArray();
|
|
516
|
+
} catch { /* might not have permissions */ }
|
|
517
|
+
|
|
518
|
+
if (spin) spin.stop();
|
|
519
|
+
|
|
520
|
+
const stats = {
|
|
521
|
+
db,
|
|
522
|
+
collection: collName,
|
|
523
|
+
model,
|
|
524
|
+
totalChunks,
|
|
525
|
+
filesIndexed: fileStats?.uniqueFiles?.length || 0,
|
|
526
|
+
lastIndexed: fileStats?.lastIndexed || 'unknown',
|
|
527
|
+
languages: fileStats?.languages || [],
|
|
528
|
+
indexes: indexes.map(i => ({ name: i.name, status: i.status })),
|
|
529
|
+
};
|
|
530
|
+
|
|
531
|
+
if (opts.json) {
|
|
532
|
+
console.log(JSON.stringify(stats, null, 2));
|
|
533
|
+
return;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
console.log('');
|
|
537
|
+
console.log(pc.bold('Code Search Index Status'));
|
|
538
|
+
console.log('');
|
|
539
|
+
console.log(ui.label('Collection', `${db}.${collName}`));
|
|
540
|
+
console.log(ui.label('Model', model));
|
|
541
|
+
console.log(ui.label('Files indexed', String(stats.filesIndexed)));
|
|
542
|
+
console.log(ui.label('Total chunks', String(stats.totalChunks)));
|
|
543
|
+
console.log(ui.label('Languages', stats.languages.join(', ') || 'N/A'));
|
|
544
|
+
console.log(ui.label('Last indexed', stats.lastIndexed));
|
|
545
|
+
|
|
546
|
+
if (indexes.length > 0) {
|
|
547
|
+
console.log('');
|
|
548
|
+
for (const idx of indexes) {
|
|
549
|
+
console.log(ui.label('Index', `${ui.bold(idx.name)} — ${ui.status(idx.status || 'unknown')}`));
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
console.log('');
|
|
553
|
+
} catch (err) {
|
|
554
|
+
console.error(ui.error(err.message));
|
|
555
|
+
process.exit(1);
|
|
556
|
+
} finally {
|
|
557
|
+
if (client) await client.close();
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
async function handleRefresh(workspacePath, opts) {
|
|
562
|
+
const telemetry = require('../lib/telemetry');
|
|
563
|
+
telemetry.send('cli_code_search_refresh');
|
|
564
|
+
|
|
565
|
+
const resolvedPath = workspacePath ? path.resolve(workspacePath) : process.cwd();
|
|
566
|
+
const { db, collection: collName, model } = resolveConfig(opts, resolvedPath);
|
|
567
|
+
const useSpinner = !opts.json && !opts.quiet;
|
|
568
|
+
|
|
569
|
+
let client;
|
|
570
|
+
try {
|
|
571
|
+
let spin;
|
|
572
|
+
if (useSpinner) {
|
|
573
|
+
spin = ui.spinner('Checking for changed files...');
|
|
574
|
+
spin.start();
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
const { client: c, collection } = await getMongoCollection(db, collName);
|
|
578
|
+
client = c;
|
|
579
|
+
|
|
580
|
+
// Get indexed file mtimes from MongoDB
|
|
581
|
+
const indexedFiles = await collection.aggregate([
|
|
582
|
+
{ $match: { 'metadata.workspace': resolvedPath } },
|
|
583
|
+
{ $group: { _id: '$metadata.source', mtime: { $max: '$metadata.mtime' } } },
|
|
584
|
+
]).toArray();
|
|
585
|
+
|
|
586
|
+
const indexedMap = new Map(indexedFiles.map(f => [f._id, f.mtime]));
|
|
587
|
+
|
|
588
|
+
// Find current files
|
|
589
|
+
const currentFiles = await findCodeFiles(resolvedPath, {
|
|
590
|
+
maxFiles: opts.maxFiles || 5000,
|
|
591
|
+
maxFileSize: opts.maxFileSize || 100000,
|
|
592
|
+
});
|
|
593
|
+
|
|
594
|
+
// Determine changed/new files
|
|
595
|
+
const changedFiles = [];
|
|
596
|
+
const currentPaths = new Set();
|
|
597
|
+
|
|
598
|
+
for (const filePath of currentFiles) {
|
|
599
|
+
const relativePath = path.relative(resolvedPath, filePath);
|
|
600
|
+
currentPaths.add(relativePath);
|
|
601
|
+
const stats = await fs.promises.stat(filePath);
|
|
602
|
+
const indexedMtime = indexedMap.get(relativePath);
|
|
603
|
+
if (!indexedMtime || stats.mtimeMs > indexedMtime) {
|
|
604
|
+
changedFiles.push(filePath);
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
// Find deleted files
|
|
609
|
+
const deletedFiles = [];
|
|
610
|
+
for (const [source] of indexedMap) {
|
|
611
|
+
if (!currentPaths.has(source)) {
|
|
612
|
+
deletedFiles.push(source);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
if (spin) spin.stop();
|
|
617
|
+
|
|
618
|
+
if (changedFiles.length === 0 && deletedFiles.length === 0) {
|
|
619
|
+
if (opts.json) {
|
|
620
|
+
console.log(JSON.stringify({ changed: 0, deleted: 0, message: 'Up to date' }, null, 2));
|
|
621
|
+
} else {
|
|
622
|
+
console.log(ui.success('Index is up to date — no changes detected.'));
|
|
623
|
+
}
|
|
624
|
+
return;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
if (!opts.quiet && !opts.json) {
|
|
628
|
+
console.log(ui.info(`${changedFiles.length} changed/new, ${deletedFiles.length} deleted`));
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Delete old chunks for changed & deleted files
|
|
632
|
+
const filesToDelete = [
|
|
633
|
+
...changedFiles.map(f => path.relative(resolvedPath, f)),
|
|
634
|
+
...deletedFiles,
|
|
635
|
+
];
|
|
636
|
+
if (filesToDelete.length > 0) {
|
|
637
|
+
await collection.deleteMany({
|
|
638
|
+
'metadata.workspace': resolvedPath,
|
|
639
|
+
'metadata.source': { $in: filesToDelete },
|
|
640
|
+
});
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// Re-index changed files
|
|
644
|
+
const start = Date.now();
|
|
645
|
+
const batchSize = opts.batchSize || 20;
|
|
646
|
+
const allDocs = [];
|
|
647
|
+
let errors = [];
|
|
648
|
+
|
|
649
|
+
for (const filePath of changedFiles) {
|
|
650
|
+
try {
|
|
651
|
+
const content = await fs.promises.readFile(filePath, 'utf-8');
|
|
652
|
+
const relativePath = path.relative(resolvedPath, filePath);
|
|
653
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
654
|
+
const fileStats = await fs.promises.stat(filePath);
|
|
655
|
+
const symbols = extractSymbols(content, filePath);
|
|
656
|
+
const chunks = smartChunkCode(content, filePath, {
|
|
657
|
+
chunkSize: opts.chunkSize,
|
|
658
|
+
chunkOverlap: opts.chunkOverlap,
|
|
659
|
+
});
|
|
660
|
+
|
|
661
|
+
for (const ch of chunks) {
|
|
662
|
+
allDocs.push({
|
|
663
|
+
text: ch.text,
|
|
664
|
+
metadata: {
|
|
665
|
+
source: relativePath,
|
|
666
|
+
filePath,
|
|
667
|
+
workspace: resolvedPath,
|
|
668
|
+
language: ext.slice(1),
|
|
669
|
+
startLine: ch.startLine,
|
|
670
|
+
endLine: ch.endLine,
|
|
671
|
+
chunkType: ch.type,
|
|
672
|
+
symbols: symbols.filter(s => ch.text.includes(s)),
|
|
673
|
+
mtime: fileStats.mtimeMs,
|
|
674
|
+
indexedAt: new Date().toISOString(),
|
|
675
|
+
},
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
} catch (err) {
|
|
679
|
+
errors.push({ file: filePath, error: err.message });
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
if (useSpinner && allDocs.length > 0) {
|
|
684
|
+
spin = ui.spinner(`Embedding ${allDocs.length} chunks...`);
|
|
685
|
+
spin.start();
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
let totalTokens = 0;
|
|
689
|
+
for (let i = 0; i < allDocs.length; i += batchSize) {
|
|
690
|
+
const batch = allDocs.slice(i, i + batchSize);
|
|
691
|
+
const texts = batch.map(d => d.text);
|
|
692
|
+
const embedResult = await generateEmbeddings(texts, { model, inputType: 'document' });
|
|
693
|
+
totalTokens += embedResult.usage?.total_tokens || 0;
|
|
694
|
+
|
|
695
|
+
const docsToInsert = batch.map((doc, idx) => ({
|
|
696
|
+
text: doc.text,
|
|
697
|
+
embedding: embedResult.data[idx].embedding,
|
|
698
|
+
metadata: doc.metadata,
|
|
699
|
+
}));
|
|
700
|
+
|
|
701
|
+
await collection.insertMany(docsToInsert);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
if (spin) spin.stop();
|
|
705
|
+
|
|
706
|
+
// Update .vai.json
|
|
707
|
+
const { config: proj, filePath: projPath } = loadProject(resolvedPath);
|
|
708
|
+
if (proj.codeSearch) {
|
|
709
|
+
proj.codeSearch.lastIndexed = new Date().toISOString();
|
|
710
|
+
try {
|
|
711
|
+
saveProject(proj, projPath);
|
|
712
|
+
} catch { /* non-critical */ }
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
const timeMs = Date.now() - start;
|
|
716
|
+
|
|
717
|
+
if (opts.json) {
|
|
718
|
+
console.log(JSON.stringify({
|
|
719
|
+
changed: changedFiles.length,
|
|
720
|
+
deleted: deletedFiles.length,
|
|
721
|
+
chunksCreated: allDocs.length,
|
|
722
|
+
totalTokens,
|
|
723
|
+
timeMs,
|
|
724
|
+
errors,
|
|
725
|
+
}, null, 2));
|
|
726
|
+
} else {
|
|
727
|
+
console.log('');
|
|
728
|
+
console.log(pc.green('✓ Index refreshed!'));
|
|
729
|
+
console.log('');
|
|
730
|
+
console.log(ui.label('Files updated', String(changedFiles.length)));
|
|
731
|
+
console.log(ui.label('Files deleted', String(deletedFiles.length)));
|
|
732
|
+
console.log(ui.label('Chunks created', String(allDocs.length)));
|
|
733
|
+
console.log(ui.label('Time', `${timeMs}ms`));
|
|
734
|
+
console.log(ui.label('Tokens', String(totalTokens)));
|
|
735
|
+
|
|
736
|
+
if (errors.length > 0) {
|
|
737
|
+
console.log('');
|
|
738
|
+
console.log(pc.yellow(`⚠ ${errors.length} error(s)`));
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
showCombinedCostSummary([{ model, tokens: totalTokens, label: `embed (${model})` }], opts);
|
|
742
|
+
}
|
|
743
|
+
} catch (err) {
|
|
744
|
+
console.error(ui.error(err.message));
|
|
745
|
+
process.exit(1);
|
|
746
|
+
} finally {
|
|
747
|
+
if (client) await client.close();
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
module.exports = { registerCodeSearch };
|