voyageai-cli 1.30.0 → 1.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +4 -4
  2. package/package.json +1 -1
  3. package/src/cli.js +8 -0
  4. package/src/commands/about.js +3 -3
  5. package/src/commands/chat.js +32 -11
  6. package/src/commands/code-search.js +751 -0
  7. package/src/commands/doctor.js +1 -1
  8. package/src/commands/export.js +124 -0
  9. package/src/commands/import.js +195 -0
  10. package/src/commands/index-workspace.js +243 -0
  11. package/src/commands/mcp-server.js +113 -3
  12. package/src/commands/playground.js +120 -4
  13. package/src/commands/quickstart.js +4 -4
  14. package/src/commands/workflow.js +132 -65
  15. package/src/lib/catalog.js +4 -2
  16. package/src/lib/code-search.js +315 -0
  17. package/src/lib/codegen.js +1 -1
  18. package/src/lib/explanations.js +3 -3
  19. package/src/lib/export/contexts/benchmark-export.js +27 -0
  20. package/src/lib/export/contexts/chat-export.js +41 -0
  21. package/src/lib/export/contexts/explore-export.js +22 -0
  22. package/src/lib/export/contexts/search-export.js +54 -0
  23. package/src/lib/export/contexts/workflow-export.js +80 -0
  24. package/src/lib/export/formats/clipboard-export.js +29 -0
  25. package/src/lib/export/formats/csv-export.js +45 -0
  26. package/src/lib/export/formats/json-export.js +50 -0
  27. package/src/lib/export/formats/markdown-export.js +189 -0
  28. package/src/lib/export/formats/mermaid-export.js +274 -0
  29. package/src/lib/export/formats/pdf-export.js +117 -0
  30. package/src/lib/export/formats/png-export.js +96 -0
  31. package/src/lib/export/formats/svg-export.js +116 -0
  32. package/src/lib/export/index.js +175 -0
  33. package/src/lib/github.js +226 -0
  34. package/src/lib/template-engine.js +154 -20
  35. package/src/lib/workflow-builder.js +753 -0
  36. package/src/lib/workflow-formatters.js +454 -0
  37. package/src/lib/workflow-input-cache.js +111 -0
  38. package/src/lib/workflow-scaffold.js +1 -1
  39. package/src/lib/workflow.js +297 -28
  40. package/src/mcp/install.js +280 -7
  41. package/src/mcp/schemas/index.js +170 -0
  42. package/src/mcp/server.js +19 -4
  43. package/src/mcp/tools/authoring.js +662 -0
  44. package/src/mcp/tools/code-search.js +620 -0
  45. package/src/mcp/tools/ingest.js +2 -5
  46. package/src/mcp/tools/retrieval.js +2 -15
  47. package/src/mcp/tools/workspace.js +452 -0
  48. package/src/mcp/utils.js +20 -0
  49. package/src/playground/announcements.md +52 -5
  50. package/src/playground/help/workflow-nodes.js +127 -2
  51. package/src/playground/index.html +17109 -12438
  52. package/src/playground/vendor/mermaid.min.js +2811 -0
  53. package/src/workflows/code-review.json +110 -0
  54. package/src/workflows/cost-analysis.json +5 -0
  55. package/src/workflows/rag-chat.json +165 -0
  56. package/src/workflows/tests/code-review.fresh-index.test.json +83 -0
  57. package/src/workflows/tests/code-review.happy-path.test.json +121 -0
  58. package/src/workflows/tests/code-review.no-question.test.json +70 -0
  59. package/src/workflows/tests/consistency-check.happy-path.test.json +28 -0
  60. package/src/workflows/tests/consistency-check.missing-source.test.json +26 -0
  61. package/src/workflows/tests/cost-analysis.happy-path.test.json +28 -0
  62. package/src/workflows/tests/enrich-and-ingest.happy-path.test.json +38 -0
  63. package/src/workflows/tests/enrich-and-ingest.notify-fails.test.json +38 -0
  64. package/src/workflows/tests/intelligent-ingest.all-filtered.test.json +26 -0
  65. package/src/workflows/tests/intelligent-ingest.happy-path.test.json +28 -0
  66. package/src/workflows/tests/kb-health-report.custom-queries.test.json +24 -0
  67. package/src/workflows/tests/kb-health-report.happy-path.test.json +26 -0
  68. package/src/workflows/tests/multi-collection-search.happy-path.test.json +40 -0
  69. package/src/workflows/tests/multi-collection-search.one-empty.test.json +28 -0
  70. package/src/workflows/tests/rag-chat.happy-path.test.json +26 -0
  71. package/src/workflows/tests/rag-chat.no-relevant-results.test.json +25 -0
  72. package/src/workflows/tests/research-and-summarize.happy-path.test.json +33 -0
  73. package/src/workflows/tests/research-and-summarize.no-results.test.json +29 -0
  74. package/src/workflows/tests/search-with-fallback.empty-both.test.json +24 -0
  75. package/src/workflows/tests/search-with-fallback.fallback-branch.test.json +24 -0
  76. package/src/workflows/tests/search-with-fallback.happy-path.test.json +27 -0
  77. package/src/workflows/tests/smart-ingest.duplicate-detected.test.json +34 -0
  78. package/src/workflows/tests/smart-ingest.happy-path.test.json +31 -0
  79. package/src/playground/assets/announcements/appstore.jpg +0 -0
  80. package/src/playground/assets/announcements/circuits.jpg +0 -0
  81. package/src/playground/assets/announcements/csvingest.jpg +0 -0
  82. package/src/playground/assets/announcements/green-wave.jpg +0 -0
@@ -0,0 +1,620 @@
1
+ 'use strict';
2
+
3
+ const path = require('path');
4
+ const fs = require('fs');
5
+ const { generateEmbeddings, apiRequest } = require('../../lib/api');
6
+ const { getMongoCollection } = require('../../lib/mongo');
7
+ const { loadProject, saveProject } = require('../../lib/project');
8
+ const {
9
+ DEFAULT_CODE_MODEL,
10
+ DEFAULT_DB,
11
+ smartChunkCode,
12
+ extractSymbols,
13
+ findCodeFiles,
14
+ resolveConfig,
15
+ deriveCollectionName,
16
+ selectCodeModel,
17
+ CODE_EXTENSIONS,
18
+ } = require('../../lib/code-search');
19
+ const {
20
+ isGitHubUrl,
21
+ parseGitHubUrl,
22
+ getAuthToken,
23
+ fetchRepoTree,
24
+ fetchFilesBatch,
25
+ fetchChangedFiles,
26
+ resolveCommitSha,
27
+ } = require('../../lib/github');
28
+
29
+ const DEFAULT_INDEX_NAME = 'code_search_index';
30
+
31
+ /**
32
+ * Resolve db/collection for MCP tools, falling back through input > project config > defaults.
33
+ */
34
+ function resolveDbColl(input) {
35
+ const { db, collection, model } = resolveConfig(
36
+ { db: input.db, collection: input.collection, model: input.model },
37
+ undefined
38
+ );
39
+ return { db, collection, model };
40
+ }
41
+
42
+ /**
43
+ * Handler for vai_code_index.
44
+ */
45
+ async function handleCodeIndex(input) {
46
+ const start = Date.now();
47
+ const source = input.source;
48
+ const isRemote = isGitHubUrl(source);
49
+ const batchSize = input.batchSize || 20;
50
+
51
+ let resolvedPath, db, collName, model;
52
+
53
+ if (isRemote) {
54
+ const { owner, repo } = parseGitHubUrl(source);
55
+ db = input.db || DEFAULT_DB;
56
+ collName = input.collection || `${repo}_code`;
57
+ model = input.model || DEFAULT_CODE_MODEL;
58
+ resolvedPath = `github:${owner}/${repo}`;
59
+ } else {
60
+ resolvedPath = path.resolve(source);
61
+ const resolved = resolveConfig(
62
+ { db: input.db, collection: input.collection, model: input.model },
63
+ resolvedPath
64
+ );
65
+ db = resolved.db;
66
+ collName = resolved.collection;
67
+ model = resolved.model;
68
+ }
69
+
70
+ const stats = { filesFound: 0, filesIndexed: 0, chunksCreated: 0, errors: [], totalTokens: 0 };
71
+ let client;
72
+
73
+ try {
74
+ const { client: c, collection } = await getMongoCollection(db, collName);
75
+ client = c;
76
+
77
+ let allDocs = [];
78
+
79
+ if (isRemote) {
80
+ // GitHub remote indexing
81
+ const { owner, repo } = parseGitHubUrl(source);
82
+ const token = getAuthToken();
83
+ const branch = input.branch || 'main';
84
+
85
+ const tree = await fetchRepoTree(owner, repo, branch, token);
86
+ const headSha = await resolveCommitSha(owner, repo, branch, token);
87
+ const codeFiles = tree.filter(entry => {
88
+ const ext = path.extname(entry.path).toLowerCase();
89
+ return CODE_EXTENSIONS.includes(ext) && entry.size <= (input.maxFileSize || 100000) && entry.size > 0;
90
+ }).slice(0, input.maxFiles || 5000);
91
+
92
+ stats.filesFound = codeFiles.length;
93
+
94
+ let didIncrementalRefresh = false;
95
+
96
+ if (input.refresh) {
97
+ // Check stored commit SHA for incremental
98
+ const meta = await collection.findOne({ _type: 'index_meta', workspace: resolvedPath });
99
+ if (meta?.commitSha) {
100
+ try {
101
+ const changed = await fetchChangedFiles(owner, repo, meta.commitSha, branch, token);
102
+ const changedSet = new Set(changed.map(f => f.filename));
103
+ const deletedFiles = changed.filter(f => f.status === 'removed').map(f => f.filename);
104
+ if (deletedFiles.length > 0) {
105
+ await collection.deleteMany({ 'metadata.workspace': resolvedPath, 'metadata.source': { $in: deletedFiles } });
106
+ }
107
+ const filesToFetch = codeFiles.filter(f => changedSet.has(f.path));
108
+ if (filesToFetch.length === 0) {
109
+ // Update stored SHA even when up to date
110
+ await collection.updateOne(
111
+ { _type: 'index_meta', workspace: resolvedPath },
112
+ { $set: { commitSha: headSha, updatedAt: new Date().toISOString() } },
113
+ { upsert: true }
114
+ );
115
+ return {
116
+ structuredContent: { ...stats, source: resolvedPath, sourceType: 'github', db, collection: collName, model, timeMs: Date.now() - start, refresh: true, indexName: DEFAULT_INDEX_NAME, message: 'Up to date' },
117
+ content: [{ type: 'text', text: 'Index is up to date, no changes detected.' }],
118
+ };
119
+ }
120
+ // Only fetch changed files
121
+ const fetched = await fetchFilesBatch(owner, repo, filesToFetch.map(f => f.path), branch, token);
122
+ await collection.deleteMany({ 'metadata.workspace': resolvedPath, 'metadata.source': { $in: filesToFetch.map(f => f.path) } });
123
+ for (const file of fetched) {
124
+ if (file.error) { stats.errors.push({ file: file.path, error: file.error }); continue; }
125
+ processFile(file.path, file.content, resolvedPath, input, allDocs, stats);
126
+ }
127
+ didIncrementalRefresh = true;
128
+ } catch {
129
+ // Compare failed, fall through to full index below
130
+ }
131
+ }
132
+ }
133
+
134
+ if (!didIncrementalRefresh && allDocs.length === 0) {
135
+ // Full index (either not refreshing, or incremental failed/no prior SHA)
136
+ await collection.deleteMany({ 'metadata.workspace': resolvedPath });
137
+
138
+ const filePaths = codeFiles.map(f => f.path);
139
+ const fetched = await fetchFilesBatch(owner, repo, filePaths, branch, token);
140
+
141
+ for (const file of fetched) {
142
+ if (file.error) { stats.errors.push({ file: file.path, error: file.error }); continue; }
143
+ processFile(file.path, file.content, resolvedPath, input, allDocs, stats);
144
+ }
145
+ }
146
+
147
+ // Store actual commit SHA for future refresh
148
+ await collection.updateOne(
149
+ { _type: 'index_meta', workspace: resolvedPath },
150
+ { $set: { _type: 'index_meta', workspace: resolvedPath, commitSha: headSha, updatedAt: new Date().toISOString() } },
151
+ { upsert: true }
152
+ );
153
+
154
+ } else {
155
+ // Local indexing
156
+ if (!fs.existsSync(resolvedPath)) {
157
+ throw new Error(`Directory not found: ${resolvedPath}`);
158
+ }
159
+
160
+ const files = await findCodeFiles(resolvedPath, { maxFiles: input.maxFiles, maxFileSize: input.maxFileSize });
161
+ stats.filesFound = files.length;
162
+
163
+ // Auto model selection
164
+ if (!input.model) {
165
+ const { config: proj } = loadProject(resolvedPath);
166
+ model = selectCodeModel(files, proj);
167
+ }
168
+
169
+ if (input.refresh) {
170
+ // Incremental refresh
171
+ const indexedFiles = await collection.aggregate([
172
+ { $match: { 'metadata.workspace': resolvedPath } },
173
+ { $group: { _id: '$metadata.source', mtime: { $max: '$metadata.mtime' } } },
174
+ ]).toArray();
175
+ const indexedMap = new Map(indexedFiles.map(f => [f._id, f.mtime]));
176
+ const currentPaths = new Set();
177
+
178
+ for (const filePath of files) {
179
+ const relativePath = path.relative(resolvedPath, filePath);
180
+ currentPaths.add(relativePath);
181
+ const fileStats = await fs.promises.stat(filePath);
182
+ const indexedMtime = indexedMap.get(relativePath);
183
+ if (!indexedMtime || fileStats.mtimeMs > indexedMtime) {
184
+ const content = await fs.promises.readFile(filePath, 'utf-8');
185
+ await collection.deleteMany({ 'metadata.workspace': resolvedPath, 'metadata.source': relativePath });
186
+ processFile(relativePath, content, resolvedPath, input, allDocs, stats, filePath, fileStats.mtimeMs);
187
+ }
188
+ }
189
+
190
+ // Delete removed files
191
+ const deletedSources = [];
192
+ for (const [source] of indexedMap) {
193
+ if (!currentPaths.has(source)) deletedSources.push(source);
194
+ }
195
+ if (deletedSources.length > 0) {
196
+ await collection.deleteMany({ 'metadata.workspace': resolvedPath, 'metadata.source': { $in: deletedSources } });
197
+ }
198
+ } else {
199
+ // Full index
200
+ await collection.deleteMany({ 'metadata.workspace': resolvedPath });
201
+
202
+ for (const filePath of files) {
203
+ try {
204
+ const content = await fs.promises.readFile(filePath, 'utf-8');
205
+ const relativePath = path.relative(resolvedPath, filePath);
206
+ const fileStats = await fs.promises.stat(filePath);
207
+ processFile(relativePath, content, resolvedPath, input, allDocs, stats, filePath, fileStats.mtimeMs);
208
+ } catch (err) {
209
+ stats.errors.push({ file: filePath, error: err.message });
210
+ }
211
+ }
212
+ }
213
+ }
214
+
215
+ stats.chunksCreated = allDocs.length;
216
+
217
+ // Embed and insert in batches
218
+ for (let i = 0; i < allDocs.length; i += batchSize) {
219
+ const batch = allDocs.slice(i, i + batchSize);
220
+ const texts = batch.map(d => d.text);
221
+ const embedResult = await generateEmbeddings(texts, { model, inputType: 'document' });
222
+ stats.totalTokens += embedResult.usage?.total_tokens || 0;
223
+
224
+ const docsToInsert = batch.map((doc, idx) => ({
225
+ text: doc.text,
226
+ embedding: embedResult.data[idx].embedding,
227
+ metadata: doc.metadata,
228
+ }));
229
+
230
+ await collection.insertMany(docsToInsert);
231
+ }
232
+
233
+ // Create vector search index
234
+ try {
235
+ await collection.createSearchIndex({
236
+ name: DEFAULT_INDEX_NAME,
237
+ type: 'vectorSearch',
238
+ definition: {
239
+ fields: [
240
+ { type: 'vector', path: 'embedding', numDimensions: 1024, similarity: 'cosine' },
241
+ { type: 'filter', path: 'metadata.language' },
242
+ { type: 'filter', path: 'metadata.workspace' },
243
+ ],
244
+ },
245
+ });
246
+ } catch (err) {
247
+ if (!err.message?.includes('already exists')) {
248
+ stats.errors.push({ file: '_index', error: `Could not create search index: ${err.message}` });
249
+ }
250
+ }
251
+
252
+ const timeMs = Date.now() - start;
253
+ const structured = {
254
+ source: isRemote ? source : resolvedPath,
255
+ sourceType: isRemote ? 'github' : 'local',
256
+ db,
257
+ collection: collName,
258
+ model,
259
+ filesFound: stats.filesFound,
260
+ filesIndexed: stats.filesIndexed,
261
+ chunksCreated: stats.chunksCreated,
262
+ totalTokens: stats.totalTokens,
263
+ errors: stats.errors,
264
+ timeMs,
265
+ refresh: input.refresh || false,
266
+ indexName: DEFAULT_INDEX_NAME,
267
+ };
268
+
269
+ return {
270
+ structuredContent: structured,
271
+ content: [{ type: 'text', text: `Indexed ${stats.filesIndexed} files (${stats.chunksCreated} chunks) into ${db}.${collName} using ${model} in ${timeMs}ms. Tokens: ${stats.totalTokens}.${stats.errors.length ? ` Errors: ${stats.errors.length}` : ''}` }],
272
+ };
273
+ } finally {
274
+ if (client) await client.close();
275
+ }
276
+ }
277
+
278
+ /**
279
+ * Process a single file into chunks and add to allDocs.
280
+ */
281
+ function processFile(relativePath, content, workspace, input, allDocs, stats, absolutePath, mtime) {
282
+ const ext = path.extname(relativePath).toLowerCase();
283
+ const symbols = extractSymbols(content, relativePath);
284
+ const chunks = smartChunkCode(content, relativePath, {
285
+ chunkSize: input.chunkSize,
286
+ chunkOverlap: input.chunkOverlap,
287
+ });
288
+
289
+ for (const c of chunks) {
290
+ allDocs.push({
291
+ text: c.text,
292
+ metadata: {
293
+ source: relativePath,
294
+ filePath: absolutePath || relativePath,
295
+ workspace,
296
+ language: ext.slice(1),
297
+ startLine: c.startLine,
298
+ endLine: c.endLine,
299
+ chunkType: c.type,
300
+ symbols: symbols.filter(s => c.text.includes(s)),
301
+ mtime: mtime || Date.now(),
302
+ indexedAt: new Date().toISOString(),
303
+ },
304
+ });
305
+ }
306
+ stats.filesIndexed++;
307
+ }
308
+
309
+ /**
310
+ * Handler for vai_code_search.
311
+ */
312
+ async function handleCodeSearch(input) {
313
+ const start = Date.now();
314
+ const { db, collection: collName, model } = resolveDbColl(input);
315
+ const codeModel = model || DEFAULT_CODE_MODEL;
316
+ const limit = input.limit || 10;
317
+ const doRerank = input.rerank !== false;
318
+ const rerankModel = input.rerankModel || 'rerank-2.5';
319
+ const candidateMultiplier = input._candidateMultiplier || 3;
320
+
321
+ let client;
322
+ try {
323
+ // Embed query
324
+ const embedResult = await generateEmbeddings([input.query], { model: codeModel, inputType: 'query' });
325
+ const queryVector = embedResult.data[0].embedding;
326
+ const embedTokens = embedResult.usage?.total_tokens || 0;
327
+
328
+ const { client: c, collection } = await getMongoCollection(db, collName);
329
+ client = c;
330
+
331
+ // Build filter
332
+ const filter = {};
333
+ if (input.language) filter['metadata.language'] = input.language;
334
+ if (input.category) filter['metadata.chunkType'] = input.category;
335
+ if (input.filter) Object.assign(filter, input.filter);
336
+
337
+ const numCandidates = Math.min(limit * 15, 10000);
338
+ const vectorSearchStage = {
339
+ index: DEFAULT_INDEX_NAME,
340
+ path: 'embedding',
341
+ queryVector,
342
+ numCandidates,
343
+ limit: doRerank ? limit * candidateMultiplier : limit,
344
+ };
345
+ if (Object.keys(filter).length > 0) vectorSearchStage.filter = filter;
346
+
347
+ const searchResults = await collection.aggregate([
348
+ { $vectorSearch: vectorSearchStage },
349
+ { $addFields: { _vsScore: { $meta: 'vectorSearchScore' } } },
350
+ ]).toArray();
351
+
352
+ if (searchResults.length === 0) {
353
+ return {
354
+ structuredContent: { query: input.query, results: [], metadata: { collection: collName, model: codeModel, reranked: false, timeMs: Date.now() - start, resultCount: 0, tokens: { embed: embedTokens, rerank: 0 } } },
355
+ content: [{ type: 'text', text: `No results found for "${input.query}" in ${db}.${collName}` }],
356
+ };
357
+ }
358
+
359
+ let finalResults;
360
+ let rerankTokens = 0;
361
+
362
+ if (doRerank && searchResults.length > 1) {
363
+ const documents = searchResults.map(d => d.text || '');
364
+ const rerankResult = await apiRequest('/rerank', {
365
+ query: input.query,
366
+ documents,
367
+ model: rerankModel,
368
+ top_k: limit,
369
+ });
370
+ rerankTokens = rerankResult.usage?.total_tokens || 0;
371
+
372
+ finalResults = (rerankResult.data || []).map(item => {
373
+ const doc = searchResults[item.index];
374
+ return {
375
+ source: doc.metadata?.source || 'unknown',
376
+ filePath: doc.metadata?.filePath || doc.metadata?.source || 'unknown',
377
+ language: doc.metadata?.language,
378
+ startLine: doc.metadata?.startLine,
379
+ endLine: doc.metadata?.endLine,
380
+ symbols: doc.metadata?.symbols || [],
381
+ content: (doc.text || '').substring(0, 300),
382
+ score: item.relevance_score,
383
+ vectorScore: doc._vsScore,
384
+ rerankScore: item.relevance_score,
385
+ chunkType: doc.metadata?.chunkType,
386
+ };
387
+ });
388
+ } else {
389
+ finalResults = searchResults.slice(0, limit).map(doc => ({
390
+ source: doc.metadata?.source || 'unknown',
391
+ filePath: doc.metadata?.filePath || doc.metadata?.source || 'unknown',
392
+ language: doc.metadata?.language,
393
+ startLine: doc.metadata?.startLine,
394
+ endLine: doc.metadata?.endLine,
395
+ symbols: doc.metadata?.symbols || [],
396
+ content: (doc.text || '').substring(0, 300),
397
+ score: doc._vsScore,
398
+ vectorScore: doc._vsScore,
399
+ chunkType: doc.metadata?.chunkType,
400
+ }));
401
+ }
402
+
403
+ const timeMs = Date.now() - start;
404
+ const structured = {
405
+ query: input.query,
406
+ results: finalResults,
407
+ metadata: {
408
+ collection: collName,
409
+ model: codeModel,
410
+ rerankModel: doRerank ? rerankModel : null,
411
+ reranked: doRerank && searchResults.length > 1,
412
+ timeMs,
413
+ resultCount: finalResults.length,
414
+ tokens: { embed: embedTokens, rerank: rerankTokens },
415
+ },
416
+ };
417
+
418
+ const textLines = finalResults.map((r, i) =>
419
+ `[${i + 1}] ${r.source}:${r.startLine}-${r.endLine} (${r.language}) score:${(r.score || 0).toFixed(3)}\n symbols: ${(r.symbols || []).slice(0, 5).join(', ')}\n ${r.content.split('\n').slice(0, 3).join('\n ')}`
420
+ );
421
+
422
+ return {
423
+ structuredContent: structured,
424
+ content: [{ type: 'text', text: `Found ${finalResults.length} results for "${input.query}" (${timeMs}ms):\n\n${textLines.join('\n\n')}` }],
425
+ };
426
+ } finally {
427
+ if (client) await client.close();
428
+ }
429
+ }
430
+
431
+ /**
432
+ * Handler for vai_code_query — thin wrapper on code_search with different defaults.
433
+ */
434
+ async function handleCodeQuery(input) {
435
+ return handleCodeSearch({
436
+ ...input,
437
+ limit: input.limit || 5,
438
+ rerank: true,
439
+ rerankModel: input.rerankModel || 'rerank-2.5',
440
+ _candidateMultiplier: 5,
441
+ });
442
+ }
443
+
444
+ /**
445
+ * Handler for vai_code_find_similar.
446
+ */
447
+ async function handleCodeFindSimilar(input) {
448
+ const start = Date.now();
449
+ const { db, collection: collName, model } = resolveDbColl(input);
450
+ const codeModel = model || DEFAULT_CODE_MODEL;
451
+ const limit = input.limit || 10;
452
+ const threshold = input.threshold || 0.5;
453
+
454
+ let client;
455
+ try {
456
+ const embedResult = await generateEmbeddings([input.code], { model: codeModel, inputType: 'query' });
457
+ const queryVector = embedResult.data[0].embedding;
458
+ const embedTokens = embedResult.usage?.total_tokens || 0;
459
+
460
+ const { client: c, collection } = await getMongoCollection(db, collName);
461
+ client = c;
462
+
463
+ const filter = {};
464
+ if (input.language) filter['metadata.language'] = input.language;
465
+ if (input.filter) Object.assign(filter, input.filter);
466
+
467
+ const vectorSearchStage = {
468
+ index: DEFAULT_INDEX_NAME,
469
+ path: 'embedding',
470
+ queryVector,
471
+ numCandidates: Math.min(limit * 15, 10000),
472
+ limit: limit * 2,
473
+ };
474
+ if (Object.keys(filter).length > 0) vectorSearchStage.filter = filter;
475
+
476
+ const results = await collection.aggregate([
477
+ { $vectorSearch: vectorSearchStage },
478
+ { $addFields: { _vsScore: { $meta: 'vectorSearchScore' } } },
479
+ ]).toArray();
480
+
481
+ const filtered = results
482
+ .filter(r => r._vsScore >= threshold)
483
+ .slice(0, limit)
484
+ .map(doc => ({
485
+ source: doc.metadata?.source || 'unknown',
486
+ filePath: doc.metadata?.filePath || doc.metadata?.source || 'unknown',
487
+ language: doc.metadata?.language,
488
+ startLine: doc.metadata?.startLine,
489
+ endLine: doc.metadata?.endLine,
490
+ symbols: doc.metadata?.symbols || [],
491
+ content: (doc.text || '').substring(0, 300),
492
+ score: doc._vsScore,
493
+ chunkType: doc.metadata?.chunkType,
494
+ }));
495
+
496
+ const timeMs = Date.now() - start;
497
+
498
+ return {
499
+ structuredContent: {
500
+ results: filtered,
501
+ metadata: { collection: collName, model: codeModel, threshold, timeMs, resultCount: filtered.length, tokens: { embed: embedTokens } },
502
+ },
503
+ content: [{ type: 'text', text: `Found ${filtered.length} similar code chunks (threshold: ${threshold}, ${timeMs}ms):\n\n${filtered.map((r, i) => `[${i + 1}] ${r.source}:${r.startLine}-${r.endLine} (score: ${r.score.toFixed(3)})\n ${r.content.split('\n').slice(0, 3).join('\n ')}`).join('\n\n')}` }],
504
+ };
505
+ } finally {
506
+ if (client) await client.close();
507
+ }
508
+ }
509
+
510
+ /**
511
+ * Handler for vai_code_status.
512
+ */
513
+ async function handleCodeStatus(input) {
514
+ const start = Date.now();
515
+ const { db, collection: collName } = resolveDbColl(input);
516
+
517
+ let client;
518
+ try {
519
+ const { client: c, collection } = await getMongoCollection(db, collName);
520
+ client = c;
521
+
522
+ const totalChunks = await collection.estimatedDocumentCount();
523
+
524
+ if (totalChunks === 0) {
525
+ return {
526
+ structuredContent: { db, collection: collName, totalChunks: 0, filesIndexed: 0, message: 'No indexed code found' },
527
+ content: [{ type: 'text', text: `No indexed code found in ${db}.${collName}. Use vai_code_index to index a codebase.` }],
528
+ };
529
+ }
530
+
531
+ const [fileStats] = await collection.aggregate([
532
+ { $match: { _type: { $ne: 'index_meta' } } },
533
+ {
534
+ $group: {
535
+ _id: null,
536
+ uniqueFiles: { $addToSet: '$metadata.source' },
537
+ lastIndexed: { $max: '$metadata.indexedAt' },
538
+ languages: { $addToSet: '$metadata.language' },
539
+ workspaces: { $addToSet: '$metadata.workspace' },
540
+ },
541
+ },
542
+ ]).toArray();
543
+
544
+ let indexes = [];
545
+ try {
546
+ indexes = await collection.listSearchIndexes().toArray();
547
+ } catch { /* might not have permissions */ }
548
+
549
+ const timeMs = Date.now() - start;
550
+ const structured = {
551
+ db,
552
+ collection: collName,
553
+ totalChunks,
554
+ filesIndexed: fileStats?.uniqueFiles?.length || 0,
555
+ lastIndexed: fileStats?.lastIndexed || 'unknown',
556
+ languages: fileStats?.languages?.filter(Boolean) || [],
557
+ workspaces: fileStats?.workspaces?.filter(Boolean) || [],
558
+ indexes: indexes.map(i => ({ name: i.name, status: i.status })),
559
+ timeMs,
560
+ };
561
+
562
+ return {
563
+ structuredContent: structured,
564
+ content: [{ type: 'text', text: `Code Search Index: ${db}.${collName}\n Files: ${structured.filesIndexed}\n Chunks: ${totalChunks}\n Languages: ${structured.languages.join(', ') || 'N/A'}\n Last indexed: ${structured.lastIndexed}\n Indexes: ${indexes.map(i => `${i.name} (${i.status})`).join(', ') || 'none'}` }],
565
+ };
566
+ } finally {
567
+ if (client) await client.close();
568
+ }
569
+ }
570
+
571
+ /**
572
+ * Register code search tools on the MCP server.
573
+ * @param {import('@modelcontextprotocol/sdk/server/mcp.js').McpServer} server
574
+ * @param {object} schemas
575
+ */
576
+ function registerCodeSearchTools(server, schemas) {
577
+ server.tool(
578
+ 'vai_code_index',
579
+ 'Index a codebase for semantic code search. Accepts a local directory path or a GitHub repository URL. Uses voyage-code-3 by default for code-optimized embeddings. Supports incremental refresh — only re-embeds files that changed since last indexing. Creates a MongoDB Atlas vector search index automatically.',
580
+ schemas.codeIndexSchema,
581
+ handleCodeIndex
582
+ );
583
+
584
+ server.tool(
585
+ 'vai_code_search',
586
+ 'Semantic code search across an indexed codebase. Finds functions, classes, modules, and documentation semantically related to your natural language query. Uses voyage-code-3 by default. Supports filtering by programming language and content category. Results include file paths, line numbers, symbols, and relevance scores.',
587
+ schemas.codeSearchSchema,
588
+ handleCodeSearch
589
+ );
590
+
591
+ server.tool(
592
+ 'vai_code_query',
593
+ 'Full RAG query against an indexed codebase. Embeds your question, performs vector search, reranks results, and returns the most relevant code with context. Optimized for answering questions like "how does X work" or "where is Y implemented". Always reranks for best quality.',
594
+ schemas.codeQuerySchema,
595
+ handleCodeQuery
596
+ );
597
+
598
+ server.tool(
599
+ 'vai_code_find_similar',
600
+ 'Find code semantically similar to a given snippet. Paste in a function, class, or code block and find related implementations across indexed codebases. Useful for finding duplicates, alternative implementations, or understanding patterns. Uses voyage-code-3 which understands both code structure and intent.',
601
+ schemas.codeFindSimilarSchema,
602
+ handleCodeFindSimilar
603
+ );
604
+
605
+ server.tool(
606
+ 'vai_code_status',
607
+ 'Check the status of a code search index. Shows file count, chunk count, languages indexed, last indexing time, and vector search index health. Use this before searching to verify the index is ready, or to decide if a refresh is needed.',
608
+ schemas.codeStatusSchema,
609
+ handleCodeStatus
610
+ );
611
+ }
612
+
613
+ module.exports = {
614
+ registerCodeSearchTools,
615
+ handleCodeIndex,
616
+ handleCodeSearch,
617
+ handleCodeQuery,
618
+ handleCodeFindSimilar,
619
+ handleCodeStatus,
620
+ };
@@ -5,6 +5,7 @@ const { generateEmbeddings } = require('../../lib/api');
5
5
  const { getMongoCollection } = require('../../lib/mongo');
6
6
  const { loadProject } = require('../../lib/project');
7
7
  const { getDefaultModel } = require('../../lib/catalog');
8
+ const { resolveDbCollection } = require('../utils');
8
9
 
9
10
  /**
10
11
  * Handler for vai_ingest: chunk, embed, and store a document.
@@ -12,12 +13,8 @@ const { getDefaultModel } = require('../../lib/catalog');
12
13
  * @returns {Promise<{structuredContent: object, content: Array}>}
13
14
  */
14
15
  async function handleVaiIngest(input) {
16
+ const { db, collection: collName } = resolveDbCollection(input);
15
17
  const { config: proj } = loadProject();
16
- const db = input.db || proj.db;
17
- const collName = input.collection || proj.collection;
18
- if (!db) throw new Error('No database specified. Pass db parameter or configure via vai init.');
19
- if (!collName) throw new Error('No collection specified. Pass collection parameter or configure via vai init.');
20
-
21
18
  const model = input.model || proj.model || getDefaultModel();
22
19
  const start = Date.now();
23
20
 
@@ -4,20 +4,7 @@ const { generateEmbeddings, apiRequest } = require('../../lib/api');
4
4
  const { getMongoCollection } = require('../../lib/mongo');
5
5
  const { getDefaultModel, DEFAULT_RERANK_MODEL } = require('../../lib/catalog');
6
6
  const { loadProject } = require('../../lib/project');
7
-
8
- /**
9
- * Resolve db/collection from tool input, falling back to project config.
10
- * @param {object} input
11
- * @returns {{ db: string, collection: string }}
12
- */
13
- function resolveDbCollection(input) {
14
- const { config: proj } = loadProject();
15
- const db = input.db || proj.db;
16
- const collection = input.collection || proj.collection;
17
- if (!db) throw new Error('No database specified. Pass db parameter or configure via vai init.');
18
- if (!collection) throw new Error('No collection specified. Pass collection parameter or configure via vai init.');
19
- return { db, collection };
20
- }
7
+ const { resolveDbCollection } = require('../utils');
21
8
 
22
9
  /**
23
10
  * Handler for vai_query: full RAG query (embed, vector search, rerank).
@@ -32,7 +19,7 @@ async function handleVaiQuery(input) {
32
19
  const field = proj.field || 'embedding';
33
20
  const dimensions = proj.dimensions;
34
21
  const limit = input.limit;
35
- const candidateLimit = Math.min(limit * 4, 20);
22
+ const candidateLimit = limit * 4;
36
23
  const start = Date.now();
37
24
 
38
25
  // Step 1: Embed query