voyageai-cli 1.19.2 → 1.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  'use strict';
2
2
 
3
3
  const fs = require('fs');
4
- const { getDefaultModel, DEFAULT_RERANK_MODEL } = require('../lib/catalog');
4
+ const { getDefaultModel, DEFAULT_RERANK_MODEL, MODEL_CATALOG } = require('../lib/catalog');
5
5
  const { generateEmbeddings, apiRequest } = require('../lib/api');
6
6
  const { getMongoCollection } = require('../lib/mongo');
7
7
  const { loadProject } = require('../lib/project');
@@ -10,17 +10,41 @@ const ui = require('../lib/ui');
10
10
 
11
11
  /**
12
12
  * Load a test set from a JSONL file.
13
- * Each line: { "query": "...", "relevant": ["id1", "id2"] }
14
- * Or: { "query": "...", "relevant_texts": ["text1", "text2"] }
13
+ *
14
+ * Retrieval mode (default):
15
+ * { "query": "...", "relevant": ["id1", "id2"] }
16
+ * { "query": "...", "relevant_texts": ["text1", "text2"] }
17
+ *
18
+ * Rerank mode (--mode rerank):
19
+ * { "query": "...", "documents": ["doc1", "doc2", ...], "relevant": [0, 2] }
20
+ * relevant = indices into documents array that are considered relevant.
21
+ *
15
22
  * @param {string} filePath
16
- * @returns {Array<{query: string, relevant: string[], relevantTexts?: string[]}>}
23
+ * @param {string} mode - 'retrieval' or 'rerank'
24
+ * @returns {Array}
17
25
  */
18
- function loadTestSet(filePath) {
26
+ function loadTestSet(filePath, mode = 'retrieval') {
19
27
  const raw = fs.readFileSync(filePath, 'utf-8');
20
28
  const lines = raw.split('\n').filter(l => l.trim().length > 0);
21
29
  return lines.map((line, i) => {
22
30
  const item = JSON.parse(line);
23
31
  if (!item.query) throw new Error(`Line ${i + 1}: missing "query" field`);
32
+
33
+ if (mode === 'rerank') {
34
+ if (!item.documents || !Array.isArray(item.documents) || item.documents.length < 2) {
35
+ throw new Error(`Line ${i + 1}: rerank mode requires "documents" array (≥2 items)`);
36
+ }
37
+ if (!item.relevant || !Array.isArray(item.relevant) || item.relevant.length === 0) {
38
+ throw new Error(`Line ${i + 1}: rerank mode requires "relevant" array of document indices`);
39
+ }
40
+ return {
41
+ query: item.query,
42
+ documents: item.documents,
43
+ relevant: item.relevant, // indices into documents
44
+ };
45
+ }
46
+
47
+ // Retrieval mode
24
48
  if (!item.relevant && !item.relevant_texts) {
25
49
  throw new Error(`Line ${i + 1}: need "relevant" (doc IDs) or "relevant_texts" (text matches)`);
26
50
  }
@@ -39,25 +63,34 @@ function loadTestSet(filePath) {
39
63
  function registerEval(program) {
40
64
  program
41
65
  .command('eval')
42
- .description('Evaluate retrieval quality — measure MRR, NDCG, recall on your data')
66
+ .description('Evaluate retrieval & reranking quality — MRR, NDCG, Recall on your data')
43
67
  .requiredOption('--test-set <path>', 'JSONL file with queries and expected results')
44
- .option('--db <database>', 'Database name')
45
- .option('--collection <name>', 'Collection name')
46
- .option('--index <name>', 'Vector search index name')
47
- .option('--field <name>', 'Embedding field name')
48
- .option('-m, --model <model>', 'Embedding model for queries')
49
- .option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
68
+ .option('--mode <mode>', 'Evaluation mode: "retrieval" (default) or "rerank"', 'retrieval')
69
+ .option('--db <database>', 'Database name (retrieval mode)')
70
+ .option('--collection <name>', 'Collection name (retrieval mode)')
71
+ .option('--index <name>', 'Vector search index name (retrieval mode)')
72
+ .option('--field <name>', 'Embedding field name (retrieval mode)')
73
+ .option('-m, --model <model>', 'Embedding model (retrieval) or rerank model (rerank mode)')
74
+ .option('--models <models>', 'Compare multiple rerank models (comma-separated)')
75
+ .option('-d, --dimensions <n>', 'Output dimensions (retrieval mode)', (v) => parseInt(v, 10))
50
76
  .option('-l, --limit <n>', 'Vector search candidates per query', (v) => parseInt(v, 10), 20)
51
77
  .option('-k, --k-values <values>', 'Comma-separated K values for @K metrics', '1,3,5,10')
52
- .option('--rerank', 'Enable reranking')
53
- .option('--no-rerank', 'Skip reranking')
54
- .option('--rerank-model <model>', 'Reranking model')
78
+ .option('--rerank', 'Enable reranking (retrieval mode)')
79
+ .option('--no-rerank', 'Skip reranking (retrieval mode)')
80
+ .option('--rerank-model <model>', 'Reranking model (retrieval mode)')
81
+ .option('--top-k <n>', 'Top-K results to return from reranker', (v) => parseInt(v, 10))
55
82
  .option('--text-field <name>', 'Document text field', 'text')
56
83
  .option('--id-field <name>', 'Document ID field for matching (default: _id)', '_id')
57
84
  .option('--compare <configs>', 'Compare configs: "model1,model2" or "rerank,no-rerank"')
58
85
  .option('--json', 'Machine-readable JSON output')
59
86
  .option('-q, --quiet', 'Suppress non-essential output')
60
87
  .action(async (opts) => {
88
+ // Dispatch to rerank eval mode
89
+ if (opts.mode === 'rerank') {
90
+ await evalRerank(opts);
91
+ return;
92
+ }
93
+
61
94
  let client;
62
95
  try {
63
96
  // Merge project config
@@ -236,23 +269,7 @@ function registerEval(program) {
236
269
  console.log(` ${label} ${bar} ${color}`);
237
270
  }
238
271
 
239
- console.log('');
240
-
241
- // Highlight key metrics
242
- const mrr = aggregated.mrr;
243
- const recall5 = aggregated['r@5'];
244
- const ndcg10 = aggregated['ndcg@10'];
245
-
246
- if (mrr !== undefined) {
247
- const grade = mrr >= 0.8 ? ui.green('Excellent') : mrr >= 0.6 ? ui.cyan('Good') : mrr >= 0.4 ? ui.yellow('Fair') : ui.red('Needs work');
248
- console.log(ui.label('MRR', `${mrr.toFixed(4)} — ${grade}`));
249
- }
250
- if (recall5 !== undefined) {
251
- console.log(ui.label('Recall@5', `${(recall5 * 100).toFixed(1)}% of relevant docs found in top 5`));
252
- }
253
- if (ndcg10 !== undefined) {
254
- console.log(ui.label('NDCG@10', `${ndcg10.toFixed(4)} — ranking quality`));
255
- }
272
+ printMetricHighlights(aggregated);
256
273
 
257
274
  // Worst queries
258
275
  if (worstQueries.length > 0 && worstQueries[0].metrics.mrr < 1) {
@@ -269,6 +286,9 @@ function registerEval(program) {
269
286
  console.log(ui.dim(` ${testSet.length} queries evaluated | Tokens: embed ${totalEmbedTokens}${totalRerankTokens ? `, rerank ${totalRerankTokens}` : ''}`));
270
287
 
271
288
  // Suggestions
289
+ const mrr = aggregated.mrr;
290
+ const recall5 = aggregated['r@5'];
291
+
272
292
  console.log('');
273
293
  if (mrr !== undefined && mrr < 0.6) {
274
294
  console.log(ui.dim(' 💡 Low MRR? Try: larger model, more candidates (--limit), or enable reranking (--rerank)'));
@@ -276,6 +296,7 @@ function registerEval(program) {
276
296
  if (recall5 !== undefined && recall5 < 0.5) {
277
297
  console.log(ui.dim(' 💡 Low recall? Try: increasing --limit, different chunking strategy, or review your test set'));
278
298
  }
299
+ console.log(ui.dim(' 💡 Evaluate reranking quality: vai eval --mode rerank --test-set rerank-test.jsonl'));
279
300
  } catch (err) {
280
301
  console.error(ui.error(err.message));
281
302
  process.exit(1);
@@ -285,6 +306,306 @@ function registerEval(program) {
285
306
  });
286
307
  }
287
308
 
309
+ /**
310
+ * Evaluate reranking quality.
311
+ *
312
+ * Test set format (JSONL):
313
+ * { "query": "...", "documents": ["doc1", "doc2", ...], "relevant": [0, 2, 5] }
314
+ * relevant = indices into the documents array that are considered relevant.
315
+ *
316
+ * Sends each query + docs to the rerank API, then evaluates how well
317
+ * the reranker surfaces relevant docs using nDCG, Recall, MRR, MAP.
318
+ */
319
+ async function evalRerank(opts) {
320
+ try {
321
+ const kValues = opts.kValues.split(',').map(v => parseInt(v.trim(), 10)).filter(v => !isNaN(v));
322
+
323
+ // Load test set in rerank mode
324
+ let testSet;
325
+ try {
326
+ testSet = loadTestSet(opts.testSet, 'rerank');
327
+ } catch (err) {
328
+ console.error(ui.error(`Failed to load test set: ${err.message}`));
329
+ process.exit(1);
330
+ }
331
+
332
+ if (testSet.length === 0) {
333
+ console.error(ui.error('Test set is empty.'));
334
+ process.exit(1);
335
+ }
336
+
337
+ // Determine which models to evaluate
338
+ const rerankModels = opts.models
339
+ ? opts.models.split(',').map(m => m.trim())
340
+ : [opts.model || DEFAULT_RERANK_MODEL];
341
+
342
+ const topK = opts.topK || undefined;
343
+ const verbose = !opts.json && !opts.quiet;
344
+
345
+ if (verbose) {
346
+ console.log('');
347
+ console.log(ui.bold('📊 Rerank Evaluation'));
348
+ console.log(ui.dim(` Test set: ${testSet.length} queries`));
349
+ console.log(ui.dim(` Models: ${rerankModels.join(', ')}`));
350
+ console.log(ui.dim(` K values: ${kValues.join(', ')}`));
351
+ if (topK) console.log(ui.dim(` Top-K: ${topK}`));
352
+ console.log('');
353
+ }
354
+
355
+ const allModelResults = [];
356
+
357
+ for (const rerankModel of rerankModels) {
358
+ const perQueryResults = [];
359
+ let totalTokens = 0;
360
+ let totalLatency = 0;
361
+
362
+ for (let qi = 0; qi < testSet.length; qi++) {
363
+ const testCase = testSet[qi];
364
+
365
+ if (verbose) {
366
+ process.stderr.write(`\r [${rerankModel}] Evaluating query ${qi + 1}/${testSet.length}...`);
367
+ }
368
+
369
+ // Call rerank API
370
+ const start = Date.now();
371
+ const rerankResult = await apiRequest('/rerank', {
372
+ query: testCase.query,
373
+ documents: testCase.documents,
374
+ model: rerankModel,
375
+ ...(topK ? { top_k: topK } : {}),
376
+ });
377
+ const elapsed = Date.now() - start;
378
+ totalLatency += elapsed;
379
+ totalTokens += rerankResult.usage?.total_tokens || 0;
380
+
381
+ // Build retrieved list: reranker returns items sorted by relevance_score desc
382
+ // Each item has { index, relevance_score }
383
+ const rerankedItems = rerankResult.data || [];
384
+
385
+ // Convert relevant indices to string IDs for metrics library
386
+ const relevantIdSet = new Set(testCase.relevant.map(idx => `doc_${idx}`));
387
+ const retrievedIds = rerankedItems.map(item => `doc_${item.index}`);
388
+
389
+ // Compute metrics
390
+ const metrics = computeMetrics(retrievedIds, [...relevantIdSet], kValues);
391
+
392
+ perQueryResults.push({
393
+ query: testCase.query,
394
+ relevant: testCase.relevant,
395
+ rerankedOrder: rerankedItems.map(r => r.index),
396
+ scores: rerankedItems.map(r => ({ index: r.index, score: r.relevance_score })),
397
+ metrics,
398
+ hits: retrievedIds.filter(id => relevantIdSet.has(id)).length,
399
+ latencyMs: elapsed,
400
+ });
401
+ }
402
+
403
+ if (verbose) {
404
+ process.stderr.write('\r' + ' '.repeat(60) + '\r');
405
+ }
406
+
407
+ const allMetrics = perQueryResults.map(r => r.metrics);
408
+ const aggregated = aggregateMetrics(allMetrics);
409
+ const avgLatency = totalLatency / testSet.length;
410
+
411
+ // Get model price
412
+ const catalogEntry = MODEL_CATALOG.find(m => m.name === rerankModel || m.name === `rerank-${rerankModel}`);
413
+ const pricePerM = catalogEntry ? parseFloat((catalogEntry.price.match(/\$([0-9.]+)/) || [])[1]) || null : null;
414
+
415
+ allModelResults.push({
416
+ model: rerankModel,
417
+ aggregated,
418
+ perQuery: perQueryResults,
419
+ totalTokens,
420
+ avgLatencyMs: avgLatency,
421
+ pricePerMTokens: pricePerM,
422
+ queries: testSet.length,
423
+ });
424
+ }
425
+
426
+ // JSON output
427
+ if (opts.json) {
428
+ console.log(JSON.stringify({
429
+ mode: 'rerank',
430
+ kValues,
431
+ models: allModelResults.map(r => ({
432
+ model: r.model,
433
+ summary: r.aggregated,
434
+ tokens: r.totalTokens,
435
+ avgLatencyMs: r.avgLatencyMs,
436
+ queries: r.queries,
437
+ perQuery: r.perQuery,
438
+ })),
439
+ }, null, 2));
440
+ return;
441
+ }
442
+
443
+ // Pretty output
444
+ if (allModelResults.length === 1) {
445
+ // Single model — detailed view
446
+ const result = allModelResults[0];
447
+ console.log(ui.bold(`Results: ${result.model}`));
448
+ console.log('');
449
+
450
+ const metricKeys = Object.keys(result.aggregated);
451
+ const maxKeyLen = Math.max(...metricKeys.map(k => k.length));
452
+
453
+ for (const key of metricKeys) {
454
+ const val = result.aggregated[key];
455
+ const bar = renderBar(val, 20);
456
+ const label = key.toUpperCase().padEnd(maxKeyLen + 1);
457
+ const valStr = val.toFixed(4);
458
+ const color = val >= 0.8 ? ui.green(valStr) : val >= 0.5 ? ui.cyan(valStr) : ui.yellow(valStr);
459
+ console.log(` ${label} ${bar} ${color}`);
460
+ }
461
+
462
+ printMetricHighlights(result.aggregated);
463
+
464
+ // Worst queries
465
+ const sorted = [...result.perQuery].sort((a, b) => a.metrics.mrr - b.metrics.mrr);
466
+ const worstQueries = sorted.slice(0, Math.min(3, sorted.length));
467
+ if (worstQueries.length > 0 && worstQueries[0].metrics.mrr < 1) {
468
+ console.log('');
469
+ console.log(ui.bold('Hardest queries:'));
470
+ for (const wq of worstQueries) {
471
+ const preview = wq.query.substring(0, 60) + (wq.query.length > 60 ? '...' : '');
472
+ const mrrStr = wq.metrics.mrr === 0 ? ui.red('miss') : ui.yellow(wq.metrics.mrr.toFixed(2));
473
+ console.log(` ${mrrStr} "${preview}" (${wq.hits}/${wq.relevant.length} relevant found)`);
474
+ }
475
+ }
476
+
477
+ console.log('');
478
+ console.log(ui.dim(` ${result.queries} queries | ${result.totalTokens} tokens | avg ${result.avgLatencyMs.toFixed(0)}ms/query`));
479
+
480
+ } else {
481
+ // Multi-model comparison
482
+ console.log(ui.bold('Rerank Model Comparison'));
483
+ console.log('');
484
+
485
+ // Summary table
486
+ const keyMetrics = ['mrr', 'ndcg@5', 'ndcg@10', 'r@5', 'r@10', 'ap'];
487
+ const availableMetrics = keyMetrics.filter(k => allModelResults[0].aggregated[k] !== undefined);
488
+
489
+ // Header
490
+ const modelColW = Math.max(22, ...allModelResults.map(r => r.model.length + 2));
491
+ const header = ` ${'Model'.padEnd(modelColW)} ${availableMetrics.map(m => m.toUpperCase().padStart(9)).join('')} ${'Latency'.padStart(9)} ${'$/1M tok'.padStart(9)}`;
492
+ console.log(ui.dim(header));
493
+ console.log(ui.dim(' ' + '─'.repeat(header.length - 2)));
494
+
495
+ // Find best value per metric for highlighting
496
+ const bestPerMetric = {};
497
+ for (const m of availableMetrics) {
498
+ bestPerMetric[m] = Math.max(...allModelResults.map(r => r.aggregated[m]));
499
+ }
500
+
501
+ for (const result of allModelResults) {
502
+ const cols = availableMetrics.map(m => {
503
+ const val = result.aggregated[m];
504
+ const str = val.toFixed(4);
505
+ return val === bestPerMetric[m] ? ui.green(str.padStart(9)) : str.padStart(9);
506
+ }).join('');
507
+
508
+ const latStr = `${result.avgLatencyMs.toFixed(0)}ms`.padStart(9);
509
+ const priceStr = result.pricePerMTokens != null ? `$${result.pricePerMTokens.toFixed(3)}`.padStart(9) : 'N/A'.padStart(9);
510
+
511
+ console.log(` ${result.model.padEnd(modelColW)} ${cols} ${latStr} ${priceStr}`);
512
+ }
513
+
514
+ console.log('');
515
+
516
+ // Per-metric visual comparison
517
+ for (const m of ['ndcg@5', 'ndcg@10']) {
518
+ if (!allModelResults[0].aggregated[m]) continue;
519
+ console.log(ui.bold(` ${m.toUpperCase()}`));
520
+ for (const result of allModelResults) {
521
+ const val = result.aggregated[m];
522
+ const bar = renderBar(val, 30);
523
+ const color = val === bestPerMetric[m] ? ui.green(val.toFixed(4)) : ui.cyan(val.toFixed(4));
524
+ console.log(` ${result.model.padEnd(modelColW - 2)} ${bar} ${color}`);
525
+ }
526
+ console.log('');
527
+ }
528
+
529
+ // Agreement analysis
530
+ console.log(ui.bold('Ranking Agreement'));
531
+ const maxK = Math.min(5, ...allModelResults.map(r => r.perQuery[0]?.rerankedOrder?.length || 5));
532
+ let agreeCount = 0;
533
+ for (let qi = 0; qi < testSet.length; qi++) {
534
+ const orders = allModelResults.map(r => r.perQuery[qi].rerankedOrder.slice(0, maxK).join(','));
535
+ if (orders.every(o => o === orders[0])) agreeCount++;
536
+ }
537
+ const agreePct = ((agreeCount / testSet.length) * 100).toFixed(0);
538
+ console.log(` ${agreeCount}/${testSet.length} queries (${agreePct}%) have identical top-${maxK} rankings`);
539
+
540
+ if (parseInt(agreePct) > 80) {
541
+ console.log(ui.info(' High agreement — the cheaper/faster model may be sufficient.'));
542
+ } else {
543
+ console.log(ui.warn(' Significant disagreement — the premium model may capture important nuances.'));
544
+ }
545
+
546
+ console.log('');
547
+
548
+ // Token/cost summary
549
+ console.log(ui.dim(' Per-query averages:'));
550
+ for (const result of allModelResults) {
551
+ const tokPerQ = result.totalTokens / result.queries;
552
+ const costPerQ = result.pricePerMTokens != null ? (tokPerQ / 1e6) * result.pricePerMTokens : null;
553
+ const costStr = costPerQ != null ? `$${costPerQ.toFixed(6)}/query` : '';
554
+ console.log(ui.dim(` ${result.model}: ${result.avgLatencyMs.toFixed(0)}ms, ${tokPerQ.toFixed(0)} tokens ${costStr}`));
555
+ }
556
+ }
557
+
558
+ // Suggestions
559
+ console.log('');
560
+ const bestResult = allModelResults.reduce((a, b) =>
561
+ (a.aggregated['ndcg@5'] || 0) >= (b.aggregated['ndcg@5'] || 0) ? a : b
562
+ );
563
+ const ndcg5 = bestResult.aggregated['ndcg@5'];
564
+ const recall5 = bestResult.aggregated['r@5'];
565
+
566
+ if (ndcg5 !== undefined && ndcg5 < 0.5) {
567
+ console.log(ui.dim(' 💡 Low nDCG@5? Try: more documents in the candidate set, or a different reranker.'));
568
+ }
569
+ if (recall5 !== undefined && recall5 < 0.5) {
570
+ console.log(ui.dim(' 💡 Low Recall@5? The reranker may need more candidates to work with (increase initial retrieval).'));
571
+ }
572
+ if (allModelResults.length > 1) {
573
+ console.log(ui.dim(' 💡 Compare with: vai eval --mode rerank --models "rerank-2.5,rerank-2.5-lite" --test-set data.jsonl'));
574
+ }
575
+
576
+ console.log('');
577
+ } catch (err) {
578
+ console.error(ui.error(err.message));
579
+ process.exit(1);
580
+ }
581
+ }
582
+
583
+ /**
584
+ * Print highlighted interpretation of key metrics.
585
+ */
586
+ function printMetricHighlights(aggregated) {
587
+ console.log('');
588
+
589
+ const mrr = aggregated.mrr;
590
+ const recall5 = aggregated['r@5'];
591
+ const ndcg5 = aggregated['ndcg@5'];
592
+ const ndcg10 = aggregated['ndcg@10'];
593
+
594
+ if (mrr !== undefined) {
595
+ const grade = mrr >= 0.8 ? ui.green('Excellent') : mrr >= 0.6 ? ui.cyan('Good') : mrr >= 0.4 ? ui.yellow('Fair') : ui.red('Needs work');
596
+ console.log(ui.label('MRR', `${mrr.toFixed(4)} — ${grade}`));
597
+ }
598
+ if (ndcg5 !== undefined) {
599
+ console.log(ui.label('NDCG@5', `${ndcg5.toFixed(4)} — ranking precision (top 5)`));
600
+ }
601
+ if (ndcg10 !== undefined) {
602
+ console.log(ui.label('NDCG@10', `${ndcg10.toFixed(4)} — ranking precision (top 10)`));
603
+ }
604
+ if (recall5 !== undefined) {
605
+ console.log(ui.label('Recall@5', `${(recall5 * 100).toFixed(1)}% of relevant docs found in top 5`));
606
+ }
607
+ }
608
+
288
609
  /**
289
610
  * Render a simple ASCII bar chart.
290
611
  * @param {number} value - 0.0 to 1.0
@@ -82,6 +82,30 @@ function createPlaygroundServer() {
82
82
  return;
83
83
  }
84
84
 
85
+ // Serve icon assets: /icons/{dark|light}/{size}.png
86
+ const iconMatch = req.url.match(/^\/icons\/(dark|light)\/(\d+)\.png$/);
87
+ if (req.method === 'GET' && iconMatch) {
88
+ const variant = iconMatch[1];
89
+ const size = iconMatch[2];
90
+ // Try portable path first (src/playground/icons/), then electron/icons/
91
+ const portablePath = path.join(__dirname, '..', 'playground', 'icons', variant, `${size}.png`);
92
+ const electronPath = path.join(__dirname, '..', '..', 'electron', 'icons', variant,
93
+ 'AppIcons', 'Assets.xcassets', 'AppIcon.appiconset', `${size}.png`);
94
+ const iconPath = fs.existsSync(portablePath) ? portablePath : electronPath;
95
+ if (fs.existsSync(iconPath)) {
96
+ const data = fs.readFileSync(iconPath);
97
+ res.writeHead(200, {
98
+ 'Content-Type': 'image/png',
99
+ 'Cache-Control': 'public, max-age=86400',
100
+ });
101
+ res.end(data);
102
+ } else {
103
+ res.writeHead(404);
104
+ res.end('Icon not found');
105
+ }
106
+ return;
107
+ }
108
+
85
109
  // API: Models
86
110
  if (req.method === 'GET' && req.url === '/api/models') {
87
111
  const models = MODEL_CATALOG.filter(m => !m.legacy && !m.local && !m.unreleased);
@@ -125,6 +149,17 @@ function createPlaygroundServer() {
125
149
 
126
150
  // Parse JSON body for POST routes
127
151
  if (req.method === 'POST') {
152
+ // Check for API key before processing any API calls
153
+ const apiKeyConfigured = !!(process.env.VOYAGE_API_KEY || getConfigValue('apiKey'));
154
+ if (!apiKeyConfigured) {
155
+ res.writeHead(401, { 'Content-Type': 'application/json' });
156
+ res.end(JSON.stringify({
157
+ error: 'No API key configured. Run: vai config set api-key <your-key>',
158
+ code: 'NO_API_KEY',
159
+ }));
160
+ return;
161
+ }
162
+
128
163
  const body = await readBody(req);
129
164
  let parsed;
130
165
  try {
@@ -178,6 +213,27 @@ function createPlaygroundServer() {
178
213
  return;
179
214
  }
180
215
 
216
+ // API: Multimodal Embed
217
+ if (req.url === '/api/multimodal-embed') {
218
+ const { inputs, model, input_type, output_dimension } = parsed;
219
+ if (!inputs || !Array.isArray(inputs) || inputs.length === 0) {
220
+ res.writeHead(400, { 'Content-Type': 'application/json' });
221
+ res.end(JSON.stringify({ error: 'inputs must be a non-empty array' }));
222
+ return;
223
+ }
224
+ const { apiRequest } = require('../lib/api');
225
+ const mmBody = {
226
+ inputs,
227
+ model: model || 'voyage-multimodal-3.5',
228
+ };
229
+ if (input_type) mmBody.input_type = input_type;
230
+ if (output_dimension) mmBody.output_dimension = output_dimension;
231
+ const result = await apiRequest('/multimodalembeddings', mmBody);
232
+ res.writeHead(200, { 'Content-Type': 'application/json' });
233
+ res.end(JSON.stringify(result));
234
+ return;
235
+ }
236
+
181
237
  // API: Benchmark (single model, single round — UI calls this per model)
182
238
  if (req.url === '/api/benchmark/embed') {
183
239
  const { texts, model, inputType, dimensions } = parsed;