voyageai-cli 1.13.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voyageai-cli",
3
- "version": "1.13.0",
3
+ "version": "1.15.0",
4
4
  "description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
5
5
  "bin": {
6
6
  "vai": "./src/cli.js"
package/src/cli.js CHANGED
@@ -20,6 +20,7 @@ const { registerIngest } = require('./commands/ingest');
20
20
  const { registerCompletions } = require('./commands/completions');
21
21
  const { registerPlayground } = require('./commands/playground');
22
22
  const { registerBenchmark } = require('./commands/benchmark');
23
+ const { registerEstimate } = require('./commands/estimate');
23
24
  const { registerAbout } = require('./commands/about');
24
25
  const { showBanner, showQuickStart, getVersion } = require('./lib/banner');
25
26
 
@@ -45,6 +46,7 @@ registerIngest(program);
45
46
  registerCompletions(program);
46
47
  registerPlayground(program);
47
48
  registerBenchmark(program);
49
+ registerEstimate(program);
48
50
  registerAbout(program);
49
51
 
50
52
  // Append disclaimer to all help output
@@ -1212,6 +1212,170 @@ function registerBenchmark(program) {
1212
1212
  .option('--json', 'Machine-readable JSON output')
1213
1213
  .option('-q, --quiet', 'Suppress non-essential output')
1214
1214
  .action(benchmarkAsymmetric);
1215
+
1216
+ // ── benchmark space ──
1217
+ bench
1218
+ .command('space')
1219
+ .description('Validate shared embedding space — embed same text with all Voyage 4 models')
1220
+ .option('--text <text>', 'Text to embed across models')
1221
+ .option('--texts <texts>', 'Comma-separated texts to compare')
1222
+ .option('--models <models>', 'Comma-separated models', 'voyage-4-large,voyage-4,voyage-4-lite')
1223
+ .option('-d, --dimensions <n>', 'Output dimensions (must be supported by all models)')
1224
+ .option('--json', 'Machine-readable JSON output')
1225
+ .option('-q, --quiet', 'Suppress non-essential output')
1226
+ .action(benchmarkSpace);
1227
+ }
1228
+
1229
+ /**
1230
+ * benchmark space — Validate shared embedding space across Voyage 4 models.
1231
+ * Embeds the same text(s) with multiple models, then computes pairwise cosine
1232
+ * similarities to prove they produce compatible embeddings.
1233
+ */
1234
+ async function benchmarkSpace(opts) {
1235
+ const models = opts.models
1236
+ ? parseModels(opts.models)
1237
+ : ['voyage-4-large', 'voyage-4', 'voyage-4-lite'];
1238
+
1239
+ const texts = opts.texts
1240
+ ? opts.texts.split(',').map(t => t.trim())
1241
+ : opts.text
1242
+ ? [opts.text]
1243
+ : [
1244
+ 'MongoDB Atlas provides a fully managed cloud database with vector search.',
1245
+ 'Machine learning models transform raw data into semantic embeddings.',
1246
+ 'The quick brown fox jumps over the lazy dog.',
1247
+ ];
1248
+
1249
+ const dimensions = opts.dimensions ? parseInt(opts.dimensions, 10) : undefined;
1250
+
1251
+ if (!opts.json && !opts.quiet) {
1252
+ console.log('');
1253
+ console.log(ui.bold(' 🔮 Shared Embedding Space Validation'));
1254
+ console.log(ui.dim(` Models: ${models.join(', ')}`));
1255
+ console.log(ui.dim(` Texts: ${texts.length}${dimensions ? `, dimensions: ${dimensions}` : ''}`));
1256
+ console.log('');
1257
+ }
1258
+
1259
+ // Embed all texts with all models
1260
+ const embeddings = {}; // { model: [[embedding for text 0], [embedding for text 1], ...] }
1261
+
1262
+ for (const model of models) {
1263
+ const spin = (!opts.json && !opts.quiet) ? ui.spinner(` Embedding with ${model}...`) : null;
1264
+ if (spin) spin.start();
1265
+
1266
+ try {
1267
+ const embedOpts = { model, inputType: 'document' };
1268
+ if (dimensions) embedOpts.dimensions = dimensions;
1269
+ const result = await generateEmbeddings(texts, embedOpts);
1270
+ embeddings[model] = result.data.map(d => d.embedding);
1271
+ if (spin) spin.stop();
1272
+ } catch (err) {
1273
+ if (spin) spin.stop();
1274
+ console.error(ui.warn(` ${model}: ${err.message} — skipping`));
1275
+ }
1276
+ }
1277
+
1278
+ const validModels = Object.keys(embeddings);
1279
+ if (validModels.length < 2) {
1280
+ console.error(ui.error('Need at least 2 models to compare embedding spaces.'));
1281
+ process.exit(1);
1282
+ }
1283
+
1284
+ // Compute pairwise cross-model similarities for each text
1285
+ const results = [];
1286
+
1287
+ for (let t = 0; t < texts.length; t++) {
1288
+ const textResult = {
1289
+ text: texts[t],
1290
+ pairs: [],
1291
+ };
1292
+
1293
+ for (let i = 0; i < validModels.length; i++) {
1294
+ for (let j = i + 1; j < validModels.length; j++) {
1295
+ const modelA = validModels[i];
1296
+ const modelB = validModels[j];
1297
+ const sim = cosineSimilarity(embeddings[modelA][t], embeddings[modelB][t]);
1298
+ textResult.pairs.push({
1299
+ modelA,
1300
+ modelB,
1301
+ similarity: sim,
1302
+ });
1303
+ }
1304
+ }
1305
+
1306
+ results.push(textResult);
1307
+ }
1308
+
1309
+ // Also compute within-model similarity across different texts (baseline)
1310
+ const withinModelSims = [];
1311
+ if (texts.length >= 2) {
1312
+ for (const model of validModels) {
1313
+ const sim = cosineSimilarity(embeddings[model][0], embeddings[model][1]);
1314
+ withinModelSims.push({ model, text0: texts[0], text1: texts[1], similarity: sim });
1315
+ }
1316
+ }
1317
+
1318
+ if (opts.json) {
1319
+ console.log(JSON.stringify({ benchmark: 'space', models: validModels, texts, results, withinModelSims }, null, 2));
1320
+ return;
1321
+ }
1322
+
1323
+ // Display results
1324
+ console.log(ui.bold(' Cross-Model Similarity (same text, different models):'));
1325
+ console.log(ui.dim(' High similarity (>0.95) = shared embedding space confirmed'));
1326
+ console.log('');
1327
+
1328
+ let allHigh = true;
1329
+ for (const r of results) {
1330
+ const preview = r.text.substring(0, 55) + (r.text.length > 55 ? '...' : '');
1331
+ console.log(` ${ui.dim('Text:')} "${preview}"`);
1332
+
1333
+ for (const p of r.pairs) {
1334
+ const simStr = p.similarity.toFixed(4);
1335
+ const quality = p.similarity >= 0.98 ? ui.green('●')
1336
+ : p.similarity >= 0.95 ? ui.cyan('●')
1337
+ : p.similarity >= 0.90 ? ui.yellow('●')
1338
+ : ui.red('●');
1339
+ if (p.similarity < 0.95) allHigh = false;
1340
+ console.log(` ${quality} ${rpad(p.modelA, 18)} ↔ ${rpad(p.modelB, 18)} ${ui.bold(simStr)}`);
1341
+ }
1342
+ console.log('');
1343
+ }
1344
+
1345
+ // Show within-model cross-text similarity for context
1346
+ if (withinModelSims.length > 0) {
1347
+ console.log(ui.bold(' Within-Model Similarity (different texts, same model):'));
1348
+ console.log(ui.dim(' Shows that cross-model same-text similarity is much higher'));
1349
+ console.log('');
1350
+
1351
+ for (const w of withinModelSims) {
1352
+ console.log(` ${ui.dim(rpad(w.model, 18))} text₀ ↔ text₁ ${ui.dim(w.similarity.toFixed(4))}`);
1353
+ }
1354
+ console.log('');
1355
+ }
1356
+
1357
+ // Summary
1358
+ const avgCrossModel = results.flatMap(r => r.pairs).reduce((sum, p) => sum + p.similarity, 0)
1359
+ / results.flatMap(r => r.pairs).length;
1360
+ const avgWithin = withinModelSims.length > 0
1361
+ ? withinModelSims.reduce((sum, w) => sum + w.similarity, 0) / withinModelSims.length
1362
+ : null;
1363
+
1364
+ if (allHigh) {
1365
+ console.log(ui.success(`Shared embedding space confirmed! Avg cross-model similarity: ${avgCrossModel.toFixed(4)}`));
1366
+ } else {
1367
+ console.log(ui.warn(`Cross-model similarity lower than expected. Avg: ${avgCrossModel.toFixed(4)}`));
1368
+ }
1369
+
1370
+ if (avgWithin !== null) {
1371
+ const ratio = (avgCrossModel / avgWithin).toFixed(1);
1372
+ console.log(ui.dim(` Cross-model same-text similarity is ${ratio}× higher than same-model different-text similarity.`));
1373
+ }
1374
+
1375
+ console.log('');
1376
+ console.log(ui.dim(' This means you can embed docs with voyage-4-large and query with voyage-4-lite'));
1377
+ console.log(ui.dim(' — the embeddings live in the same space. See "vai explain shared-space".'));
1378
+ console.log('');
1215
1379
  }
1216
1380
 
1217
1381
  module.exports = { registerBenchmark };
@@ -19,7 +19,7 @@ _vai_completions() {
19
19
  prev="\${COMP_WORDS[COMP_CWORD-1]}"
20
20
 
21
21
  # Top-level commands
22
- commands="embed rerank store search index models ping config demo explain similarity ingest completions help"
22
+ commands="embed rerank store search index models ping config demo explain similarity ingest estimate completions help"
23
23
 
24
24
  # Subcommands
25
25
  local index_subs="create list delete"
@@ -102,6 +102,10 @@ _vai_completions() {
102
102
  COMPREPLY=( \$(compgen -W "--file --db --collection --field --model --input-type --dimensions --batch-size --text-field --text-column --strict --dry-run --json --quiet --help" -- "\$cur") )
103
103
  return 0
104
104
  ;;
105
+ estimate)
106
+ COMPREPLY=( \$(compgen -W "--docs --queries --doc-tokens --query-tokens --doc-model --query-model --months --json --quiet --help" -- "\$cur") )
107
+ return 0
108
+ ;;
105
109
  completions)
106
110
  COMPREPLY=( \$(compgen -W "bash zsh --help" -- "\$cur") )
107
111
  return 0
@@ -172,6 +176,7 @@ _vai() {
172
176
  'explain:Learn about AI and vector search concepts'
173
177
  'similarity:Compute cosine similarity between texts'
174
178
  'ingest:Bulk import documents with progress'
179
+ 'estimate:Estimate embedding costs — symmetric vs asymmetric'
175
180
  'completions:Generate shell completion scripts'
176
181
  'help:Display help for command'
177
182
  )
@@ -375,6 +380,18 @@ _vai() {
375
380
  '--json[Machine-readable JSON output]' \\
376
381
  '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
377
382
  ;;
383
+ estimate)
384
+ _arguments \\
385
+ '--docs[Number of documents]:count:' \\
386
+ '--queries[Queries per month]:count:' \\
387
+ '--doc-tokens[Avg tokens per document]:tokens:' \\
388
+ '--query-tokens[Avg tokens per query]:tokens:' \\
389
+ '--doc-model[Document embedding model]:model:(\$models)' \\
390
+ '--query-model[Query embedding model]:model:(\$models)' \\
391
+ '--months[Months to project]:months:' \\
392
+ '--json[Machine-readable JSON output]' \\
393
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
394
+ ;;
378
395
  completions)
379
396
  _arguments \\
380
397
  '1:shell:(bash zsh)'
@@ -0,0 +1,209 @@
1
+ 'use strict';
2
+
3
+ const { MODEL_CATALOG } = require('../lib/catalog');
4
+ const ui = require('../lib/ui');
5
+
6
+ // Average tokens per document/query (rough industry estimates)
7
+ const DEFAULT_DOC_TOKENS = 500;
8
+ const DEFAULT_QUERY_TOKENS = 30;
9
+
10
+ /**
11
+ * Parse a shorthand number: "1M" → 1000000, "500K" → 500000, "1B" → 1000000000.
12
+ * @param {string} val
13
+ * @returns {number}
14
+ */
15
+ function parseShorthand(val) {
16
+ if (!val) return NaN;
17
+ const str = String(val).trim().toUpperCase();
18
+ const multipliers = { K: 1e3, M: 1e6, B: 1e9, T: 1e12 };
19
+ const match = str.match(/^([\d.]+)\s*([KMBT])?$/);
20
+ if (!match) return parseFloat(str);
21
+ const num = parseFloat(match[1]);
22
+ const suffix = match[2];
23
+ return suffix ? num * multipliers[suffix] : num;
24
+ }
25
+
26
+ /**
27
+ * Format a number with commas: 1234567 → "1,234,567".
28
+ */
29
+ function formatNum(n) {
30
+ return n.toLocaleString('en-US');
31
+ }
32
+
33
+ /**
34
+ * Format dollars: 0.50 → "$0.50", 1234.56 → "$1,234.56".
35
+ */
36
+ function formatDollars(n) {
37
+ if (n < 0.01 && n > 0) return `$${n.toFixed(4)}`;
38
+ if (n < 1) return `$${n.toFixed(2)}`;
39
+ return '$' + n.toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 });
40
+ }
41
+
42
+ /**
43
+ * Format a large number in short form: 1000000 → "1M".
44
+ */
45
+ function shortNum(n) {
46
+ if (n >= 1e9) return (n / 1e9).toFixed(n % 1e9 === 0 ? 0 : 1) + 'B';
47
+ if (n >= 1e6) return (n / 1e6).toFixed(n % 1e6 === 0 ? 0 : 1) + 'M';
48
+ if (n >= 1e3) return (n / 1e3).toFixed(n % 1e3 === 0 ? 0 : 1) + 'K';
49
+ return String(n);
50
+ }
51
+
52
+ /**
53
+ * Register the estimate command on a Commander program.
54
+ * @param {import('commander').Command} program
55
+ */
56
+ function registerEstimate(program) {
57
+ program
58
+ .command('estimate')
59
+ .description('Estimate embedding costs — symmetric vs asymmetric strategies')
60
+ .option('--docs <n>', 'Number of documents to embed (supports K/M/B shorthand)', '100K')
61
+ .option('--queries <n>', 'Number of queries per month (supports K/M/B shorthand)', '1M')
62
+ .option('--doc-tokens <n>', 'Average tokens per document', String(DEFAULT_DOC_TOKENS))
63
+ .option('--query-tokens <n>', 'Average tokens per query', String(DEFAULT_QUERY_TOKENS))
64
+ .option('--doc-model <model>', 'Model for document embedding (asymmetric)', 'voyage-4-large')
65
+ .option('--query-model <model>', 'Model for query embedding (asymmetric)', 'voyage-4-lite')
66
+ .option('--months <n>', 'Months to project', '12')
67
+ .option('--json', 'Machine-readable JSON output')
68
+ .option('-q, --quiet', 'Suppress non-essential output')
69
+ .action((opts) => {
70
+ const numDocs = parseShorthand(opts.docs);
71
+ const numQueries = parseShorthand(opts.queries);
72
+ const docTokens = parseInt(opts.docTokens, 10) || DEFAULT_DOC_TOKENS;
73
+ const queryTokens = parseInt(opts.queryTokens, 10) || DEFAULT_QUERY_TOKENS;
74
+ const months = parseInt(opts.months, 10) || 12;
75
+
76
+ if (isNaN(numDocs) || isNaN(numQueries)) {
77
+ console.error(ui.error('Invalid --docs or --queries value. Use numbers or shorthand (e.g., 1M, 500K).'));
78
+ process.exit(1);
79
+ }
80
+
81
+ // Get model prices
82
+ const v4Models = MODEL_CATALOG.filter(m => m.sharedSpace === 'voyage-4' && m.pricePerMToken != null);
83
+ const docModel = MODEL_CATALOG.find(m => m.name === opts.docModel);
84
+ const queryModel = MODEL_CATALOG.find(m => m.name === opts.queryModel);
85
+
86
+ if (!docModel || docModel.pricePerMToken == null) {
87
+ console.error(ui.error(`Unknown or unpriced model: ${opts.docModel}`));
88
+ process.exit(1);
89
+ }
90
+ if (!queryModel || queryModel.pricePerMToken == null) {
91
+ console.error(ui.error(`Unknown or unpriced model: ${opts.queryModel}`));
92
+ process.exit(1);
93
+ }
94
+
95
+ const docTotalTokens = numDocs * docTokens;
96
+ const queryTotalTokensPerMonth = numQueries * queryTokens;
97
+
98
+ // Calculate costs for different strategies
99
+ const strategies = [];
100
+
101
+ // Strategy 1: Symmetric with each V4 model
102
+ for (const model of v4Models) {
103
+ if (model.pricePerMToken === 0) continue; // skip free models for symmetric
104
+ const docCost = (docTotalTokens / 1e6) * model.pricePerMToken;
105
+ const queryCostPerMonth = (queryTotalTokensPerMonth / 1e6) * model.pricePerMToken;
106
+ const totalCost = docCost + (queryCostPerMonth * months);
107
+ strategies.push({
108
+ name: `Symmetric: ${model.name}`,
109
+ type: 'symmetric',
110
+ docModel: model.name,
111
+ queryModel: model.name,
112
+ docCost,
113
+ queryCostPerMonth,
114
+ totalCost,
115
+ months,
116
+ });
117
+ }
118
+
119
+ // Strategy 2: Asymmetric — user-specified doc+query combo
120
+ const asymDocCost = (docTotalTokens / 1e6) * docModel.pricePerMToken;
121
+ const asymQueryCostPerMonth = (queryTotalTokensPerMonth / 1e6) * queryModel.pricePerMToken;
122
+ const asymTotalCost = asymDocCost + (asymQueryCostPerMonth * months);
123
+ strategies.push({
124
+ name: `Asymmetric: ${docModel.name} docs + ${queryModel.name} queries`,
125
+ type: 'asymmetric',
126
+ docModel: docModel.name,
127
+ queryModel: queryModel.name,
128
+ docCost: asymDocCost,
129
+ queryCostPerMonth: asymQueryCostPerMonth,
130
+ totalCost: asymTotalCost,
131
+ months,
132
+ recommended: true,
133
+ });
134
+
135
+ // Strategy 3: Asymmetric with nano queries (if doc model isn't nano)
136
+ if (opts.queryModel !== 'voyage-4-nano') {
137
+ const nanoModel = MODEL_CATALOG.find(m => m.name === 'voyage-4-nano');
138
+ if (nanoModel) {
139
+ strategies.push({
140
+ name: `Asymmetric: ${docModel.name} docs + voyage-4-nano queries (local)`,
141
+ type: 'asymmetric-local',
142
+ docModel: docModel.name,
143
+ queryModel: 'voyage-4-nano',
144
+ docCost: asymDocCost,
145
+ queryCostPerMonth: 0,
146
+ totalCost: asymDocCost,
147
+ months,
148
+ });
149
+ }
150
+ }
151
+
152
+ // Sort by total cost
153
+ strategies.sort((a, b) => a.totalCost - b.totalCost);
154
+
155
+ if (opts.json) {
156
+ console.log(JSON.stringify({
157
+ params: { docs: numDocs, queries: numQueries, docTokens, queryTokens, months },
158
+ strategies,
159
+ }, null, 2));
160
+ return;
161
+ }
162
+
163
+ // Find the most expensive for savings comparison
164
+ const maxCost = Math.max(...strategies.map(s => s.totalCost));
165
+
166
+ if (!opts.quiet) {
167
+ console.log(ui.bold('💰 Voyage AI Cost Estimator'));
168
+ console.log('');
169
+ console.log(ui.label('Documents', `${shortNum(numDocs)} × ${formatNum(docTokens)} tokens = ${shortNum(docTotalTokens)} tokens (one-time)`));
170
+ console.log(ui.label('Queries', `${shortNum(numQueries)}/mo × ${formatNum(queryTokens)} tokens = ${shortNum(queryTotalTokensPerMonth)} tokens/mo`));
171
+ console.log(ui.label('Projection', `${months} months`));
172
+ console.log('');
173
+ }
174
+
175
+ console.log(ui.bold('Strategy Comparison:'));
176
+ console.log('');
177
+
178
+ for (const s of strategies) {
179
+ const savings = maxCost > 0 ? ((1 - s.totalCost / maxCost) * 100) : 0;
180
+ const savingsStr = savings > 0 ? ui.green(` (${savings.toFixed(0)}% savings)`) : '';
181
+ const marker = s.recommended ? ui.cyan(' ★ recommended') : '';
182
+ const localNote = s.type === 'asymmetric-local' ? ui.dim(' (query cost = $0, runs locally)') : '';
183
+
184
+ console.log(` ${s.recommended ? ui.cyan('►') : ' '} ${ui.bold(s.name)}${marker}`);
185
+ console.log(` Doc embedding: ${formatDollars(s.docCost)} ${ui.dim('(one-time)')}`);
186
+ console.log(` Query cost: ${formatDollars(s.queryCostPerMonth)}/mo${localNote}`);
187
+ console.log(` ${months}-mo total: ${ui.bold(formatDollars(s.totalCost))}${savingsStr}`);
188
+ console.log('');
189
+ }
190
+
191
+ // Show the asymmetric advantage
192
+ const symmetricLarge = strategies.find(s => s.type === 'symmetric' && s.docModel === 'voyage-4-large');
193
+ const asymmetric = strategies.find(s => s.recommended);
194
+ if (symmetricLarge && asymmetric && symmetricLarge.totalCost > asymmetric.totalCost) {
195
+ const saved = symmetricLarge.totalCost - asymmetric.totalCost;
196
+ const pct = ((saved / symmetricLarge.totalCost) * 100).toFixed(0);
197
+ console.log(ui.success(`Asymmetric retrieval saves ${formatDollars(saved)} (${pct}%) over symmetric voyage-4-large`));
198
+ console.log(ui.dim(' Same document quality — lower query costs. Shared embedding space makes this possible.'));
199
+ console.log('');
200
+ }
201
+
202
+ if (!opts.quiet) {
203
+ console.log(ui.dim('Tip: Use --doc-model and --query-model to compare any combination.'));
204
+ console.log(ui.dim(' Use "vai explain shared-space" to learn about asymmetric retrieval.'));
205
+ }
206
+ });
207
+ }
208
+
209
+ module.exports = { registerEstimate };
@@ -1,6 +1,6 @@
1
1
  'use strict';
2
2
 
3
- const { MODEL_CATALOG } = require('../lib/catalog');
3
+ const { MODEL_CATALOG, BENCHMARK_SCORES } = require('../lib/catalog');
4
4
  const { getApiBase } = require('../lib/api');
5
5
  const { formatTable } = require('../lib/format');
6
6
  const ui = require('../lib/ui');
@@ -42,6 +42,7 @@ function registerModels(program) {
42
42
  .option('-t, --type <type>', 'Filter by type: embedding, reranking, or all', 'all')
43
43
  .option('-a, --all', 'Show all models including legacy')
44
44
  .option('-w, --wide', 'Wide output (show all columns untruncated)')
45
+ .option('-b, --benchmarks', 'Show RTEB benchmark scores')
45
46
  .option('--json', 'Machine-readable JSON output')
46
47
  .option('-q, --quiet', 'Suppress non-essential output')
47
48
  .action((opts) => {
@@ -86,7 +87,9 @@ function registerModels(program) {
86
87
  const name = ui.cyan(m.name);
87
88
  const type = m.type.startsWith('embedding') ? ui.green(m.type) : ui.yellow(m.type);
88
89
  const price = ui.dim(m.price);
89
- return [name, type, m.context, m.dimensions, price, m.bestFor];
90
+ const arch = m.architecture ? (m.architecture === 'moe' ? ui.cyan('MoE') : m.architecture) : '—';
91
+ const space = m.sharedSpace ? ui.green('✓ ' + m.sharedSpace) : '—';
92
+ return [name, type, m.context, m.dimensions, arch, space, price, m.bestFor];
90
93
  };
91
94
 
92
95
  const formatCompactRow = (m) => {
@@ -98,7 +101,7 @@ function registerModels(program) {
98
101
  };
99
102
 
100
103
  if (opts.wide) {
101
- const headers = ['Model', 'Type', 'Context', 'Dimensions', 'Price', 'Best For'];
104
+ const headers = ['Model', 'Type', 'Context', 'Dimensions', 'Arch', 'Space', 'Price', 'Best For'];
102
105
  const boldHeaders = headers.map(h => ui.bold(h));
103
106
  const rows = displayCurrent.map(formatWideRow);
104
107
  console.log(formatTable(boldHeaders, rows));
@@ -123,6 +126,29 @@ function registerModels(program) {
123
126
  }
124
127
  }
125
128
 
129
+ // Show benchmark scores if requested
130
+ if (opts.benchmarks) {
131
+ console.log('');
132
+ console.log(ui.bold('RTEB Benchmark Scores (NDCG@10, avg 29 datasets)'));
133
+ console.log(ui.dim('Source: Voyage AI, January 2026'));
134
+ console.log('');
135
+
136
+ const maxScore = Math.max(...BENCHMARK_SCORES.map(b => b.score));
137
+ const barWidth = 30;
138
+
139
+ for (const b of BENCHMARK_SCORES) {
140
+ const barLen = Math.round((b.score / maxScore) * barWidth);
141
+ const bar = '█'.repeat(barLen) + '░'.repeat(barWidth - barLen);
142
+ const isVoyage = b.provider === 'Voyage AI';
143
+ const name = isVoyage ? ui.cyan(b.model.padEnd(22)) : ui.dim(b.model.padEnd(22));
144
+ const score = isVoyage ? ui.bold(b.score.toFixed(2)) : b.score.toFixed(2);
145
+ const colorBar = isVoyage ? ui.cyan(bar) : ui.dim(bar);
146
+ console.log(` ${name} ${colorBar} ${score}`);
147
+ }
148
+ console.log('');
149
+ console.log(ui.dim(' Run "vai explain rteb" for details.'));
150
+ }
151
+
126
152
  if (!opts.quiet) {
127
153
  console.log('');
128
154
  if (!opts.wide) {
@@ -130,7 +156,9 @@ function registerModels(program) {
130
156
  }
131
157
  console.log(ui.dim('Free tier: 200M tokens (most models), 50M (domain-specific)'));
132
158
  console.log(ui.dim('All 4-series models share the same embedding space.'));
133
- if (!opts.wide) {
159
+ if (!opts.wide && !opts.benchmarks) {
160
+ console.log(ui.dim('Use --wide for full details, --benchmarks for RTEB scores.'));
161
+ } else if (!opts.wide) {
134
162
  console.log(ui.dim('Use --wide for full details.'));
135
163
  }
136
164
  }
@@ -24,29 +24,51 @@ function getDefaultDimensions() {
24
24
 
25
25
  // The model catalog: like a wine list (I don't drink :-P), except every choice
26
26
  // leads to vectors instead of regret.
27
- /** @type {Array<{name: string, type: string, context: string, dimensions: string, price: string, bestFor: string}>} */
27
+ /** @type {Array<{name: string, type: string, context: string, dimensions: string, price: string, bestFor: string, family?: string, architecture?: string, sharedSpace?: string, huggingface?: string, pricePerMToken?: number, rtebScore?: number}>} */
28
28
  const MODEL_CATALOG = [
29
- { name: 'voyage-4-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/1M tokens', bestFor: 'Best quality, multilingual', shortFor: 'Best quality' },
30
- { name: 'voyage-4', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', bestFor: 'Balanced quality/perf', shortFor: 'Balanced' },
31
- { name: 'voyage-4-lite', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.02/1M tokens', bestFor: 'Lowest cost', shortFor: 'Budget' },
32
- { name: 'voyage-code-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Code retrieval', shortFor: 'Code' },
33
- { name: 'voyage-finance-2', type: 'embedding', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Finance', shortFor: 'Finance' },
34
- { name: 'voyage-law-2', type: 'embedding', context: '16K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Legal', shortFor: 'Legal' },
35
- { name: 'voyage-context-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Contextualized chunks', shortFor: 'Context chunks', unreleased: true },
29
+ { name: 'voyage-4-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/1M tokens', pricePerMToken: 0.12, bestFor: 'Best quality, multilingual, MoE', shortFor: 'Best quality', family: 'voyage-4', architecture: 'moe', sharedSpace: 'voyage-4', rtebScore: 71.41 },
30
+ { name: 'voyage-4', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', pricePerMToken: 0.06, bestFor: 'Balanced quality/perf', shortFor: 'Balanced', family: 'voyage-4', architecture: 'dense', sharedSpace: 'voyage-4', rtebScore: 70.07 },
31
+ { name: 'voyage-4-lite', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.02/1M tokens', pricePerMToken: 0.02, bestFor: 'Lowest cost', shortFor: 'Budget', family: 'voyage-4', architecture: 'dense', sharedSpace: 'voyage-4', rtebScore: 68.10 },
32
+ { name: 'voyage-code-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', pricePerMToken: 0.18, bestFor: 'Code retrieval', shortFor: 'Code' },
33
+ { name: 'voyage-finance-2', type: 'embedding', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', pricePerMToken: 0.12, bestFor: 'Finance', shortFor: 'Finance' },
34
+ { name: 'voyage-law-2', type: 'embedding', context: '16K', dimensions: '1024', price: '$0.12/1M tokens', pricePerMToken: 0.12, bestFor: 'Legal', shortFor: 'Legal' },
35
+ { name: 'voyage-context-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', pricePerMToken: 0.18, bestFor: 'Contextualized chunks', shortFor: 'Context chunks', unreleased: true },
36
36
  { name: 'voyage-multimodal-3.5', type: 'embedding-multimodal', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video', shortFor: 'Multimodal', multimodal: true },
37
- { name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Best quality reranking', shortFor: 'Best reranker' },
38
- { name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Fast reranking', shortFor: 'Fast reranker' },
39
- { name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight', bestFor: 'Open-weight / edge', shortFor: 'Open / edge', local: true },
37
+ { name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', pricePerMToken: 0.05, bestFor: 'Best quality reranking', shortFor: 'Best reranker' },
38
+ { name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', pricePerMToken: 0.02, bestFor: 'Fast reranking', shortFor: 'Fast reranker' },
39
+ { name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight (free)', pricePerMToken: 0, bestFor: 'Open-weight / edge / local', shortFor: 'Open / edge', local: true, family: 'voyage-4', architecture: 'dense', sharedSpace: 'voyage-4', huggingface: 'https://huggingface.co/voyageai/voyage-4-nano', rtebScore: null },
40
40
  // Legacy models
41
- { name: 'voyage-3-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Previous gen quality', shortFor: 'Previous gen quality', legacy: true },
42
- { name: 'voyage-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', bestFor: 'Previous gen balanced', shortFor: 'Previous gen balanced', legacy: true },
43
- { name: 'voyage-3.5-lite', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.02/1M tokens', bestFor: 'Previous gen budget', shortFor: 'Previous gen budget', legacy: true },
44
- { name: 'voyage-code-2', type: 'embedding', context: '16K', dimensions: '1536', price: '$0.12/1M tokens', bestFor: 'Legacy code', shortFor: 'Legacy code', legacy: true },
45
- { name: 'voyage-multimodal-3', type: 'embedding-multimodal', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Legacy multimodal', shortFor: 'Legacy multimodal', legacy: true, multimodal: true },
46
- { name: 'rerank-2', type: 'reranking', context: '16K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Legacy reranker', shortFor: 'Legacy reranker', legacy: true },
47
- { name: 'rerank-2-lite', type: 'reranking', context: '8K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Legacy fast reranker', shortFor: 'Legacy fast reranker', legacy: true },
41
+ { name: 'voyage-3-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', pricePerMToken: 0.18, bestFor: 'Previous gen quality', shortFor: 'Previous gen quality', legacy: true, rtebScore: null },
42
+ { name: 'voyage-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', pricePerMToken: 0.06, bestFor: 'Previous gen balanced', shortFor: 'Previous gen balanced', legacy: true, rtebScore: null },
43
+ { name: 'voyage-3.5-lite', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.02/1M tokens', pricePerMToken: 0.02, bestFor: 'Previous gen budget', shortFor: 'Previous gen budget', legacy: true, rtebScore: null },
44
+ { name: 'voyage-code-2', type: 'embedding', context: '16K', dimensions: '1536', price: '$0.12/1M tokens', pricePerMToken: 0.12, bestFor: 'Legacy code', shortFor: 'Legacy code', legacy: true },
45
+ { name: 'voyage-multimodal-3', type: 'embedding-multimodal', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', pricePerMToken: 0.12, bestFor: 'Legacy multimodal', shortFor: 'Legacy multimodal', legacy: true, multimodal: true },
46
+ { name: 'rerank-2', type: 'reranking', context: '16K', dimensions: '—', price: '$0.05/1M tokens', pricePerMToken: 0.05, bestFor: 'Legacy reranker', shortFor: 'Legacy reranker', legacy: true },
47
+ { name: 'rerank-2-lite', type: 'reranking', context: '8K', dimensions: '—', price: '$0.02/1M tokens', pricePerMToken: 0.02, bestFor: 'Legacy fast reranker', shortFor: 'Legacy fast reranker', legacy: true },
48
48
  ];
49
49
 
50
+ /**
51
+ * RTEB benchmark scores for competitive models (NDCG@10 average across 29 datasets).
52
+ * Source: Voyage AI blog, January 15 2026.
53
+ */
54
+ const BENCHMARK_SCORES = [
55
+ { model: 'voyage-4-large', provider: 'Voyage AI', score: 71.41 },
56
+ { model: 'voyage-4', provider: 'Voyage AI', score: 70.07 },
57
+ { model: 'voyage-4-lite', provider: 'Voyage AI', score: 68.10 },
58
+ { model: 'Gemini Embedding 001', provider: 'Google', score: 68.66 },
59
+ { model: 'Cohere Embed v4', provider: 'Cohere', score: 65.75 },
60
+ { model: 'OpenAI v3 Large', provider: 'OpenAI', score: 62.57 },
61
+ ];
62
+
63
+ /**
64
+ * Get models that share an embedding space.
65
+ * @param {string} space - e.g. 'voyage-4'
66
+ * @returns {Array}
67
+ */
68
+ function getSharedSpaceModels(space) {
69
+ return MODEL_CATALOG.filter(m => m.sharedSpace === space);
70
+ }
71
+
50
72
  module.exports = {
51
73
  DEFAULT_EMBED_MODEL,
52
74
  DEFAULT_RERANK_MODEL,
@@ -54,4 +76,6 @@ module.exports = {
54
76
  getDefaultModel,
55
77
  getDefaultDimensions,
56
78
  MODEL_CATALOG,
79
+ BENCHMARK_SCORES,
80
+ getSharedSpaceModels,
57
81
  };
@@ -513,6 +513,169 @@ const concepts = {
513
513
  'vai benchmark similarity --query "your search query" --file your-docs.txt',
514
514
  ],
515
515
  },
516
+ 'mixture-of-experts': {
517
+ title: 'Mixture-of-Experts (MoE) Architecture',
518
+ summary: 'How voyage-4-large achieves SOTA quality at 40% lower cost',
519
+ content: [
520
+ `${pc.cyan('Mixture-of-Experts (MoE)')} is a neural network architecture where multiple`,
521
+ `specialized sub-networks ("experts") share a single model. A learned ${pc.cyan('router')}`,
522
+ `selects which experts activate for each input — typically 2-4 out of 8-64 total.`,
523
+ ``,
524
+ `${pc.bold('Why MoE matters for embeddings:')}`,
525
+ ` ${pc.dim('•')} ${pc.cyan('Higher capacity, lower cost')} — the model has more total parameters`,
526
+ ` (knowledge) but only activates a fraction per input, keeping inference fast`,
527
+ ` ${pc.dim('•')} ${pc.cyan('Specialization')} — different experts learn different domains (code,`,
528
+ ` legal, medical) without interfering with each other`,
529
+ ` ${pc.dim('•')} ${pc.cyan('State-of-the-art quality')} — voyage-4-large beats all competitors on`,
530
+ ` RTEB benchmarks while costing 40% less than comparable dense models`,
531
+ ``,
532
+ `${pc.bold('voyage-4-large')} is the ${pc.cyan('first production-grade embedding model')} to use MoE.`,
533
+ `Previous MoE successes (Mixtral, Switch Transformer) were language models —`,
534
+ `applying MoE to embedding models required solving alignment across the shared`,
535
+ `embedding space, which is what makes the Voyage 4 family unique.`,
536
+ ``,
537
+ `${pc.bold('Dense vs MoE:')}`,
538
+ ` ${pc.dim('Dense (voyage-4, voyage-4-lite):')} Every parameter is used for every input.`,
539
+ ` Simpler, predictable latency, lower total parameter count.`,
540
+ ` ${pc.dim('MoE (voyage-4-large):')} Sparse activation — more total parameters, but each`,
541
+ ` input only uses a subset. Higher quality ceiling, similar serving cost.`,
542
+ ``,
543
+ `${pc.bold('In practice:')} You don't need to do anything special to use MoE — the API`,
544
+ `interface is identical. The architecture difference shows up in quality and cost:`,
545
+ ` ${pc.dim('•')} voyage-4-large: $0.12/1M tokens — better quality than voyage-3-large ($0.18/1M)`,
546
+ ` ${pc.dim('•')} 40% cheaper than comparable dense models at the same quality tier`,
547
+ ].join('\n'),
548
+ links: [
549
+ 'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
550
+ 'https://www.mongodb.com/docs/voyageai/models/text-embeddings/',
551
+ ],
552
+ tryIt: [
553
+ 'vai embed "test MoE quality" --model voyage-4-large',
554
+ 'vai benchmark embed --models voyage-4-large,voyage-4,voyage-4-lite',
555
+ 'vai models --wide',
556
+ ],
557
+ },
558
+
559
+ 'shared-embedding-space': {
560
+ title: 'Shared Embedding Space',
561
+ summary: 'How Voyage 4 models produce compatible, interchangeable embeddings',
562
+ content: [
563
+ `The Voyage 4 series introduces an ${pc.cyan('industry-first capability')}: all four models`,
564
+ `(voyage-4-large, voyage-4, voyage-4-lite, voyage-4-nano) produce embeddings in`,
565
+ `the ${pc.cyan('same vector space')}. Embeddings from different models are directly comparable.`,
566
+ ``,
567
+ `${pc.bold('What this means:')}`,
568
+ ` ${pc.dim('•')} Embed documents with ${pc.cyan('voyage-4-large')} (best quality, one-time cost)`,
569
+ ` ${pc.dim('•')} Query with ${pc.cyan('voyage-4-lite')} or ${pc.cyan('voyage-4-nano')} (low cost, high volume)`,
570
+ ` ${pc.dim('•')} Cosine similarity works across model boundaries`,
571
+ ` ${pc.dim('•')} Upgrade query model later ${pc.cyan('without re-vectorizing documents')}`,
572
+ ``,
573
+ `${pc.bold('Why this is new:')} Previously, embeddings from different models lived in`,
574
+ `incompatible vector spaces. Switching models meant re-embedding your entire`,
575
+ `corpus — expensive and slow. The shared space eliminates this constraint.`,
576
+ ``,
577
+ `${pc.bold('Recommended workflow:')}`,
578
+ ` ${pc.dim('1.')} Vectorize your document corpus once with ${pc.cyan('voyage-4-large')}`,
579
+ ` ${pc.dim('2.')} Start with ${pc.cyan('voyage-4-lite')} for queries in development / early production`,
580
+ ` ${pc.dim('3.')} Upgrade to ${pc.cyan('voyage-4')} or ${pc.cyan('voyage-4-large')} as accuracy needs grow`,
581
+ ` ${pc.dim('4.')} No re-vectorization needed at any step`,
582
+ ``,
583
+ `${pc.bold('Validate it yourself:')} Use ${pc.cyan('vai benchmark space')} to embed identical text`,
584
+ `with all Voyage 4 models and see the cross-model cosine similarities.`,
585
+ ].join('\n'),
586
+ links: [
587
+ 'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
588
+ ],
589
+ tryIt: [
590
+ 'vai benchmark space',
591
+ 'vai benchmark asymmetric --query "your search" --file corpus.txt',
592
+ 'vai estimate --docs 1M --queries 10M',
593
+ ],
594
+ },
595
+
596
+ 'rteb-benchmarks': {
597
+ title: 'RTEB Benchmark Scores',
598
+ summary: 'Retrieval quality scores across embedding providers',
599
+ content: [
600
+ `The ${pc.cyan('Retrieval Embedding Benchmark (RTEB)')} evaluates general-purpose retrieval`,
601
+ `quality across 29 diverse datasets. Scores are ${pc.cyan('NDCG@10')} (normalized discounted`,
602
+ `cumulative gain at top 10 results) — higher is better.`,
603
+ ``,
604
+ `${pc.bold('Current standings (Jan 2026):')}`,
605
+ ` ${pc.cyan('voyage-4-large')} ${pc.bold('71.41')} ${pc.dim('— SOTA, MoE architecture')}`,
606
+ ` ${pc.cyan('voyage-4')} ${pc.bold('70.07')} ${pc.dim('— near voyage-3-large quality')}`,
607
+ ` ${pc.cyan('Gemini Embedding 001')} ${pc.bold('68.66')} ${pc.dim('— Google')}`,
608
+ ` ${pc.cyan('voyage-4-lite')} ${pc.bold('68.10')} ${pc.dim('— near voyage-3.5 quality')}`,
609
+ ` ${pc.cyan('Cohere Embed v4')} ${pc.bold('65.75')} ${pc.dim('— Cohere')}`,
610
+ ` ${pc.cyan('OpenAI v3 Large')} ${pc.bold('62.57')} ${pc.dim('— OpenAI')}`,
611
+ ``,
612
+ `${pc.bold('What the numbers mean:')}`,
613
+ ` ${pc.dim('•')} voyage-4-large beats Gemini by ${pc.cyan('3.87%')}, Cohere by ${pc.cyan('8.20%')}, OpenAI by ${pc.cyan('14.05%')}`,
614
+ ` ${pc.dim('•')} voyage-4 (mid-tier pricing) outperforms all non-Voyage models`,
615
+ ` ${pc.dim('•')} Even voyage-4-lite ($0.02/1M) is competitive with Gemini Embedding`,
616
+ ``,
617
+ `${pc.bold('Asymmetric retrieval bonus:')} When documents are embedded with voyage-4-large`,
618
+ `and queries with a smaller Voyage 4 model, retrieval quality ${pc.cyan('improves')} over`,
619
+ `using the smaller model alone — you get the benefit of the larger model's`,
620
+ `document representations.`,
621
+ ``,
622
+ `${pc.bold('Note:')} These scores are from Voyage AI's evaluation. Independent benchmarks`,
623
+ `may differ. Always test on your own data with ${pc.cyan('vai benchmark similarity')}.`,
624
+ ].join('\n'),
625
+ links: [
626
+ 'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
627
+ 'https://docs.google.com/spreadsheets/d/1GfPkqCAjPKaGS9f66IDhMRxVpd2bMuqL2wXjj-kNS7E/',
628
+ ],
629
+ tryIt: [
630
+ 'vai models --benchmarks',
631
+ 'vai benchmark similarity --query "your query" --file your-docs.txt',
632
+ 'vai estimate --docs 1M --queries 10M',
633
+ ],
634
+ },
635
+
636
+ 'voyage-4-nano': {
637
+ title: 'voyage-4-nano — Open-Weight Local Model',
638
+ summary: 'Free, local-first embeddings with shared space compatibility',
639
+ content: [
640
+ `${pc.cyan('voyage-4-nano')} is Voyage AI's first ${pc.cyan('open-weight')} embedding model, freely`,
641
+ `available on Hugging Face under the ${pc.bold('Apache 2.0')} license.`,
642
+ ``,
643
+ `${pc.bold('Key specs:')}`,
644
+ ` ${pc.dim('•')} Dimensions: 512 (default), 128, 256`,
645
+ ` ${pc.dim('•')} Context: 32K tokens`,
646
+ ` ${pc.dim('•')} License: Apache 2.0 (fully open)`,
647
+ ` ${pc.dim('•')} Shared space: Compatible with voyage-4-large/4/4-lite embeddings`,
648
+ ``,
649
+ `${pc.bold('Use cases:')}`,
650
+ ` ${pc.dim('•')} ${pc.cyan('Local development')} — no API key, no network, no cost`,
651
+ ` ${pc.dim('•')} ${pc.cyan('Prototyping')} — fast iteration before committing to API models`,
652
+ ` ${pc.dim('•')} ${pc.cyan('Edge/on-device')} — run inference on your own hardware`,
653
+ ` ${pc.dim('•')} ${pc.cyan('Asymmetric queries')} — use nano for queries against voyage-4-large docs`,
654
+ ``,
655
+ `${pc.bold('Getting started with Hugging Face:')}`,
656
+ ` ${pc.dim('pip install sentence-transformers')}`,
657
+ ` ${pc.dim('from sentence_transformers import SentenceTransformer')}`,
658
+ ` ${pc.dim('model = SentenceTransformer("voyageai/voyage-4-nano")')}`,
659
+ ` ${pc.dim('embeddings = model.encode(["your text here"])')}`,
660
+ ``,
661
+ `${pc.bold('With the Voyage API:')} voyage-4-nano is also available via the standard API`,
662
+ `endpoint, so you can use ${pc.cyan('vai embed --model voyage-4-nano')} for testing before`,
663
+ `switching to local inference.`,
664
+ ``,
665
+ `${pc.bold('Shared space advantage:')} Since nano shares the same embedding space as the`,
666
+ `larger Voyage 4 models, you can prototype locally with nano, then seamlessly`,
667
+ `use the same document embeddings with voyage-4 or voyage-4-large in production.`,
668
+ ].join('\n'),
669
+ links: [
670
+ 'https://huggingface.co/voyageai/voyage-4-nano',
671
+ 'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
672
+ ],
673
+ tryIt: [
674
+ 'vai embed "test nano" --model voyage-4-nano',
675
+ 'vai benchmark space',
676
+ 'vai benchmark asymmetric --doc-model voyage-4-large --query-models voyage-4-nano',
677
+ ],
678
+ },
516
679
  };
517
680
 
518
681
  /**
@@ -567,6 +730,26 @@ const aliases = {
567
730
  'model-selection': 'benchmarking',
568
731
  choosing: 'benchmarking',
569
732
  compare: 'benchmarking',
733
+ moe: 'mixture-of-experts',
734
+ 'mixture-of-experts': 'mixture-of-experts',
735
+ 'moe-architecture': 'mixture-of-experts',
736
+ experts: 'mixture-of-experts',
737
+ sparse: 'mixture-of-experts',
738
+ 'shared-space': 'shared-embedding-space',
739
+ 'shared-embedding-space': 'shared-embedding-space',
740
+ 'embedding-space': 'shared-embedding-space',
741
+ interchangeable: 'shared-embedding-space',
742
+ compatible: 'shared-embedding-space',
743
+ rteb: 'rteb-benchmarks',
744
+ 'rteb-benchmarks': 'rteb-benchmarks',
745
+ ndcg: 'rteb-benchmarks',
746
+ scores: 'rteb-benchmarks',
747
+ leaderboard: 'rteb-benchmarks',
748
+ nano: 'voyage-4-nano',
749
+ 'voyage-4-nano': 'voyage-4-nano',
750
+ 'open-weight': 'voyage-4-nano',
751
+ huggingface: 'voyage-4-nano',
752
+ local: 'voyage-4-nano',
570
753
  };
571
754
 
572
755
  /**