voyageai-cli 1.7.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +1 -0
- package/package.json +2 -3
- package/src/cli.js +2 -0
- package/src/commands/benchmark.js +799 -0
- package/src/commands/playground.js +49 -1
- package/src/lib/catalog.js +1 -1
- package/src/lib/explanations.js +47 -0
- package/src/playground/index.html +835 -0
- package/test/commands/benchmark.test.js +252 -0
- package/test/commands/ping.test.js +7 -3
- package/test/lib/explanations.test.js +1 -0
|
@@ -84,7 +84,7 @@ function createPlaygroundServer() {
|
|
|
84
84
|
|
|
85
85
|
// API: Models
|
|
86
86
|
if (req.method === 'GET' && req.url === '/api/models') {
|
|
87
|
-
const models = MODEL_CATALOG.filter(m => !m.legacy);
|
|
87
|
+
const models = MODEL_CATALOG.filter(m => !m.legacy && !m.local);
|
|
88
88
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
89
89
|
res.end(JSON.stringify({ models }));
|
|
90
90
|
return;
|
|
@@ -152,6 +152,54 @@ function createPlaygroundServer() {
|
|
|
152
152
|
return;
|
|
153
153
|
}
|
|
154
154
|
|
|
155
|
+
// API: Benchmark (single model, single round — UI calls this per model)
|
|
156
|
+
if (req.url === '/api/benchmark/embed') {
|
|
157
|
+
const { texts, model, inputType, dimensions } = parsed;
|
|
158
|
+
if (!texts || !Array.isArray(texts) || texts.length === 0) {
|
|
159
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
160
|
+
res.end(JSON.stringify({ error: 'texts must be a non-empty array' }));
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
const opts = { model: model || undefined };
|
|
164
|
+
if (inputType) opts.inputType = inputType;
|
|
165
|
+
if (dimensions) opts.dimensions = dimensions;
|
|
166
|
+
const start = performance.now();
|
|
167
|
+
const result = await generateEmbeddings(texts, opts);
|
|
168
|
+
const elapsed = performance.now() - start;
|
|
169
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
170
|
+
res.end(JSON.stringify({
|
|
171
|
+
model: result.model,
|
|
172
|
+
elapsed,
|
|
173
|
+
tokens: result.usage?.total_tokens || 0,
|
|
174
|
+
dimensions: result.data?.[0]?.embedding?.length || 0,
|
|
175
|
+
embeddings: result.data?.map(d => d.embedding),
|
|
176
|
+
}));
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (req.url === '/api/benchmark/rerank') {
|
|
181
|
+
const { query, documents, model, topK } = parsed;
|
|
182
|
+
if (!query || !documents || !Array.isArray(documents)) {
|
|
183
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
184
|
+
res.end(JSON.stringify({ error: 'query and documents are required' }));
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
const { apiRequest } = require('../lib/api');
|
|
188
|
+
const body = { query, documents, model: model || 'rerank-2.5' };
|
|
189
|
+
if (topK) body.top_k = topK;
|
|
190
|
+
const start = performance.now();
|
|
191
|
+
const result = await apiRequest('/rerank', body);
|
|
192
|
+
const elapsed = performance.now() - start;
|
|
193
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
194
|
+
res.end(JSON.stringify({
|
|
195
|
+
model: result.model,
|
|
196
|
+
elapsed,
|
|
197
|
+
tokens: result.usage?.total_tokens || 0,
|
|
198
|
+
results: result.data || [],
|
|
199
|
+
}));
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
|
|
155
203
|
// API: Similarity
|
|
156
204
|
if (req.url === '/api/similarity') {
|
|
157
205
|
const { texts, model } = parsed;
|
package/src/lib/catalog.js
CHANGED
|
@@ -34,7 +34,7 @@ const MODEL_CATALOG = [
|
|
|
34
34
|
{ name: 'voyage-multimodal-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video', shortFor: 'Multimodal' },
|
|
35
35
|
{ name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Best quality reranking', shortFor: 'Best reranker' },
|
|
36
36
|
{ name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Fast reranking', shortFor: 'Fast reranker' },
|
|
37
|
-
{ name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight', bestFor: 'Open-weight / edge', shortFor: 'Open / edge' },
|
|
37
|
+
{ name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight', bestFor: 'Open-weight / edge', shortFor: 'Open / edge', local: true },
|
|
38
38
|
// Legacy models
|
|
39
39
|
{ name: 'voyage-3-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Previous gen quality', shortFor: 'Previous gen quality', legacy: true },
|
|
40
40
|
{ name: 'voyage-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', bestFor: 'Previous gen balanced', shortFor: 'Previous gen balanced', legacy: true },
|
package/src/lib/explanations.js
CHANGED
|
@@ -406,6 +406,48 @@ const concepts = {
|
|
|
406
406
|
'vai embed --file document.txt --input-type document',
|
|
407
407
|
],
|
|
408
408
|
},
|
|
409
|
+
benchmarking: {
|
|
410
|
+
title: 'Benchmarking & Model Selection',
|
|
411
|
+
summary: 'How to choose the right model for your use case',
|
|
412
|
+
content: [
|
|
413
|
+
`Choosing the right embedding or reranking model depends on your priorities:`,
|
|
414
|
+
`${pc.cyan('latency')}, ${pc.cyan('accuracy')}, ${pc.cyan('cost')}, or a balance of all three.`,
|
|
415
|
+
``,
|
|
416
|
+
`${pc.bold('vai benchmark embed')} — Compare embedding models head-to-head:`,
|
|
417
|
+
` Measures avg/p50/p95 latency, token usage, and cost per model.`,
|
|
418
|
+
` ${pc.dim('vai benchmark embed --models voyage-4-large,voyage-4,voyage-4-lite --rounds 5')}`,
|
|
419
|
+
``,
|
|
420
|
+
`${pc.bold('vai benchmark similarity')} — Test ranking quality on your data:`,
|
|
421
|
+
` Embeds a query + corpus with each model, shows side-by-side top-K rankings.`,
|
|
422
|
+
` If models agree on the top results, the cheaper one is likely sufficient.`,
|
|
423
|
+
` ${pc.dim('vai benchmark similarity --query "your query" --file corpus.txt')}`,
|
|
424
|
+
``,
|
|
425
|
+
`${pc.bold('vai benchmark rerank')} — Compare reranking models:`,
|
|
426
|
+
` Measures latency and shows how models order the same documents.`,
|
|
427
|
+
` ${pc.dim('vai benchmark rerank --query "your query" --documents-file docs.json')}`,
|
|
428
|
+
``,
|
|
429
|
+
`${pc.bold('vai benchmark cost')} — Project monthly costs at scale:`,
|
|
430
|
+
` Shows estimated cost for each model at different daily query volumes.`,
|
|
431
|
+
` ${pc.dim('vai benchmark cost --tokens 500 --volumes 100,1000,10000,100000')}`,
|
|
432
|
+
``,
|
|
433
|
+
`${pc.bold('vai benchmark batch')} — Find optimal batch size for ingestion:`,
|
|
434
|
+
` Measures throughput (texts/sec) at different batch sizes.`,
|
|
435
|
+
` ${pc.dim('vai benchmark batch --batch-sizes 1,5,10,25,50 --rounds 3')}`,
|
|
436
|
+
``,
|
|
437
|
+
`${pc.bold('Decision framework:')}`,
|
|
438
|
+
` 1. Run ${pc.cyan('benchmark cost')} to eliminate models outside your budget`,
|
|
439
|
+
` 2. Run ${pc.cyan('benchmark embed')} to compare latency of affordable models`,
|
|
440
|
+
` 3. Run ${pc.cyan('benchmark similarity')} with your actual data to compare quality`,
|
|
441
|
+
` 4. If quality is similar, pick the cheaper/faster model`,
|
|
442
|
+
` 5. Use ${pc.cyan('--save')} to track results over time as your data evolves`,
|
|
443
|
+
].join('\n'),
|
|
444
|
+
links: ['https://www.mongodb.com/docs/voyageai/models/text-embeddings/'],
|
|
445
|
+
tryIt: [
|
|
446
|
+
'vai benchmark embed --rounds 3',
|
|
447
|
+
'vai benchmark cost',
|
|
448
|
+
'vai benchmark similarity --query "your search query" --file your-docs.txt',
|
|
449
|
+
],
|
|
450
|
+
},
|
|
409
451
|
};
|
|
410
452
|
|
|
411
453
|
/**
|
|
@@ -446,6 +488,11 @@ const aliases = {
|
|
|
446
488
|
batch: 'batch-processing',
|
|
447
489
|
'batch-processing': 'batch-processing',
|
|
448
490
|
batching: 'batch-processing',
|
|
491
|
+
benchmark: 'benchmarking',
|
|
492
|
+
benchmarking: 'benchmarking',
|
|
493
|
+
'model-selection': 'benchmarking',
|
|
494
|
+
choosing: 'benchmarking',
|
|
495
|
+
compare: 'benchmarking',
|
|
449
496
|
};
|
|
450
497
|
|
|
451
498
|
/**
|