voyageai-cli 1.11.0 → 1.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -0
- package/package.json +4 -1
- package/src/commands/benchmark.js +416 -0
- package/src/commands/embed.js +5 -0
- package/src/commands/models.js +3 -3
- package/src/commands/ping.js +32 -2
- package/src/commands/playground.js +8 -4
- package/src/commands/search.js +2 -2
- package/src/commands/store.js +14 -5
- package/src/lib/api.js +22 -14
- package/src/lib/catalog.js +3 -3
- package/src/lib/explanations.js +76 -2
- package/src/playground/index.html +411 -1
- package/test/commands/benchmark.test.js +67 -0
- package/test/commands/embed.test.js +10 -0
- package/test/lib/api.test.js +1 -2
- package/test/lib/explanations.test.js +6 -0
package/src/lib/api.js
CHANGED
|
@@ -96,25 +96,29 @@ async function apiRequest(endpoint, body) {
|
|
|
96
96
|
} catch {
|
|
97
97
|
errorDetail = await response.text();
|
|
98
98
|
}
|
|
99
|
-
|
|
99
|
+
const errMsg = `API Error (${response.status}): ${errorDetail}`;
|
|
100
100
|
|
|
101
101
|
// Help users diagnose endpoint mismatch
|
|
102
|
+
let hint = '';
|
|
102
103
|
if (response.status === 403 && base === ATLAS_API_BASE) {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
console.error(' vai config set base-url https://api.voyageai.com/v1/');
|
|
108
|
-
console.error('');
|
|
109
|
-
console.error('Or set VOYAGE_API_BASE=https://api.voyageai.com/v1/ in your environment.');
|
|
104
|
+
hint = '\n\nHint: 403 on ai.mongodb.com often means your key is for the Voyage AI' +
|
|
105
|
+
'\nplatform, not MongoDB Atlas. Try switching the base URL:' +
|
|
106
|
+
'\n\n vai config set base-url https://api.voyageai.com/v1/' +
|
|
107
|
+
'\n\nOr set VOYAGE_API_BASE=https://api.voyageai.com/v1/ in your environment.';
|
|
110
108
|
} else if (response.status === 401 && base === VOYAGE_API_BASE) {
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
console.error('');
|
|
115
|
-
console.error(' vai config set base-url https://ai.mongodb.com/v1/');
|
|
109
|
+
hint = '\n\nHint: 401 on api.voyageai.com may mean your key is an Atlas AI key.' +
|
|
110
|
+
'\nTry switching back:' +
|
|
111
|
+
'\n\n vai config set base-url https://ai.mongodb.com/v1/';
|
|
116
112
|
}
|
|
117
|
-
|
|
113
|
+
|
|
114
|
+
// Log the error + hint to stderr for CLI users
|
|
115
|
+
console.error(errMsg);
|
|
116
|
+
if (hint) console.error(hint);
|
|
117
|
+
|
|
118
|
+
// Throw instead of process.exit so callers (like playground) can catch gracefully
|
|
119
|
+
const err = new Error(errMsg);
|
|
120
|
+
err.statusCode = response.status;
|
|
121
|
+
throw err;
|
|
118
122
|
}
|
|
119
123
|
|
|
120
124
|
return response.json();
|
|
@@ -129,6 +133,7 @@ async function apiRequest(endpoint, body) {
|
|
|
129
133
|
* @param {string} [options.inputType] - Input type (query|document)
|
|
130
134
|
* @param {number} [options.dimensions] - Output dimensions
|
|
131
135
|
* @param {boolean} [options.truncation] - Enable/disable truncation
|
|
136
|
+
* @param {string} [options.outputDtype] - Output data type: float, int8, uint8, binary, ubinary
|
|
132
137
|
* @returns {Promise<object>} API response with embeddings
|
|
133
138
|
*/
|
|
134
139
|
async function generateEmbeddings(texts, options = {}) {
|
|
@@ -148,6 +153,9 @@ async function generateEmbeddings(texts, options = {}) {
|
|
|
148
153
|
if (options.truncation !== undefined) {
|
|
149
154
|
body.truncation = options.truncation;
|
|
150
155
|
}
|
|
156
|
+
if (options.outputDtype && options.outputDtype !== 'float') {
|
|
157
|
+
body.output_dtype = options.outputDtype;
|
|
158
|
+
}
|
|
151
159
|
|
|
152
160
|
return apiRequest('/embeddings', body);
|
|
153
161
|
}
|
package/src/lib/catalog.js
CHANGED
|
@@ -32,8 +32,8 @@ const MODEL_CATALOG = [
|
|
|
32
32
|
{ name: 'voyage-code-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Code retrieval', shortFor: 'Code' },
|
|
33
33
|
{ name: 'voyage-finance-2', type: 'embedding', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Finance', shortFor: 'Finance' },
|
|
34
34
|
{ name: 'voyage-law-2', type: 'embedding', context: '16K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Legal', shortFor: 'Legal' },
|
|
35
|
-
{ name: 'voyage-context-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Contextualized chunks', shortFor: 'Context chunks' },
|
|
36
|
-
{ name: 'voyage-multimodal-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video', shortFor: 'Multimodal' },
|
|
35
|
+
{ name: 'voyage-context-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Contextualized chunks', shortFor: 'Context chunks', unreleased: true },
|
|
36
|
+
{ name: 'voyage-multimodal-3.5', type: 'embedding-multimodal', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video', shortFor: 'Multimodal', multimodal: true },
|
|
37
37
|
{ name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Best quality reranking', shortFor: 'Best reranker' },
|
|
38
38
|
{ name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Fast reranking', shortFor: 'Fast reranker' },
|
|
39
39
|
{ name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight', bestFor: 'Open-weight / edge', shortFor: 'Open / edge', local: true },
|
|
@@ -42,7 +42,7 @@ const MODEL_CATALOG = [
|
|
|
42
42
|
{ name: 'voyage-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', bestFor: 'Previous gen balanced', shortFor: 'Previous gen balanced', legacy: true },
|
|
43
43
|
{ name: 'voyage-3.5-lite', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.02/1M tokens', bestFor: 'Previous gen budget', shortFor: 'Previous gen budget', legacy: true },
|
|
44
44
|
{ name: 'voyage-code-2', type: 'embedding', context: '16K', dimensions: '1536', price: '$0.12/1M tokens', bestFor: 'Legacy code', shortFor: 'Legacy code', legacy: true },
|
|
45
|
-
{ name: 'voyage-multimodal-3', type: 'embedding', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Legacy multimodal', shortFor: 'Legacy multimodal', legacy: true },
|
|
45
|
+
{ name: 'voyage-multimodal-3', type: 'embedding-multimodal', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Legacy multimodal', shortFor: 'Legacy multimodal', legacy: true, multimodal: true },
|
|
46
46
|
{ name: 'rerank-2', type: 'reranking', context: '16K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Legacy reranker', shortFor: 'Legacy reranker', legacy: true },
|
|
47
47
|
{ name: 'rerank-2-lite', type: 'reranking', context: '8K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Legacy fast reranker', shortFor: 'Legacy fast reranker', legacy: true },
|
|
48
48
|
];
|
package/src/lib/explanations.js
CHANGED
|
@@ -406,6 +406,65 @@ const concepts = {
|
|
|
406
406
|
'vai embed --file document.txt --input-type document',
|
|
407
407
|
],
|
|
408
408
|
},
|
|
409
|
+
quantization: {
|
|
410
|
+
title: 'Quantization & Flexible Dimensions',
|
|
411
|
+
summary: 'Reduce storage costs with lower-precision embeddings',
|
|
412
|
+
content: [
|
|
413
|
+
`${pc.cyan('Quantization')} reduces embedding precision from 32-bit floats to smaller`,
|
|
414
|
+
`representations, dramatically cutting storage and search costs with minimal`,
|
|
415
|
+
`quality loss. Combined with ${pc.cyan('Matryoshka dimensions')}, you can shrink vectors`,
|
|
416
|
+
`by up to ${pc.bold('128×')} (32× from binary × 4× from fewer dimensions).`,
|
|
417
|
+
``,
|
|
418
|
+
`${pc.bold('Output data types (--output-dtype):')}`,
|
|
419
|
+
``,
|
|
420
|
+
` ${pc.cyan('float')} 32 bits/dim 4 bytes/dim Baseline (default)`,
|
|
421
|
+
` ${pc.cyan('int8')} 8 bits/dim 1 byte/dim ${pc.green('4× smaller')} Signed: -128 to 127`,
|
|
422
|
+
` ${pc.cyan('uint8')} 8 bits/dim 1 byte/dim ${pc.green('4× smaller')} Unsigned: 0 to 255`,
|
|
423
|
+
` ${pc.cyan('binary')} 1 bit/dim 1/8 byte/dim ${pc.green('32× smaller')} Bit-packed int8 (offset binary)`,
|
|
424
|
+
` ${pc.cyan('ubinary')} 1 bit/dim 1/8 byte/dim ${pc.green('32× smaller')} Bit-packed uint8`,
|
|
425
|
+
``,
|
|
426
|
+
`${pc.bold('Storage math for 1M documents at 1024 dims:')}`,
|
|
427
|
+
` float: ${pc.dim('1M × 1024 × 4B')} = ${pc.cyan('4.0 GB')}`,
|
|
428
|
+
` int8: ${pc.dim('1M × 1024 × 1B')} = ${pc.cyan('1.0 GB')} (4× savings)`,
|
|
429
|
+
` binary: ${pc.dim('1M × 1024 / 8B')} = ${pc.cyan('128 MB')} (32× savings)`,
|
|
430
|
+
` ${pc.dim('+ reduced dimensions:')} 256-dim binary = ${pc.cyan('32 MB')} (128× savings)`,
|
|
431
|
+
``,
|
|
432
|
+
`${pc.bold('How binary quantization works:')} Each float value is converted to a single bit:`,
|
|
433
|
+
`positive values become 1, zero/negative become 0. Eight bits are packed into`,
|
|
434
|
+
`one byte. ${pc.cyan('binary')} uses offset binary (subtract 128) for signed int8 output;`,
|
|
435
|
+
`${pc.cyan('ubinary')} stores the raw unsigned uint8 value.`,
|
|
436
|
+
``,
|
|
437
|
+
`${pc.bold('Quality impact:')} Quantization-aware training minimizes degradation:`,
|
|
438
|
+
` ${pc.dim('•')} ${pc.cyan('int8/uint8')} — Typically <1% retrieval quality loss vs float`,
|
|
439
|
+
` ${pc.dim('•')} ${pc.cyan('binary/ubinary')} — ~2-5% quality loss; best paired with a reranker`,
|
|
440
|
+
` ${pc.dim('•')} Combining lower dimensions + quantization compounds the quality loss`,
|
|
441
|
+
``,
|
|
442
|
+
`${pc.bold('Matryoshka dimensions:')} Voyage 4 models produce ${pc.cyan('nested embeddings')} — the`,
|
|
443
|
+
`first 256 entries of a 1024-dim vector are themselves a valid 256-dim embedding.`,
|
|
444
|
+
`You can embed once at full dimension and truncate later without re-embedding.`,
|
|
445
|
+
`Supported values: 256, 512, 1024 (default), 2048.`,
|
|
446
|
+
``,
|
|
447
|
+
`${pc.bold('Which vector databases support quantized storage?')}`,
|
|
448
|
+
` ${pc.dim('•')} MongoDB Atlas Vector Search — float and int8`,
|
|
449
|
+
` ${pc.dim('•')} Milvus, Qdrant, Weaviate, Elasticsearch, Vespa — float, int8, binary`,
|
|
450
|
+
``,
|
|
451
|
+
`${pc.bold('Decision framework:')}`,
|
|
452
|
+
` 1. Start with ${pc.cyan('float')} at default dimensions — measure your baseline`,
|
|
453
|
+
` 2. Try ${pc.cyan('int8')} — if quality holds, you get 4× storage savings for free`,
|
|
454
|
+
` 3. If storage is critical, try ${pc.cyan('binary')} + reranker for 32× savings`,
|
|
455
|
+
` 4. Reduce dimensions (1024→256) for another 4× on top of quantization`,
|
|
456
|
+
` 5. Use ${pc.cyan('vai benchmark quantization')} to measure the tradeoffs on your data`,
|
|
457
|
+
].join('\n'),
|
|
458
|
+
links: [
|
|
459
|
+
'https://docs.voyageai.com/docs/flexible-dimensions-and-quantization',
|
|
460
|
+
'https://www.mongodb.com/docs/voyageai/models/text-embeddings/',
|
|
461
|
+
],
|
|
462
|
+
tryIt: [
|
|
463
|
+
'vai embed "hello world" --output-dtype int8',
|
|
464
|
+
'vai embed "hello world" --output-dtype binary --dimensions 256',
|
|
465
|
+
'vai benchmark quantization --model voyage-4-large',
|
|
466
|
+
],
|
|
467
|
+
},
|
|
409
468
|
benchmarking: {
|
|
410
469
|
title: 'Benchmarking & Model Selection',
|
|
411
470
|
summary: 'How to choose the right model for your use case',
|
|
@@ -434,12 +493,18 @@ const concepts = {
|
|
|
434
493
|
` Measures throughput (texts/sec) at different batch sizes.`,
|
|
435
494
|
` ${pc.dim('vai benchmark batch --batch-sizes 1,5,10,25,50 --rounds 3')}`,
|
|
436
495
|
``,
|
|
496
|
+
`${pc.bold('vai benchmark quantization')} — Compare output dtypes for storage savings:`,
|
|
497
|
+
` Embeds the same corpus with float, int8, and binary, measures ranking quality`,
|
|
498
|
+
` degradation vs storage savings. Helps you decide if quantization works for your data.`,
|
|
499
|
+
` ${pc.dim('vai benchmark quantization --model voyage-4-large --dtypes float,int8,ubinary')}`,
|
|
500
|
+
``,
|
|
437
501
|
`${pc.bold('Decision framework:')}`,
|
|
438
502
|
` 1. Run ${pc.cyan('benchmark cost')} to eliminate models outside your budget`,
|
|
439
503
|
` 2. Run ${pc.cyan('benchmark embed')} to compare latency of affordable models`,
|
|
440
504
|
` 3. Run ${pc.cyan('benchmark similarity')} with your actual data to compare quality`,
|
|
441
|
-
` 4.
|
|
442
|
-
` 5.
|
|
505
|
+
` 4. Run ${pc.cyan('benchmark quantization')} to see if int8/binary preserves your ranking`,
|
|
506
|
+
` 5. If quality is similar, pick the cheaper/faster model + smallest viable dtype`,
|
|
507
|
+
` 6. Use ${pc.cyan('--save')} to track results over time as your data evolves`,
|
|
443
508
|
].join('\n'),
|
|
444
509
|
links: ['https://www.mongodb.com/docs/voyageai/models/text-embeddings/'],
|
|
445
510
|
tryIt: [
|
|
@@ -488,6 +553,15 @@ const aliases = {
|
|
|
488
553
|
batch: 'batch-processing',
|
|
489
554
|
'batch-processing': 'batch-processing',
|
|
490
555
|
batching: 'batch-processing',
|
|
556
|
+
quantization: 'quantization',
|
|
557
|
+
quantize: 'quantization',
|
|
558
|
+
'output-dtype': 'quantization',
|
|
559
|
+
dtype: 'quantization',
|
|
560
|
+
int8: 'quantization',
|
|
561
|
+
binary: 'quantization',
|
|
562
|
+
ubinary: 'quantization',
|
|
563
|
+
matryoshka: 'quantization',
|
|
564
|
+
'flexible-dimensions': 'quantization',
|
|
491
565
|
benchmark: 'benchmarking',
|
|
492
566
|
benchmarking: 'benchmarking',
|
|
493
567
|
'model-selection': 'benchmarking',
|