voyageai-cli 1.11.0 → 1.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -0
- package/package.json +4 -1
- package/src/commands/benchmark.js +416 -0
- package/src/commands/embed.js +5 -0
- package/src/commands/models.js +3 -3
- package/src/commands/ping.js +32 -2
- package/src/commands/playground.js +8 -4
- package/src/commands/search.js +2 -2
- package/src/commands/store.js +14 -5
- package/src/lib/api.js +22 -14
- package/src/lib/catalog.js +3 -3
- package/src/lib/explanations.js +76 -2
- package/src/playground/index.html +411 -1
- package/test/commands/benchmark.test.js +67 -0
- package/test/commands/embed.test.js +10 -0
- package/test/lib/api.test.js +1 -2
- package/test/lib/explanations.test.js +6 -0
package/README.md
CHANGED
|
@@ -312,6 +312,29 @@ All commands support:
|
|
|
312
312
|
|
|
313
313
|
Free tier: 200M tokens for most models. All Voyage 4 series models share the same embedding space.
|
|
314
314
|
|
|
315
|
+
## Benchmarks: vai vs. Voyage AI's Published Results
|
|
316
|
+
|
|
317
|
+
Voyage AI publishes [retrieval quality benchmarks](https://blog.voyageai.com/2026/01/15/voyage-4/) — NDCG@10 scores across 29 RTEB datasets measuring how *accurate* each model's embeddings are. Their results show voyage-4-large outperforms Gemini Embedding 001 by 3.87%, Cohere Embed v4 by 8.20%, and OpenAI v3 Large by 14.05%.
|
|
318
|
+
|
|
319
|
+
**`vai benchmark` measures something different:** real-world latency, cost, and whether models agree on ranking *your specific data*. The two are complementary:
|
|
320
|
+
|
|
321
|
+
| | Voyage AI Benchmarks | vai benchmark |
|
|
322
|
+
|---|---|---|
|
|
323
|
+
| **Measures** | Retrieval quality (NDCG@10) | Latency, cost, ranking agreement |
|
|
324
|
+
| **Data** | 29 standardized datasets | Your actual data |
|
|
325
|
+
| **Answers** | "Which model produces the best embeddings?" | "For my data and budget, which model should I use?" |
|
|
326
|
+
|
|
327
|
+
Voyage AI's key insight — [asymmetric retrieval](https://blog.voyageai.com/2026/01/15/voyage-4/) (embed docs with voyage-4-large, query with voyage-4-lite) — is directly testable with `vai`:
|
|
328
|
+
|
|
329
|
+
```bash
|
|
330
|
+
# Does the cheap query model find the same results as the expensive one?
|
|
331
|
+
vai benchmark asymmetric --doc-model voyage-4-large \
|
|
332
|
+
--query-models voyage-4-large,voyage-4,voyage-4-lite \
|
|
333
|
+
--file your-corpus.txt --query "your actual query"
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
If rankings agree, you can embed documents once with voyage-4-large and query with voyage-4-lite — **6x cheaper** at query time with no re-indexing.
|
|
337
|
+
|
|
315
338
|
## Requirements
|
|
316
339
|
|
|
317
340
|
- Node.js 18+
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "voyageai-cli",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.12.1",
|
|
4
4
|
"description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
|
|
5
5
|
"bin": {
|
|
6
6
|
"vai": "./src/cli.js"
|
|
@@ -40,5 +40,8 @@
|
|
|
40
40
|
"ora": "^9.1.0",
|
|
41
41
|
"picocolors": "^1.1.1",
|
|
42
42
|
"update-notifier": "^7.3.1"
|
|
43
|
+
},
|
|
44
|
+
"devDependencies": {
|
|
45
|
+
"playwright": "^1.58.1"
|
|
43
46
|
}
|
|
44
47
|
}
|
|
@@ -719,6 +719,393 @@ async function benchmarkBatch(opts) {
|
|
|
719
719
|
console.log('');
|
|
720
720
|
}
|
|
721
721
|
|
|
722
|
+
/**
|
|
723
|
+
* benchmark asymmetric — Test Voyage 4's asymmetric retrieval
|
|
724
|
+
* (embed docs with one model, query with another).
|
|
725
|
+
*/
|
|
726
|
+
async function benchmarkAsymmetric(opts) {
|
|
727
|
+
const docModel = opts.docModel || 'voyage-4-large';
|
|
728
|
+
const queryModels = opts.queryModels
|
|
729
|
+
? parseModels(opts.queryModels)
|
|
730
|
+
: ['voyage-4-large', 'voyage-4', 'voyage-4-lite'];
|
|
731
|
+
const query = opts.query || SAMPLE_QUERY;
|
|
732
|
+
const showK = opts.topK ? parseInt(opts.topK, 10) : 5;
|
|
733
|
+
|
|
734
|
+
let corpus;
|
|
735
|
+
if (opts.file) {
|
|
736
|
+
corpus = loadTexts(opts.file);
|
|
737
|
+
} else {
|
|
738
|
+
corpus = SAMPLE_RERANK_DOCS;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
if (!opts.json && !opts.quiet) {
|
|
742
|
+
console.log('');
|
|
743
|
+
console.log(ui.bold(' Asymmetric Retrieval Benchmark'));
|
|
744
|
+
console.log(ui.dim(` Documents embedded with: ${docModel}`));
|
|
745
|
+
console.log(ui.dim(` Query models: ${queryModels.join(', ')}`));
|
|
746
|
+
console.log(ui.dim(` Query: "${query.substring(0, 60)}${query.length > 60 ? '...' : ''}"`));
|
|
747
|
+
console.log(ui.dim(` ${corpus.length} documents`));
|
|
748
|
+
console.log('');
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
// Step 1: Embed documents with the doc model
|
|
752
|
+
const spin1 = (!opts.json && !opts.quiet) ? ui.spinner(` Embedding ${corpus.length} docs with ${docModel}...`) : null;
|
|
753
|
+
if (spin1) spin1.start();
|
|
754
|
+
|
|
755
|
+
let docEmbeddings;
|
|
756
|
+
try {
|
|
757
|
+
const docResult = await generateEmbeddings(corpus, { model: docModel, inputType: 'document' });
|
|
758
|
+
docEmbeddings = docResult.data.map(d => d.embedding);
|
|
759
|
+
if (spin1) spin1.stop();
|
|
760
|
+
} catch (err) {
|
|
761
|
+
if (spin1) spin1.stop();
|
|
762
|
+
console.error(ui.error(`Failed to embed documents with ${docModel}: ${err.message}`));
|
|
763
|
+
process.exit(1);
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// Step 2: For each query model, embed the query and rank
|
|
767
|
+
const allResults = [];
|
|
768
|
+
|
|
769
|
+
for (const qModel of queryModels) {
|
|
770
|
+
const spin = (!opts.json && !opts.quiet) ? ui.spinner(` Querying with ${qModel}...`) : null;
|
|
771
|
+
if (spin) spin.start();
|
|
772
|
+
|
|
773
|
+
try {
|
|
774
|
+
const start = performance.now();
|
|
775
|
+
const qResult = await generateEmbeddings([query], { model: qModel, inputType: 'query' });
|
|
776
|
+
const elapsed = performance.now() - start;
|
|
777
|
+
const queryEmbed = qResult.data[0].embedding;
|
|
778
|
+
|
|
779
|
+
const ranked = corpus.map((text, i) => ({
|
|
780
|
+
index: i,
|
|
781
|
+
text,
|
|
782
|
+
similarity: cosineSimilarity(queryEmbed, docEmbeddings[i]),
|
|
783
|
+
})).sort((a, b) => b.similarity - a.similarity);
|
|
784
|
+
|
|
785
|
+
allResults.push({
|
|
786
|
+
queryModel: qModel,
|
|
787
|
+
docModel,
|
|
788
|
+
latency: elapsed,
|
|
789
|
+
tokens: qResult.usage?.total_tokens || 0,
|
|
790
|
+
ranked,
|
|
791
|
+
});
|
|
792
|
+
|
|
793
|
+
if (spin) spin.stop();
|
|
794
|
+
} catch (err) {
|
|
795
|
+
if (spin) spin.stop();
|
|
796
|
+
console.error(ui.warn(` ${qModel}: ${err.message} — skipping`));
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
if (opts.json) {
|
|
801
|
+
console.log(JSON.stringify({ benchmark: 'asymmetric', docModel, query, corpus: corpus.length, results: allResults }, null, 2));
|
|
802
|
+
return;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
if (allResults.length === 0) {
|
|
806
|
+
console.error(ui.error('No query models completed successfully.'));
|
|
807
|
+
process.exit(1);
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
// Show latency comparison
|
|
811
|
+
if (!opts.quiet) {
|
|
812
|
+
console.log(ui.dim(` ${rpad('Query Model', 22)} ${lpad('Latency', 8)} ${lpad('Tokens', 7)}`));
|
|
813
|
+
console.log(ui.dim(' ' + '─'.repeat(40)));
|
|
814
|
+
const minLat = Math.min(...allResults.map(r => r.latency));
|
|
815
|
+
for (const r of allResults) {
|
|
816
|
+
const badge = r.latency === minLat ? ui.green(' ⚡') : ' ';
|
|
817
|
+
console.log(` ${rpad(r.queryModel, 22)} ${lpad(fmtMs(r.latency), 8)} ${lpad(String(r.tokens), 7)}${badge}`);
|
|
818
|
+
}
|
|
819
|
+
console.log('');
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// Show ranking comparison
|
|
823
|
+
console.log(ui.bold(` Top ${showK} results (docs embedded with ${ui.cyan(docModel)})`));
|
|
824
|
+
console.log('');
|
|
825
|
+
|
|
826
|
+
// Use the full-model result as baseline
|
|
827
|
+
const baseline = allResults[0];
|
|
828
|
+
|
|
829
|
+
for (let rank = 0; rank < showK && rank < corpus.length; rank++) {
|
|
830
|
+
console.log(ui.dim(` #${rank + 1}`));
|
|
831
|
+
for (const r of allResults) {
|
|
832
|
+
const item = r.ranked[rank];
|
|
833
|
+
const preview = item.text.substring(0, 50) + (item.text.length > 50 ? '...' : '');
|
|
834
|
+
const match = baseline.ranked[rank].index === item.index ? ui.green('=') : ui.yellow('≠');
|
|
835
|
+
console.log(` ${match} ${ui.cyan(rpad(r.queryModel, 20))} ${ui.score(item.similarity)} [${item.index}] ${ui.dim(preview)}`);
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
console.log('');
|
|
840
|
+
|
|
841
|
+
// Agreement analysis
|
|
842
|
+
const baseOrder = baseline.ranked.slice(0, showK).map(x => x.index);
|
|
843
|
+
for (const r of allResults.slice(1)) {
|
|
844
|
+
const rOrder = r.ranked.slice(0, showK).map(x => x.index);
|
|
845
|
+
const overlap = baseOrder.filter(idx => rOrder.includes(idx)).length;
|
|
846
|
+
const exactMatch = baseOrder.filter((idx, i) => rOrder[i] === idx).length;
|
|
847
|
+
const overlapPct = ((overlap / showK) * 100).toFixed(0);
|
|
848
|
+
const exactPct = ((exactMatch / showK) * 100).toFixed(0);
|
|
849
|
+
|
|
850
|
+
const price = getPrice(r.queryModel);
|
|
851
|
+
const basePrice = getPrice(baseline.queryModel);
|
|
852
|
+
const savings = (price && basePrice && price < basePrice)
|
|
853
|
+
? ` (${((1 - price / basePrice) * 100).toFixed(0)}% cheaper)`
|
|
854
|
+
: '';
|
|
855
|
+
|
|
856
|
+
if (exactMatch === showK) {
|
|
857
|
+
console.log(ui.success(`${r.queryModel}: Identical ranking to ${docModel}${savings} — asymmetric retrieval works perfectly.`));
|
|
858
|
+
} else if (overlap === showK) {
|
|
859
|
+
console.log(ui.info(`${r.queryModel}: Same ${showK} docs, ${exactPct}% exact order match${savings}.`));
|
|
860
|
+
} else {
|
|
861
|
+
console.log(ui.warn(`${r.queryModel}: ${overlapPct}% overlap in top-${showK}${savings}.`));
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
console.log('');
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
/**
|
|
868
|
+
* benchmark quantization — Compare output dtypes for quality vs storage tradeoff.
|
|
869
|
+
*/
|
|
870
|
+
async function benchmarkQuantization(opts) {
|
|
871
|
+
const model = opts.model || getDefaultModel();
|
|
872
|
+
const dtypes = opts.dtypes
|
|
873
|
+
? opts.dtypes.split(',').map(d => d.trim())
|
|
874
|
+
: ['float', 'int8', 'ubinary'];
|
|
875
|
+
const query = opts.query || SAMPLE_QUERY;
|
|
876
|
+
const dimensions = opts.dimensions ? parseInt(opts.dimensions, 10) : undefined;
|
|
877
|
+
const showK = opts.topK ? parseInt(opts.topK, 10) : 5;
|
|
878
|
+
|
|
879
|
+
let corpus;
|
|
880
|
+
if (opts.file) {
|
|
881
|
+
corpus = loadTexts(opts.file);
|
|
882
|
+
} else {
|
|
883
|
+
corpus = SAMPLE_RERANK_DOCS;
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
if (!opts.json && !opts.quiet) {
|
|
887
|
+
console.log('');
|
|
888
|
+
console.log(ui.bold(' Quantization Benchmark'));
|
|
889
|
+
console.log(ui.dim(` Model: ${model}`));
|
|
890
|
+
console.log(ui.dim(` Data types: ${dtypes.join(', ')}`));
|
|
891
|
+
console.log(ui.dim(` ${corpus.length} documents, top-${showK} comparison`));
|
|
892
|
+
if (dimensions) console.log(ui.dim(` Dimensions: ${dimensions}`));
|
|
893
|
+
console.log('');
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
// Step 1: Get float baseline embeddings (query + corpus)
|
|
897
|
+
const allTexts = [query, ...corpus];
|
|
898
|
+
const resultsByDtype = {};
|
|
899
|
+
|
|
900
|
+
for (const dtype of dtypes) {
|
|
901
|
+
const spin = (!opts.json && !opts.quiet) ? ui.spinner(` Embedding with ${dtype}...`) : null;
|
|
902
|
+
if (spin) spin.start();
|
|
903
|
+
|
|
904
|
+
try {
|
|
905
|
+
const embedOpts = { model, inputType: 'document' };
|
|
906
|
+
if (dimensions) embedOpts.dimensions = dimensions;
|
|
907
|
+
if (dtype !== 'float') embedOpts.outputDtype = dtype;
|
|
908
|
+
|
|
909
|
+
const start = performance.now();
|
|
910
|
+
const result = await generateEmbeddings(allTexts, embedOpts);
|
|
911
|
+
const elapsed = performance.now() - start;
|
|
912
|
+
|
|
913
|
+
if (spin) spin.stop();
|
|
914
|
+
|
|
915
|
+
const embeddings = result.data.map(d => d.embedding);
|
|
916
|
+
const queryEmbed = embeddings[0];
|
|
917
|
+
const dims = embeddings[0].length;
|
|
918
|
+
|
|
919
|
+
// For binary/ubinary, we can't directly cosine-similarity the packed ints
|
|
920
|
+
// against float embeddings meaningfully. Instead we compare the ranking
|
|
921
|
+
// each dtype produces independently.
|
|
922
|
+
const ranked = corpus.map((text, i) => {
|
|
923
|
+
const docEmbed = embeddings[i + 1];
|
|
924
|
+
let sim;
|
|
925
|
+
if (dtype === 'binary' || dtype === 'ubinary') {
|
|
926
|
+
// Hamming-style: compute dot product of packed int arrays
|
|
927
|
+
// (higher = more bits agree = more similar)
|
|
928
|
+
sim = hammingSimilarity(queryEmbed, docEmbed);
|
|
929
|
+
} else {
|
|
930
|
+
sim = cosineSimilarity(queryEmbed, docEmbed);
|
|
931
|
+
}
|
|
932
|
+
return { index: i, text, similarity: sim };
|
|
933
|
+
}).sort((a, b) => b.similarity - a.similarity);
|
|
934
|
+
|
|
935
|
+
// Calculate storage per vector
|
|
936
|
+
let bytesPerVec;
|
|
937
|
+
const actualDims = (dtype === 'binary' || dtype === 'ubinary') ? dims * 8 : dims;
|
|
938
|
+
if (dtype === 'float') {
|
|
939
|
+
bytesPerVec = dims * 4;
|
|
940
|
+
} else if (dtype === 'int8' || dtype === 'uint8') {
|
|
941
|
+
bytesPerVec = dims * 1;
|
|
942
|
+
} else {
|
|
943
|
+
// binary/ubinary: dims is already 1/8th of actual dimensions
|
|
944
|
+
bytesPerVec = dims;
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
resultsByDtype[dtype] = {
|
|
948
|
+
dtype,
|
|
949
|
+
latency: elapsed,
|
|
950
|
+
dims,
|
|
951
|
+
actualDims,
|
|
952
|
+
bytesPerVec,
|
|
953
|
+
tokens: result.usage?.total_tokens || 0,
|
|
954
|
+
ranked,
|
|
955
|
+
};
|
|
956
|
+
} catch (err) {
|
|
957
|
+
if (spin) spin.stop();
|
|
958
|
+
console.error(ui.warn(` ${dtype}: ${err.message} — skipping`));
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
const completed = Object.values(resultsByDtype);
|
|
963
|
+
|
|
964
|
+
if (opts.json) {
|
|
965
|
+
const jsonResults = completed.map(r => ({
|
|
966
|
+
dtype: r.dtype,
|
|
967
|
+
latency: r.latency,
|
|
968
|
+
dimensions: r.actualDims,
|
|
969
|
+
bytesPerVector: r.bytesPerVec,
|
|
970
|
+
ranking: r.ranked.slice(0, showK).map(x => ({ index: x.index, similarity: x.similarity })),
|
|
971
|
+
}));
|
|
972
|
+
console.log(JSON.stringify({ benchmark: 'quantization', model, results: jsonResults }, null, 2));
|
|
973
|
+
return;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
if (completed.length === 0) {
|
|
977
|
+
console.error(ui.error('No data types completed successfully.'));
|
|
978
|
+
process.exit(1);
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
// Storage comparison table
|
|
982
|
+
console.log(ui.bold(' Storage Comparison'));
|
|
983
|
+
console.log('');
|
|
984
|
+
|
|
985
|
+
const sHeader = ` ${rpad('dtype', 10)} ${lpad('Dims', 8)} ${lpad('Bytes/vec', 12)} ${lpad('1M docs', 10)} ${lpad('Savings', 10)} ${lpad('Latency', 10)}`;
|
|
986
|
+
console.log(ui.dim(sHeader));
|
|
987
|
+
console.log(ui.dim(' ' + '─'.repeat(stripAnsi(sHeader).length - 2)));
|
|
988
|
+
|
|
989
|
+
const baseline = completed.find(r => r.dtype === 'float') || completed[0];
|
|
990
|
+
const baselineBytes = baseline.bytesPerVec;
|
|
991
|
+
|
|
992
|
+
for (const r of completed) {
|
|
993
|
+
const savings = r.bytesPerVec < baselineBytes
|
|
994
|
+
? ui.green(`${(baselineBytes / r.bytesPerVec).toFixed(0)}×`)
|
|
995
|
+
: ui.dim('baseline');
|
|
996
|
+
|
|
997
|
+
const totalMB = (r.bytesPerVec * 1_000_000) / (1024 * 1024);
|
|
998
|
+
let sizeStr;
|
|
999
|
+
if (totalMB >= 1024) sizeStr = `${(totalMB / 1024).toFixed(1)} GB`;
|
|
1000
|
+
else sizeStr = `${totalMB.toFixed(0)} MB`;
|
|
1001
|
+
|
|
1002
|
+
console.log(
|
|
1003
|
+
` ${rpad(r.dtype, 10)} ${lpad(String(r.actualDims), 8)} ${lpad(formatBytes(r.bytesPerVec), 12)} ${lpad(sizeStr, 10)} ${lpad(savings, 10)} ${lpad(fmtMs(r.latency), 10)}`
|
|
1004
|
+
);
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
console.log('');
|
|
1008
|
+
|
|
1009
|
+
// Ranking comparison
|
|
1010
|
+
console.log(ui.bold(` Ranking Comparison (top ${showK})`));
|
|
1011
|
+
console.log('');
|
|
1012
|
+
|
|
1013
|
+
const baselineRanked = baseline.ranked;
|
|
1014
|
+
const baselineOrder = baselineRanked.slice(0, showK).map(x => x.index);
|
|
1015
|
+
|
|
1016
|
+
for (let rank = 0; rank < showK && rank < corpus.length; rank++) {
|
|
1017
|
+
console.log(ui.dim(` #${rank + 1}`));
|
|
1018
|
+
for (const r of completed) {
|
|
1019
|
+
const item = r.ranked[rank];
|
|
1020
|
+
const preview = item.text.substring(0, 45) + (item.text.length > 45 ? '...' : '');
|
|
1021
|
+
const matchesBaseline = (r === baseline) ? ' ' :
|
|
1022
|
+
(item.index === baselineRanked[rank].index ? ui.green('=') : ui.yellow('≠'));
|
|
1023
|
+
const simStr = (r.dtype === 'binary' || r.dtype === 'ubinary')
|
|
1024
|
+
? `${(item.similarity * 100).toFixed(1)}%`
|
|
1025
|
+
: item.similarity.toFixed(4);
|
|
1026
|
+
console.log(` ${matchesBaseline} ${ui.cyan(rpad(r.dtype, 10))} ${lpad(simStr, 8)} [${item.index}] ${ui.dim(preview)}`);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
console.log('');
|
|
1031
|
+
|
|
1032
|
+
// Agreement summary
|
|
1033
|
+
if (completed.length > 1) {
|
|
1034
|
+
for (const r of completed) {
|
|
1035
|
+
if (r === baseline) continue;
|
|
1036
|
+
const rOrder = r.ranked.slice(0, showK).map(x => x.index);
|
|
1037
|
+
const overlap = baselineOrder.filter(idx => rOrder.includes(idx)).length;
|
|
1038
|
+
const exactMatch = baselineOrder.filter((idx, i) => rOrder[i] === idx).length;
|
|
1039
|
+
const overlapPct = ((overlap / showK) * 100).toFixed(0);
|
|
1040
|
+
const exactPct = ((exactMatch / showK) * 100).toFixed(0);
|
|
1041
|
+
const savingsX = (baselineBytes / r.bytesPerVec).toFixed(0);
|
|
1042
|
+
|
|
1043
|
+
if (exactMatch === showK) {
|
|
1044
|
+
console.log(ui.success(`${r.dtype}: Identical ranking to float — ${savingsX}× storage savings with zero quality loss.`));
|
|
1045
|
+
} else if (overlap === showK) {
|
|
1046
|
+
console.log(ui.info(`${r.dtype}: Same top-${showK} docs, ${exactPct}% exact order — ${savingsX}× smaller.`));
|
|
1047
|
+
} else {
|
|
1048
|
+
console.log(ui.warn(`${r.dtype}: ${overlapPct}% overlap in top-${showK} — ${savingsX}× smaller. Consider using a reranker.`));
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
console.log('');
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
// Save results
|
|
1055
|
+
if (opts.save) {
|
|
1056
|
+
const outData = {
|
|
1057
|
+
benchmark: 'quantization',
|
|
1058
|
+
timestamp: new Date().toISOString(),
|
|
1059
|
+
model,
|
|
1060
|
+
results: completed.map(r => ({
|
|
1061
|
+
dtype: r.dtype,
|
|
1062
|
+
latency: r.latency,
|
|
1063
|
+
dimensions: r.actualDims,
|
|
1064
|
+
bytesPerVector: r.bytesPerVec,
|
|
1065
|
+
topRanking: r.ranked.slice(0, showK),
|
|
1066
|
+
})),
|
|
1067
|
+
};
|
|
1068
|
+
const outPath = typeof opts.save === 'string' ? opts.save : `benchmark-quantization-${Date.now()}.json`;
|
|
1069
|
+
fs.writeFileSync(outPath, JSON.stringify(outData, null, 2));
|
|
1070
|
+
console.log(ui.info(`Results saved to ${outPath}`));
|
|
1071
|
+
console.log('');
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
/**
|
|
1076
|
+
* Compute Hamming similarity between two packed binary vectors.
|
|
1077
|
+
* Returns a value between 0 and 1 (fraction of bits that agree).
|
|
1078
|
+
*/
|
|
1079
|
+
function hammingSimilarity(a, b) {
|
|
1080
|
+
const len = Math.min(a.length, b.length);
|
|
1081
|
+
let agreeBits = 0;
|
|
1082
|
+
const totalBits = len * 8;
|
|
1083
|
+
for (let i = 0; i < len; i++) {
|
|
1084
|
+
// XOR to find differing bits, then count matching bits
|
|
1085
|
+
const xor = (a[i] & 0xFF) ^ (b[i] & 0xFF);
|
|
1086
|
+
// popcount via bit tricks
|
|
1087
|
+
agreeBits += 8 - popcount8(xor);
|
|
1088
|
+
}
|
|
1089
|
+
return agreeBits / totalBits;
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
/**
|
|
1093
|
+
* Count set bits in an 8-bit value.
|
|
1094
|
+
*/
|
|
1095
|
+
function popcount8(v) {
|
|
1096
|
+
v = v - ((v >> 1) & 0x55);
|
|
1097
|
+
v = (v & 0x33) + ((v >> 2) & 0x33);
|
|
1098
|
+
return (v + (v >> 4)) & 0x0F;
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
/**
|
|
1102
|
+
* Format bytes into a human-readable string.
|
|
1103
|
+
*/
|
|
1104
|
+
function formatBytes(bytes) {
|
|
1105
|
+
if (bytes >= 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
1106
|
+
return `${bytes} B`;
|
|
1107
|
+
}
|
|
1108
|
+
|
|
722
1109
|
// ── Registration ──
|
|
723
1110
|
|
|
724
1111
|
/**
|
|
@@ -796,6 +1183,35 @@ function registerBenchmark(program) {
|
|
|
796
1183
|
.option('--json', 'Machine-readable JSON output')
|
|
797
1184
|
.option('-q, --quiet', 'Suppress non-essential output')
|
|
798
1185
|
.action(benchmarkBatch);
|
|
1186
|
+
|
|
1187
|
+
// ── benchmark quantization ──
|
|
1188
|
+
bench
|
|
1189
|
+
.command('quantization')
|
|
1190
|
+
.alias('quant')
|
|
1191
|
+
.description('Compare output dtypes (float/int8/binary) for quality vs storage')
|
|
1192
|
+
.option('-m, --model <model>', 'Embedding model to benchmark')
|
|
1193
|
+
.option('--dtypes <types>', 'Comma-separated output dtypes', 'float,int8,ubinary')
|
|
1194
|
+
.option('--query <text>', 'Search query')
|
|
1195
|
+
.option('-f, --file <path>', 'Corpus file (JSON array or newline-delimited)')
|
|
1196
|
+
.option('-k, --top-k <n>', 'Show top K results', '5')
|
|
1197
|
+
.option('-d, --dimensions <n>', 'Output dimensions')
|
|
1198
|
+
.option('--json', 'Machine-readable JSON output')
|
|
1199
|
+
.option('-q, --quiet', 'Suppress non-essential output')
|
|
1200
|
+
.option('-s, --save [path]', 'Save results to JSON file')
|
|
1201
|
+
.action(benchmarkQuantization);
|
|
1202
|
+
|
|
1203
|
+
// ── benchmark asymmetric ──
|
|
1204
|
+
bench
|
|
1205
|
+
.command('asymmetric')
|
|
1206
|
+
.description('Test asymmetric retrieval (docs with large model, queries with smaller)')
|
|
1207
|
+
.option('--doc-model <model>', 'Model to embed documents with', 'voyage-4-large')
|
|
1208
|
+
.option('--query-models <models>', 'Comma-separated query models', 'voyage-4-large,voyage-4,voyage-4-lite')
|
|
1209
|
+
.option('--query <text>', 'Search query')
|
|
1210
|
+
.option('-f, --file <path>', 'Corpus file (JSON array or newline-delimited)')
|
|
1211
|
+
.option('-k, --top-k <n>', 'Show top K results', '5')
|
|
1212
|
+
.option('--json', 'Machine-readable JSON output')
|
|
1213
|
+
.option('-q, --quiet', 'Suppress non-essential output')
|
|
1214
|
+
.action(benchmarkAsymmetric);
|
|
799
1215
|
}
|
|
800
1216
|
|
|
801
1217
|
module.exports = { registerBenchmark };
|
package/src/commands/embed.js
CHANGED
|
@@ -19,6 +19,7 @@ function registerEmbed(program) {
|
|
|
19
19
|
.option('-f, --file <path>', 'Read text from file')
|
|
20
20
|
.option('--truncation', 'Enable truncation for long inputs')
|
|
21
21
|
.option('--no-truncation', 'Disable truncation')
|
|
22
|
+
.option('--output-dtype <type>', 'Output data type: float, int8, uint8, binary, ubinary', 'float')
|
|
22
23
|
.option('-o, --output-format <format>', 'Output format: json or array', 'json')
|
|
23
24
|
.option('--json', 'Machine-readable JSON output')
|
|
24
25
|
.option('-q, --quiet', 'Suppress non-essential output')
|
|
@@ -49,6 +50,10 @@ function registerEmbed(program) {
|
|
|
49
50
|
if (opts.truncation !== undefined) {
|
|
50
51
|
embedOpts.truncation = opts.truncation;
|
|
51
52
|
}
|
|
53
|
+
// Only pass output_dtype when not the default float
|
|
54
|
+
if (opts.outputDtype && opts.outputDtype !== 'float') {
|
|
55
|
+
embedOpts.outputDtype = opts.outputDtype;
|
|
56
|
+
}
|
|
52
57
|
|
|
53
58
|
const result = await generateEmbeddings(texts, embedOpts);
|
|
54
59
|
|
package/src/commands/models.js
CHANGED
|
@@ -53,7 +53,7 @@ function registerModels(program) {
|
|
|
53
53
|
const legacyModels = models.filter(m => m.legacy);
|
|
54
54
|
|
|
55
55
|
if (opts.type !== 'all') {
|
|
56
|
-
models = models.filter(m => m.type === opts.type);
|
|
56
|
+
models = models.filter(m => opts.type === 'embedding' ? m.type.startsWith('embedding') : m.type === opts.type);
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
if (!showLegacy) {
|
|
@@ -84,14 +84,14 @@ function registerModels(program) {
|
|
|
84
84
|
|
|
85
85
|
const formatWideRow = (m) => {
|
|
86
86
|
const name = ui.cyan(m.name);
|
|
87
|
-
const type = m.type
|
|
87
|
+
const type = m.type.startsWith('embedding') ? ui.green(m.type) : ui.yellow(m.type);
|
|
88
88
|
const price = ui.dim(m.price);
|
|
89
89
|
return [name, type, m.context, m.dimensions, price, m.bestFor];
|
|
90
90
|
};
|
|
91
91
|
|
|
92
92
|
const formatCompactRow = (m) => {
|
|
93
93
|
const name = ui.cyan(m.name);
|
|
94
|
-
const type = m.type
|
|
94
|
+
const type = m.type.startsWith('embedding') ? ui.green(m.multimodal ? 'multi' : 'embed') : ui.yellow('rerank');
|
|
95
95
|
const dims = compactDimensions(m.dimensions);
|
|
96
96
|
const price = ui.dim(compactPrice(m.price));
|
|
97
97
|
return [name, type, dims, price, m.shortFor || m.bestFor];
|
package/src/commands/ping.js
CHANGED
|
@@ -13,7 +13,12 @@ function registerPing(program) {
|
|
|
13
13
|
.description('Test connectivity to Voyage AI API (and optionally MongoDB)')
|
|
14
14
|
.option('--json', 'Machine-readable JSON output')
|
|
15
15
|
.option('-q, --quiet', 'Suppress non-essential output')
|
|
16
|
+
.option('--mask', 'Mask sensitive info (cluster hostnames, endpoints) in output. Also enabled by VAI_MASK=1 env var.')
|
|
16
17
|
.action(async (opts) => {
|
|
18
|
+
// Support env var so all recordings are masked without remembering the flag
|
|
19
|
+
if (process.env.VAI_MASK === '1' || process.env.VAI_MASK === 'true') {
|
|
20
|
+
opts.mask = true;
|
|
21
|
+
}
|
|
17
22
|
const results = {};
|
|
18
23
|
|
|
19
24
|
// ── Voyage AI ping ──
|
|
@@ -28,6 +33,31 @@ function registerPing(program) {
|
|
|
28
33
|
const useColor = !opts.json;
|
|
29
34
|
const useSpinner = useColor && !opts.quiet;
|
|
30
35
|
|
|
36
|
+
// Masking helper: "performance.zbcul.mongodb.net" → "perfo*****.mongodb.net"
|
|
37
|
+
const PUBLIC_HOSTS = ['ai.mongodb.com', 'api.voyageai.com'];
|
|
38
|
+
const maskHost = (host) => {
|
|
39
|
+
if (!opts.mask || !host) return host;
|
|
40
|
+
if (PUBLIC_HOSTS.includes(host)) return host;
|
|
41
|
+
const parts = host.split('.');
|
|
42
|
+
if (parts.length >= 3) {
|
|
43
|
+
const name = parts[0];
|
|
44
|
+
const masked = name.slice(0, Math.min(5, name.length)) + '*****';
|
|
45
|
+
return [masked, ...parts.slice(1)].join('.');
|
|
46
|
+
}
|
|
47
|
+
return host.slice(0, 5) + '*****';
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
const maskUrl = (url) => {
|
|
51
|
+
if (!opts.mask || !url) return url;
|
|
52
|
+
try {
|
|
53
|
+
const u = new URL(url);
|
|
54
|
+
u.hostname = maskHost(u.hostname);
|
|
55
|
+
return u.toString().replace(/\/$/, '');
|
|
56
|
+
} catch {
|
|
57
|
+
return url;
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
|
|
31
61
|
const apiBase = getApiBase();
|
|
32
62
|
const model = 'voyage-4-lite';
|
|
33
63
|
const startTime = Date.now();
|
|
@@ -94,7 +124,7 @@ function registerPing(program) {
|
|
|
94
124
|
console.log(`ok ${elapsed}ms`);
|
|
95
125
|
} else {
|
|
96
126
|
console.log(ui.success(`Connected to Voyage AI API ${ui.dim('(' + elapsed + 'ms)')}`));
|
|
97
|
-
console.log(ui.label('Endpoint', apiBase));
|
|
127
|
+
console.log(ui.label('Endpoint', maskUrl(apiBase)));
|
|
98
128
|
console.log(ui.label('Model', model));
|
|
99
129
|
console.log(ui.label('Dimensions', String(dims)));
|
|
100
130
|
console.log(ui.label('Tokens', String(tokens)));
|
|
@@ -145,7 +175,7 @@ function registerPing(program) {
|
|
|
145
175
|
if (!opts.json && !opts.quiet) {
|
|
146
176
|
console.log('');
|
|
147
177
|
console.log(ui.success(`Connected to MongoDB Atlas ${ui.dim('(' + mongoElapsed + 'ms)')}`));
|
|
148
|
-
console.log(ui.label('Cluster', cluster));
|
|
178
|
+
console.log(ui.label('Cluster', maskHost(cluster)));
|
|
149
179
|
}
|
|
150
180
|
|
|
151
181
|
await client.close();
|
|
@@ -84,7 +84,7 @@ function createPlaygroundServer() {
|
|
|
84
84
|
|
|
85
85
|
// API: Models
|
|
86
86
|
if (req.method === 'GET' && req.url === '/api/models') {
|
|
87
|
-
const models = MODEL_CATALOG.filter(m => !m.legacy && !m.local);
|
|
87
|
+
const models = MODEL_CATALOG.filter(m => !m.legacy && !m.local && !m.unreleased);
|
|
88
88
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
89
89
|
res.end(JSON.stringify({ models }));
|
|
90
90
|
return;
|
|
@@ -137,17 +137,21 @@ function createPlaygroundServer() {
|
|
|
137
137
|
|
|
138
138
|
// API: Embed
|
|
139
139
|
if (req.url === '/api/embed') {
|
|
140
|
-
const { texts, model, inputType, dimensions } = parsed;
|
|
140
|
+
const { texts, model, inputType, dimensions, output_dtype } = parsed;
|
|
141
141
|
if (!texts || !Array.isArray(texts) || texts.length === 0) {
|
|
142
142
|
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
143
143
|
res.end(JSON.stringify({ error: 'texts must be a non-empty array' }));
|
|
144
144
|
return;
|
|
145
145
|
}
|
|
146
|
-
const
|
|
146
|
+
const embedOpts = {
|
|
147
147
|
model: model || undefined,
|
|
148
148
|
inputType: inputType || undefined,
|
|
149
149
|
dimensions: dimensions || undefined,
|
|
150
|
-
}
|
|
150
|
+
};
|
|
151
|
+
if (output_dtype && output_dtype !== 'float') {
|
|
152
|
+
embedOpts.outputDtype = output_dtype;
|
|
153
|
+
}
|
|
154
|
+
const result = await generateEmbeddings(texts, embedOpts);
|
|
151
155
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
152
156
|
res.end(JSON.stringify(result));
|
|
153
157
|
return;
|
package/src/commands/search.js
CHANGED
|
@@ -16,8 +16,8 @@ function registerSearch(program) {
|
|
|
16
16
|
.requiredOption('--query <text>', 'Search query text')
|
|
17
17
|
.requiredOption('--db <database>', 'Database name')
|
|
18
18
|
.requiredOption('--collection <name>', 'Collection name')
|
|
19
|
-
.
|
|
20
|
-
.
|
|
19
|
+
.option('--index <name>', 'Vector search index name', 'vector_index')
|
|
20
|
+
.option('--field <name>', 'Embedding field name', 'embedding')
|
|
21
21
|
.option('-m, --model <model>', 'Embedding model', getDefaultModel())
|
|
22
22
|
.option('--input-type <type>', 'Input type for query embedding', 'query')
|
|
23
23
|
.option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
|
package/src/commands/store.js
CHANGED
|
@@ -17,12 +17,13 @@ function registerStore(program) {
|
|
|
17
17
|
.description('Embed text and store in MongoDB Atlas')
|
|
18
18
|
.requiredOption('--db <database>', 'Database name')
|
|
19
19
|
.requiredOption('--collection <name>', 'Collection name')
|
|
20
|
-
.
|
|
20
|
+
.option('--field <name>', 'Embedding field name', 'embedding')
|
|
21
21
|
.option('--text <text>', 'Text to embed and store')
|
|
22
22
|
.option('-f, --file <path>', 'File to embed and store (text file or .jsonl for batch mode)')
|
|
23
23
|
.option('-m, --model <model>', 'Embedding model', getDefaultModel())
|
|
24
24
|
.option('--input-type <type>', 'Input type: query or document', 'document')
|
|
25
25
|
.option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
|
|
26
|
+
.option('--output-dtype <type>', 'Output data type: float, int8, uint8, binary, ubinary', 'float')
|
|
26
27
|
.option('--metadata <json>', 'Additional metadata as JSON')
|
|
27
28
|
.option('--json', 'Machine-readable JSON output')
|
|
28
29
|
.option('-q, --quiet', 'Suppress non-essential output')
|
|
@@ -46,11 +47,15 @@ function registerStore(program) {
|
|
|
46
47
|
spin.start();
|
|
47
48
|
}
|
|
48
49
|
|
|
49
|
-
const
|
|
50
|
+
const embedOpts = {
|
|
50
51
|
model: opts.model,
|
|
51
52
|
inputType: opts.inputType,
|
|
52
53
|
dimensions: opts.dimensions,
|
|
53
|
-
}
|
|
54
|
+
};
|
|
55
|
+
if (opts.outputDtype && opts.outputDtype !== 'float') {
|
|
56
|
+
embedOpts.outputDtype = opts.outputDtype;
|
|
57
|
+
}
|
|
58
|
+
const embedResult = await generateEmbeddings([textContent], embedOpts);
|
|
54
59
|
|
|
55
60
|
const embedding = embedResult.data[0].embedding;
|
|
56
61
|
|
|
@@ -147,11 +152,15 @@ async function handleBatchStore(opts) {
|
|
|
147
152
|
spin.start();
|
|
148
153
|
}
|
|
149
154
|
|
|
150
|
-
const
|
|
155
|
+
const batchEmbedOpts = {
|
|
151
156
|
model: opts.model,
|
|
152
157
|
inputType: opts.inputType,
|
|
153
158
|
dimensions: opts.dimensions,
|
|
154
|
-
}
|
|
159
|
+
};
|
|
160
|
+
if (opts.outputDtype && opts.outputDtype !== 'float') {
|
|
161
|
+
batchEmbedOpts.outputDtype = opts.outputDtype;
|
|
162
|
+
}
|
|
163
|
+
const embedResult = await generateEmbeddings(texts, batchEmbedOpts);
|
|
155
164
|
|
|
156
165
|
const docs = records.map((record, i) => {
|
|
157
166
|
const embedding = embedResult.data[i].embedding;
|