voyageai-cli 1.10.0 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -312,6 +312,29 @@ All commands support:
312
312
 
313
313
  Free tier: 200M tokens for most models. All Voyage 4 series models share the same embedding space.
314
314
 
315
+ ## Benchmarks: vai vs. Voyage AI's Published Results
316
+
317
+ Voyage AI publishes [retrieval quality benchmarks](https://blog.voyageai.com/2026/01/15/voyage-4/) — NDCG@10 scores across 29 RTEB datasets measuring how *accurate* each model's embeddings are. Their results show voyage-4-large outperforms Gemini Embedding 001 by 3.87%, Cohere Embed v4 by 8.20%, and OpenAI v3 Large by 14.05%.
318
+
319
+ **`vai benchmark` measures something different:** real-world latency, cost, and whether models agree on ranking *your specific data*. The two are complementary:
320
+
321
+ | | Voyage AI Benchmarks | vai benchmark |
322
+ |---|---|---|
323
+ | **Measures** | Retrieval quality (NDCG@10) | Latency, cost, ranking agreement |
324
+ | **Data** | 29 standardized datasets | Your actual data |
325
+ | **Answers** | "Which model produces the best embeddings?" | "For my data and budget, which model should I use?" |
326
+
327
+ Voyage AI's key insight — [asymmetric retrieval](https://blog.voyageai.com/2026/01/15/voyage-4/) (embed docs with voyage-4-large, query with voyage-4-lite) — is directly testable with `vai`:
328
+
329
+ ```bash
330
+ # Does the cheap query model find the same results as the expensive one?
331
+ vai benchmark asymmetric --doc-model voyage-4-large \
332
+ --query-models voyage-4-large,voyage-4,voyage-4-lite \
333
+ --file your-corpus.txt --query "your actual query"
334
+ ```
335
+
336
+ If rankings agree, you can embed documents once with voyage-4-large and query with voyage-4-lite — **6x cheaper** at query time with no re-indexing.
337
+
315
338
  ## Requirements
316
339
 
317
340
  - Node.js 18+
package/demo.gif CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voyageai-cli",
3
- "version": "1.10.0",
3
+ "version": "1.12.0",
4
4
  "description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
5
5
  "bin": {
6
6
  "vai": "./src/cli.js"
package/src/cli.js CHANGED
@@ -20,6 +20,7 @@ const { registerIngest } = require('./commands/ingest');
20
20
  const { registerCompletions } = require('./commands/completions');
21
21
  const { registerPlayground } = require('./commands/playground');
22
22
  const { registerBenchmark } = require('./commands/benchmark');
23
+ const { registerAbout } = require('./commands/about');
23
24
  const { showBanner, showQuickStart, getVersion } = require('./lib/banner');
24
25
 
25
26
  const version = getVersion();
@@ -44,6 +45,7 @@ registerIngest(program);
44
45
  registerCompletions(program);
45
46
  registerPlayground(program);
46
47
  registerBenchmark(program);
48
+ registerAbout(program);
47
49
 
48
50
  // Append disclaimer to all help output
49
51
  program.addHelpText('after', `
@@ -0,0 +1,85 @@
1
+ 'use strict';
2
+
3
+ const pc = require('picocolors');
4
+
5
+ /**
6
+ * Register the about command on a Commander program.
7
+ * @param {import('commander').Command} program
8
+ */
9
+ function registerAbout(program) {
10
+ program
11
+ .command('about')
12
+ .description('About this tool and its author')
13
+ .option('--json', 'Machine-readable JSON output')
14
+ .action((opts) => {
15
+ if (opts.json) {
16
+ console.log(JSON.stringify({
17
+ tool: 'voyageai-cli',
18
+ binary: 'vai',
19
+ author: {
20
+ name: 'Michael Lynn',
21
+ role: 'Principal Staff Developer Advocate, MongoDB',
22
+ github: 'https://github.com/mrlynn',
23
+ website: 'https://mlynn.org',
24
+ },
25
+ links: {
26
+ npm: 'https://www.npmjs.com/package/voyageai-cli',
27
+ github: 'https://github.com/mrlynn/voyageai-cli',
28
+ docs: 'https://www.mongodb.com/docs/voyageai/',
29
+ },
30
+ disclaimer: 'Community tool — not an official MongoDB or Voyage AI product.',
31
+ }, null, 2));
32
+ return;
33
+ }
34
+
35
+ console.log('');
36
+ console.log(` ${pc.bold(pc.cyan('voyageai-cli'))} ${pc.dim('(vai)')}`);
37
+ console.log(` ${pc.dim('Voyage AI embeddings, reranking & Atlas Vector Search CLI')}`);
38
+ console.log('');
39
+
40
+ // Author
41
+ console.log(` ${pc.bold('Author')}`);
42
+ console.log(` Michael Lynn`);
43
+ console.log(` ${pc.dim('Principal Staff Developer Advocate · MongoDB')}`);
44
+ console.log(` ${pc.dim('25+ years enterprise infrastructure · 10+ years at MongoDB')}`);
45
+ console.log('');
46
+
47
+ // About
48
+ console.log(` ${pc.bold('About This Project')}`);
49
+ console.log(` A community-built CLI for working with Voyage AI embeddings,`);
50
+ console.log(` reranking, and MongoDB Atlas Vector Search. Created to help`);
51
+ console.log(` developers explore, benchmark, and integrate Voyage AI models`);
52
+ console.log(` into their applications — right from the terminal.`);
53
+ console.log('');
54
+
55
+ // Features
56
+ console.log(` ${pc.bold('What You Can Do')}`);
57
+ console.log(` ${pc.cyan('vai embed')} Generate vector embeddings for text`);
58
+ console.log(` ${pc.cyan('vai similarity')} Compare texts with cosine similarity`);
59
+ console.log(` ${pc.cyan('vai rerank')} Rerank documents against a query`);
60
+ console.log(` ${pc.cyan('vai search')} Vector search against Atlas collections`);
61
+ console.log(` ${pc.cyan('vai store')} Embed and store documents in Atlas`);
62
+ console.log(` ${pc.cyan('vai benchmark')} Compare model latency, ranking & costs`);
63
+ console.log(` ${pc.cyan('vai explain')} Learn about embeddings, vector search & more`);
64
+ console.log(` ${pc.cyan('vai playground')} Launch interactive web playground`);
65
+ console.log('');
66
+
67
+ // Links
68
+ console.log(` ${pc.bold('Links')}`);
69
+ console.log(` ${pc.dim('npm:')} https://www.npmjs.com/package/voyageai-cli`);
70
+ console.log(` ${pc.dim('GitHub:')} https://github.com/mrlynn/voyageai-cli`);
71
+ console.log(` ${pc.dim('Docs:')} https://www.mongodb.com/docs/voyageai/`);
72
+ console.log(` ${pc.dim('Author:')} https://mlynn.org`);
73
+ console.log('');
74
+
75
+ // Disclaimer
76
+ console.log(` ${pc.yellow('⚠ Community Tool Disclaimer')}`);
77
+ console.log(` ${pc.dim('This tool is not an official product of MongoDB, Inc. or')}`);
78
+ console.log(` ${pc.dim('Voyage AI. It is independently built and maintained by')}`);
79
+ console.log(` ${pc.dim('Michael Lynn as a community resource. Not supported,')}`);
80
+ console.log(` ${pc.dim('endorsed, or guaranteed by either company.')}`);
81
+ console.log('');
82
+ });
83
+ }
84
+
85
+ module.exports = { registerAbout };
@@ -21,6 +21,8 @@ const SAMPLE_TEXTS = [
21
21
  'GraphQL provides a flexible query language that lets clients request exactly the data they need.',
22
22
  ];
23
23
 
24
+ // If you're reading this, you're either benchmarking or procrastinating.
25
+ // Either way, we respect the hustle.
24
26
  const SAMPLE_QUERY = 'How do I search for similar documents using embeddings?';
25
27
 
26
28
  const SAMPLE_RERANK_DOCS = [
@@ -717,6 +719,393 @@ async function benchmarkBatch(opts) {
717
719
  console.log('');
718
720
  }
719
721
 
722
+ /**
723
+ * benchmark asymmetric — Test Voyage 4's asymmetric retrieval
724
+ * (embed docs with one model, query with another).
725
+ */
726
+ async function benchmarkAsymmetric(opts) {
727
+ const docModel = opts.docModel || 'voyage-4-large';
728
+ const queryModels = opts.queryModels
729
+ ? parseModels(opts.queryModels)
730
+ : ['voyage-4-large', 'voyage-4', 'voyage-4-lite'];
731
+ const query = opts.query || SAMPLE_QUERY;
732
+ const showK = opts.topK ? parseInt(opts.topK, 10) : 5;
733
+
734
+ let corpus;
735
+ if (opts.file) {
736
+ corpus = loadTexts(opts.file);
737
+ } else {
738
+ corpus = SAMPLE_RERANK_DOCS;
739
+ }
740
+
741
+ if (!opts.json && !opts.quiet) {
742
+ console.log('');
743
+ console.log(ui.bold(' Asymmetric Retrieval Benchmark'));
744
+ console.log(ui.dim(` Documents embedded with: ${docModel}`));
745
+ console.log(ui.dim(` Query models: ${queryModels.join(', ')}`));
746
+ console.log(ui.dim(` Query: "${query.substring(0, 60)}${query.length > 60 ? '...' : ''}"`));
747
+ console.log(ui.dim(` ${corpus.length} documents`));
748
+ console.log('');
749
+ }
750
+
751
+ // Step 1: Embed documents with the doc model
752
+ const spin1 = (!opts.json && !opts.quiet) ? ui.spinner(` Embedding ${corpus.length} docs with ${docModel}...`) : null;
753
+ if (spin1) spin1.start();
754
+
755
+ let docEmbeddings;
756
+ try {
757
+ const docResult = await generateEmbeddings(corpus, { model: docModel, inputType: 'document' });
758
+ docEmbeddings = docResult.data.map(d => d.embedding);
759
+ if (spin1) spin1.stop();
760
+ } catch (err) {
761
+ if (spin1) spin1.stop();
762
+ console.error(ui.error(`Failed to embed documents with ${docModel}: ${err.message}`));
763
+ process.exit(1);
764
+ }
765
+
766
+ // Step 2: For each query model, embed the query and rank
767
+ const allResults = [];
768
+
769
+ for (const qModel of queryModels) {
770
+ const spin = (!opts.json && !opts.quiet) ? ui.spinner(` Querying with ${qModel}...`) : null;
771
+ if (spin) spin.start();
772
+
773
+ try {
774
+ const start = performance.now();
775
+ const qResult = await generateEmbeddings([query], { model: qModel, inputType: 'query' });
776
+ const elapsed = performance.now() - start;
777
+ const queryEmbed = qResult.data[0].embedding;
778
+
779
+ const ranked = corpus.map((text, i) => ({
780
+ index: i,
781
+ text,
782
+ similarity: cosineSimilarity(queryEmbed, docEmbeddings[i]),
783
+ })).sort((a, b) => b.similarity - a.similarity);
784
+
785
+ allResults.push({
786
+ queryModel: qModel,
787
+ docModel,
788
+ latency: elapsed,
789
+ tokens: qResult.usage?.total_tokens || 0,
790
+ ranked,
791
+ });
792
+
793
+ if (spin) spin.stop();
794
+ } catch (err) {
795
+ if (spin) spin.stop();
796
+ console.error(ui.warn(` ${qModel}: ${err.message} — skipping`));
797
+ }
798
+ }
799
+
800
+ if (opts.json) {
801
+ console.log(JSON.stringify({ benchmark: 'asymmetric', docModel, query, corpus: corpus.length, results: allResults }, null, 2));
802
+ return;
803
+ }
804
+
805
+ if (allResults.length === 0) {
806
+ console.error(ui.error('No query models completed successfully.'));
807
+ process.exit(1);
808
+ }
809
+
810
+ // Show latency comparison
811
+ if (!opts.quiet) {
812
+ console.log(ui.dim(` ${rpad('Query Model', 22)} ${lpad('Latency', 8)} ${lpad('Tokens', 7)}`));
813
+ console.log(ui.dim(' ' + '─'.repeat(40)));
814
+ const minLat = Math.min(...allResults.map(r => r.latency));
815
+ for (const r of allResults) {
816
+ const badge = r.latency === minLat ? ui.green(' ⚡') : ' ';
817
+ console.log(` ${rpad(r.queryModel, 22)} ${lpad(fmtMs(r.latency), 8)} ${lpad(String(r.tokens), 7)}${badge}`);
818
+ }
819
+ console.log('');
820
+ }
821
+
822
+ // Show ranking comparison
823
+ console.log(ui.bold(` Top ${showK} results (docs embedded with ${ui.cyan(docModel)})`));
824
+ console.log('');
825
+
826
+ // Use the full-model result as baseline
827
+ const baseline = allResults[0];
828
+
829
+ for (let rank = 0; rank < showK && rank < corpus.length; rank++) {
830
+ console.log(ui.dim(` #${rank + 1}`));
831
+ for (const r of allResults) {
832
+ const item = r.ranked[rank];
833
+ const preview = item.text.substring(0, 50) + (item.text.length > 50 ? '...' : '');
834
+ const match = baseline.ranked[rank].index === item.index ? ui.green('=') : ui.yellow('≠');
835
+ console.log(` ${match} ${ui.cyan(rpad(r.queryModel, 20))} ${ui.score(item.similarity)} [${item.index}] ${ui.dim(preview)}`);
836
+ }
837
+ }
838
+
839
+ console.log('');
840
+
841
+ // Agreement analysis
842
+ const baseOrder = baseline.ranked.slice(0, showK).map(x => x.index);
843
+ for (const r of allResults.slice(1)) {
844
+ const rOrder = r.ranked.slice(0, showK).map(x => x.index);
845
+ const overlap = baseOrder.filter(idx => rOrder.includes(idx)).length;
846
+ const exactMatch = baseOrder.filter((idx, i) => rOrder[i] === idx).length;
847
+ const overlapPct = ((overlap / showK) * 100).toFixed(0);
848
+ const exactPct = ((exactMatch / showK) * 100).toFixed(0);
849
+
850
+ const price = getPrice(r.queryModel);
851
+ const basePrice = getPrice(baseline.queryModel);
852
+ const savings = (price && basePrice && price < basePrice)
853
+ ? ` (${((1 - price / basePrice) * 100).toFixed(0)}% cheaper)`
854
+ : '';
855
+
856
+ if (exactMatch === showK) {
857
+ console.log(ui.success(`${r.queryModel}: Identical ranking to ${docModel}${savings} — asymmetric retrieval works perfectly.`));
858
+ } else if (overlap === showK) {
859
+ console.log(ui.info(`${r.queryModel}: Same ${showK} docs, ${exactPct}% exact order match${savings}.`));
860
+ } else {
861
+ console.log(ui.warn(`${r.queryModel}: ${overlapPct}% overlap in top-${showK}${savings}.`));
862
+ }
863
+ }
864
+ console.log('');
865
+ }
866
+
867
+ /**
868
+ * benchmark quantization — Compare output dtypes for quality vs storage tradeoff.
869
+ */
870
+ async function benchmarkQuantization(opts) {
871
+ const model = opts.model || getDefaultModel();
872
+ const dtypes = opts.dtypes
873
+ ? opts.dtypes.split(',').map(d => d.trim())
874
+ : ['float', 'int8', 'ubinary'];
875
+ const query = opts.query || SAMPLE_QUERY;
876
+ const dimensions = opts.dimensions ? parseInt(opts.dimensions, 10) : undefined;
877
+ const showK = opts.topK ? parseInt(opts.topK, 10) : 5;
878
+
879
+ let corpus;
880
+ if (opts.file) {
881
+ corpus = loadTexts(opts.file);
882
+ } else {
883
+ corpus = SAMPLE_RERANK_DOCS;
884
+ }
885
+
886
+ if (!opts.json && !opts.quiet) {
887
+ console.log('');
888
+ console.log(ui.bold(' Quantization Benchmark'));
889
+ console.log(ui.dim(` Model: ${model}`));
890
+ console.log(ui.dim(` Data types: ${dtypes.join(', ')}`));
891
+ console.log(ui.dim(` ${corpus.length} documents, top-${showK} comparison`));
892
+ if (dimensions) console.log(ui.dim(` Dimensions: ${dimensions}`));
893
+ console.log('');
894
+ }
895
+
896
+ // Step 1: Get float baseline embeddings (query + corpus)
897
+ const allTexts = [query, ...corpus];
898
+ const resultsByDtype = {};
899
+
900
+ for (const dtype of dtypes) {
901
+ const spin = (!opts.json && !opts.quiet) ? ui.spinner(` Embedding with ${dtype}...`) : null;
902
+ if (spin) spin.start();
903
+
904
+ try {
905
+ const embedOpts = { model, inputType: 'document' };
906
+ if (dimensions) embedOpts.dimensions = dimensions;
907
+ if (dtype !== 'float') embedOpts.outputDtype = dtype;
908
+
909
+ const start = performance.now();
910
+ const result = await generateEmbeddings(allTexts, embedOpts);
911
+ const elapsed = performance.now() - start;
912
+
913
+ if (spin) spin.stop();
914
+
915
+ const embeddings = result.data.map(d => d.embedding);
916
+ const queryEmbed = embeddings[0];
917
+ const dims = embeddings[0].length;
918
+
919
+ // For binary/ubinary, we can't directly cosine-similarity the packed ints
920
+ // against float embeddings meaningfully. Instead we compare the ranking
921
+ // each dtype produces independently.
922
+ const ranked = corpus.map((text, i) => {
923
+ const docEmbed = embeddings[i + 1];
924
+ let sim;
925
+ if (dtype === 'binary' || dtype === 'ubinary') {
926
+ // Hamming-style: compute dot product of packed int arrays
927
+ // (higher = more bits agree = more similar)
928
+ sim = hammingSimilarity(queryEmbed, docEmbed);
929
+ } else {
930
+ sim = cosineSimilarity(queryEmbed, docEmbed);
931
+ }
932
+ return { index: i, text, similarity: sim };
933
+ }).sort((a, b) => b.similarity - a.similarity);
934
+
935
+ // Calculate storage per vector
936
+ let bytesPerVec;
937
+ const actualDims = (dtype === 'binary' || dtype === 'ubinary') ? dims * 8 : dims;
938
+ if (dtype === 'float') {
939
+ bytesPerVec = dims * 4;
940
+ } else if (dtype === 'int8' || dtype === 'uint8') {
941
+ bytesPerVec = dims * 1;
942
+ } else {
943
+ // binary/ubinary: dims is already 1/8th of actual dimensions
944
+ bytesPerVec = dims;
945
+ }
946
+
947
+ resultsByDtype[dtype] = {
948
+ dtype,
949
+ latency: elapsed,
950
+ dims,
951
+ actualDims,
952
+ bytesPerVec,
953
+ tokens: result.usage?.total_tokens || 0,
954
+ ranked,
955
+ };
956
+ } catch (err) {
957
+ if (spin) spin.stop();
958
+ console.error(ui.warn(` ${dtype}: ${err.message} — skipping`));
959
+ }
960
+ }
961
+
962
+ const completed = Object.values(resultsByDtype);
963
+
964
+ if (opts.json) {
965
+ const jsonResults = completed.map(r => ({
966
+ dtype: r.dtype,
967
+ latency: r.latency,
968
+ dimensions: r.actualDims,
969
+ bytesPerVector: r.bytesPerVec,
970
+ ranking: r.ranked.slice(0, showK).map(x => ({ index: x.index, similarity: x.similarity })),
971
+ }));
972
+ console.log(JSON.stringify({ benchmark: 'quantization', model, results: jsonResults }, null, 2));
973
+ return;
974
+ }
975
+
976
+ if (completed.length === 0) {
977
+ console.error(ui.error('No data types completed successfully.'));
978
+ process.exit(1);
979
+ }
980
+
981
+ // Storage comparison table
982
+ console.log(ui.bold(' Storage Comparison'));
983
+ console.log('');
984
+
985
+ const sHeader = ` ${rpad('dtype', 10)} ${lpad('Dims', 8)} ${lpad('Bytes/vec', 12)} ${lpad('1M docs', 10)} ${lpad('Savings', 10)} ${lpad('Latency', 10)}`;
986
+ console.log(ui.dim(sHeader));
987
+ console.log(ui.dim(' ' + '─'.repeat(stripAnsi(sHeader).length - 2)));
988
+
989
+ const baseline = completed.find(r => r.dtype === 'float') || completed[0];
990
+ const baselineBytes = baseline.bytesPerVec;
991
+
992
+ for (const r of completed) {
993
+ const savings = r.bytesPerVec < baselineBytes
994
+ ? ui.green(`${(baselineBytes / r.bytesPerVec).toFixed(0)}×`)
995
+ : ui.dim('baseline');
996
+
997
+ const totalMB = (r.bytesPerVec * 1_000_000) / (1024 * 1024);
998
+ let sizeStr;
999
+ if (totalMB >= 1024) sizeStr = `${(totalMB / 1024).toFixed(1)} GB`;
1000
+ else sizeStr = `${totalMB.toFixed(0)} MB`;
1001
+
1002
+ console.log(
1003
+ ` ${rpad(r.dtype, 10)} ${lpad(String(r.actualDims), 8)} ${lpad(formatBytes(r.bytesPerVec), 12)} ${lpad(sizeStr, 10)} ${lpad(savings, 10)} ${lpad(fmtMs(r.latency), 10)}`
1004
+ );
1005
+ }
1006
+
1007
+ console.log('');
1008
+
1009
+ // Ranking comparison
1010
+ console.log(ui.bold(` Ranking Comparison (top ${showK})`));
1011
+ console.log('');
1012
+
1013
+ const baselineRanked = baseline.ranked;
1014
+ const baselineOrder = baselineRanked.slice(0, showK).map(x => x.index);
1015
+
1016
+ for (let rank = 0; rank < showK && rank < corpus.length; rank++) {
1017
+ console.log(ui.dim(` #${rank + 1}`));
1018
+ for (const r of completed) {
1019
+ const item = r.ranked[rank];
1020
+ const preview = item.text.substring(0, 45) + (item.text.length > 45 ? '...' : '');
1021
+ const matchesBaseline = (r === baseline) ? ' ' :
1022
+ (item.index === baselineRanked[rank].index ? ui.green('=') : ui.yellow('≠'));
1023
+ const simStr = (r.dtype === 'binary' || r.dtype === 'ubinary')
1024
+ ? `${(item.similarity * 100).toFixed(1)}%`
1025
+ : item.similarity.toFixed(4);
1026
+ console.log(` ${matchesBaseline} ${ui.cyan(rpad(r.dtype, 10))} ${lpad(simStr, 8)} [${item.index}] ${ui.dim(preview)}`);
1027
+ }
1028
+ }
1029
+
1030
+ console.log('');
1031
+
1032
+ // Agreement summary
1033
+ if (completed.length > 1) {
1034
+ for (const r of completed) {
1035
+ if (r === baseline) continue;
1036
+ const rOrder = r.ranked.slice(0, showK).map(x => x.index);
1037
+ const overlap = baselineOrder.filter(idx => rOrder.includes(idx)).length;
1038
+ const exactMatch = baselineOrder.filter((idx, i) => rOrder[i] === idx).length;
1039
+ const overlapPct = ((overlap / showK) * 100).toFixed(0);
1040
+ const exactPct = ((exactMatch / showK) * 100).toFixed(0);
1041
+ const savingsX = (baselineBytes / r.bytesPerVec).toFixed(0);
1042
+
1043
+ if (exactMatch === showK) {
1044
+ console.log(ui.success(`${r.dtype}: Identical ranking to float — ${savingsX}× storage savings with zero quality loss.`));
1045
+ } else if (overlap === showK) {
1046
+ console.log(ui.info(`${r.dtype}: Same top-${showK} docs, ${exactPct}% exact order — ${savingsX}× smaller.`));
1047
+ } else {
1048
+ console.log(ui.warn(`${r.dtype}: ${overlapPct}% overlap in top-${showK} — ${savingsX}× smaller. Consider using a reranker.`));
1049
+ }
1050
+ }
1051
+ console.log('');
1052
+ }
1053
+
1054
+ // Save results
1055
+ if (opts.save) {
1056
+ const outData = {
1057
+ benchmark: 'quantization',
1058
+ timestamp: new Date().toISOString(),
1059
+ model,
1060
+ results: completed.map(r => ({
1061
+ dtype: r.dtype,
1062
+ latency: r.latency,
1063
+ dimensions: r.actualDims,
1064
+ bytesPerVector: r.bytesPerVec,
1065
+ topRanking: r.ranked.slice(0, showK),
1066
+ })),
1067
+ };
1068
+ const outPath = typeof opts.save === 'string' ? opts.save : `benchmark-quantization-${Date.now()}.json`;
1069
+ fs.writeFileSync(outPath, JSON.stringify(outData, null, 2));
1070
+ console.log(ui.info(`Results saved to ${outPath}`));
1071
+ console.log('');
1072
+ }
1073
+ }
1074
+
1075
+ /**
1076
+ * Compute Hamming similarity between two packed binary vectors.
1077
+ * Returns a value between 0 and 1 (fraction of bits that agree).
1078
+ */
1079
+ function hammingSimilarity(a, b) {
1080
+ const len = Math.min(a.length, b.length);
1081
+ let agreeBits = 0;
1082
+ const totalBits = len * 8;
1083
+ for (let i = 0; i < len; i++) {
1084
+ // XOR to find differing bits, then count matching bits
1085
+ const xor = (a[i] & 0xFF) ^ (b[i] & 0xFF);
1086
+ // popcount via bit tricks
1087
+ agreeBits += 8 - popcount8(xor);
1088
+ }
1089
+ return agreeBits / totalBits;
1090
+ }
1091
+
1092
+ /**
1093
+ * Count set bits in an 8-bit value.
1094
+ */
1095
+ function popcount8(v) {
1096
+ v = v - ((v >> 1) & 0x55);
1097
+ v = (v & 0x33) + ((v >> 2) & 0x33);
1098
+ return (v + (v >> 4)) & 0x0F;
1099
+ }
1100
+
1101
+ /**
1102
+ * Format bytes into a human-readable string.
1103
+ */
1104
+ function formatBytes(bytes) {
1105
+ if (bytes >= 1024) return `${(bytes / 1024).toFixed(1)} KB`;
1106
+ return `${bytes} B`;
1107
+ }
1108
+
720
1109
  // ── Registration ──
721
1110
 
722
1111
  /**
@@ -794,6 +1183,35 @@ function registerBenchmark(program) {
794
1183
  .option('--json', 'Machine-readable JSON output')
795
1184
  .option('-q, --quiet', 'Suppress non-essential output')
796
1185
  .action(benchmarkBatch);
1186
+
1187
+ // ── benchmark quantization ──
1188
+ bench
1189
+ .command('quantization')
1190
+ .alias('quant')
1191
+ .description('Compare output dtypes (float/int8/binary) for quality vs storage')
1192
+ .option('-m, --model <model>', 'Embedding model to benchmark')
1193
+ .option('--dtypes <types>', 'Comma-separated output dtypes', 'float,int8,ubinary')
1194
+ .option('--query <text>', 'Search query')
1195
+ .option('-f, --file <path>', 'Corpus file (JSON array or newline-delimited)')
1196
+ .option('-k, --top-k <n>', 'Show top K results', '5')
1197
+ .option('-d, --dimensions <n>', 'Output dimensions')
1198
+ .option('--json', 'Machine-readable JSON output')
1199
+ .option('-q, --quiet', 'Suppress non-essential output')
1200
+ .option('-s, --save [path]', 'Save results to JSON file')
1201
+ .action(benchmarkQuantization);
1202
+
1203
+ // ── benchmark asymmetric ──
1204
+ bench
1205
+ .command('asymmetric')
1206
+ .description('Test asymmetric retrieval (docs with large model, queries with smaller)')
1207
+ .option('--doc-model <model>', 'Model to embed documents with', 'voyage-4-large')
1208
+ .option('--query-models <models>', 'Comma-separated query models', 'voyage-4-large,voyage-4,voyage-4-lite')
1209
+ .option('--query <text>', 'Search query')
1210
+ .option('-f, --file <path>', 'Corpus file (JSON array or newline-delimited)')
1211
+ .option('-k, --top-k <n>', 'Show top K results', '5')
1212
+ .option('--json', 'Machine-readable JSON output')
1213
+ .option('-q, --quiet', 'Suppress non-essential output')
1214
+ .action(benchmarkAsymmetric);
797
1215
  }
798
1216
 
799
1217
  module.exports = { registerBenchmark };
@@ -19,6 +19,7 @@ function registerEmbed(program) {
19
19
  .option('-f, --file <path>', 'Read text from file')
20
20
  .option('--truncation', 'Enable truncation for long inputs')
21
21
  .option('--no-truncation', 'Disable truncation')
22
+ .option('--output-dtype <type>', 'Output data type: float, int8, uint8, binary, ubinary', 'float')
22
23
  .option('-o, --output-format <format>', 'Output format: json or array', 'json')
23
24
  .option('--json', 'Machine-readable JSON output')
24
25
  .option('-q, --quiet', 'Suppress non-essential output')
@@ -49,6 +50,10 @@ function registerEmbed(program) {
49
50
  if (opts.truncation !== undefined) {
50
51
  embedOpts.truncation = opts.truncation;
51
52
  }
53
+ // Only pass output_dtype when not the default float
54
+ if (opts.outputDtype && opts.outputDtype !== 'float') {
55
+ embedOpts.outputDtype = opts.outputDtype;
56
+ }
52
57
 
53
58
  const result = await generateEmbeddings(texts, embedOpts);
54
59
 
@@ -137,17 +137,21 @@ function createPlaygroundServer() {
137
137
 
138
138
  // API: Embed
139
139
  if (req.url === '/api/embed') {
140
- const { texts, model, inputType, dimensions } = parsed;
140
+ const { texts, model, inputType, dimensions, output_dtype } = parsed;
141
141
  if (!texts || !Array.isArray(texts) || texts.length === 0) {
142
142
  res.writeHead(400, { 'Content-Type': 'application/json' });
143
143
  res.end(JSON.stringify({ error: 'texts must be a non-empty array' }));
144
144
  return;
145
145
  }
146
- const result = await generateEmbeddings(texts, {
146
+ const embedOpts = {
147
147
  model: model || undefined,
148
148
  inputType: inputType || undefined,
149
149
  dimensions: dimensions || undefined,
150
- });
150
+ };
151
+ if (output_dtype && output_dtype !== 'float') {
152
+ embedOpts.outputDtype = output_dtype;
153
+ }
154
+ const result = await generateEmbeddings(texts, embedOpts);
151
155
  res.writeHead(200, { 'Content-Type': 'application/json' });
152
156
  res.end(JSON.stringify(result));
153
157
  return;
@@ -23,6 +23,7 @@ function registerStore(program) {
23
23
  .option('-m, --model <model>', 'Embedding model', getDefaultModel())
24
24
  .option('--input-type <type>', 'Input type: query or document', 'document')
25
25
  .option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
26
+ .option('--output-dtype <type>', 'Output data type: float, int8, uint8, binary, ubinary', 'float')
26
27
  .option('--metadata <json>', 'Additional metadata as JSON')
27
28
  .option('--json', 'Machine-readable JSON output')
28
29
  .option('-q, --quiet', 'Suppress non-essential output')
@@ -46,11 +47,15 @@ function registerStore(program) {
46
47
  spin.start();
47
48
  }
48
49
 
49
- const embedResult = await generateEmbeddings([textContent], {
50
+ const embedOpts = {
50
51
  model: opts.model,
51
52
  inputType: opts.inputType,
52
53
  dimensions: opts.dimensions,
53
- });
54
+ };
55
+ if (opts.outputDtype && opts.outputDtype !== 'float') {
56
+ embedOpts.outputDtype = opts.outputDtype;
57
+ }
58
+ const embedResult = await generateEmbeddings([textContent], embedOpts);
54
59
 
55
60
  const embedding = embedResult.data[0].embedding;
56
61
 
@@ -147,11 +152,15 @@ async function handleBatchStore(opts) {
147
152
  spin.start();
148
153
  }
149
154
 
150
- const embedResult = await generateEmbeddings(texts, {
155
+ const batchEmbedOpts = {
151
156
  model: opts.model,
152
157
  inputType: opts.inputType,
153
158
  dimensions: opts.dimensions,
154
- });
159
+ };
160
+ if (opts.outputDtype && opts.outputDtype !== 'float') {
161
+ batchEmbedOpts.outputDtype = opts.outputDtype;
162
+ }
163
+ const embedResult = await generateEmbeddings(texts, batchEmbedOpts);
155
164
 
156
165
  const docs = records.map((record, i) => {
157
166
  const embedding = embedResult.data[i].embedding;
@@ -170,6 +179,8 @@ async function handleBatchStore(opts) {
170
179
 
171
180
  const { client: c, collection } = await getMongoCollection(opts.db, opts.collection);
172
181
  client = c;
182
+ // insertMany: because life's too short for one document at a time.
183
+ // This is the MongoDB equivalent of "I'll have what everyone's having."
173
184
  const result = await collection.insertMany(docs);
174
185
 
175
186
  if (spin) spin.stop();