voyageai-cli 1.11.0 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -0
- package/package.json +1 -1
- package/src/commands/benchmark.js +416 -0
- package/src/commands/embed.js +5 -0
- package/src/commands/playground.js +7 -3
- package/src/commands/store.js +13 -4
- package/src/lib/api.js +4 -0
- package/src/lib/explanations.js +76 -2
- package/src/playground/index.html +411 -1
- package/test/commands/benchmark.test.js +67 -0
- package/test/commands/embed.test.js +10 -0
- package/test/lib/explanations.test.js +6 -0
package/README.md
CHANGED
|
@@ -312,6 +312,29 @@ All commands support:
|
|
|
312
312
|
|
|
313
313
|
Free tier: 200M tokens for most models. All Voyage 4 series models share the same embedding space.
|
|
314
314
|
|
|
315
|
+
## Benchmarks: vai vs. Voyage AI's Published Results
|
|
316
|
+
|
|
317
|
+
Voyage AI publishes [retrieval quality benchmarks](https://blog.voyageai.com/2026/01/15/voyage-4/) — NDCG@10 scores across 29 RTEB datasets measuring how *accurate* each model's embeddings are. Their results show voyage-4-large outperforms Gemini Embedding 001 by 3.87%, Cohere Embed v4 by 8.20%, and OpenAI v3 Large by 14.05%.
|
|
318
|
+
|
|
319
|
+
**`vai benchmark` measures something different:** real-world latency, cost, and whether models agree on ranking *your specific data*. The two are complementary:
|
|
320
|
+
|
|
321
|
+
| | Voyage AI Benchmarks | vai benchmark |
|
|
322
|
+
|---|---|---|
|
|
323
|
+
| **Measures** | Retrieval quality (NDCG@10) | Latency, cost, ranking agreement |
|
|
324
|
+
| **Data** | 29 standardized datasets | Your actual data |
|
|
325
|
+
| **Answers** | "Which model produces the best embeddings?" | "For my data and budget, which model should I use?" |
|
|
326
|
+
|
|
327
|
+
Voyage AI's key insight — [asymmetric retrieval](https://blog.voyageai.com/2026/01/15/voyage-4/) (embed docs with voyage-4-large, query with voyage-4-lite) — is directly testable with `vai`:
|
|
328
|
+
|
|
329
|
+
```bash
|
|
330
|
+
# Does the cheap query model find the same results as the expensive one?
|
|
331
|
+
vai benchmark asymmetric --doc-model voyage-4-large \
|
|
332
|
+
--query-models voyage-4-large,voyage-4,voyage-4-lite \
|
|
333
|
+
--file your-corpus.txt --query "your actual query"
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
If rankings agree, you can embed documents once with voyage-4-large and query with voyage-4-lite — **6x cheaper** at query time with no re-indexing.
|
|
337
|
+
|
|
315
338
|
## Requirements
|
|
316
339
|
|
|
317
340
|
- Node.js 18+
|
package/package.json
CHANGED
|
@@ -719,6 +719,393 @@ async function benchmarkBatch(opts) {
|
|
|
719
719
|
console.log('');
|
|
720
720
|
}
|
|
721
721
|
|
|
722
|
+
/**
|
|
723
|
+
* benchmark asymmetric — Test Voyage 4's asymmetric retrieval
|
|
724
|
+
* (embed docs with one model, query with another).
|
|
725
|
+
*/
|
|
726
|
+
async function benchmarkAsymmetric(opts) {
|
|
727
|
+
const docModel = opts.docModel || 'voyage-4-large';
|
|
728
|
+
const queryModels = opts.queryModels
|
|
729
|
+
? parseModels(opts.queryModels)
|
|
730
|
+
: ['voyage-4-large', 'voyage-4', 'voyage-4-lite'];
|
|
731
|
+
const query = opts.query || SAMPLE_QUERY;
|
|
732
|
+
const showK = opts.topK ? parseInt(opts.topK, 10) : 5;
|
|
733
|
+
|
|
734
|
+
let corpus;
|
|
735
|
+
if (opts.file) {
|
|
736
|
+
corpus = loadTexts(opts.file);
|
|
737
|
+
} else {
|
|
738
|
+
corpus = SAMPLE_RERANK_DOCS;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
if (!opts.json && !opts.quiet) {
|
|
742
|
+
console.log('');
|
|
743
|
+
console.log(ui.bold(' Asymmetric Retrieval Benchmark'));
|
|
744
|
+
console.log(ui.dim(` Documents embedded with: ${docModel}`));
|
|
745
|
+
console.log(ui.dim(` Query models: ${queryModels.join(', ')}`));
|
|
746
|
+
console.log(ui.dim(` Query: "${query.substring(0, 60)}${query.length > 60 ? '...' : ''}"`));
|
|
747
|
+
console.log(ui.dim(` ${corpus.length} documents`));
|
|
748
|
+
console.log('');
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
// Step 1: Embed documents with the doc model
|
|
752
|
+
const spin1 = (!opts.json && !opts.quiet) ? ui.spinner(` Embedding ${corpus.length} docs with ${docModel}...`) : null;
|
|
753
|
+
if (spin1) spin1.start();
|
|
754
|
+
|
|
755
|
+
let docEmbeddings;
|
|
756
|
+
try {
|
|
757
|
+
const docResult = await generateEmbeddings(corpus, { model: docModel, inputType: 'document' });
|
|
758
|
+
docEmbeddings = docResult.data.map(d => d.embedding);
|
|
759
|
+
if (spin1) spin1.stop();
|
|
760
|
+
} catch (err) {
|
|
761
|
+
if (spin1) spin1.stop();
|
|
762
|
+
console.error(ui.error(`Failed to embed documents with ${docModel}: ${err.message}`));
|
|
763
|
+
process.exit(1);
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// Step 2: For each query model, embed the query and rank
|
|
767
|
+
const allResults = [];
|
|
768
|
+
|
|
769
|
+
for (const qModel of queryModels) {
|
|
770
|
+
const spin = (!opts.json && !opts.quiet) ? ui.spinner(` Querying with ${qModel}...`) : null;
|
|
771
|
+
if (spin) spin.start();
|
|
772
|
+
|
|
773
|
+
try {
|
|
774
|
+
const start = performance.now();
|
|
775
|
+
const qResult = await generateEmbeddings([query], { model: qModel, inputType: 'query' });
|
|
776
|
+
const elapsed = performance.now() - start;
|
|
777
|
+
const queryEmbed = qResult.data[0].embedding;
|
|
778
|
+
|
|
779
|
+
const ranked = corpus.map((text, i) => ({
|
|
780
|
+
index: i,
|
|
781
|
+
text,
|
|
782
|
+
similarity: cosineSimilarity(queryEmbed, docEmbeddings[i]),
|
|
783
|
+
})).sort((a, b) => b.similarity - a.similarity);
|
|
784
|
+
|
|
785
|
+
allResults.push({
|
|
786
|
+
queryModel: qModel,
|
|
787
|
+
docModel,
|
|
788
|
+
latency: elapsed,
|
|
789
|
+
tokens: qResult.usage?.total_tokens || 0,
|
|
790
|
+
ranked,
|
|
791
|
+
});
|
|
792
|
+
|
|
793
|
+
if (spin) spin.stop();
|
|
794
|
+
} catch (err) {
|
|
795
|
+
if (spin) spin.stop();
|
|
796
|
+
console.error(ui.warn(` ${qModel}: ${err.message} — skipping`));
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
if (opts.json) {
|
|
801
|
+
console.log(JSON.stringify({ benchmark: 'asymmetric', docModel, query, corpus: corpus.length, results: allResults }, null, 2));
|
|
802
|
+
return;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
if (allResults.length === 0) {
|
|
806
|
+
console.error(ui.error('No query models completed successfully.'));
|
|
807
|
+
process.exit(1);
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
// Show latency comparison
|
|
811
|
+
if (!opts.quiet) {
|
|
812
|
+
console.log(ui.dim(` ${rpad('Query Model', 22)} ${lpad('Latency', 8)} ${lpad('Tokens', 7)}`));
|
|
813
|
+
console.log(ui.dim(' ' + '─'.repeat(40)));
|
|
814
|
+
const minLat = Math.min(...allResults.map(r => r.latency));
|
|
815
|
+
for (const r of allResults) {
|
|
816
|
+
const badge = r.latency === minLat ? ui.green(' ⚡') : ' ';
|
|
817
|
+
console.log(` ${rpad(r.queryModel, 22)} ${lpad(fmtMs(r.latency), 8)} ${lpad(String(r.tokens), 7)}${badge}`);
|
|
818
|
+
}
|
|
819
|
+
console.log('');
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// Show ranking comparison
|
|
823
|
+
console.log(ui.bold(` Top ${showK} results (docs embedded with ${ui.cyan(docModel)})`));
|
|
824
|
+
console.log('');
|
|
825
|
+
|
|
826
|
+
// Use the full-model result as baseline
|
|
827
|
+
const baseline = allResults[0];
|
|
828
|
+
|
|
829
|
+
for (let rank = 0; rank < showK && rank < corpus.length; rank++) {
|
|
830
|
+
console.log(ui.dim(` #${rank + 1}`));
|
|
831
|
+
for (const r of allResults) {
|
|
832
|
+
const item = r.ranked[rank];
|
|
833
|
+
const preview = item.text.substring(0, 50) + (item.text.length > 50 ? '...' : '');
|
|
834
|
+
const match = baseline.ranked[rank].index === item.index ? ui.green('=') : ui.yellow('≠');
|
|
835
|
+
console.log(` ${match} ${ui.cyan(rpad(r.queryModel, 20))} ${ui.score(item.similarity)} [${item.index}] ${ui.dim(preview)}`);
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
console.log('');
|
|
840
|
+
|
|
841
|
+
// Agreement analysis
|
|
842
|
+
const baseOrder = baseline.ranked.slice(0, showK).map(x => x.index);
|
|
843
|
+
for (const r of allResults.slice(1)) {
|
|
844
|
+
const rOrder = r.ranked.slice(0, showK).map(x => x.index);
|
|
845
|
+
const overlap = baseOrder.filter(idx => rOrder.includes(idx)).length;
|
|
846
|
+
const exactMatch = baseOrder.filter((idx, i) => rOrder[i] === idx).length;
|
|
847
|
+
const overlapPct = ((overlap / showK) * 100).toFixed(0);
|
|
848
|
+
const exactPct = ((exactMatch / showK) * 100).toFixed(0);
|
|
849
|
+
|
|
850
|
+
const price = getPrice(r.queryModel);
|
|
851
|
+
const basePrice = getPrice(baseline.queryModel);
|
|
852
|
+
const savings = (price && basePrice && price < basePrice)
|
|
853
|
+
? ` (${((1 - price / basePrice) * 100).toFixed(0)}% cheaper)`
|
|
854
|
+
: '';
|
|
855
|
+
|
|
856
|
+
if (exactMatch === showK) {
|
|
857
|
+
console.log(ui.success(`${r.queryModel}: Identical ranking to ${docModel}${savings} — asymmetric retrieval works perfectly.`));
|
|
858
|
+
} else if (overlap === showK) {
|
|
859
|
+
console.log(ui.info(`${r.queryModel}: Same ${showK} docs, ${exactPct}% exact order match${savings}.`));
|
|
860
|
+
} else {
|
|
861
|
+
console.log(ui.warn(`${r.queryModel}: ${overlapPct}% overlap in top-${showK}${savings}.`));
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
console.log('');
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
/**
|
|
868
|
+
* benchmark quantization — Compare output dtypes for quality vs storage tradeoff.
|
|
869
|
+
*/
|
|
870
|
+
async function benchmarkQuantization(opts) {
|
|
871
|
+
const model = opts.model || getDefaultModel();
|
|
872
|
+
const dtypes = opts.dtypes
|
|
873
|
+
? opts.dtypes.split(',').map(d => d.trim())
|
|
874
|
+
: ['float', 'int8', 'ubinary'];
|
|
875
|
+
const query = opts.query || SAMPLE_QUERY;
|
|
876
|
+
const dimensions = opts.dimensions ? parseInt(opts.dimensions, 10) : undefined;
|
|
877
|
+
const showK = opts.topK ? parseInt(opts.topK, 10) : 5;
|
|
878
|
+
|
|
879
|
+
let corpus;
|
|
880
|
+
if (opts.file) {
|
|
881
|
+
corpus = loadTexts(opts.file);
|
|
882
|
+
} else {
|
|
883
|
+
corpus = SAMPLE_RERANK_DOCS;
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
if (!opts.json && !opts.quiet) {
|
|
887
|
+
console.log('');
|
|
888
|
+
console.log(ui.bold(' Quantization Benchmark'));
|
|
889
|
+
console.log(ui.dim(` Model: ${model}`));
|
|
890
|
+
console.log(ui.dim(` Data types: ${dtypes.join(', ')}`));
|
|
891
|
+
console.log(ui.dim(` ${corpus.length} documents, top-${showK} comparison`));
|
|
892
|
+
if (dimensions) console.log(ui.dim(` Dimensions: ${dimensions}`));
|
|
893
|
+
console.log('');
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
// Step 1: Get float baseline embeddings (query + corpus)
|
|
897
|
+
const allTexts = [query, ...corpus];
|
|
898
|
+
const resultsByDtype = {};
|
|
899
|
+
|
|
900
|
+
for (const dtype of dtypes) {
|
|
901
|
+
const spin = (!opts.json && !opts.quiet) ? ui.spinner(` Embedding with ${dtype}...`) : null;
|
|
902
|
+
if (spin) spin.start();
|
|
903
|
+
|
|
904
|
+
try {
|
|
905
|
+
const embedOpts = { model, inputType: 'document' };
|
|
906
|
+
if (dimensions) embedOpts.dimensions = dimensions;
|
|
907
|
+
if (dtype !== 'float') embedOpts.outputDtype = dtype;
|
|
908
|
+
|
|
909
|
+
const start = performance.now();
|
|
910
|
+
const result = await generateEmbeddings(allTexts, embedOpts);
|
|
911
|
+
const elapsed = performance.now() - start;
|
|
912
|
+
|
|
913
|
+
if (spin) spin.stop();
|
|
914
|
+
|
|
915
|
+
const embeddings = result.data.map(d => d.embedding);
|
|
916
|
+
const queryEmbed = embeddings[0];
|
|
917
|
+
const dims = embeddings[0].length;
|
|
918
|
+
|
|
919
|
+
// For binary/ubinary, we can't directly cosine-similarity the packed ints
|
|
920
|
+
// against float embeddings meaningfully. Instead we compare the ranking
|
|
921
|
+
// each dtype produces independently.
|
|
922
|
+
const ranked = corpus.map((text, i) => {
|
|
923
|
+
const docEmbed = embeddings[i + 1];
|
|
924
|
+
let sim;
|
|
925
|
+
if (dtype === 'binary' || dtype === 'ubinary') {
|
|
926
|
+
// Hamming-style: compute dot product of packed int arrays
|
|
927
|
+
// (higher = more bits agree = more similar)
|
|
928
|
+
sim = hammingSimilarity(queryEmbed, docEmbed);
|
|
929
|
+
} else {
|
|
930
|
+
sim = cosineSimilarity(queryEmbed, docEmbed);
|
|
931
|
+
}
|
|
932
|
+
return { index: i, text, similarity: sim };
|
|
933
|
+
}).sort((a, b) => b.similarity - a.similarity);
|
|
934
|
+
|
|
935
|
+
// Calculate storage per vector
|
|
936
|
+
let bytesPerVec;
|
|
937
|
+
const actualDims = (dtype === 'binary' || dtype === 'ubinary') ? dims * 8 : dims;
|
|
938
|
+
if (dtype === 'float') {
|
|
939
|
+
bytesPerVec = dims * 4;
|
|
940
|
+
} else if (dtype === 'int8' || dtype === 'uint8') {
|
|
941
|
+
bytesPerVec = dims * 1;
|
|
942
|
+
} else {
|
|
943
|
+
// binary/ubinary: dims is already 1/8th of actual dimensions
|
|
944
|
+
bytesPerVec = dims;
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
resultsByDtype[dtype] = {
|
|
948
|
+
dtype,
|
|
949
|
+
latency: elapsed,
|
|
950
|
+
dims,
|
|
951
|
+
actualDims,
|
|
952
|
+
bytesPerVec,
|
|
953
|
+
tokens: result.usage?.total_tokens || 0,
|
|
954
|
+
ranked,
|
|
955
|
+
};
|
|
956
|
+
} catch (err) {
|
|
957
|
+
if (spin) spin.stop();
|
|
958
|
+
console.error(ui.warn(` ${dtype}: ${err.message} — skipping`));
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
const completed = Object.values(resultsByDtype);
|
|
963
|
+
|
|
964
|
+
if (opts.json) {
|
|
965
|
+
const jsonResults = completed.map(r => ({
|
|
966
|
+
dtype: r.dtype,
|
|
967
|
+
latency: r.latency,
|
|
968
|
+
dimensions: r.actualDims,
|
|
969
|
+
bytesPerVector: r.bytesPerVec,
|
|
970
|
+
ranking: r.ranked.slice(0, showK).map(x => ({ index: x.index, similarity: x.similarity })),
|
|
971
|
+
}));
|
|
972
|
+
console.log(JSON.stringify({ benchmark: 'quantization', model, results: jsonResults }, null, 2));
|
|
973
|
+
return;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
if (completed.length === 0) {
|
|
977
|
+
console.error(ui.error('No data types completed successfully.'));
|
|
978
|
+
process.exit(1);
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
// Storage comparison table
|
|
982
|
+
console.log(ui.bold(' Storage Comparison'));
|
|
983
|
+
console.log('');
|
|
984
|
+
|
|
985
|
+
const sHeader = ` ${rpad('dtype', 10)} ${lpad('Dims', 8)} ${lpad('Bytes/vec', 12)} ${lpad('1M docs', 10)} ${lpad('Savings', 10)} ${lpad('Latency', 10)}`;
|
|
986
|
+
console.log(ui.dim(sHeader));
|
|
987
|
+
console.log(ui.dim(' ' + '─'.repeat(stripAnsi(sHeader).length - 2)));
|
|
988
|
+
|
|
989
|
+
const baseline = completed.find(r => r.dtype === 'float') || completed[0];
|
|
990
|
+
const baselineBytes = baseline.bytesPerVec;
|
|
991
|
+
|
|
992
|
+
for (const r of completed) {
|
|
993
|
+
const savings = r.bytesPerVec < baselineBytes
|
|
994
|
+
? ui.green(`${(baselineBytes / r.bytesPerVec).toFixed(0)}×`)
|
|
995
|
+
: ui.dim('baseline');
|
|
996
|
+
|
|
997
|
+
const totalMB = (r.bytesPerVec * 1_000_000) / (1024 * 1024);
|
|
998
|
+
let sizeStr;
|
|
999
|
+
if (totalMB >= 1024) sizeStr = `${(totalMB / 1024).toFixed(1)} GB`;
|
|
1000
|
+
else sizeStr = `${totalMB.toFixed(0)} MB`;
|
|
1001
|
+
|
|
1002
|
+
console.log(
|
|
1003
|
+
` ${rpad(r.dtype, 10)} ${lpad(String(r.actualDims), 8)} ${lpad(formatBytes(r.bytesPerVec), 12)} ${lpad(sizeStr, 10)} ${lpad(savings, 10)} ${lpad(fmtMs(r.latency), 10)}`
|
|
1004
|
+
);
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
console.log('');
|
|
1008
|
+
|
|
1009
|
+
// Ranking comparison
|
|
1010
|
+
console.log(ui.bold(` Ranking Comparison (top ${showK})`));
|
|
1011
|
+
console.log('');
|
|
1012
|
+
|
|
1013
|
+
const baselineRanked = baseline.ranked;
|
|
1014
|
+
const baselineOrder = baselineRanked.slice(0, showK).map(x => x.index);
|
|
1015
|
+
|
|
1016
|
+
for (let rank = 0; rank < showK && rank < corpus.length; rank++) {
|
|
1017
|
+
console.log(ui.dim(` #${rank + 1}`));
|
|
1018
|
+
for (const r of completed) {
|
|
1019
|
+
const item = r.ranked[rank];
|
|
1020
|
+
const preview = item.text.substring(0, 45) + (item.text.length > 45 ? '...' : '');
|
|
1021
|
+
const matchesBaseline = (r === baseline) ? ' ' :
|
|
1022
|
+
(item.index === baselineRanked[rank].index ? ui.green('=') : ui.yellow('≠'));
|
|
1023
|
+
const simStr = (r.dtype === 'binary' || r.dtype === 'ubinary')
|
|
1024
|
+
? `${(item.similarity * 100).toFixed(1)}%`
|
|
1025
|
+
: item.similarity.toFixed(4);
|
|
1026
|
+
console.log(` ${matchesBaseline} ${ui.cyan(rpad(r.dtype, 10))} ${lpad(simStr, 8)} [${item.index}] ${ui.dim(preview)}`);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
console.log('');
|
|
1031
|
+
|
|
1032
|
+
// Agreement summary
|
|
1033
|
+
if (completed.length > 1) {
|
|
1034
|
+
for (const r of completed) {
|
|
1035
|
+
if (r === baseline) continue;
|
|
1036
|
+
const rOrder = r.ranked.slice(0, showK).map(x => x.index);
|
|
1037
|
+
const overlap = baselineOrder.filter(idx => rOrder.includes(idx)).length;
|
|
1038
|
+
const exactMatch = baselineOrder.filter((idx, i) => rOrder[i] === idx).length;
|
|
1039
|
+
const overlapPct = ((overlap / showK) * 100).toFixed(0);
|
|
1040
|
+
const exactPct = ((exactMatch / showK) * 100).toFixed(0);
|
|
1041
|
+
const savingsX = (baselineBytes / r.bytesPerVec).toFixed(0);
|
|
1042
|
+
|
|
1043
|
+
if (exactMatch === showK) {
|
|
1044
|
+
console.log(ui.success(`${r.dtype}: Identical ranking to float — ${savingsX}× storage savings with zero quality loss.`));
|
|
1045
|
+
} else if (overlap === showK) {
|
|
1046
|
+
console.log(ui.info(`${r.dtype}: Same top-${showK} docs, ${exactPct}% exact order — ${savingsX}× smaller.`));
|
|
1047
|
+
} else {
|
|
1048
|
+
console.log(ui.warn(`${r.dtype}: ${overlapPct}% overlap in top-${showK} — ${savingsX}× smaller. Consider using a reranker.`));
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
console.log('');
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
// Save results
|
|
1055
|
+
if (opts.save) {
|
|
1056
|
+
const outData = {
|
|
1057
|
+
benchmark: 'quantization',
|
|
1058
|
+
timestamp: new Date().toISOString(),
|
|
1059
|
+
model,
|
|
1060
|
+
results: completed.map(r => ({
|
|
1061
|
+
dtype: r.dtype,
|
|
1062
|
+
latency: r.latency,
|
|
1063
|
+
dimensions: r.actualDims,
|
|
1064
|
+
bytesPerVector: r.bytesPerVec,
|
|
1065
|
+
topRanking: r.ranked.slice(0, showK),
|
|
1066
|
+
})),
|
|
1067
|
+
};
|
|
1068
|
+
const outPath = typeof opts.save === 'string' ? opts.save : `benchmark-quantization-${Date.now()}.json`;
|
|
1069
|
+
fs.writeFileSync(outPath, JSON.stringify(outData, null, 2));
|
|
1070
|
+
console.log(ui.info(`Results saved to ${outPath}`));
|
|
1071
|
+
console.log('');
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
/**
|
|
1076
|
+
* Compute Hamming similarity between two packed binary vectors.
|
|
1077
|
+
* Returns a value between 0 and 1 (fraction of bits that agree).
|
|
1078
|
+
*/
|
|
1079
|
+
function hammingSimilarity(a, b) {
|
|
1080
|
+
const len = Math.min(a.length, b.length);
|
|
1081
|
+
let agreeBits = 0;
|
|
1082
|
+
const totalBits = len * 8;
|
|
1083
|
+
for (let i = 0; i < len; i++) {
|
|
1084
|
+
// XOR to find differing bits, then count matching bits
|
|
1085
|
+
const xor = (a[i] & 0xFF) ^ (b[i] & 0xFF);
|
|
1086
|
+
// popcount via bit tricks
|
|
1087
|
+
agreeBits += 8 - popcount8(xor);
|
|
1088
|
+
}
|
|
1089
|
+
return agreeBits / totalBits;
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
/**
|
|
1093
|
+
* Count set bits in an 8-bit value.
|
|
1094
|
+
*/
|
|
1095
|
+
function popcount8(v) {
|
|
1096
|
+
v = v - ((v >> 1) & 0x55);
|
|
1097
|
+
v = (v & 0x33) + ((v >> 2) & 0x33);
|
|
1098
|
+
return (v + (v >> 4)) & 0x0F;
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
/**
|
|
1102
|
+
* Format bytes into a human-readable string.
|
|
1103
|
+
*/
|
|
1104
|
+
function formatBytes(bytes) {
|
|
1105
|
+
if (bytes >= 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
1106
|
+
return `${bytes} B`;
|
|
1107
|
+
}
|
|
1108
|
+
|
|
722
1109
|
// ── Registration ──
|
|
723
1110
|
|
|
724
1111
|
/**
|
|
@@ -796,6 +1183,35 @@ function registerBenchmark(program) {
|
|
|
796
1183
|
.option('--json', 'Machine-readable JSON output')
|
|
797
1184
|
.option('-q, --quiet', 'Suppress non-essential output')
|
|
798
1185
|
.action(benchmarkBatch);
|
|
1186
|
+
|
|
1187
|
+
// ── benchmark quantization ──
|
|
1188
|
+
bench
|
|
1189
|
+
.command('quantization')
|
|
1190
|
+
.alias('quant')
|
|
1191
|
+
.description('Compare output dtypes (float/int8/binary) for quality vs storage')
|
|
1192
|
+
.option('-m, --model <model>', 'Embedding model to benchmark')
|
|
1193
|
+
.option('--dtypes <types>', 'Comma-separated output dtypes', 'float,int8,ubinary')
|
|
1194
|
+
.option('--query <text>', 'Search query')
|
|
1195
|
+
.option('-f, --file <path>', 'Corpus file (JSON array or newline-delimited)')
|
|
1196
|
+
.option('-k, --top-k <n>', 'Show top K results', '5')
|
|
1197
|
+
.option('-d, --dimensions <n>', 'Output dimensions')
|
|
1198
|
+
.option('--json', 'Machine-readable JSON output')
|
|
1199
|
+
.option('-q, --quiet', 'Suppress non-essential output')
|
|
1200
|
+
.option('-s, --save [path]', 'Save results to JSON file')
|
|
1201
|
+
.action(benchmarkQuantization);
|
|
1202
|
+
|
|
1203
|
+
// ── benchmark asymmetric ──
|
|
1204
|
+
bench
|
|
1205
|
+
.command('asymmetric')
|
|
1206
|
+
.description('Test asymmetric retrieval (docs with large model, queries with smaller)')
|
|
1207
|
+
.option('--doc-model <model>', 'Model to embed documents with', 'voyage-4-large')
|
|
1208
|
+
.option('--query-models <models>', 'Comma-separated query models', 'voyage-4-large,voyage-4,voyage-4-lite')
|
|
1209
|
+
.option('--query <text>', 'Search query')
|
|
1210
|
+
.option('-f, --file <path>', 'Corpus file (JSON array or newline-delimited)')
|
|
1211
|
+
.option('-k, --top-k <n>', 'Show top K results', '5')
|
|
1212
|
+
.option('--json', 'Machine-readable JSON output')
|
|
1213
|
+
.option('-q, --quiet', 'Suppress non-essential output')
|
|
1214
|
+
.action(benchmarkAsymmetric);
|
|
799
1215
|
}
|
|
800
1216
|
|
|
801
1217
|
module.exports = { registerBenchmark };
|
package/src/commands/embed.js
CHANGED
|
@@ -19,6 +19,7 @@ function registerEmbed(program) {
|
|
|
19
19
|
.option('-f, --file <path>', 'Read text from file')
|
|
20
20
|
.option('--truncation', 'Enable truncation for long inputs')
|
|
21
21
|
.option('--no-truncation', 'Disable truncation')
|
|
22
|
+
.option('--output-dtype <type>', 'Output data type: float, int8, uint8, binary, ubinary', 'float')
|
|
22
23
|
.option('-o, --output-format <format>', 'Output format: json or array', 'json')
|
|
23
24
|
.option('--json', 'Machine-readable JSON output')
|
|
24
25
|
.option('-q, --quiet', 'Suppress non-essential output')
|
|
@@ -49,6 +50,10 @@ function registerEmbed(program) {
|
|
|
49
50
|
if (opts.truncation !== undefined) {
|
|
50
51
|
embedOpts.truncation = opts.truncation;
|
|
51
52
|
}
|
|
53
|
+
// Only pass output_dtype when not the default float
|
|
54
|
+
if (opts.outputDtype && opts.outputDtype !== 'float') {
|
|
55
|
+
embedOpts.outputDtype = opts.outputDtype;
|
|
56
|
+
}
|
|
52
57
|
|
|
53
58
|
const result = await generateEmbeddings(texts, embedOpts);
|
|
54
59
|
|
|
@@ -137,17 +137,21 @@ function createPlaygroundServer() {
|
|
|
137
137
|
|
|
138
138
|
// API: Embed
|
|
139
139
|
if (req.url === '/api/embed') {
|
|
140
|
-
const { texts, model, inputType, dimensions } = parsed;
|
|
140
|
+
const { texts, model, inputType, dimensions, output_dtype } = parsed;
|
|
141
141
|
if (!texts || !Array.isArray(texts) || texts.length === 0) {
|
|
142
142
|
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
143
143
|
res.end(JSON.stringify({ error: 'texts must be a non-empty array' }));
|
|
144
144
|
return;
|
|
145
145
|
}
|
|
146
|
-
const
|
|
146
|
+
const embedOpts = {
|
|
147
147
|
model: model || undefined,
|
|
148
148
|
inputType: inputType || undefined,
|
|
149
149
|
dimensions: dimensions || undefined,
|
|
150
|
-
}
|
|
150
|
+
};
|
|
151
|
+
if (output_dtype && output_dtype !== 'float') {
|
|
152
|
+
embedOpts.outputDtype = output_dtype;
|
|
153
|
+
}
|
|
154
|
+
const result = await generateEmbeddings(texts, embedOpts);
|
|
151
155
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
152
156
|
res.end(JSON.stringify(result));
|
|
153
157
|
return;
|
package/src/commands/store.js
CHANGED
|
@@ -23,6 +23,7 @@ function registerStore(program) {
|
|
|
23
23
|
.option('-m, --model <model>', 'Embedding model', getDefaultModel())
|
|
24
24
|
.option('--input-type <type>', 'Input type: query or document', 'document')
|
|
25
25
|
.option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
|
|
26
|
+
.option('--output-dtype <type>', 'Output data type: float, int8, uint8, binary, ubinary', 'float')
|
|
26
27
|
.option('--metadata <json>', 'Additional metadata as JSON')
|
|
27
28
|
.option('--json', 'Machine-readable JSON output')
|
|
28
29
|
.option('-q, --quiet', 'Suppress non-essential output')
|
|
@@ -46,11 +47,15 @@ function registerStore(program) {
|
|
|
46
47
|
spin.start();
|
|
47
48
|
}
|
|
48
49
|
|
|
49
|
-
const
|
|
50
|
+
const embedOpts = {
|
|
50
51
|
model: opts.model,
|
|
51
52
|
inputType: opts.inputType,
|
|
52
53
|
dimensions: opts.dimensions,
|
|
53
|
-
}
|
|
54
|
+
};
|
|
55
|
+
if (opts.outputDtype && opts.outputDtype !== 'float') {
|
|
56
|
+
embedOpts.outputDtype = opts.outputDtype;
|
|
57
|
+
}
|
|
58
|
+
const embedResult = await generateEmbeddings([textContent], embedOpts);
|
|
54
59
|
|
|
55
60
|
const embedding = embedResult.data[0].embedding;
|
|
56
61
|
|
|
@@ -147,11 +152,15 @@ async function handleBatchStore(opts) {
|
|
|
147
152
|
spin.start();
|
|
148
153
|
}
|
|
149
154
|
|
|
150
|
-
const
|
|
155
|
+
const batchEmbedOpts = {
|
|
151
156
|
model: opts.model,
|
|
152
157
|
inputType: opts.inputType,
|
|
153
158
|
dimensions: opts.dimensions,
|
|
154
|
-
}
|
|
159
|
+
};
|
|
160
|
+
if (opts.outputDtype && opts.outputDtype !== 'float') {
|
|
161
|
+
batchEmbedOpts.outputDtype = opts.outputDtype;
|
|
162
|
+
}
|
|
163
|
+
const embedResult = await generateEmbeddings(texts, batchEmbedOpts);
|
|
155
164
|
|
|
156
165
|
const docs = records.map((record, i) => {
|
|
157
166
|
const embedding = embedResult.data[i].embedding;
|
package/src/lib/api.js
CHANGED
|
@@ -129,6 +129,7 @@ async function apiRequest(endpoint, body) {
|
|
|
129
129
|
* @param {string} [options.inputType] - Input type (query|document)
|
|
130
130
|
* @param {number} [options.dimensions] - Output dimensions
|
|
131
131
|
* @param {boolean} [options.truncation] - Enable/disable truncation
|
|
132
|
+
* @param {string} [options.outputDtype] - Output data type: float, int8, uint8, binary, ubinary
|
|
132
133
|
* @returns {Promise<object>} API response with embeddings
|
|
133
134
|
*/
|
|
134
135
|
async function generateEmbeddings(texts, options = {}) {
|
|
@@ -148,6 +149,9 @@ async function generateEmbeddings(texts, options = {}) {
|
|
|
148
149
|
if (options.truncation !== undefined) {
|
|
149
150
|
body.truncation = options.truncation;
|
|
150
151
|
}
|
|
152
|
+
if (options.outputDtype && options.outputDtype !== 'float') {
|
|
153
|
+
body.output_dtype = options.outputDtype;
|
|
154
|
+
}
|
|
151
155
|
|
|
152
156
|
return apiRequest('/embeddings', body);
|
|
153
157
|
}
|
package/src/lib/explanations.js
CHANGED
|
@@ -406,6 +406,65 @@ const concepts = {
|
|
|
406
406
|
'vai embed --file document.txt --input-type document',
|
|
407
407
|
],
|
|
408
408
|
},
|
|
409
|
+
quantization: {
|
|
410
|
+
title: 'Quantization & Flexible Dimensions',
|
|
411
|
+
summary: 'Reduce storage costs with lower-precision embeddings',
|
|
412
|
+
content: [
|
|
413
|
+
`${pc.cyan('Quantization')} reduces embedding precision from 32-bit floats to smaller`,
|
|
414
|
+
`representations, dramatically cutting storage and search costs with minimal`,
|
|
415
|
+
`quality loss. Combined with ${pc.cyan('Matryoshka dimensions')}, you can shrink vectors`,
|
|
416
|
+
`by up to ${pc.bold('128×')} (32× from binary × 4× from fewer dimensions).`,
|
|
417
|
+
``,
|
|
418
|
+
`${pc.bold('Output data types (--output-dtype):')}`,
|
|
419
|
+
``,
|
|
420
|
+
` ${pc.cyan('float')} 32 bits/dim 4 bytes/dim Baseline (default)`,
|
|
421
|
+
` ${pc.cyan('int8')} 8 bits/dim 1 byte/dim ${pc.green('4× smaller')} Signed: -128 to 127`,
|
|
422
|
+
` ${pc.cyan('uint8')} 8 bits/dim 1 byte/dim ${pc.green('4× smaller')} Unsigned: 0 to 255`,
|
|
423
|
+
` ${pc.cyan('binary')} 1 bit/dim 1/8 byte/dim ${pc.green('32× smaller')} Bit-packed int8 (offset binary)`,
|
|
424
|
+
` ${pc.cyan('ubinary')} 1 bit/dim 1/8 byte/dim ${pc.green('32× smaller')} Bit-packed uint8`,
|
|
425
|
+
``,
|
|
426
|
+
`${pc.bold('Storage math for 1M documents at 1024 dims:')}`,
|
|
427
|
+
` float: ${pc.dim('1M × 1024 × 4B')} = ${pc.cyan('4.0 GB')}`,
|
|
428
|
+
` int8: ${pc.dim('1M × 1024 × 1B')} = ${pc.cyan('1.0 GB')} (4× savings)`,
|
|
429
|
+
` binary: ${pc.dim('1M × 1024 / 8B')} = ${pc.cyan('128 MB')} (32× savings)`,
|
|
430
|
+
` ${pc.dim('+ reduced dimensions:')} 256-dim binary = ${pc.cyan('32 MB')} (128× savings)`,
|
|
431
|
+
``,
|
|
432
|
+
`${pc.bold('How binary quantization works:')} Each float value is converted to a single bit:`,
|
|
433
|
+
`positive values become 1, zero/negative become 0. Eight bits are packed into`,
|
|
434
|
+
`one byte. ${pc.cyan('binary')} uses offset binary (subtract 128) for signed int8 output;`,
|
|
435
|
+
`${pc.cyan('ubinary')} stores the raw unsigned uint8 value.`,
|
|
436
|
+
``,
|
|
437
|
+
`${pc.bold('Quality impact:')} Quantization-aware training minimizes degradation:`,
|
|
438
|
+
` ${pc.dim('•')} ${pc.cyan('int8/uint8')} — Typically <1% retrieval quality loss vs float`,
|
|
439
|
+
` ${pc.dim('•')} ${pc.cyan('binary/ubinary')} — ~2-5% quality loss; best paired with a reranker`,
|
|
440
|
+
` ${pc.dim('•')} Combining lower dimensions + quantization compounds the quality loss`,
|
|
441
|
+
``,
|
|
442
|
+
`${pc.bold('Matryoshka dimensions:')} Voyage 4 models produce ${pc.cyan('nested embeddings')} — the`,
|
|
443
|
+
`first 256 entries of a 1024-dim vector are themselves a valid 256-dim embedding.`,
|
|
444
|
+
`You can embed once at full dimension and truncate later without re-embedding.`,
|
|
445
|
+
`Supported values: 256, 512, 1024 (default), 2048.`,
|
|
446
|
+
``,
|
|
447
|
+
`${pc.bold('Which vector databases support quantized storage?')}`,
|
|
448
|
+
` ${pc.dim('•')} MongoDB Atlas Vector Search — float and int8`,
|
|
449
|
+
` ${pc.dim('•')} Milvus, Qdrant, Weaviate, Elasticsearch, Vespa — float, int8, binary`,
|
|
450
|
+
``,
|
|
451
|
+
`${pc.bold('Decision framework:')}`,
|
|
452
|
+
` 1. Start with ${pc.cyan('float')} at default dimensions — measure your baseline`,
|
|
453
|
+
` 2. Try ${pc.cyan('int8')} — if quality holds, you get 4× storage savings for free`,
|
|
454
|
+
` 3. If storage is critical, try ${pc.cyan('binary')} + reranker for 32× savings`,
|
|
455
|
+
` 4. Reduce dimensions (1024→256) for another 4× on top of quantization`,
|
|
456
|
+
` 5. Use ${pc.cyan('vai benchmark quantization')} to measure the tradeoffs on your data`,
|
|
457
|
+
].join('\n'),
|
|
458
|
+
links: [
|
|
459
|
+
'https://docs.voyageai.com/docs/flexible-dimensions-and-quantization',
|
|
460
|
+
'https://www.mongodb.com/docs/voyageai/models/text-embeddings/',
|
|
461
|
+
],
|
|
462
|
+
tryIt: [
|
|
463
|
+
'vai embed "hello world" --output-dtype int8',
|
|
464
|
+
'vai embed "hello world" --output-dtype binary --dimensions 256',
|
|
465
|
+
'vai benchmark quantization --model voyage-4-large',
|
|
466
|
+
],
|
|
467
|
+
},
|
|
409
468
|
benchmarking: {
|
|
410
469
|
title: 'Benchmarking & Model Selection',
|
|
411
470
|
summary: 'How to choose the right model for your use case',
|
|
@@ -434,12 +493,18 @@ const concepts = {
|
|
|
434
493
|
` Measures throughput (texts/sec) at different batch sizes.`,
|
|
435
494
|
` ${pc.dim('vai benchmark batch --batch-sizes 1,5,10,25,50 --rounds 3')}`,
|
|
436
495
|
``,
|
|
496
|
+
`${pc.bold('vai benchmark quantization')} — Compare output dtypes for storage savings:`,
|
|
497
|
+
` Embeds the same corpus with float, int8, and binary, measures ranking quality`,
|
|
498
|
+
` degradation vs storage savings. Helps you decide if quantization works for your data.`,
|
|
499
|
+
` ${pc.dim('vai benchmark quantization --model voyage-4-large --dtypes float,int8,ubinary')}`,
|
|
500
|
+
``,
|
|
437
501
|
`${pc.bold('Decision framework:')}`,
|
|
438
502
|
` 1. Run ${pc.cyan('benchmark cost')} to eliminate models outside your budget`,
|
|
439
503
|
` 2. Run ${pc.cyan('benchmark embed')} to compare latency of affordable models`,
|
|
440
504
|
` 3. Run ${pc.cyan('benchmark similarity')} with your actual data to compare quality`,
|
|
441
|
-
` 4.
|
|
442
|
-
` 5.
|
|
505
|
+
` 4. Run ${pc.cyan('benchmark quantization')} to see if int8/binary preserves your ranking`,
|
|
506
|
+
` 5. If quality is similar, pick the cheaper/faster model + smallest viable dtype`,
|
|
507
|
+
` 6. Use ${pc.cyan('--save')} to track results over time as your data evolves`,
|
|
443
508
|
].join('\n'),
|
|
444
509
|
links: ['https://www.mongodb.com/docs/voyageai/models/text-embeddings/'],
|
|
445
510
|
tryIt: [
|
|
@@ -488,6 +553,15 @@ const aliases = {
|
|
|
488
553
|
batch: 'batch-processing',
|
|
489
554
|
'batch-processing': 'batch-processing',
|
|
490
555
|
batching: 'batch-processing',
|
|
556
|
+
quantization: 'quantization',
|
|
557
|
+
quantize: 'quantization',
|
|
558
|
+
'output-dtype': 'quantization',
|
|
559
|
+
dtype: 'quantization',
|
|
560
|
+
int8: 'quantization',
|
|
561
|
+
binary: 'quantization',
|
|
562
|
+
ubinary: 'quantization',
|
|
563
|
+
matryoshka: 'quantization',
|
|
564
|
+
'flexible-dimensions': 'quantization',
|
|
491
565
|
benchmark: 'benchmarking',
|
|
492
566
|
benchmarking: 'benchmarking',
|
|
493
567
|
'model-selection': 'benchmarking',
|
|
@@ -551,6 +551,83 @@ select:focus { outline: none; border-color: var(--accent); }
|
|
|
551
551
|
.rank-differ { border-left-color: var(--yellow); }
|
|
552
552
|
.rank-arrow { text-align: center; color: var(--text-muted); font-size: 18px; padding-top: 4px; }
|
|
553
553
|
|
|
554
|
+
/* Quantization charts */
|
|
555
|
+
.quant-charts { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 16px; }
|
|
556
|
+
@media (max-width: 768px) { .quant-charts { grid-template-columns: 1fr; } }
|
|
557
|
+
|
|
558
|
+
.quant-bar-group { margin-bottom: 14px; }
|
|
559
|
+
.quant-bar-label {
|
|
560
|
+
display: flex; justify-content: space-between; align-items: baseline;
|
|
561
|
+
margin-bottom: 4px; font-size: 13px;
|
|
562
|
+
}
|
|
563
|
+
.quant-bar-label .dtype-name { color: var(--accent); font-weight: 600; font-family: var(--mono); }
|
|
564
|
+
.quant-bar-label .dtype-value { color: var(--text-dim); font-family: var(--mono); font-size: 12px; }
|
|
565
|
+
.quant-bar-track {
|
|
566
|
+
height: 32px; background: var(--bg-input); border-radius: 6px;
|
|
567
|
+
overflow: hidden; position: relative;
|
|
568
|
+
}
|
|
569
|
+
.quant-bar-fill {
|
|
570
|
+
height: 100%; border-radius: 6px;
|
|
571
|
+
transition: width 0.8s cubic-bezier(0.22, 1, 0.36, 1);
|
|
572
|
+
display: flex; align-items: center; padding: 0 10px;
|
|
573
|
+
font-family: var(--mono); font-size: 12px; font-weight: 600;
|
|
574
|
+
color: #0a0a1a; white-space: nowrap; min-width: fit-content;
|
|
575
|
+
}
|
|
576
|
+
.quant-bar-fill.storage { background: linear-gradient(90deg, #00d4aa, #4ecdc4); }
|
|
577
|
+
.quant-bar-fill.latency { background: linear-gradient(90deg, #45b7d1, #82aaff); }
|
|
578
|
+
.quant-bar-badge {
|
|
579
|
+
position: absolute; right: 10px; top: 50%; transform: translateY(-50%);
|
|
580
|
+
font-size: 12px; color: var(--text-dim); font-family: var(--mono);
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
.quant-quality-meter { margin-bottom: 14px; }
|
|
584
|
+
.quant-meter-header {
|
|
585
|
+
display: flex; justify-content: space-between; align-items: center;
|
|
586
|
+
margin-bottom: 6px;
|
|
587
|
+
}
|
|
588
|
+
.quant-meter-header .dtype-name { color: var(--accent); font-weight: 600; font-family: var(--mono); font-size: 13px; }
|
|
589
|
+
.quant-meter-header .verdict-badge {
|
|
590
|
+
font-size: 12px; padding: 2px 8px; border-radius: 10px; font-weight: 600;
|
|
591
|
+
}
|
|
592
|
+
.quant-meter-header .verdict-badge.perfect { background: rgba(0,212,170,0.15); color: var(--green); }
|
|
593
|
+
.quant-meter-header .verdict-badge.good { background: rgba(255,217,61,0.15); color: var(--yellow); }
|
|
594
|
+
.quant-meter-header .verdict-badge.degraded { background: rgba(255,107,107,0.15); color: var(--red); }
|
|
595
|
+
.quant-meter-track {
|
|
596
|
+
height: 10px; background: var(--bg-input); border-radius: 5px; overflow: hidden;
|
|
597
|
+
}
|
|
598
|
+
.quant-meter-fill {
|
|
599
|
+
height: 100%; border-radius: 5px;
|
|
600
|
+
transition: width 0.8s cubic-bezier(0.22, 1, 0.36, 1);
|
|
601
|
+
}
|
|
602
|
+
.quant-meter-fill.perfect { background: linear-gradient(90deg, #00d4aa, #00e4ba); }
|
|
603
|
+
.quant-meter-fill.good { background: linear-gradient(90deg, #ffd93d, #ffe066); }
|
|
604
|
+
.quant-meter-fill.degraded { background: linear-gradient(90deg, #ff6b6b, #ff8e8e); }
|
|
605
|
+
.quant-meter-detail { font-size: 11px; color: var(--text-muted); margin-top: 4px; font-family: var(--mono); }
|
|
606
|
+
|
|
607
|
+
.quant-rank-cols {
|
|
608
|
+
display: grid; gap: 12px;
|
|
609
|
+
}
|
|
610
|
+
.quant-rank-col-header {
|
|
611
|
+
font-weight: 600; color: var(--accent); font-size: 13px; font-family: var(--mono);
|
|
612
|
+
margin-bottom: 8px; padding-bottom: 6px; border-bottom: 1px solid var(--border);
|
|
613
|
+
}
|
|
614
|
+
.quant-rank-item {
|
|
615
|
+
padding: 8px 10px; margin-bottom: 4px; border-radius: 6px;
|
|
616
|
+
font-size: 12px; position: relative; border-left: 3px solid transparent;
|
|
617
|
+
transition: background 0.2s;
|
|
618
|
+
}
|
|
619
|
+
.quant-rank-item:hover { background: rgba(255,255,255,0.03); }
|
|
620
|
+
.quant-rank-item.match { border-left-color: var(--green); background: rgba(0,212,170,0.06); }
|
|
621
|
+
.quant-rank-item.differ { border-left-color: var(--red); background: rgba(255,107,107,0.06); }
|
|
622
|
+
.quant-rank-item.baseline { border-left-color: var(--border); background: var(--bg-input); }
|
|
623
|
+
.quant-rank-pos {
|
|
624
|
+
display: inline-block; width: 22px; height: 22px; line-height: 22px;
|
|
625
|
+
text-align: center; border-radius: 50%; background: var(--bg-surface);
|
|
626
|
+
color: var(--accent); font-weight: 700; font-size: 11px; font-family: var(--mono);
|
|
627
|
+
margin-right: 8px;
|
|
628
|
+
}
|
|
629
|
+
.quant-rank-score { color: var(--text-muted); font-size: 11px; font-family: var(--mono); margin-top: 3px; }
|
|
630
|
+
|
|
554
631
|
/* Cost calculator */
|
|
555
632
|
.cost-slider-row {
|
|
556
633
|
display: flex;
|
|
@@ -797,6 +874,16 @@ select:focus { outline: none; border-color: var(--accent); }
|
|
|
797
874
|
<option value="2048">2048</option>
|
|
798
875
|
</select>
|
|
799
876
|
</div>
|
|
877
|
+
<div class="option-group">
|
|
878
|
+
<span class="option-label">Output Type</span>
|
|
879
|
+
<select id="embedOutputDtype">
|
|
880
|
+
<option value="float">float (32-bit)</option>
|
|
881
|
+
<option value="int8">int8 (4× smaller)</option>
|
|
882
|
+
<option value="uint8">uint8 (4× smaller)</option>
|
|
883
|
+
<option value="binary">binary (32× smaller)</option>
|
|
884
|
+
<option value="ubinary">ubinary (32× smaller)</option>
|
|
885
|
+
</select>
|
|
886
|
+
</div>
|
|
800
887
|
<button class="btn" id="embedBtn" onclick="doEmbed()">⚡ Embed</button>
|
|
801
888
|
</div>
|
|
802
889
|
|
|
@@ -916,6 +1003,7 @@ Semantic search understands meaning beyond keyword matching</textarea>
|
|
|
916
1003
|
<div class="bench-panels">
|
|
917
1004
|
<button class="bench-panel-btn active" data-bench="latency">⚡ Latency</button>
|
|
918
1005
|
<button class="bench-panel-btn" data-bench="ranking">🏆 Ranking</button>
|
|
1006
|
+
<button class="bench-panel-btn" data-bench="quantization">⚗️ Quantization</button>
|
|
919
1007
|
<button class="bench-panel-btn" data-bench="cost">💰 Cost</button>
|
|
920
1008
|
<button class="bench-panel-btn" data-bench="history">📊 History</button>
|
|
921
1009
|
</div>
|
|
@@ -1008,6 +1096,91 @@ Reranking models rescore initial search results to improve relevance ordering.</
|
|
|
1008
1096
|
</div>
|
|
1009
1097
|
</div>
|
|
1010
1098
|
|
|
1099
|
+
<!-- ── Quantization Panel ── -->
|
|
1100
|
+
<div class="bench-view" id="bench-quantization">
|
|
1101
|
+
<div class="card">
|
|
1102
|
+
<div class="card-title">Quantization Benchmark</div>
|
|
1103
|
+
<p style="color:var(--text-dim);font-size:13px;margin-bottom:12px;">
|
|
1104
|
+
Compare how different output data types (float, int8, binary) affect storage size and ranking quality.
|
|
1105
|
+
Embeds the same corpus with each dtype and measures the tradeoff.
|
|
1106
|
+
</p>
|
|
1107
|
+
<div class="options-row" style="flex-wrap:wrap;">
|
|
1108
|
+
<div class="option-group">
|
|
1109
|
+
<span class="option-label">Model</span>
|
|
1110
|
+
<select id="quantModel"></select>
|
|
1111
|
+
</div>
|
|
1112
|
+
<div class="option-group">
|
|
1113
|
+
<span class="option-label">Dimensions</span>
|
|
1114
|
+
<select id="quantDimensions">
|
|
1115
|
+
<option value="">Default</option>
|
|
1116
|
+
<option value="256">256</option>
|
|
1117
|
+
<option value="512">512</option>
|
|
1118
|
+
<option value="1024">1024</option>
|
|
1119
|
+
<option value="2048">2048</option>
|
|
1120
|
+
</select>
|
|
1121
|
+
</div>
|
|
1122
|
+
<div class="option-group">
|
|
1123
|
+
<span class="option-label">Data Types</span>
|
|
1124
|
+
<div id="quantDtypeChecks" style="display:flex;gap:8px;flex-wrap:wrap;">
|
|
1125
|
+
<label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
|
|
1126
|
+
<input type="checkbox" value="float" checked style="accent-color:var(--accent);">float
|
|
1127
|
+
</label>
|
|
1128
|
+
<label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
|
|
1129
|
+
<input type="checkbox" value="int8" checked style="accent-color:var(--accent);">int8
|
|
1130
|
+
</label>
|
|
1131
|
+
<label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
|
|
1132
|
+
<input type="checkbox" value="uint8" style="accent-color:var(--accent);">uint8
|
|
1133
|
+
</label>
|
|
1134
|
+
<label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
|
|
1135
|
+
<input type="checkbox" value="ubinary" checked style="accent-color:var(--accent);">ubinary
|
|
1136
|
+
</label>
|
|
1137
|
+
<label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
|
|
1138
|
+
<input type="checkbox" value="binary" style="accent-color:var(--accent);">binary
|
|
1139
|
+
</label>
|
|
1140
|
+
</div>
|
|
1141
|
+
</div>
|
|
1142
|
+
</div>
|
|
1143
|
+
<div style="margin-top:12px;">
|
|
1144
|
+
<span class="option-label">Query</span>
|
|
1145
|
+
<input type="text" id="quantQuery" placeholder="Search query..." value="How do I search for similar documents using embeddings?" style="width:100%;margin-bottom:8px;">
|
|
1146
|
+
</div>
|
|
1147
|
+
<div>
|
|
1148
|
+
<span class="option-label">Corpus (one document per line)</span>
|
|
1149
|
+
<textarea id="quantCorpus" rows="5" placeholder="Documents to embed...">Vector search finds documents by computing similarity between embedding vectors in high-dimensional space.
|
|
1150
|
+
MongoDB Atlas Vector Search lets you index and query vector embeddings alongside your operational data.
|
|
1151
|
+
Traditional full-text search uses inverted indexes to match keyword terms in documents.
|
|
1152
|
+
Cosine similarity measures the angle between two vectors, commonly used for semantic search.
|
|
1153
|
+
Database sharding distributes data across multiple servers for horizontal scalability.
|
|
1154
|
+
Embedding models convert text into dense numerical vectors that capture meaning.
|
|
1155
|
+
Approximate nearest neighbor algorithms like HNSW enable fast similarity search at scale.
|
|
1156
|
+
Reranking models rescore initial search results to improve relevance ordering.</textarea>
|
|
1157
|
+
</div>
|
|
1158
|
+
<div style="margin-top:12px;">
|
|
1159
|
+
<button class="btn" id="quantBtn" onclick="doBenchQuantization()">⚗️ Run Quantization Benchmark</button>
|
|
1160
|
+
</div>
|
|
1161
|
+
</div>
|
|
1162
|
+
|
|
1163
|
+
<div class="error-msg" id="quantError"></div>
|
|
1164
|
+
|
|
1165
|
+
<div class="result-section" id="quantResult">
|
|
1166
|
+
<div class="quant-charts">
|
|
1167
|
+
<div class="card">
|
|
1168
|
+
<div class="card-title">📦 Storage per Vector</div>
|
|
1169
|
+
<div id="quantStorageChart"></div>
|
|
1170
|
+
</div>
|
|
1171
|
+
<div class="card">
|
|
1172
|
+
<div class="card-title">⏱ API Latency</div>
|
|
1173
|
+
<div id="quantLatencyChart"></div>
|
|
1174
|
+
</div>
|
|
1175
|
+
</div>
|
|
1176
|
+
<div class="card">
|
|
1177
|
+
<div class="card-title">🎯 Ranking Quality vs Float Baseline</div>
|
|
1178
|
+
<div id="quantQualityMeters" style="margin-bottom:16px;"></div>
|
|
1179
|
+
<div id="quantRankGrid"></div>
|
|
1180
|
+
</div>
|
|
1181
|
+
</div>
|
|
1182
|
+
</div>
|
|
1183
|
+
|
|
1011
1184
|
<!-- ── Cost Panel ── -->
|
|
1012
1185
|
<div class="bench-view" id="bench-cost">
|
|
1013
1186
|
<div class="card">
|
|
@@ -1238,6 +1411,12 @@ function populateModelSelects() {
|
|
|
1238
1411
|
}
|
|
1239
1412
|
|
|
1240
1413
|
// ── API Helpers ──
|
|
1414
|
+
function formatBytesUI(bytes) {
|
|
1415
|
+
if (bytes >= 1024 * 1024) return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
|
|
1416
|
+
if (bytes >= 1024) return (bytes / 1024).toFixed(1) + ' KB';
|
|
1417
|
+
return bytes + ' B';
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1241
1420
|
async function apiPost(url, body) {
|
|
1242
1421
|
const res = await fetch(url, {
|
|
1243
1422
|
method: 'POST',
|
|
@@ -1284,16 +1463,29 @@ window.doEmbed = async function() {
|
|
|
1284
1463
|
const dims = document.getElementById('embedDimensions').value;
|
|
1285
1464
|
const dimensions = dims ? parseInt(dims, 10) : undefined;
|
|
1286
1465
|
|
|
1287
|
-
const
|
|
1466
|
+
const outputDtype = document.getElementById('embedOutputDtype').value;
|
|
1467
|
+
const body = { texts: [text], model, inputType, dimensions };
|
|
1468
|
+
if (outputDtype && outputDtype !== 'float') body.output_dtype = outputDtype;
|
|
1469
|
+
|
|
1470
|
+
const data = await apiPost('/api/embed', body);
|
|
1288
1471
|
const emb = data.data[0].embedding;
|
|
1289
1472
|
lastEmbedding = emb;
|
|
1290
1473
|
|
|
1291
1474
|
// Stats
|
|
1475
|
+
const dtype = outputDtype || 'float';
|
|
1476
|
+
const bytesPerDim = (dtype === 'binary' || dtype === 'ubinary') ? 0.125 : (dtype === 'int8' || dtype === 'uint8') ? 1 : 4;
|
|
1477
|
+
const totalBytes = emb.length * bytesPerDim;
|
|
1478
|
+
const storageLine = dtype !== 'float'
|
|
1479
|
+
? `<br><span style="color:var(--success)">📦 ${dtype}: ${formatBytesUI(totalBytes)}/vector (${(4 * emb.length / totalBytes).toFixed(0)}× smaller than float)</span>`
|
|
1480
|
+
: '';
|
|
1481
|
+
|
|
1292
1482
|
const statsEl = document.getElementById('embedStats');
|
|
1293
1483
|
statsEl.innerHTML = `
|
|
1294
1484
|
<span class="stat"><span class="stat-label">Model</span><span class="stat-value">${data.model}</span></span>
|
|
1295
1485
|
<span class="stat"><span class="stat-label">Dimensions</span><span class="stat-value">${emb.length}</span></span>
|
|
1296
1486
|
<span class="stat"><span class="stat-label">Tokens</span><span class="stat-value">${data.usage?.total_tokens || '—'}</span></span>
|
|
1487
|
+
<span class="stat"><span class="stat-label">Type</span><span class="stat-value">${dtype}</span></span>
|
|
1488
|
+
${storageLine}
|
|
1297
1489
|
`;
|
|
1298
1490
|
|
|
1299
1491
|
// Vector preview
|
|
@@ -1529,6 +1721,7 @@ const CONCEPT_META = {
|
|
|
1529
1721
|
'api-access': { icon: '🌐', tab: 'embed' },
|
|
1530
1722
|
'batch-processing': { icon: '📦', tab: 'embed' },
|
|
1531
1723
|
benchmarking: { icon: '⏱', tab: 'benchmark' },
|
|
1724
|
+
quantization: { icon: '⚗️', tab: 'benchmark' },
|
|
1532
1725
|
};
|
|
1533
1726
|
|
|
1534
1727
|
let exploreConcepts = {};
|
|
@@ -1916,6 +2109,222 @@ function renderRankComparison(modelA, modelB, rankedA, rankedB, topK) {
|
|
|
1916
2109
|
}
|
|
1917
2110
|
}
|
|
1918
2111
|
|
|
2112
|
+
// ── Benchmark: Quantization ──
|
|
2113
|
+
function populateQuantModelSelect() {
|
|
2114
|
+
const sel = document.getElementById('quantModel');
|
|
2115
|
+
sel.innerHTML = '';
|
|
2116
|
+
embedModels.forEach(m => {
|
|
2117
|
+
const opt = document.createElement('option');
|
|
2118
|
+
opt.value = m.name;
|
|
2119
|
+
opt.textContent = m.name;
|
|
2120
|
+
sel.appendChild(opt);
|
|
2121
|
+
});
|
|
2122
|
+
// Default to voyage-4-large if available
|
|
2123
|
+
const preferred = embedModels.find(m => m.name === 'voyage-4-large');
|
|
2124
|
+
if (preferred) sel.value = preferred.name;
|
|
2125
|
+
}
|
|
2126
|
+
|
|
2127
|
+
function hammingSimUI(a, b) {
|
|
2128
|
+
// For binary/ubinary packed embeddings, compute agreement via dot product
|
|
2129
|
+
let dot = 0;
|
|
2130
|
+
for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
|
|
2131
|
+
return dot;
|
|
2132
|
+
}
|
|
2133
|
+
|
|
2134
|
+
window.doBenchQuantization = async function() {
|
|
2135
|
+
hideError('quantError');
|
|
2136
|
+
const model = document.getElementById('quantModel').value;
|
|
2137
|
+
const dimsVal = document.getElementById('quantDimensions').value;
|
|
2138
|
+
const dimensions = dimsVal ? parseInt(dimsVal, 10) : undefined;
|
|
2139
|
+
const query = document.getElementById('quantQuery').value.trim();
|
|
2140
|
+
const corpusText = document.getElementById('quantCorpus').value.trim();
|
|
2141
|
+
|
|
2142
|
+
if (!query) { showError('quantError', 'Enter a query'); return; }
|
|
2143
|
+
if (!corpusText) { showError('quantError', 'Enter at least 2 documents'); return; }
|
|
2144
|
+
|
|
2145
|
+
const corpus = corpusText.split('\n').map(d => d.trim()).filter(Boolean);
|
|
2146
|
+
if (corpus.length < 2) { showError('quantError', 'Enter at least 2 documents'); return; }
|
|
2147
|
+
|
|
2148
|
+
const checks = document.querySelectorAll('#quantDtypeChecks input:checked');
|
|
2149
|
+
const dtypes = Array.from(checks).map(c => c.value);
|
|
2150
|
+
if (dtypes.length === 0) { showError('quantError', 'Select at least one data type'); return; }
|
|
2151
|
+
|
|
2152
|
+
setLoading('quantBtn', true);
|
|
2153
|
+
|
|
2154
|
+
try {
|
|
2155
|
+
const allTexts = [query, ...corpus];
|
|
2156
|
+
const resultsByDtype = {};
|
|
2157
|
+
|
|
2158
|
+
for (const dtype of dtypes) {
|
|
2159
|
+
const body = { texts: allTexts, model, inputType: 'document' };
|
|
2160
|
+
if (dimensions) body.dimensions = dimensions;
|
|
2161
|
+
if (dtype !== 'float') body.output_dtype = dtype;
|
|
2162
|
+
|
|
2163
|
+
const start = performance.now();
|
|
2164
|
+
const data = await apiPost('/api/embed', body);
|
|
2165
|
+
const elapsed = performance.now() - start;
|
|
2166
|
+
|
|
2167
|
+
const embeddings = data.data.map(d => d.embedding);
|
|
2168
|
+
const queryEmbed = embeddings[0];
|
|
2169
|
+
const dims = embeddings[0].length;
|
|
2170
|
+
const isBinary = (dtype === 'binary' || dtype === 'ubinary');
|
|
2171
|
+
|
|
2172
|
+
// Rank corpus documents by similarity
|
|
2173
|
+
const ranked = corpus.map((text, i) => {
|
|
2174
|
+
const docEmbed = embeddings[i + 1];
|
|
2175
|
+
let sim;
|
|
2176
|
+
if (isBinary) {
|
|
2177
|
+
sim = hammingSimUI(queryEmbed, docEmbed);
|
|
2178
|
+
} else {
|
|
2179
|
+
sim = cosineSim(queryEmbed, docEmbed);
|
|
2180
|
+
}
|
|
2181
|
+
return { index: i, text, similarity: sim };
|
|
2182
|
+
}).sort((a, b) => b.similarity - a.similarity);
|
|
2183
|
+
|
|
2184
|
+
// Calculate storage
|
|
2185
|
+
const actualDims = isBinary ? dims * 8 : dims;
|
|
2186
|
+
let bytesPerVec;
|
|
2187
|
+
if (dtype === 'float') bytesPerVec = dims * 4;
|
|
2188
|
+
else if (dtype === 'int8' || dtype === 'uint8') bytesPerVec = dims * 1;
|
|
2189
|
+
else bytesPerVec = dims; // binary/ubinary: dims is already 1/8th
|
|
2190
|
+
|
|
2191
|
+
resultsByDtype[dtype] = {
|
|
2192
|
+
dtype, latency: elapsed, dims, actualDims, bytesPerVec,
|
|
2193
|
+
tokens: data.usage?.total_tokens || 0, ranked,
|
|
2194
|
+
};
|
|
2195
|
+
}
|
|
2196
|
+
|
|
2197
|
+
const completed = Object.values(resultsByDtype);
|
|
2198
|
+
if (completed.length === 0) {
|
|
2199
|
+
showError('quantError', 'No data types completed successfully');
|
|
2200
|
+
return;
|
|
2201
|
+
}
|
|
2202
|
+
|
|
2203
|
+
// ── Render Charts ──
|
|
2204
|
+
const baseline = completed.find(r => r.dtype === 'float') || completed[0];
|
|
2205
|
+
const maxBytes = Math.max(...completed.map(r => r.bytesPerVec));
|
|
2206
|
+
const maxLatency = Math.max(...completed.map(r => r.latency));
|
|
2207
|
+
const DTYPE_COLORS = { float: '#00d4aa', int8: '#4ecdc4', uint8: '#45b7d1', ubinary: '#ffd93d', binary: '#ff6b6b' };
|
|
2208
|
+
|
|
2209
|
+
// ── Storage Bar Chart ──
|
|
2210
|
+
let storageHTML = '';
|
|
2211
|
+
for (const r of completed) {
|
|
2212
|
+
const pct = Math.max(8, (r.bytesPerVec / maxBytes) * 100);
|
|
2213
|
+
const totalMB = (r.bytesPerVec * 1_000_000) / (1024 * 1024);
|
|
2214
|
+
const sizeStr = totalMB >= 1024 ? `${(totalMB / 1024).toFixed(1)} GB` : `${totalMB.toFixed(0)} MB`;
|
|
2215
|
+
const savings = r.bytesPerVec < baseline.bytesPerVec
|
|
2216
|
+
? `${(baseline.bytesPerVec / r.bytesPerVec).toFixed(0)}× smaller`
|
|
2217
|
+
: 'baseline';
|
|
2218
|
+
const color = DTYPE_COLORS[r.dtype] || '#82aaff';
|
|
2219
|
+
storageHTML += `<div class="quant-bar-group">
|
|
2220
|
+
<div class="quant-bar-label">
|
|
2221
|
+
<span class="dtype-name">${r.dtype}</span>
|
|
2222
|
+
<span class="dtype-value">${formatBytesUI(r.bytesPerVec)}/vec · ${sizeStr} @ 1M</span>
|
|
2223
|
+
</div>
|
|
2224
|
+
<div class="quant-bar-track">
|
|
2225
|
+
<div class="quant-bar-fill storage" style="width:${pct}%;background:linear-gradient(90deg, ${color}, ${color}cc);">${savings}</div>
|
|
2226
|
+
</div>
|
|
2227
|
+
</div>`;
|
|
2228
|
+
}
|
|
2229
|
+
document.getElementById('quantStorageChart').innerHTML = storageHTML;
|
|
2230
|
+
|
|
2231
|
+
// ── Latency Bar Chart ──
|
|
2232
|
+
let latencyHTML = '';
|
|
2233
|
+
const minLatency = Math.min(...completed.map(r => r.latency));
|
|
2234
|
+
for (const r of completed) {
|
|
2235
|
+
const pct = Math.max(8, (r.latency / maxLatency) * 100);
|
|
2236
|
+
const color = DTYPE_COLORS[r.dtype] || '#82aaff';
|
|
2237
|
+
const badge = r.latency === minLatency ? ' ⚡' : '';
|
|
2238
|
+
latencyHTML += `<div class="quant-bar-group">
|
|
2239
|
+
<div class="quant-bar-label">
|
|
2240
|
+
<span class="dtype-name">${r.dtype}</span>
|
|
2241
|
+
<span class="dtype-value">${r.latency.toFixed(0)}ms${badge}</span>
|
|
2242
|
+
</div>
|
|
2243
|
+
<div class="quant-bar-track">
|
|
2244
|
+
<div class="quant-bar-fill latency" style="width:${pct}%;background:linear-gradient(90deg, ${color}, ${color}cc);">${r.latency.toFixed(0)}ms</div>
|
|
2245
|
+
</div>
|
|
2246
|
+
</div>`;
|
|
2247
|
+
}
|
|
2248
|
+
document.getElementById('quantLatencyChart').innerHTML = latencyHTML;
|
|
2249
|
+
|
|
2250
|
+
// ── Quality Meters + Ranking Grid ──
|
|
2251
|
+
const topK = Math.min(5, corpus.length);
|
|
2252
|
+
const metersEl = document.getElementById('quantQualityMeters');
|
|
2253
|
+
const gridEl = document.getElementById('quantRankGrid');
|
|
2254
|
+
gridEl.innerHTML = '';
|
|
2255
|
+
metersEl.innerHTML = '';
|
|
2256
|
+
|
|
2257
|
+
if (completed.length >= 2 && baseline) {
|
|
2258
|
+
const baselineRanking = baseline.ranked.slice(0, topK).map(r => r.index);
|
|
2259
|
+
|
|
2260
|
+
// Quality meters for each non-baseline dtype
|
|
2261
|
+
let metersHTML = '';
|
|
2262
|
+
for (const r of completed) {
|
|
2263
|
+
if (r.dtype === baseline.dtype) continue;
|
|
2264
|
+
const otherRanking = r.ranked.slice(0, topK).map(x => x.index);
|
|
2265
|
+
const overlap = baselineRanking.filter(idx => otherRanking.includes(idx)).length;
|
|
2266
|
+
const overlapPct = (overlap / topK) * 100;
|
|
2267
|
+
const exactMatch = baselineRanking.every((idx, pos) => otherRanking[pos] === idx);
|
|
2268
|
+
const positionMatches = baselineRanking.filter((idx, pos) => otherRanking[pos] === idx).length;
|
|
2269
|
+
const posMatchPct = (positionMatches / topK) * 100;
|
|
2270
|
+
|
|
2271
|
+
let grade, gradeLabel, detail;
|
|
2272
|
+
if (exactMatch) {
|
|
2273
|
+
grade = 'perfect'; gradeLabel = '✓ Perfect';
|
|
2274
|
+
detail = `Identical ranking — all ${topK} positions match float baseline`;
|
|
2275
|
+
} else if (overlap === topK) {
|
|
2276
|
+
grade = 'good'; gradeLabel = '≈ Reordered';
|
|
2277
|
+
detail = `Same ${topK} documents, ${positionMatches}/${topK} in same position`;
|
|
2278
|
+
} else {
|
|
2279
|
+
grade = overlap >= topK * 0.6 ? 'good' : 'degraded';
|
|
2280
|
+
gradeLabel = `${overlapPct.toFixed(0)}% overlap`;
|
|
2281
|
+
detail = `${overlap}/${topK} documents match, ${positionMatches}/${topK} positions match`;
|
|
2282
|
+
}
|
|
2283
|
+
|
|
2284
|
+
metersHTML += `<div class="quant-quality-meter">
|
|
2285
|
+
<div class="quant-meter-header">
|
|
2286
|
+
<span class="dtype-name">${r.dtype}</span>
|
|
2287
|
+
<span class="verdict-badge ${grade}">${gradeLabel}</span>
|
|
2288
|
+
</div>
|
|
2289
|
+
<div class="quant-meter-track">
|
|
2290
|
+
<div class="quant-meter-fill ${grade}" style="width:${exactMatch ? 100 : posMatchPct}%"></div>
|
|
2291
|
+
</div>
|
|
2292
|
+
<div class="quant-meter-detail">${detail}</div>
|
|
2293
|
+
</div>`;
|
|
2294
|
+
}
|
|
2295
|
+
metersEl.innerHTML = metersHTML;
|
|
2296
|
+
|
|
2297
|
+
// Side-by-side ranking columns
|
|
2298
|
+
let rankHTML = `<div class="quant-rank-cols" style="grid-template-columns:repeat(${completed.length},1fr);">`;
|
|
2299
|
+
for (const r of completed) {
|
|
2300
|
+
rankHTML += `<div><div class="quant-rank-col-header">${r.dtype}${r === baseline ? ' (baseline)' : ''}</div>`;
|
|
2301
|
+
r.ranked.slice(0, topK).forEach((item, pos) => {
|
|
2302
|
+
const trunc = item.text.length > 55 ? item.text.slice(0, 52) + '…' : item.text;
|
|
2303
|
+
let cls = 'baseline';
|
|
2304
|
+
if (r !== baseline) {
|
|
2305
|
+
cls = (baseline.ranked[pos] && item.index === baseline.ranked[pos].index) ? 'match' : 'differ';
|
|
2306
|
+
}
|
|
2307
|
+
rankHTML += `<div class="quant-rank-item ${cls}" title="${item.text.replace(/"/g, '"')}">
|
|
2308
|
+
<span class="quant-rank-pos">${pos + 1}</span>${trunc}
|
|
2309
|
+
<div class="quant-rank-score">${item.similarity.toFixed(4)} · doc ${item.index}</div>
|
|
2310
|
+
</div>`;
|
|
2311
|
+
});
|
|
2312
|
+
rankHTML += '</div>';
|
|
2313
|
+
}
|
|
2314
|
+
rankHTML += '</div>';
|
|
2315
|
+
gridEl.innerHTML = rankHTML;
|
|
2316
|
+
} else {
|
|
2317
|
+
metersEl.innerHTML = '<span style="color:var(--text-dim)">Select multiple data types (including float) to compare rankings.</span>';
|
|
2318
|
+
}
|
|
2319
|
+
|
|
2320
|
+
document.getElementById('quantResult').classList.add('visible');
|
|
2321
|
+
} catch (err) {
|
|
2322
|
+
showError('quantError', err.message);
|
|
2323
|
+
} finally {
|
|
2324
|
+
setLoading('quantBtn', false);
|
|
2325
|
+
}
|
|
2326
|
+
};
|
|
2327
|
+
|
|
1919
2328
|
// ── Benchmark: Cost Calculator ──
|
|
1920
2329
|
function initCostCalculator() {
|
|
1921
2330
|
const tokSlider = document.getElementById('costTokens');
|
|
@@ -2055,6 +2464,7 @@ init = async function() {
|
|
|
2055
2464
|
await _origInit();
|
|
2056
2465
|
buildModelCheckboxes();
|
|
2057
2466
|
populateBenchRankSelects();
|
|
2467
|
+
populateQuantModelSelect();
|
|
2058
2468
|
initCostCalculator();
|
|
2059
2469
|
renderHistory();
|
|
2060
2470
|
};
|
|
@@ -241,6 +241,73 @@ describe('benchmark command', () => {
|
|
|
241
241
|
assert.ok(optionNames.includes('--save'), 'should have --save option');
|
|
242
242
|
});
|
|
243
243
|
|
|
244
|
+
it('has asymmetric subcommand', () => {
|
|
245
|
+
const program = new Command();
|
|
246
|
+
registerBenchmark(program);
|
|
247
|
+
const benchCmd = program.commands.find(c => c.name() === 'benchmark');
|
|
248
|
+
const asymSub = benchCmd.commands.find(c => c.name() === 'asymmetric');
|
|
249
|
+
assert.ok(asymSub, 'asymmetric subcommand should be registered');
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
it('asymmetric has --doc-model and --query-models options', () => {
|
|
253
|
+
const program = new Command();
|
|
254
|
+
registerBenchmark(program);
|
|
255
|
+
const benchCmd = program.commands.find(c => c.name() === 'benchmark');
|
|
256
|
+
const asymSub = benchCmd.commands.find(c => c.name() === 'asymmetric');
|
|
257
|
+
const optionNames = asymSub.options.map(o => o.long);
|
|
258
|
+
assert.ok(optionNames.includes('--doc-model'), 'should have --doc-model');
|
|
259
|
+
assert.ok(optionNames.includes('--query-models'), 'should have --query-models');
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
it('asymmetric defaults doc-model to voyage-4-large', () => {
|
|
263
|
+
const program = new Command();
|
|
264
|
+
registerBenchmark(program);
|
|
265
|
+
const benchCmd = program.commands.find(c => c.name() === 'benchmark');
|
|
266
|
+
const asymSub = benchCmd.commands.find(c => c.name() === 'asymmetric');
|
|
267
|
+
const opt = asymSub.options.find(o => o.long === '--doc-model');
|
|
268
|
+
assert.equal(opt.defaultValue, 'voyage-4-large');
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
it('has quantization subcommand with quant alias', () => {
|
|
272
|
+
const program = new Command();
|
|
273
|
+
registerBenchmark(program);
|
|
274
|
+
const benchCmd = program.commands.find(c => c.name() === 'benchmark');
|
|
275
|
+
const quantSub = benchCmd.commands.find(c => c.name() === 'quantization');
|
|
276
|
+
assert.ok(quantSub, 'quantization subcommand should be registered');
|
|
277
|
+
assert.ok(quantSub.aliases().includes('quant'), 'should have "quant" alias');
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
it('quantization has --model, --dtypes, --query options', () => {
|
|
281
|
+
const program = new Command();
|
|
282
|
+
registerBenchmark(program);
|
|
283
|
+
const benchCmd = program.commands.find(c => c.name() === 'benchmark');
|
|
284
|
+
const quantSub = benchCmd.commands.find(c => c.name() === 'quantization');
|
|
285
|
+
const optionNames = quantSub.options.map(o => o.long);
|
|
286
|
+
assert.ok(optionNames.includes('--model'), 'should have --model');
|
|
287
|
+
assert.ok(optionNames.includes('--dtypes'), 'should have --dtypes');
|
|
288
|
+
assert.ok(optionNames.includes('--query'), 'should have --query');
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
it('quantization defaults dtypes to float,int8,ubinary', () => {
|
|
292
|
+
const program = new Command();
|
|
293
|
+
registerBenchmark(program);
|
|
294
|
+
const benchCmd = program.commands.find(c => c.name() === 'benchmark');
|
|
295
|
+
const quantSub = benchCmd.commands.find(c => c.name() === 'quantization');
|
|
296
|
+
const dtypesOpt = quantSub.options.find(o => o.long === '--dtypes');
|
|
297
|
+
assert.equal(dtypesOpt.defaultValue, 'float,int8,ubinary');
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
it('quantization has --dimensions, --save, --file options', () => {
|
|
301
|
+
const program = new Command();
|
|
302
|
+
registerBenchmark(program);
|
|
303
|
+
const benchCmd = program.commands.find(c => c.name() === 'benchmark');
|
|
304
|
+
const quantSub = benchCmd.commands.find(c => c.name() === 'quantization');
|
|
305
|
+
const optionNames = quantSub.options.map(o => o.long);
|
|
306
|
+
assert.ok(optionNames.includes('--dimensions'), 'should have --dimensions');
|
|
307
|
+
assert.ok(optionNames.includes('--save'), 'should have --save');
|
|
308
|
+
assert.ok(optionNames.includes('--file'), 'should have --file');
|
|
309
|
+
});
|
|
310
|
+
|
|
244
311
|
it('batch defaults batch-sizes to 1,5,10,25,50', () => {
|
|
245
312
|
const program = new Command();
|
|
246
313
|
registerBenchmark(program);
|
|
@@ -29,4 +29,14 @@ describe('embed command', () => {
|
|
|
29
29
|
const optionNames = embedCmd.options.map(o => o.long);
|
|
30
30
|
assert.ok(optionNames.includes('--input-type'), 'should have --input-type option');
|
|
31
31
|
});
|
|
32
|
+
|
|
33
|
+
it('has --output-dtype flag with float default', () => {
|
|
34
|
+
const program = new Command();
|
|
35
|
+
registerEmbed(program);
|
|
36
|
+
const embedCmd = program.commands.find(c => c.name() === 'embed');
|
|
37
|
+
const optionNames = embedCmd.options.map(o => o.long);
|
|
38
|
+
assert.ok(optionNames.includes('--output-dtype'), 'should have --output-dtype option');
|
|
39
|
+
const opt = embedCmd.options.find(o => o.long === '--output-dtype');
|
|
40
|
+
assert.equal(opt.defaultValue, 'float');
|
|
41
|
+
});
|
|
32
42
|
});
|
|
@@ -17,6 +17,7 @@ describe('explanations', () => {
|
|
|
17
17
|
'api-keys',
|
|
18
18
|
'api-access',
|
|
19
19
|
'batch-processing',
|
|
20
|
+
'quantization',
|
|
20
21
|
'benchmarking',
|
|
21
22
|
];
|
|
22
23
|
|
|
@@ -90,6 +91,11 @@ describe('explanations', () => {
|
|
|
90
91
|
batch: 'batch-processing',
|
|
91
92
|
model: 'models',
|
|
92
93
|
batching: 'batch-processing',
|
|
94
|
+
quantize: 'quantization',
|
|
95
|
+
int8: 'quantization',
|
|
96
|
+
binary: 'quantization',
|
|
97
|
+
matryoshka: 'quantization',
|
|
98
|
+
dtype: 'quantization',
|
|
93
99
|
};
|
|
94
100
|
|
|
95
101
|
it('alias map covers expected aliases', () => {
|