voyageai-cli 1.11.0 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -551,6 +551,83 @@ select:focus { outline: none; border-color: var(--accent); }
551
551
  .rank-differ { border-left-color: var(--yellow); }
552
552
  .rank-arrow { text-align: center; color: var(--text-muted); font-size: 18px; padding-top: 4px; }
553
553
 
554
+ /* Quantization charts */
555
+ .quant-charts { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 16px; }
556
+ @media (max-width: 768px) { .quant-charts { grid-template-columns: 1fr; } }
557
+
558
+ .quant-bar-group { margin-bottom: 14px; }
559
+ .quant-bar-label {
560
+ display: flex; justify-content: space-between; align-items: baseline;
561
+ margin-bottom: 4px; font-size: 13px;
562
+ }
563
+ .quant-bar-label .dtype-name { color: var(--accent); font-weight: 600; font-family: var(--mono); }
564
+ .quant-bar-label .dtype-value { color: var(--text-dim); font-family: var(--mono); font-size: 12px; }
565
+ .quant-bar-track {
566
+ height: 32px; background: var(--bg-input); border-radius: 6px;
567
+ overflow: hidden; position: relative;
568
+ }
569
+ .quant-bar-fill {
570
+ height: 100%; border-radius: 6px;
571
+ transition: width 0.8s cubic-bezier(0.22, 1, 0.36, 1);
572
+ display: flex; align-items: center; padding: 0 10px;
573
+ font-family: var(--mono); font-size: 12px; font-weight: 600;
574
+ color: #0a0a1a; white-space: nowrap; min-width: fit-content;
575
+ }
576
+ .quant-bar-fill.storage { background: linear-gradient(90deg, #00d4aa, #4ecdc4); }
577
+ .quant-bar-fill.latency { background: linear-gradient(90deg, #45b7d1, #82aaff); }
578
+ .quant-bar-badge {
579
+ position: absolute; right: 10px; top: 50%; transform: translateY(-50%);
580
+ font-size: 12px; color: var(--text-dim); font-family: var(--mono);
581
+ }
582
+
583
+ .quant-quality-meter { margin-bottom: 14px; }
584
+ .quant-meter-header {
585
+ display: flex; justify-content: space-between; align-items: center;
586
+ margin-bottom: 6px;
587
+ }
588
+ .quant-meter-header .dtype-name { color: var(--accent); font-weight: 600; font-family: var(--mono); font-size: 13px; }
589
+ .quant-meter-header .verdict-badge {
590
+ font-size: 12px; padding: 2px 8px; border-radius: 10px; font-weight: 600;
591
+ }
592
+ .quant-meter-header .verdict-badge.perfect { background: rgba(0,212,170,0.15); color: var(--green); }
593
+ .quant-meter-header .verdict-badge.good { background: rgba(255,217,61,0.15); color: var(--yellow); }
594
+ .quant-meter-header .verdict-badge.degraded { background: rgba(255,107,107,0.15); color: var(--red); }
595
+ .quant-meter-track {
596
+ height: 10px; background: var(--bg-input); border-radius: 5px; overflow: hidden;
597
+ }
598
+ .quant-meter-fill {
599
+ height: 100%; border-radius: 5px;
600
+ transition: width 0.8s cubic-bezier(0.22, 1, 0.36, 1);
601
+ }
602
+ .quant-meter-fill.perfect { background: linear-gradient(90deg, #00d4aa, #00e4ba); }
603
+ .quant-meter-fill.good { background: linear-gradient(90deg, #ffd93d, #ffe066); }
604
+ .quant-meter-fill.degraded { background: linear-gradient(90deg, #ff6b6b, #ff8e8e); }
605
+ .quant-meter-detail { font-size: 11px; color: var(--text-muted); margin-top: 4px; font-family: var(--mono); }
606
+
607
+ .quant-rank-cols {
608
+ display: grid; gap: 12px;
609
+ }
610
+ .quant-rank-col-header {
611
+ font-weight: 600; color: var(--accent); font-size: 13px; font-family: var(--mono);
612
+ margin-bottom: 8px; padding-bottom: 6px; border-bottom: 1px solid var(--border);
613
+ }
614
+ .quant-rank-item {
615
+ padding: 8px 10px; margin-bottom: 4px; border-radius: 6px;
616
+ font-size: 12px; position: relative; border-left: 3px solid transparent;
617
+ transition: background 0.2s;
618
+ }
619
+ .quant-rank-item:hover { background: rgba(255,255,255,0.03); }
620
+ .quant-rank-item.match { border-left-color: var(--green); background: rgba(0,212,170,0.06); }
621
+ .quant-rank-item.differ { border-left-color: var(--red); background: rgba(255,107,107,0.06); }
622
+ .quant-rank-item.baseline { border-left-color: var(--border); background: var(--bg-input); }
623
+ .quant-rank-pos {
624
+ display: inline-block; width: 22px; height: 22px; line-height: 22px;
625
+ text-align: center; border-radius: 50%; background: var(--bg-surface);
626
+ color: var(--accent); font-weight: 700; font-size: 11px; font-family: var(--mono);
627
+ margin-right: 8px;
628
+ }
629
+ .quant-rank-score { color: var(--text-muted); font-size: 11px; font-family: var(--mono); margin-top: 3px; }
630
+
554
631
  /* Cost calculator */
555
632
  .cost-slider-row {
556
633
  display: flex;
@@ -797,6 +874,16 @@ select:focus { outline: none; border-color: var(--accent); }
797
874
  <option value="2048">2048</option>
798
875
  </select>
799
876
  </div>
877
+ <div class="option-group">
878
+ <span class="option-label">Output Type</span>
879
+ <select id="embedOutputDtype">
880
+ <option value="float">float (32-bit)</option>
881
+ <option value="int8">int8 (4× smaller)</option>
882
+ <option value="uint8">uint8 (4× smaller)</option>
883
+ <option value="binary">binary (32× smaller)</option>
884
+ <option value="ubinary">ubinary (32× smaller)</option>
885
+ </select>
886
+ </div>
800
887
  <button class="btn" id="embedBtn" onclick="doEmbed()">⚡ Embed</button>
801
888
  </div>
802
889
 
@@ -916,6 +1003,7 @@ Semantic search understands meaning beyond keyword matching</textarea>
916
1003
  <div class="bench-panels">
917
1004
  <button class="bench-panel-btn active" data-bench="latency">⚡ Latency</button>
918
1005
  <button class="bench-panel-btn" data-bench="ranking">🏆 Ranking</button>
1006
+ <button class="bench-panel-btn" data-bench="quantization">⚗️ Quantization</button>
919
1007
  <button class="bench-panel-btn" data-bench="cost">💰 Cost</button>
920
1008
  <button class="bench-panel-btn" data-bench="history">📊 History</button>
921
1009
  </div>
@@ -1008,6 +1096,91 @@ Reranking models rescore initial search results to improve relevance ordering.</
1008
1096
  </div>
1009
1097
  </div>
1010
1098
 
1099
+ <!-- ── Quantization Panel ── -->
1100
+ <div class="bench-view" id="bench-quantization">
1101
+ <div class="card">
1102
+ <div class="card-title">Quantization Benchmark</div>
1103
+ <p style="color:var(--text-dim);font-size:13px;margin-bottom:12px;">
1104
+ Compare how different output data types (float, int8, binary) affect storage size and ranking quality.
1105
+ Embeds the same corpus with each dtype and measures the tradeoff.
1106
+ </p>
1107
+ <div class="options-row" style="flex-wrap:wrap;">
1108
+ <div class="option-group">
1109
+ <span class="option-label">Model</span>
1110
+ <select id="quantModel"></select>
1111
+ </div>
1112
+ <div class="option-group">
1113
+ <span class="option-label">Dimensions</span>
1114
+ <select id="quantDimensions">
1115
+ <option value="">Default</option>
1116
+ <option value="256">256</option>
1117
+ <option value="512">512</option>
1118
+ <option value="1024">1024</option>
1119
+ <option value="2048">2048</option>
1120
+ </select>
1121
+ </div>
1122
+ <div class="option-group">
1123
+ <span class="option-label">Data Types</span>
1124
+ <div id="quantDtypeChecks" style="display:flex;gap:8px;flex-wrap:wrap;">
1125
+ <label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
1126
+ <input type="checkbox" value="float" checked style="accent-color:var(--accent);">float
1127
+ </label>
1128
+ <label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
1129
+ <input type="checkbox" value="int8" checked style="accent-color:var(--accent);">int8
1130
+ </label>
1131
+ <label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
1132
+ <input type="checkbox" value="uint8" style="accent-color:var(--accent);">uint8
1133
+ </label>
1134
+ <label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
1135
+ <input type="checkbox" value="ubinary" checked style="accent-color:var(--accent);">ubinary
1136
+ </label>
1137
+ <label style="display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);">
1138
+ <input type="checkbox" value="binary" style="accent-color:var(--accent);">binary
1139
+ </label>
1140
+ </div>
1141
+ </div>
1142
+ </div>
1143
+ <div style="margin-top:12px;">
1144
+ <span class="option-label">Query</span>
1145
+ <input type="text" id="quantQuery" placeholder="Search query..." value="How do I search for similar documents using embeddings?" style="width:100%;margin-bottom:8px;">
1146
+ </div>
1147
+ <div>
1148
+ <span class="option-label">Corpus (one document per line)</span>
1149
+ <textarea id="quantCorpus" rows="5" placeholder="Documents to embed...">Vector search finds documents by computing similarity between embedding vectors in high-dimensional space.
1150
+ MongoDB Atlas Vector Search lets you index and query vector embeddings alongside your operational data.
1151
+ Traditional full-text search uses inverted indexes to match keyword terms in documents.
1152
+ Cosine similarity measures the angle between two vectors, commonly used for semantic search.
1153
+ Database sharding distributes data across multiple servers for horizontal scalability.
1154
+ Embedding models convert text into dense numerical vectors that capture meaning.
1155
+ Approximate nearest neighbor algorithms like HNSW enable fast similarity search at scale.
1156
+ Reranking models rescore initial search results to improve relevance ordering.</textarea>
1157
+ </div>
1158
+ <div style="margin-top:12px;">
1159
+ <button class="btn" id="quantBtn" onclick="doBenchQuantization()">⚗️ Run Quantization Benchmark</button>
1160
+ </div>
1161
+ </div>
1162
+
1163
+ <div class="error-msg" id="quantError"></div>
1164
+
1165
+ <div class="result-section" id="quantResult">
1166
+ <div class="quant-charts">
1167
+ <div class="card">
1168
+ <div class="card-title">📦 Storage per Vector</div>
1169
+ <div id="quantStorageChart"></div>
1170
+ </div>
1171
+ <div class="card">
1172
+ <div class="card-title">⏱ API Latency</div>
1173
+ <div id="quantLatencyChart"></div>
1174
+ </div>
1175
+ </div>
1176
+ <div class="card">
1177
+ <div class="card-title">🎯 Ranking Quality vs Float Baseline</div>
1178
+ <div id="quantQualityMeters" style="margin-bottom:16px;"></div>
1179
+ <div id="quantRankGrid"></div>
1180
+ </div>
1181
+ </div>
1182
+ </div>
1183
+
1011
1184
  <!-- ── Cost Panel ── -->
1012
1185
  <div class="bench-view" id="bench-cost">
1013
1186
  <div class="card">
@@ -1238,6 +1411,12 @@ function populateModelSelects() {
1238
1411
  }
1239
1412
 
1240
1413
  // ── API Helpers ──
1414
+ function formatBytesUI(bytes) {
1415
+ if (bytes >= 1024 * 1024) return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
1416
+ if (bytes >= 1024) return (bytes / 1024).toFixed(1) + ' KB';
1417
+ return bytes + ' B';
1418
+ }
1419
+
1241
1420
  async function apiPost(url, body) {
1242
1421
  const res = await fetch(url, {
1243
1422
  method: 'POST',
@@ -1284,16 +1463,29 @@ window.doEmbed = async function() {
1284
1463
  const dims = document.getElementById('embedDimensions').value;
1285
1464
  const dimensions = dims ? parseInt(dims, 10) : undefined;
1286
1465
 
1287
- const data = await apiPost('/api/embed', { texts: [text], model, inputType, dimensions });
1466
+ const outputDtype = document.getElementById('embedOutputDtype').value;
1467
+ const body = { texts: [text], model, inputType, dimensions };
1468
+ if (outputDtype && outputDtype !== 'float') body.output_dtype = outputDtype;
1469
+
1470
+ const data = await apiPost('/api/embed', body);
1288
1471
  const emb = data.data[0].embedding;
1289
1472
  lastEmbedding = emb;
1290
1473
 
1291
1474
  // Stats
1475
+ const dtype = outputDtype || 'float';
1476
+ const bytesPerDim = (dtype === 'binary' || dtype === 'ubinary') ? 0.125 : (dtype === 'int8' || dtype === 'uint8') ? 1 : 4;
1477
+ const totalBytes = emb.length * bytesPerDim;
1478
+ const storageLine = dtype !== 'float'
1479
+ ? `<br><span style="color:var(--success)">📦 ${dtype}: ${formatBytesUI(totalBytes)}/vector (${(4 * emb.length / totalBytes).toFixed(0)}× smaller than float)</span>`
1480
+ : '';
1481
+
1292
1482
  const statsEl = document.getElementById('embedStats');
1293
1483
  statsEl.innerHTML = `
1294
1484
  <span class="stat"><span class="stat-label">Model</span><span class="stat-value">${data.model}</span></span>
1295
1485
  <span class="stat"><span class="stat-label">Dimensions</span><span class="stat-value">${emb.length}</span></span>
1296
1486
  <span class="stat"><span class="stat-label">Tokens</span><span class="stat-value">${data.usage?.total_tokens || '—'}</span></span>
1487
+ <span class="stat"><span class="stat-label">Type</span><span class="stat-value">${dtype}</span></span>
1488
+ ${storageLine}
1297
1489
  `;
1298
1490
 
1299
1491
  // Vector preview
@@ -1529,6 +1721,7 @@ const CONCEPT_META = {
1529
1721
  'api-access': { icon: '🌐', tab: 'embed' },
1530
1722
  'batch-processing': { icon: '📦', tab: 'embed' },
1531
1723
  benchmarking: { icon: '⏱', tab: 'benchmark' },
1724
+ quantization: { icon: '⚗️', tab: 'benchmark' },
1532
1725
  };
1533
1726
 
1534
1727
  let exploreConcepts = {};
@@ -1916,6 +2109,222 @@ function renderRankComparison(modelA, modelB, rankedA, rankedB, topK) {
1916
2109
  }
1917
2110
  }
1918
2111
 
2112
+ // ── Benchmark: Quantization ──
2113
+ function populateQuantModelSelect() {
2114
+ const sel = document.getElementById('quantModel');
2115
+ sel.innerHTML = '';
2116
+ embedModels.forEach(m => {
2117
+ const opt = document.createElement('option');
2118
+ opt.value = m.name;
2119
+ opt.textContent = m.name;
2120
+ sel.appendChild(opt);
2121
+ });
2122
+ // Default to voyage-4-large if available
2123
+ const preferred = embedModels.find(m => m.name === 'voyage-4-large');
2124
+ if (preferred) sel.value = preferred.name;
2125
+ }
2126
+
2127
+ function hammingSimUI(a, b) {
2128
+ // For binary/ubinary packed embeddings, compute agreement via dot product
2129
+ let dot = 0;
2130
+ for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
2131
+ return dot;
2132
+ }
2133
+
2134
+ window.doBenchQuantization = async function() {
2135
+ hideError('quantError');
2136
+ const model = document.getElementById('quantModel').value;
2137
+ const dimsVal = document.getElementById('quantDimensions').value;
2138
+ const dimensions = dimsVal ? parseInt(dimsVal, 10) : undefined;
2139
+ const query = document.getElementById('quantQuery').value.trim();
2140
+ const corpusText = document.getElementById('quantCorpus').value.trim();
2141
+
2142
+ if (!query) { showError('quantError', 'Enter a query'); return; }
2143
+ if (!corpusText) { showError('quantError', 'Enter at least 2 documents'); return; }
2144
+
2145
+ const corpus = corpusText.split('\n').map(d => d.trim()).filter(Boolean);
2146
+ if (corpus.length < 2) { showError('quantError', 'Enter at least 2 documents'); return; }
2147
+
2148
+ const checks = document.querySelectorAll('#quantDtypeChecks input:checked');
2149
+ const dtypes = Array.from(checks).map(c => c.value);
2150
+ if (dtypes.length === 0) { showError('quantError', 'Select at least one data type'); return; }
2151
+
2152
+ setLoading('quantBtn', true);
2153
+
2154
+ try {
2155
+ const allTexts = [query, ...corpus];
2156
+ const resultsByDtype = {};
2157
+
2158
+ for (const dtype of dtypes) {
2159
+ const body = { texts: allTexts, model, inputType: 'document' };
2160
+ if (dimensions) body.dimensions = dimensions;
2161
+ if (dtype !== 'float') body.output_dtype = dtype;
2162
+
2163
+ const start = performance.now();
2164
+ const data = await apiPost('/api/embed', body);
2165
+ const elapsed = performance.now() - start;
2166
+
2167
+ const embeddings = data.data.map(d => d.embedding);
2168
+ const queryEmbed = embeddings[0];
2169
+ const dims = embeddings[0].length;
2170
+ const isBinary = (dtype === 'binary' || dtype === 'ubinary');
2171
+
2172
+ // Rank corpus documents by similarity
2173
+ const ranked = corpus.map((text, i) => {
2174
+ const docEmbed = embeddings[i + 1];
2175
+ let sim;
2176
+ if (isBinary) {
2177
+ sim = hammingSimUI(queryEmbed, docEmbed);
2178
+ } else {
2179
+ sim = cosineSim(queryEmbed, docEmbed);
2180
+ }
2181
+ return { index: i, text, similarity: sim };
2182
+ }).sort((a, b) => b.similarity - a.similarity);
2183
+
2184
+ // Calculate storage
2185
+ const actualDims = isBinary ? dims * 8 : dims;
2186
+ let bytesPerVec;
2187
+ if (dtype === 'float') bytesPerVec = dims * 4;
2188
+ else if (dtype === 'int8' || dtype === 'uint8') bytesPerVec = dims * 1;
2189
+ else bytesPerVec = dims; // binary/ubinary: dims is already 1/8th
2190
+
2191
+ resultsByDtype[dtype] = {
2192
+ dtype, latency: elapsed, dims, actualDims, bytesPerVec,
2193
+ tokens: data.usage?.total_tokens || 0, ranked,
2194
+ };
2195
+ }
2196
+
2197
+ const completed = Object.values(resultsByDtype);
2198
+ if (completed.length === 0) {
2199
+ showError('quantError', 'No data types completed successfully');
2200
+ return;
2201
+ }
2202
+
2203
+ // ── Render Charts ──
2204
+ const baseline = completed.find(r => r.dtype === 'float') || completed[0];
2205
+ const maxBytes = Math.max(...completed.map(r => r.bytesPerVec));
2206
+ const maxLatency = Math.max(...completed.map(r => r.latency));
2207
+ const DTYPE_COLORS = { float: '#00d4aa', int8: '#4ecdc4', uint8: '#45b7d1', ubinary: '#ffd93d', binary: '#ff6b6b' };
2208
+
2209
+ // ── Storage Bar Chart ──
2210
+ let storageHTML = '';
2211
+ for (const r of completed) {
2212
+ const pct = Math.max(8, (r.bytesPerVec / maxBytes) * 100);
2213
+ const totalMB = (r.bytesPerVec * 1_000_000) / (1024 * 1024);
2214
+ const sizeStr = totalMB >= 1024 ? `${(totalMB / 1024).toFixed(1)} GB` : `${totalMB.toFixed(0)} MB`;
2215
+ const savings = r.bytesPerVec < baseline.bytesPerVec
2216
+ ? `${(baseline.bytesPerVec / r.bytesPerVec).toFixed(0)}× smaller`
2217
+ : 'baseline';
2218
+ const color = DTYPE_COLORS[r.dtype] || '#82aaff';
2219
+ storageHTML += `<div class="quant-bar-group">
2220
+ <div class="quant-bar-label">
2221
+ <span class="dtype-name">${r.dtype}</span>
2222
+ <span class="dtype-value">${formatBytesUI(r.bytesPerVec)}/vec · ${sizeStr} @ 1M</span>
2223
+ </div>
2224
+ <div class="quant-bar-track">
2225
+ <div class="quant-bar-fill storage" style="width:${pct}%;background:linear-gradient(90deg, ${color}, ${color}cc);">${savings}</div>
2226
+ </div>
2227
+ </div>`;
2228
+ }
2229
+ document.getElementById('quantStorageChart').innerHTML = storageHTML;
2230
+
2231
+ // ── Latency Bar Chart ──
2232
+ let latencyHTML = '';
2233
+ const minLatency = Math.min(...completed.map(r => r.latency));
2234
+ for (const r of completed) {
2235
+ const pct = Math.max(8, (r.latency / maxLatency) * 100);
2236
+ const color = DTYPE_COLORS[r.dtype] || '#82aaff';
2237
+ const badge = r.latency === minLatency ? ' ⚡' : '';
2238
+ latencyHTML += `<div class="quant-bar-group">
2239
+ <div class="quant-bar-label">
2240
+ <span class="dtype-name">${r.dtype}</span>
2241
+ <span class="dtype-value">${r.latency.toFixed(0)}ms${badge}</span>
2242
+ </div>
2243
+ <div class="quant-bar-track">
2244
+ <div class="quant-bar-fill latency" style="width:${pct}%;background:linear-gradient(90deg, ${color}, ${color}cc);">${r.latency.toFixed(0)}ms</div>
2245
+ </div>
2246
+ </div>`;
2247
+ }
2248
+ document.getElementById('quantLatencyChart').innerHTML = latencyHTML;
2249
+
2250
+ // ── Quality Meters + Ranking Grid ──
2251
+ const topK = Math.min(5, corpus.length);
2252
+ const metersEl = document.getElementById('quantQualityMeters');
2253
+ const gridEl = document.getElementById('quantRankGrid');
2254
+ gridEl.innerHTML = '';
2255
+ metersEl.innerHTML = '';
2256
+
2257
+ if (completed.length >= 2 && baseline) {
2258
+ const baselineRanking = baseline.ranked.slice(0, topK).map(r => r.index);
2259
+
2260
+ // Quality meters for each non-baseline dtype
2261
+ let metersHTML = '';
2262
+ for (const r of completed) {
2263
+ if (r.dtype === baseline.dtype) continue;
2264
+ const otherRanking = r.ranked.slice(0, topK).map(x => x.index);
2265
+ const overlap = baselineRanking.filter(idx => otherRanking.includes(idx)).length;
2266
+ const overlapPct = (overlap / topK) * 100;
2267
+ const exactMatch = baselineRanking.every((idx, pos) => otherRanking[pos] === idx);
2268
+ const positionMatches = baselineRanking.filter((idx, pos) => otherRanking[pos] === idx).length;
2269
+ const posMatchPct = (positionMatches / topK) * 100;
2270
+
2271
+ let grade, gradeLabel, detail;
2272
+ if (exactMatch) {
2273
+ grade = 'perfect'; gradeLabel = '✓ Perfect';
2274
+ detail = `Identical ranking — all ${topK} positions match float baseline`;
2275
+ } else if (overlap === topK) {
2276
+ grade = 'good'; gradeLabel = '≈ Reordered';
2277
+ detail = `Same ${topK} documents, ${positionMatches}/${topK} in same position`;
2278
+ } else {
2279
+ grade = overlap >= topK * 0.6 ? 'good' : 'degraded';
2280
+ gradeLabel = `${overlapPct.toFixed(0)}% overlap`;
2281
+ detail = `${overlap}/${topK} documents match, ${positionMatches}/${topK} positions match`;
2282
+ }
2283
+
2284
+ metersHTML += `<div class="quant-quality-meter">
2285
+ <div class="quant-meter-header">
2286
+ <span class="dtype-name">${r.dtype}</span>
2287
+ <span class="verdict-badge ${grade}">${gradeLabel}</span>
2288
+ </div>
2289
+ <div class="quant-meter-track">
2290
+ <div class="quant-meter-fill ${grade}" style="width:${exactMatch ? 100 : posMatchPct}%"></div>
2291
+ </div>
2292
+ <div class="quant-meter-detail">${detail}</div>
2293
+ </div>`;
2294
+ }
2295
+ metersEl.innerHTML = metersHTML;
2296
+
2297
+ // Side-by-side ranking columns
2298
+ let rankHTML = `<div class="quant-rank-cols" style="grid-template-columns:repeat(${completed.length},1fr);">`;
2299
+ for (const r of completed) {
2300
+ rankHTML += `<div><div class="quant-rank-col-header">${r.dtype}${r === baseline ? ' (baseline)' : ''}</div>`;
2301
+ r.ranked.slice(0, topK).forEach((item, pos) => {
2302
+ const trunc = item.text.length > 55 ? item.text.slice(0, 52) + '…' : item.text;
2303
+ let cls = 'baseline';
2304
+ if (r !== baseline) {
2305
+ cls = (baseline.ranked[pos] && item.index === baseline.ranked[pos].index) ? 'match' : 'differ';
2306
+ }
2307
+ rankHTML += `<div class="quant-rank-item ${cls}" title="${item.text.replace(/"/g, '&quot;')}">
2308
+ <span class="quant-rank-pos">${pos + 1}</span>${trunc}
2309
+ <div class="quant-rank-score">${item.similarity.toFixed(4)} · doc ${item.index}</div>
2310
+ </div>`;
2311
+ });
2312
+ rankHTML += '</div>';
2313
+ }
2314
+ rankHTML += '</div>';
2315
+ gridEl.innerHTML = rankHTML;
2316
+ } else {
2317
+ metersEl.innerHTML = '<span style="color:var(--text-dim)">Select multiple data types (including float) to compare rankings.</span>';
2318
+ }
2319
+
2320
+ document.getElementById('quantResult').classList.add('visible');
2321
+ } catch (err) {
2322
+ showError('quantError', err.message);
2323
+ } finally {
2324
+ setLoading('quantBtn', false);
2325
+ }
2326
+ };
2327
+
1919
2328
  // ── Benchmark: Cost Calculator ──
1920
2329
  function initCostCalculator() {
1921
2330
  const tokSlider = document.getElementById('costTokens');
@@ -2055,6 +2464,7 @@ init = async function() {
2055
2464
  await _origInit();
2056
2465
  buildModelCheckboxes();
2057
2466
  populateBenchRankSelects();
2467
+ populateQuantModelSelect();
2058
2468
  initCostCalculator();
2059
2469
  renderHistory();
2060
2470
  };
@@ -241,6 +241,73 @@ describe('benchmark command', () => {
241
241
  assert.ok(optionNames.includes('--save'), 'should have --save option');
242
242
  });
243
243
 
244
+ it('has asymmetric subcommand', () => {
245
+ const program = new Command();
246
+ registerBenchmark(program);
247
+ const benchCmd = program.commands.find(c => c.name() === 'benchmark');
248
+ const asymSub = benchCmd.commands.find(c => c.name() === 'asymmetric');
249
+ assert.ok(asymSub, 'asymmetric subcommand should be registered');
250
+ });
251
+
252
+ it('asymmetric has --doc-model and --query-models options', () => {
253
+ const program = new Command();
254
+ registerBenchmark(program);
255
+ const benchCmd = program.commands.find(c => c.name() === 'benchmark');
256
+ const asymSub = benchCmd.commands.find(c => c.name() === 'asymmetric');
257
+ const optionNames = asymSub.options.map(o => o.long);
258
+ assert.ok(optionNames.includes('--doc-model'), 'should have --doc-model');
259
+ assert.ok(optionNames.includes('--query-models'), 'should have --query-models');
260
+ });
261
+
262
+ it('asymmetric defaults doc-model to voyage-4-large', () => {
263
+ const program = new Command();
264
+ registerBenchmark(program);
265
+ const benchCmd = program.commands.find(c => c.name() === 'benchmark');
266
+ const asymSub = benchCmd.commands.find(c => c.name() === 'asymmetric');
267
+ const opt = asymSub.options.find(o => o.long === '--doc-model');
268
+ assert.equal(opt.defaultValue, 'voyage-4-large');
269
+ });
270
+
271
+ it('has quantization subcommand with quant alias', () => {
272
+ const program = new Command();
273
+ registerBenchmark(program);
274
+ const benchCmd = program.commands.find(c => c.name() === 'benchmark');
275
+ const quantSub = benchCmd.commands.find(c => c.name() === 'quantization');
276
+ assert.ok(quantSub, 'quantization subcommand should be registered');
277
+ assert.ok(quantSub.aliases().includes('quant'), 'should have "quant" alias');
278
+ });
279
+
280
+ it('quantization has --model, --dtypes, --query options', () => {
281
+ const program = new Command();
282
+ registerBenchmark(program);
283
+ const benchCmd = program.commands.find(c => c.name() === 'benchmark');
284
+ const quantSub = benchCmd.commands.find(c => c.name() === 'quantization');
285
+ const optionNames = quantSub.options.map(o => o.long);
286
+ assert.ok(optionNames.includes('--model'), 'should have --model');
287
+ assert.ok(optionNames.includes('--dtypes'), 'should have --dtypes');
288
+ assert.ok(optionNames.includes('--query'), 'should have --query');
289
+ });
290
+
291
+ it('quantization defaults dtypes to float,int8,ubinary', () => {
292
+ const program = new Command();
293
+ registerBenchmark(program);
294
+ const benchCmd = program.commands.find(c => c.name() === 'benchmark');
295
+ const quantSub = benchCmd.commands.find(c => c.name() === 'quantization');
296
+ const dtypesOpt = quantSub.options.find(o => o.long === '--dtypes');
297
+ assert.equal(dtypesOpt.defaultValue, 'float,int8,ubinary');
298
+ });
299
+
300
+ it('quantization has --dimensions, --save, --file options', () => {
301
+ const program = new Command();
302
+ registerBenchmark(program);
303
+ const benchCmd = program.commands.find(c => c.name() === 'benchmark');
304
+ const quantSub = benchCmd.commands.find(c => c.name() === 'quantization');
305
+ const optionNames = quantSub.options.map(o => o.long);
306
+ assert.ok(optionNames.includes('--dimensions'), 'should have --dimensions');
307
+ assert.ok(optionNames.includes('--save'), 'should have --save');
308
+ assert.ok(optionNames.includes('--file'), 'should have --file');
309
+ });
310
+
244
311
  it('batch defaults batch-sizes to 1,5,10,25,50', () => {
245
312
  const program = new Command();
246
313
  registerBenchmark(program);
@@ -29,4 +29,14 @@ describe('embed command', () => {
29
29
  const optionNames = embedCmd.options.map(o => o.long);
30
30
  assert.ok(optionNames.includes('--input-type'), 'should have --input-type option');
31
31
  });
32
+
33
+ it('has --output-dtype flag with float default', () => {
34
+ const program = new Command();
35
+ registerEmbed(program);
36
+ const embedCmd = program.commands.find(c => c.name() === 'embed');
37
+ const optionNames = embedCmd.options.map(o => o.long);
38
+ assert.ok(optionNames.includes('--output-dtype'), 'should have --output-dtype option');
39
+ const opt = embedCmd.options.find(o => o.long === '--output-dtype');
40
+ assert.equal(opt.defaultValue, 'float');
41
+ });
32
42
  });
@@ -97,9 +97,8 @@ describe('api', () => {
97
97
 
98
98
  await assert.rejects(
99
99
  () => apiRequest('/embeddings', { input: ['test'], model: 'voyage-4-lite' }),
100
- /process\.exit called/
100
+ /API Error \(400\)/
101
101
  );
102
- assert.equal(exitCode, 1);
103
102
  });
104
103
 
105
104
  it('retries on 429', async () => {
@@ -17,6 +17,7 @@ describe('explanations', () => {
17
17
  'api-keys',
18
18
  'api-access',
19
19
  'batch-processing',
20
+ 'quantization',
20
21
  'benchmarking',
21
22
  ];
22
23
 
@@ -90,6 +91,11 @@ describe('explanations', () => {
90
91
  batch: 'batch-processing',
91
92
  model: 'models',
92
93
  batching: 'batch-processing',
94
+ quantize: 'quantization',
95
+ int8: 'quantization',
96
+ binary: 'quantization',
97
+ matryoshka: 'quantization',
98
+ dtype: 'quantization',
93
99
  };
94
100
 
95
101
  it('alias map covers expected aliases', () => {