voyageai-cli 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -438,6 +438,248 @@ select:focus { outline: none; border-color: var(--accent); }
438
438
  }
439
439
  .explore-card.expanded .explore-card-actions { display: flex; }
440
440
 
441
+ /* Benchmark tab */
442
+ .bench-panels { display: flex; gap: 8px; margin-bottom: 16px; }
443
+ .bench-panel-btn {
444
+ background: var(--bg-input);
445
+ border: 1px solid var(--border);
446
+ color: var(--text-dim);
447
+ padding: 8px 18px;
448
+ border-radius: var(--radius);
449
+ font-size: 13px;
450
+ font-family: var(--font);
451
+ cursor: pointer;
452
+ transition: all 0.2s;
453
+ }
454
+ .bench-panel-btn:hover { color: var(--text); border-color: var(--text-dim); }
455
+ .bench-panel-btn.active { color: var(--accent); border-color: var(--accent); background: var(--accent-glow); }
456
+
457
+ .bench-view { display: none; }
458
+ .bench-view.active { display: block; }
459
+
460
+ .latency-chart { margin-top: 16px; }
461
+ .latency-row {
462
+ display: flex;
463
+ align-items: center;
464
+ gap: 12px;
465
+ margin-bottom: 12px;
466
+ }
467
+ .latency-model {
468
+ font-family: var(--mono);
469
+ font-size: 13px;
470
+ min-width: 170px;
471
+ color: var(--text);
472
+ white-space: nowrap;
473
+ overflow: hidden;
474
+ text-overflow: ellipsis;
475
+ }
476
+ .latency-bar-outer {
477
+ flex: 1;
478
+ height: 28px;
479
+ background: var(--bg-input);
480
+ border-radius: 4px;
481
+ overflow: hidden;
482
+ position: relative;
483
+ }
484
+ .latency-bar-inner {
485
+ height: 100%;
486
+ border-radius: 4px;
487
+ transition: width 0.8s cubic-bezier(0.22, 1, 0.36, 1);
488
+ display: flex;
489
+ align-items: center;
490
+ padding: 0 10px;
491
+ font-family: var(--mono);
492
+ font-size: 12px;
493
+ color: #0a0a1a;
494
+ font-weight: 600;
495
+ white-space: nowrap;
496
+ }
497
+ .latency-bar-inner.running {
498
+ background: var(--border) !important;
499
+ width: 100% !important;
500
+ animation: pulse-bar 1.2s ease-in-out infinite;
501
+ }
502
+ @keyframes pulse-bar {
503
+ 0%, 100% { opacity: 0.3; }
504
+ 50% { opacity: 0.6; }
505
+ }
506
+ .latency-stats {
507
+ min-width: 90px;
508
+ font-family: var(--mono);
509
+ font-size: 12px;
510
+ color: var(--text-dim);
511
+ text-align: right;
512
+ }
513
+ .latency-badge {
514
+ font-size: 14px;
515
+ min-width: 20px;
516
+ text-align: center;
517
+ }
518
+
519
+ /* Ranking diff */
520
+ .rank-comparison { margin-top: 16px; }
521
+ .rank-row {
522
+ display: grid;
523
+ grid-template-columns: 30px 1fr 40px 1fr;
524
+ gap: 8px;
525
+ align-items: start;
526
+ margin-bottom: 8px;
527
+ padding: 8px;
528
+ background: var(--bg-input);
529
+ border-radius: var(--radius);
530
+ }
531
+ .rank-num {
532
+ font-size: 16px;
533
+ font-weight: 700;
534
+ color: var(--accent);
535
+ font-family: var(--mono);
536
+ text-align: center;
537
+ }
538
+ .rank-item {
539
+ font-size: 13px;
540
+ padding: 6px 10px;
541
+ border-radius: 4px;
542
+ border-left: 3px solid var(--border);
543
+ }
544
+ .rank-item .rank-score {
545
+ font-family: var(--mono);
546
+ font-size: 11px;
547
+ color: var(--text-dim);
548
+ margin-top: 2px;
549
+ }
550
+ .rank-match { border-left-color: var(--green); }
551
+ .rank-differ { border-left-color: var(--yellow); }
552
+ .rank-arrow { text-align: center; color: var(--text-muted); font-size: 18px; padding-top: 4px; }
553
+
554
+ /* Cost calculator */
555
+ .cost-slider-row {
556
+ display: flex;
557
+ align-items: center;
558
+ gap: 16px;
559
+ margin-bottom: 16px;
560
+ }
561
+ .cost-slider-label {
562
+ font-size: 13px;
563
+ color: var(--text-dim);
564
+ min-width: 130px;
565
+ }
566
+ .cost-slider {
567
+ flex: 1;
568
+ -webkit-appearance: none;
569
+ appearance: none;
570
+ height: 6px;
571
+ background: var(--bg-input);
572
+ border-radius: 3px;
573
+ outline: none;
574
+ }
575
+ .cost-slider::-webkit-slider-thumb {
576
+ -webkit-appearance: none;
577
+ width: 18px; height: 18px;
578
+ border-radius: 50%;
579
+ background: var(--accent);
580
+ cursor: pointer;
581
+ box-shadow: 0 0 8px var(--accent-glow);
582
+ }
583
+ .cost-slider-value {
584
+ font-family: var(--mono);
585
+ font-size: 14px;
586
+ color: var(--accent);
587
+ min-width: 80px;
588
+ text-align: right;
589
+ font-weight: 600;
590
+ }
591
+ .cost-table {
592
+ width: 100%;
593
+ border-collapse: collapse;
594
+ font-size: 13px;
595
+ margin-top: 12px;
596
+ }
597
+ .cost-table th {
598
+ text-align: left;
599
+ padding: 8px 12px;
600
+ color: var(--text-dim);
601
+ font-weight: 500;
602
+ border-bottom: 1px solid var(--border);
603
+ font-size: 12px;
604
+ text-transform: uppercase;
605
+ letter-spacing: 0.5px;
606
+ }
607
+ .cost-table td {
608
+ padding: 8px 12px;
609
+ border-bottom: 1px solid rgba(42, 53, 80, 0.3);
610
+ font-family: var(--mono);
611
+ }
612
+ .cost-table tr:hover { background: rgba(0, 212, 170, 0.03); }
613
+ .cost-highlight {
614
+ color: var(--accent);
615
+ font-weight: 600;
616
+ }
617
+ .cost-bar-cell { position: relative; }
618
+ .cost-bar {
619
+ position: absolute;
620
+ left: 0; top: 50%;
621
+ transform: translateY(-50%);
622
+ height: 20px;
623
+ background: var(--accent-glow);
624
+ border-radius: 3px;
625
+ transition: width 0.4s ease;
626
+ }
627
+
628
+ /* History chart */
629
+ .history-empty {
630
+ text-align: center;
631
+ padding: 40px;
632
+ color: var(--text-muted);
633
+ font-size: 14px;
634
+ }
635
+ .history-chart {
636
+ display: flex;
637
+ align-items: flex-end;
638
+ gap: 4px;
639
+ height: 120px;
640
+ padding: 8px 0;
641
+ border-bottom: 1px solid var(--border);
642
+ }
643
+ .history-bar-group {
644
+ flex: 1;
645
+ display: flex;
646
+ gap: 2px;
647
+ align-items: flex-end;
648
+ height: 100%;
649
+ min-width: 0;
650
+ }
651
+ .history-bar {
652
+ flex: 1;
653
+ border-radius: 3px 3px 0 0;
654
+ min-width: 4px;
655
+ transition: height 0.4s ease;
656
+ cursor: pointer;
657
+ position: relative;
658
+ }
659
+ .history-bar:hover { opacity: 0.8; }
660
+ .history-legend {
661
+ display: flex;
662
+ gap: 16px;
663
+ justify-content: center;
664
+ margin-top: 8px;
665
+ font-size: 12px;
666
+ color: var(--text-dim);
667
+ }
668
+ .history-legend-dot {
669
+ display: inline-block;
670
+ width: 10px; height: 10px;
671
+ border-radius: 2px;
672
+ margin-right: 4px;
673
+ vertical-align: middle;
674
+ }
675
+ .history-labels {
676
+ display: flex;
677
+ justify-content: space-between;
678
+ font-size: 10px;
679
+ color: var(--text-muted);
680
+ margin-top: 4px;
681
+ }
682
+
441
683
  @media (max-width: 768px) {
442
684
  .compare-grid, .search-results { grid-template-columns: 1fr; }
443
685
  .nav { padding: 0 12px; }
@@ -467,6 +709,7 @@ select:focus { outline: none; border-color: var(--accent); }
467
709
  <button class="tab-btn active" data-tab="embed">⚡ Embed</button>
468
710
  <button class="tab-btn" data-tab="compare">⚖️ Compare</button>
469
711
  <button class="tab-btn" data-tab="search">🔍 Search</button>
712
+ <button class="tab-btn" data-tab="benchmark">⏱ Benchmark</button>
470
713
  <button class="tab-btn" data-tab="explore">📚 Explore</button>
471
714
  </div>
472
715
 
@@ -614,6 +857,150 @@ Semantic search understands meaning beyond keyword matching</textarea>
614
857
  </div>
615
858
  </div>
616
859
 
860
+ <!-- ========== BENCHMARK TAB ========== -->
861
+ <div class="tab-panel" id="tab-benchmark">
862
+
863
+ <!-- Sub-panel switcher -->
864
+ <div class="bench-panels">
865
+ <button class="bench-panel-btn active" data-bench="latency">⚡ Latency</button>
866
+ <button class="bench-panel-btn" data-bench="ranking">🏆 Ranking</button>
867
+ <button class="bench-panel-btn" data-bench="cost">💰 Cost</button>
868
+ <button class="bench-panel-btn" data-bench="history">📊 History</button>
869
+ </div>
870
+
871
+ <!-- ── Latency Panel ── -->
872
+ <div class="bench-view active" id="bench-latency">
873
+ <div class="card">
874
+ <div class="card-title">Embedding Latency Benchmark</div>
875
+ <textarea id="benchLatencyInput" rows="3" placeholder="Enter text to benchmark (or leave for built-in sample)..."></textarea>
876
+ <div class="options-row">
877
+ <div class="option-group">
878
+ <span class="option-label">Models</span>
879
+ <div id="benchModelChecks" style="display:flex;gap:8px;flex-wrap:wrap;"></div>
880
+ </div>
881
+ <div class="option-group">
882
+ <span class="option-label">Rounds</span>
883
+ <select id="benchRounds">
884
+ <option value="1">1</option>
885
+ <option value="3" selected>3</option>
886
+ <option value="5">5</option>
887
+ <option value="10">10</option>
888
+ </select>
889
+ </div>
890
+ <button class="btn" id="benchLatencyBtn" onclick="doBenchLatency()">⚡ Run Benchmark</button>
891
+ </div>
892
+ </div>
893
+
894
+ <div class="error-msg" id="benchLatencyError"></div>
895
+
896
+ <div class="result-section" id="benchLatencyResult">
897
+ <div class="card">
898
+ <div class="card-title">Results</div>
899
+ <div id="benchLatencyStats"></div>
900
+ <div class="latency-chart" id="benchLatencyChart"></div>
901
+ </div>
902
+ </div>
903
+ </div>
904
+
905
+ <!-- ── Ranking Panel ── -->
906
+ <div class="bench-view" id="bench-ranking">
907
+ <div class="card">
908
+ <div class="card-title">Model Ranking Comparison</div>
909
+ <div style="margin-bottom:12px;">
910
+ <input type="text" id="benchRankQuery" placeholder="Search query..." value="How do I search for similar documents using embeddings?">
911
+ </div>
912
+ <textarea id="benchRankDocs" rows="6" placeholder="Documents (one per line)...">Vector search finds documents by computing similarity between embedding vectors in high-dimensional space.
913
+ MongoDB Atlas Vector Search lets you index and query vector embeddings alongside your operational data.
914
+ Traditional full-text search uses inverted indexes to match keyword terms in documents.
915
+ Cosine similarity measures the angle between two vectors, commonly used for semantic search.
916
+ Database sharding distributes data across multiple servers for horizontal scalability.
917
+ Embedding models convert text into dense numerical vectors that capture meaning.
918
+ Approximate nearest neighbor algorithms like HNSW enable fast similarity search at scale.
919
+ Reranking models rescore initial search results to improve relevance ordering.</textarea>
920
+ <div class="options-row">
921
+ <div class="option-group">
922
+ <span class="option-label">Model A</span>
923
+ <select id="benchRankModelA"></select>
924
+ </div>
925
+ <div class="option-group">
926
+ <span class="option-label">Model B</span>
927
+ <select id="benchRankModelB"></select>
928
+ </div>
929
+ <div class="option-group">
930
+ <span class="option-label">Mode</span>
931
+ <select id="benchRankMode">
932
+ <option value="embed">Embedding Similarity</option>
933
+ <option value="rerank">Reranking</option>
934
+ </select>
935
+ </div>
936
+ <div class="option-group">
937
+ <span class="option-label">Top K</span>
938
+ <select id="benchRankTopK">
939
+ <option value="3">3</option>
940
+ <option value="5" selected>5</option>
941
+ <option value="8">8</option>
942
+ </select>
943
+ </div>
944
+ <button class="btn" id="benchRankBtn" onclick="doBenchRanking()">🏆 Compare Rankings</button>
945
+ </div>
946
+ </div>
947
+
948
+ <div class="error-msg" id="benchRankError"></div>
949
+
950
+ <div class="result-section" id="benchRankResult">
951
+ <div class="card">
952
+ <div class="card-title">Ranking Comparison</div>
953
+ <div id="benchRankVerdict" style="margin-bottom:12px;font-size:14px;"></div>
954
+ <div class="rank-comparison" id="benchRankGrid"></div>
955
+ </div>
956
+ </div>
957
+ </div>
958
+
959
+ <!-- ── Cost Panel ── -->
960
+ <div class="bench-view" id="bench-cost">
961
+ <div class="card">
962
+ <div class="card-title">Cost Calculator</div>
963
+ <div class="cost-slider-row">
964
+ <span class="cost-slider-label">Tokens per query</span>
965
+ <input type="range" class="cost-slider" id="costTokens" min="50" max="5000" value="500" step="50">
966
+ <span class="cost-slider-value" id="costTokensValue">500</span>
967
+ </div>
968
+ <div class="cost-slider-row">
969
+ <span class="cost-slider-label">Queries per day</span>
970
+ <input type="range" class="cost-slider" id="costQueries" min="10" max="500000" value="1000" step="10">
971
+ <span class="cost-slider-value" id="costQueriesValue">1,000</span>
972
+ </div>
973
+ <table class="cost-table" id="costTable">
974
+ <thead>
975
+ <tr>
976
+ <th>Model</th>
977
+ <th>Type</th>
978
+ <th>$/1M tokens</th>
979
+ <th>Daily Cost</th>
980
+ <th>Monthly Cost</th>
981
+ <th style="width:30%">Relative</th>
982
+ </tr>
983
+ </thead>
984
+ <tbody id="costTableBody"></tbody>
985
+ </table>
986
+ </div>
987
+ </div>
988
+
989
+ <!-- ── History Panel ── -->
990
+ <div class="bench-view" id="bench-history">
991
+ <div class="card">
992
+ <div class="card-title">Benchmark History</div>
993
+ <div id="benchHistoryContent">
994
+ <div class="history-empty">No benchmarks recorded yet. Run a latency benchmark to start tracking.</div>
995
+ </div>
996
+ <div style="margin-top:12px;text-align:right;">
997
+ <button class="btn btn-secondary btn-small" onclick="clearHistory()">🗑 Clear History</button>
998
+ </div>
999
+ </div>
1000
+ </div>
1001
+
1002
+ </div>
1003
+
617
1004
  <!-- ========== EXPLORE TAB ========== -->
618
1005
  <div class="tab-panel" id="tab-explore">
619
1006
  <div class="explore-grid" id="exploreGrid"></div>
@@ -1103,6 +1490,454 @@ window.collapseTopic = function(btn) {
1103
1490
  btn.closest('.explore-card').classList.remove('expanded');
1104
1491
  };
1105
1492
 
1493
+ // ── Benchmark: Sub-panel switching ──
1494
+ document.querySelectorAll('.bench-panel-btn').forEach(btn => {
1495
+ btn.addEventListener('click', () => {
1496
+ document.querySelectorAll('.bench-panel-btn').forEach(b => b.classList.remove('active'));
1497
+ document.querySelectorAll('.bench-view').forEach(v => v.classList.remove('active'));
1498
+ btn.classList.add('active');
1499
+ document.getElementById('bench-' + btn.dataset.bench).classList.add('active');
1500
+ });
1501
+ });
1502
+
1503
+ // ── Benchmark: Model checkboxes ──
1504
+ function buildModelCheckboxes() {
1505
+ const container = document.getElementById('benchModelChecks');
1506
+ container.innerHTML = '';
1507
+ const defaults = ['voyage-4-large', 'voyage-4', 'voyage-4-lite'];
1508
+ embedModels.forEach(m => {
1509
+ const label = document.createElement('label');
1510
+ label.style.cssText = 'display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);';
1511
+ const cb = document.createElement('input');
1512
+ cb.type = 'checkbox';
1513
+ cb.value = m.name;
1514
+ cb.checked = defaults.includes(m.name);
1515
+ cb.style.accentColor = 'var(--accent)';
1516
+ label.appendChild(cb);
1517
+ label.appendChild(document.createTextNode(m.name));
1518
+ container.appendChild(label);
1519
+ });
1520
+ }
1521
+
1522
+ function populateBenchRankSelects() {
1523
+ const selA = document.getElementById('benchRankModelA');
1524
+ const selB = document.getElementById('benchRankModelB');
1525
+ [selA, selB].forEach(sel => {
1526
+ sel.innerHTML = '';
1527
+ [...embedModels, ...rerankModels].forEach(m => {
1528
+ const opt = document.createElement('option');
1529
+ opt.value = m.name;
1530
+ opt.textContent = m.name;
1531
+ sel.appendChild(opt);
1532
+ });
1533
+ });
1534
+ // Sensible defaults
1535
+ if (embedModels.length >= 2) {
1536
+ selA.value = embedModels[0].name;
1537
+ selB.value = embedModels[embedModels.length > 2 ? 2 : 1].name;
1538
+ }
1539
+ }
1540
+
1541
+ // ── Benchmark: Latency ──
1542
+ const BENCH_SAMPLE_TEXTS = [
1543
+ 'MongoDB Atlas provides a fully managed cloud database service with built-in vector search capabilities.',
1544
+ 'Kubernetes orchestrates containerized applications across clusters of machines for high availability.',
1545
+ 'Machine learning models transform raw data into embeddings that capture semantic meaning.',
1546
+ 'RESTful APIs use HTTP methods like GET, POST, PUT, and DELETE to manage resources.',
1547
+ 'Natural language processing enables computers to understand and generate human language.',
1548
+ ];
1549
+
1550
+ const MODEL_COLORS = [
1551
+ '#00d4aa', '#4ecdc4', '#45b7d1', '#96ceb4', '#ffd93d',
1552
+ '#ff6b6b', '#c792ea', '#f78c6c', '#82aaff', '#c3e88d',
1553
+ ];
1554
+
1555
+ window.doBenchLatency = async function() {
1556
+ hideError('benchLatencyError');
1557
+ const checks = document.querySelectorAll('#benchModelChecks input:checked');
1558
+ const models = Array.from(checks).map(c => c.value);
1559
+ if (models.length === 0) { showError('benchLatencyError', 'Select at least one model'); return; }
1560
+
1561
+ const rounds = parseInt(document.getElementById('benchRounds').value, 10);
1562
+ const customText = document.getElementById('benchLatencyInput').value.trim();
1563
+ const texts = customText ? [customText] : BENCH_SAMPLE_TEXTS;
1564
+
1565
+ setLoading('benchLatencyBtn', true);
1566
+ document.getElementById('benchLatencyResult').classList.add('visible');
1567
+
1568
+ const chart = document.getElementById('benchLatencyChart');
1569
+ const statsEl = document.getElementById('benchLatencyStats');
1570
+
1571
+ // Build placeholder bars
1572
+ chart.innerHTML = '';
1573
+ models.forEach((model, i) => {
1574
+ const row = document.createElement('div');
1575
+ row.className = 'latency-row';
1576
+ row.innerHTML = `
1577
+ <span class="latency-model">${model}</span>
1578
+ <div class="latency-bar-outer">
1579
+ <div class="latency-bar-inner running" id="bench-bar-${i}" style="background:${MODEL_COLORS[i % MODEL_COLORS.length]}"></div>
1580
+ </div>
1581
+ <span class="latency-stats" id="bench-stats-${i}">Running...</span>
1582
+ <span class="latency-badge" id="bench-badge-${i}"></span>
1583
+ `;
1584
+ chart.appendChild(row);
1585
+ });
1586
+ statsEl.innerHTML = `<span class="stat"><span class="stat-label">Texts</span><span class="stat-value">${texts.length}</span></span>
1587
+ <span class="stat"><span class="stat-label">Rounds</span><span class="stat-value">${rounds}</span></span>`;
1588
+
1589
+ const results = [];
1590
+
1591
+ for (let mi = 0; mi < models.length; mi++) {
1592
+ const model = models[mi];
1593
+ const latencies = [];
1594
+ let tokens = 0, dims = 0;
1595
+
1596
+ for (let r = 0; r < rounds; r++) {
1597
+ try {
1598
+ const data = await apiPost('/api/benchmark/embed', { texts, model, inputType: 'document' });
1599
+ latencies.push(data.elapsed);
1600
+ tokens = data.tokens;
1601
+ dims = data.dimensions;
1602
+ } catch (err) {
1603
+ document.getElementById(`bench-stats-${mi}`).textContent = 'Error';
1604
+ document.getElementById(`bench-bar-${mi}`).classList.remove('running');
1605
+ document.getElementById(`bench-bar-${mi}`).style.width = '0%';
1606
+ break;
1607
+ }
1608
+ }
1609
+
1610
+ if (latencies.length > 0) {
1611
+ const avg = latencies.reduce((a, b) => a + b, 0) / latencies.length;
1612
+ const sorted = [...latencies].sort((a, b) => a - b);
1613
+ const p50 = sorted[Math.floor(sorted.length / 2)];
1614
+ results.push({ model, avg, p50, min: sorted[0], max: sorted[sorted.length - 1], tokens, dims, idx: mi });
1615
+ }
1616
+ }
1617
+
1618
+ // Animate bars to final widths
1619
+ if (results.length > 0) {
1620
+ const maxAvg = Math.max(...results.map(r => r.avg));
1621
+ const minAvg = Math.min(...results.map(r => r.avg));
1622
+
1623
+ results.forEach(r => {
1624
+ const bar = document.getElementById(`bench-bar-${r.idx}`);
1625
+ const stats = document.getElementById(`bench-stats-${r.idx}`);
1626
+ const badge = document.getElementById(`bench-badge-${r.idx}`);
1627
+ bar.classList.remove('running');
1628
+ const pct = Math.max(15, (r.avg / maxAvg) * 100);
1629
+ bar.style.width = pct + '%';
1630
+ bar.textContent = r.avg.toFixed(0) + 'ms';
1631
+ stats.textContent = `p50: ${r.p50.toFixed(0)}ms`;
1632
+ stats.title = `min: ${r.min.toFixed(0)}ms, max: ${r.max.toFixed(0)}ms`;
1633
+ if (r.avg === minAvg) badge.textContent = '⚡';
1634
+ });
1635
+
1636
+ // Update stats
1637
+ const fastest = results.reduce((a, b) => a.avg < b.avg ? a : b);
1638
+ const price = getModelPrice(fastest.model);
1639
+ statsEl.innerHTML += `
1640
+ <span class="stat"><span class="stat-label">Fastest</span><span class="stat-value">${fastest.model}</span></span>
1641
+ <span class="stat"><span class="stat-label">Avg</span><span class="stat-value">${fastest.avg.toFixed(0)}ms</span></span>
1642
+ <span class="stat"><span class="stat-label">Dims</span><span class="stat-value">${fastest.dims}</span></span>
1643
+ ${price ? `<span class="stat"><span class="stat-label">Price</span><span class="stat-value">${price}</span></span>` : ''}
1644
+ `;
1645
+
1646
+ // Save to history
1647
+ saveBenchHistory(results, texts.length, rounds);
1648
+ renderHistory();
1649
+ }
1650
+
1651
+ setLoading('benchLatencyBtn', false);
1652
+ };
1653
+
1654
+ function getModelPrice(name) {
1655
+ const m = allModels.find(m => m.name === name);
1656
+ return m ? m.price : null;
1657
+ }
1658
+
1659
+ function getModelPriceNum(name) {
1660
+ const m = allModels.find(m => m.name === name);
1661
+ if (!m) return null;
1662
+ const match = m.price.match(/\$([0-9.]+)\/1M/);
1663
+ return match ? parseFloat(match[1]) : null;
1664
+ }
1665
+
1666
+ // ── Benchmark: Ranking Comparison ──
1667
+ window.doBenchRanking = async function() {
1668
+ hideError('benchRankError');
1669
+ const query = document.getElementById('benchRankQuery').value.trim();
1670
+ const docsText = document.getElementById('benchRankDocs').value.trim();
1671
+ if (!query || !docsText) { showError('benchRankError', 'Enter a query and documents'); return; }
1672
+
1673
+ const documents = docsText.split('\n').map(d => d.trim()).filter(Boolean);
1674
+ if (documents.length < 2) { showError('benchRankError', 'Enter at least 2 documents'); return; }
1675
+
1676
+ const modelA = document.getElementById('benchRankModelA').value;
1677
+ const modelB = document.getElementById('benchRankModelB').value;
1678
+ const mode = document.getElementById('benchRankMode').value;
1679
+ const topK = parseInt(document.getElementById('benchRankTopK').value, 10);
1680
+
1681
+ setLoading('benchRankBtn', true);
1682
+
1683
+ try {
1684
+ let rankedA, rankedB;
1685
+
1686
+ if (mode === 'embed') {
1687
+ // Embedding similarity mode
1688
+ const [dataA, dataB] = await Promise.all([
1689
+ apiPost('/api/benchmark/embed', { texts: [query, ...documents], model: modelA, inputType: 'document' }),
1690
+ apiPost('/api/benchmark/embed', { texts: [query, ...documents], model: modelB, inputType: 'document' }),
1691
+ ]);
1692
+
1693
+ rankedA = rankBySimilarity(dataA.embeddings, documents, topK);
1694
+ rankedB = rankBySimilarity(dataB.embeddings, documents, topK);
1695
+ } else {
1696
+ // Rerank mode
1697
+ const [dataA, dataB] = await Promise.all([
1698
+ apiPost('/api/benchmark/rerank', { query, documents, model: modelA, topK }),
1699
+ apiPost('/api/benchmark/rerank', { query, documents, model: modelB, topK }),
1700
+ ]);
1701
+
1702
+ rankedA = dataA.results.slice(0, topK).map(r => ({
1703
+ index: r.index,
1704
+ text: documents[r.index],
1705
+ score: r.relevance_score,
1706
+ }));
1707
+ rankedB = dataB.results.slice(0, topK).map(r => ({
1708
+ index: r.index,
1709
+ text: documents[r.index],
1710
+ score: r.relevance_score,
1711
+ }));
1712
+ }
1713
+
1714
+ // Render comparison
1715
+ renderRankComparison(modelA, modelB, rankedA, rankedB, topK);
1716
+ document.getElementById('benchRankResult').classList.add('visible');
1717
+ } catch (err) {
1718
+ showError('benchRankError', err.message);
1719
+ } finally {
1720
+ setLoading('benchRankBtn', false);
1721
+ }
1722
+ };
1723
+
1724
+ function rankBySimilarity(embeddings, documents, topK) {
1725
+ const queryVec = embeddings[0];
1726
+ const docVecs = embeddings.slice(1);
1727
+ const scores = docVecs.map((dv, i) => ({
1728
+ index: i,
1729
+ text: documents[i],
1730
+ score: cosineSim(queryVec, dv),
1731
+ }));
1732
+ scores.sort((a, b) => b.score - a.score);
1733
+ return scores.slice(0, topK);
1734
+ }
1735
+
1736
+ function renderRankComparison(modelA, modelB, rankedA, rankedB, topK) {
1737
+ const grid = document.getElementById('benchRankGrid');
1738
+ const verdict = document.getElementById('benchRankVerdict');
1739
+ grid.innerHTML = '';
1740
+
1741
+ // Header
1742
+ const header = document.createElement('div');
1743
+ header.className = 'rank-row';
1744
+ header.style.background = 'none';
1745
+ header.style.fontWeight = '600';
1746
+ header.style.fontSize = '13px';
1747
+ header.style.color = 'var(--accent)';
1748
+ header.innerHTML = `<div></div><div>${modelA}</div><div></div><div>${modelB}</div>`;
1749
+ grid.appendChild(header);
1750
+
1751
+ const orderA = rankedA.map(r => r.index);
1752
+ const orderB = rankedB.map(r => r.index);
1753
+ let matches = 0;
1754
+
1755
+ const k = Math.min(topK, rankedA.length, rankedB.length);
1756
+ for (let i = 0; i < k; i++) {
1757
+ const a = rankedA[i];
1758
+ const b = rankedB[i];
1759
+ const same = a.index === b.index;
1760
+ if (same) matches++;
1761
+
1762
+ const truncA = a.text.length > 60 ? a.text.slice(0, 57) + '...' : a.text;
1763
+ const truncB = b.text.length > 60 ? b.text.slice(0, 57) + '...' : b.text;
1764
+
1765
+ const row = document.createElement('div');
1766
+ row.className = 'rank-row';
1767
+ row.innerHTML = `
1768
+ <div class="rank-num">${i + 1}</div>
1769
+ <div class="rank-item ${same ? 'rank-match' : 'rank-differ'}">
1770
+ <div title="${a.text.replace(/"/g, '&quot;')}">${truncA}</div>
1771
+ <div class="rank-score">${a.score.toFixed(4)} [doc ${a.index}]</div>
1772
+ </div>
1773
+ <div class="rank-arrow">${same ? '=' : '≠'}</div>
1774
+ <div class="rank-item ${same ? 'rank-match' : 'rank-differ'}">
1775
+ <div title="${b.text.replace(/"/g, '&quot;')}">${truncB}</div>
1776
+ <div class="rank-score">${b.score.toFixed(4)} [doc ${b.index}]</div>
1777
+ </div>
1778
+ `;
1779
+ grid.appendChild(row);
1780
+ }
1781
+
1782
+ // Compute overlap
1783
+ const setA = new Set(orderA.slice(0, k));
1784
+ const setB = new Set(orderB.slice(0, k));
1785
+ const overlap = [...setA].filter(x => setB.has(x)).length;
1786
+ const overlapPct = ((overlap / k) * 100).toFixed(0);
1787
+ const agreePct = ((matches / k) * 100).toFixed(0);
1788
+
1789
+ if (matches === k) {
1790
+ verdict.innerHTML = `<span style="color:var(--green)">✓ Models agree on all ${k} positions — the cheaper model is likely sufficient.</span>`;
1791
+ } else if (overlap === k) {
1792
+ verdict.innerHTML = `<span style="color:var(--yellow)">⚠ Same ${k} documents in top-${k}, but in different order (${agreePct}% exact match).</span>`;
1793
+ } else {
1794
+ verdict.innerHTML = `<span style="color:var(--warning)">⚠ ${overlapPct}% overlap in top-${k} results — models see different relevance signals.</span>`;
1795
+ }
1796
+ }
1797
+
1798
+ // ── Benchmark: Cost Calculator ──
1799
+ function initCostCalculator() {
1800
+ const tokSlider = document.getElementById('costTokens');
1801
+ const qSlider = document.getElementById('costQueries');
1802
+ const tokValue = document.getElementById('costTokensValue');
1803
+ const qValue = document.getElementById('costQueriesValue');
1804
+
1805
+ function updateCost() {
1806
+ const tokens = parseInt(tokSlider.value, 10);
1807
+ const queries = parseInt(qSlider.value, 10);
1808
+ tokValue.textContent = tokens.toLocaleString();
1809
+ qValue.textContent = queries.toLocaleString();
1810
+ renderCostTable(tokens, queries);
1811
+ }
1812
+
1813
+ tokSlider.addEventListener('input', updateCost);
1814
+ qSlider.addEventListener('input', updateCost);
1815
+
1816
+ // Initialize
1817
+ updateCost();
1818
+ }
1819
+
1820
+ function renderCostTable(tokensPerQuery, queriesPerDay) {
1821
+ const tbody = document.getElementById('costTableBody');
1822
+ tbody.innerHTML = '';
1823
+
1824
+ const models = allModels.filter(m => !m.legacy);
1825
+ const rows = [];
1826
+
1827
+ models.forEach(m => {
1828
+ const match = m.price.match(/\$([0-9.]+)\/1M/);
1829
+ if (!match) return;
1830
+ const pricePerM = parseFloat(match[1]);
1831
+ const dailyTokens = tokensPerQuery * queriesPerDay;
1832
+ const dailyCost = (dailyTokens / 1_000_000) * pricePerM;
1833
+ const monthlyCost = dailyCost * 30;
1834
+ rows.push({ name: m.name, type: m.type === 'embedding' ? 'embed' : 'rerank', pricePerM, dailyCost, monthlyCost });
1835
+ });
1836
+
1837
+ rows.sort((a, b) => a.monthlyCost - b.monthlyCost);
1838
+ const maxMonthly = Math.max(...rows.map(r => r.monthlyCost), 0.01);
1839
+
1840
+ rows.forEach(r => {
1841
+ const tr = document.createElement('tr');
1842
+ const barPct = Math.max(2, (r.monthlyCost / maxMonthly) * 100);
1843
+ const monthlyStr = r.monthlyCost < 0.01 ? '<$0.01' : '$' + (r.monthlyCost < 1 ? r.monthlyCost.toFixed(2) : r.monthlyCost < 100 ? r.monthlyCost.toFixed(1) : r.monthlyCost.toFixed(0));
1844
+ const dailyStr = r.dailyCost < 0.01 ? '<$0.01' : '$' + r.dailyCost.toFixed(2);
1845
+
1846
+ tr.innerHTML = `
1847
+ <td style="color:var(--text)">${r.name}</td>
1848
+ <td style="color:var(--text-dim)">${r.type}</td>
1849
+ <td>$${r.pricePerM.toFixed(2)}</td>
1850
+ <td>${dailyStr}</td>
1851
+ <td class="cost-highlight">${monthlyStr}</td>
1852
+ <td class="cost-bar-cell" style="position:relative;padding-left:8px;">
1853
+ <div class="cost-bar" style="width:${barPct}%;"></div>
1854
+ <span style="position:relative;z-index:1;font-size:12px;color:var(--text-dim);">${monthlyStr}</span>
1855
+ </td>
1856
+ `;
1857
+ tbody.appendChild(tr);
1858
+ });
1859
+ }
1860
+
1861
+ // ── Benchmark: History ──
1862
+ const HISTORY_KEY = 'vai-bench-history';
1863
+
1864
+ function saveBenchHistory(results, textCount, rounds) {
1865
+ const history = JSON.parse(localStorage.getItem(HISTORY_KEY) || '[]');
1866
+ history.push({
1867
+ timestamp: Date.now(),
1868
+ textCount,
1869
+ rounds,
1870
+ results: results.map(r => ({ model: r.model, avg: r.avg, p50: r.p50, dims: r.dims })),
1871
+ });
1872
+ // Keep last 20
1873
+ if (history.length > 20) history.splice(0, history.length - 20);
1874
+ localStorage.setItem(HISTORY_KEY, JSON.stringify(history));
1875
+ }
1876
+
1877
+ function renderHistory() {
1878
+ const container = document.getElementById('benchHistoryContent');
1879
+ const history = JSON.parse(localStorage.getItem(HISTORY_KEY) || '[]');
1880
+
1881
+ if (history.length === 0) {
1882
+ container.innerHTML = '<div class="history-empty">No benchmarks recorded yet. Run a latency benchmark to start tracking.</div>';
1883
+ return;
1884
+ }
1885
+
1886
+ // Collect all models that appear
1887
+ const modelSet = new Set();
1888
+ history.forEach(h => h.results.forEach(r => modelSet.add(r.model)));
1889
+ const models = [...modelSet];
1890
+ const colorMap = {};
1891
+ models.forEach((m, i) => { colorMap[m] = MODEL_COLORS[i % MODEL_COLORS.length]; });
1892
+
1893
+ // Find global max for scale
1894
+ const maxAvg = Math.max(...history.flatMap(h => h.results.map(r => r.avg)));
1895
+
1896
+ let html = '<div class="history-chart">';
1897
+ history.forEach((h, hi) => {
1898
+ html += '<div class="history-bar-group" title="' + new Date(h.timestamp).toLocaleString() + '">';
1899
+ models.forEach(m => {
1900
+ const r = h.results.find(r => r.model === m);
1901
+ const height = r ? Math.max(4, (r.avg / maxAvg) * 100) : 0;
1902
+ const label = r ? `${m}: ${r.avg.toFixed(0)}ms` : '';
1903
+ html += `<div class="history-bar" style="height:${height}%;background:${colorMap[m]};" title="${label}"></div>`;
1904
+ });
1905
+ html += '</div>';
1906
+ });
1907
+ html += '</div>';
1908
+
1909
+ // Legend
1910
+ html += '<div class="history-legend">';
1911
+ models.forEach(m => {
1912
+ html += `<span><span class="history-legend-dot" style="background:${colorMap[m]}"></span>${m}</span>`;
1913
+ });
1914
+ html += '</div>';
1915
+
1916
+ // Time labels
1917
+ if (history.length > 1) {
1918
+ const first = new Date(history[0].timestamp);
1919
+ const last = new Date(history[history.length - 1].timestamp);
1920
+ html += `<div class="history-labels"><span>${first.toLocaleDateString()} ${first.toLocaleTimeString([], {hour:'2-digit',minute:'2-digit'})}</span><span>${last.toLocaleDateString()} ${last.toLocaleTimeString([], {hour:'2-digit',minute:'2-digit'})}</span></div>`;
1921
+ }
1922
+
1923
+ container.innerHTML = html;
1924
+ }
1925
+
1926
+ window.clearHistory = function() {
1927
+ localStorage.removeItem(HISTORY_KEY);
1928
+ renderHistory();
1929
+ };
1930
+
1931
+ // ── Patch init to include benchmark setup ──
1932
+ const _origInit = init;
1933
+ init = async function() {
1934
+ await _origInit();
1935
+ buildModelCheckboxes();
1936
+ populateBenchRankSelects();
1937
+ initCostCalculator();
1938
+ renderHistory();
1939
+ };
1940
+
1106
1941
  // ── Start ──
1107
1942
  init();
1108
1943
  })();