voyageai-cli 1.8.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ on:
5
5
  branches: [main]
6
6
  pull_request:
7
7
  branches: [main]
8
+ workflow_dispatch:
8
9
 
9
10
  jobs:
10
11
  test:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voyageai-cli",
3
- "version": "1.8.0",
3
+ "version": "1.10.0",
4
4
  "description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
5
5
  "bin": {
6
6
  "vai": "./src/cli.js"
@@ -84,12 +84,34 @@ function createPlaygroundServer() {
84
84
 
85
85
  // API: Models
86
86
  if (req.method === 'GET' && req.url === '/api/models') {
87
- const models = MODEL_CATALOG.filter(m => !m.legacy);
87
+ const models = MODEL_CATALOG.filter(m => !m.legacy && !m.local);
88
88
  res.writeHead(200, { 'Content-Type': 'application/json' });
89
89
  res.end(JSON.stringify({ models }));
90
90
  return;
91
91
  }
92
92
 
93
+ // API: Concepts (from vai explain)
94
+ if (req.method === 'GET' && req.url === '/api/concepts') {
95
+ const { concepts } = require('../lib/explanations');
96
+ // Strip picocolors ANSI from content for web display
97
+ // eslint-disable-next-line no-control-regex
98
+ const ANSI_RE = /\x1b\[[0-9;]*m/g;
99
+ const stripped = {};
100
+ for (const [key, concept] of Object.entries(concepts)) {
101
+ stripped[key] = {
102
+ title: concept.title,
103
+ summary: concept.summary,
104
+ content: (typeof concept.content === 'string' ? concept.content : concept.content).replace(ANSI_RE, ''),
105
+ links: concept.links || [],
106
+ tryIt: concept.tryIt || [],
107
+ keyPoints: concept.keyPoints || [],
108
+ };
109
+ }
110
+ res.writeHead(200, { 'Content-Type': 'application/json' });
111
+ res.end(JSON.stringify({ concepts: stripped }));
112
+ return;
113
+ }
114
+
93
115
  // API: Config
94
116
  if (req.method === 'GET' && req.url === '/api/config') {
95
117
  const key = process.env.VOYAGE_API_KEY || getConfigValue('apiKey');
@@ -152,6 +174,54 @@ function createPlaygroundServer() {
152
174
  return;
153
175
  }
154
176
 
177
+ // API: Benchmark (single model, single round — UI calls this per model)
178
+ if (req.url === '/api/benchmark/embed') {
179
+ const { texts, model, inputType, dimensions } = parsed;
180
+ if (!texts || !Array.isArray(texts) || texts.length === 0) {
181
+ res.writeHead(400, { 'Content-Type': 'application/json' });
182
+ res.end(JSON.stringify({ error: 'texts must be a non-empty array' }));
183
+ return;
184
+ }
185
+ const opts = { model: model || undefined };
186
+ if (inputType) opts.inputType = inputType;
187
+ if (dimensions) opts.dimensions = dimensions;
188
+ const start = performance.now();
189
+ const result = await generateEmbeddings(texts, opts);
190
+ const elapsed = performance.now() - start;
191
+ res.writeHead(200, { 'Content-Type': 'application/json' });
192
+ res.end(JSON.stringify({
193
+ model: result.model,
194
+ elapsed,
195
+ tokens: result.usage?.total_tokens || 0,
196
+ dimensions: result.data?.[0]?.embedding?.length || 0,
197
+ embeddings: result.data?.map(d => d.embedding),
198
+ }));
199
+ return;
200
+ }
201
+
202
+ if (req.url === '/api/benchmark/rerank') {
203
+ const { query, documents, model, topK } = parsed;
204
+ if (!query || !documents || !Array.isArray(documents)) {
205
+ res.writeHead(400, { 'Content-Type': 'application/json' });
206
+ res.end(JSON.stringify({ error: 'query and documents are required' }));
207
+ return;
208
+ }
209
+ const { apiRequest } = require('../lib/api');
210
+ const body = { query, documents, model: model || 'rerank-2.5' };
211
+ if (topK) body.top_k = topK;
212
+ const start = performance.now();
213
+ const result = await apiRequest('/rerank', body);
214
+ const elapsed = performance.now() - start;
215
+ res.writeHead(200, { 'Content-Type': 'application/json' });
216
+ res.end(JSON.stringify({
217
+ model: result.model,
218
+ elapsed,
219
+ tokens: result.usage?.total_tokens || 0,
220
+ results: result.data || [],
221
+ }));
222
+ return;
223
+ }
224
+
155
225
  // API: Similarity
156
226
  if (req.url === '/api/similarity') {
157
227
  const { texts, model } = parsed;
@@ -34,7 +34,7 @@ const MODEL_CATALOG = [
34
34
  { name: 'voyage-multimodal-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video', shortFor: 'Multimodal' },
35
35
  { name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Best quality reranking', shortFor: 'Best reranker' },
36
36
  { name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Fast reranking', shortFor: 'Fast reranker' },
37
- { name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight', bestFor: 'Open-weight / edge', shortFor: 'Open / edge' },
37
+ { name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight', bestFor: 'Open-weight / edge', shortFor: 'Open / edge', local: true },
38
38
  // Legacy models
39
39
  { name: 'voyage-3-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Previous gen quality', shortFor: 'Previous gen quality', legacy: true },
40
40
  { name: 'voyage-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', bestFor: 'Previous gen balanced', shortFor: 'Previous gen balanced', legacy: true },
@@ -438,6 +438,248 @@ select:focus { outline: none; border-color: var(--accent); }
438
438
  }
439
439
  .explore-card.expanded .explore-card-actions { display: flex; }
440
440
 
441
+ /* Benchmark tab */
442
+ .bench-panels { display: flex; gap: 8px; margin-bottom: 16px; }
443
+ .bench-panel-btn {
444
+ background: var(--bg-input);
445
+ border: 1px solid var(--border);
446
+ color: var(--text-dim);
447
+ padding: 8px 18px;
448
+ border-radius: var(--radius);
449
+ font-size: 13px;
450
+ font-family: var(--font);
451
+ cursor: pointer;
452
+ transition: all 0.2s;
453
+ }
454
+ .bench-panel-btn:hover { color: var(--text); border-color: var(--text-dim); }
455
+ .bench-panel-btn.active { color: var(--accent); border-color: var(--accent); background: var(--accent-glow); }
456
+
457
+ .bench-view { display: none; }
458
+ .bench-view.active { display: block; }
459
+
460
+ .latency-chart { margin-top: 16px; }
461
+ .latency-row {
462
+ display: flex;
463
+ align-items: center;
464
+ gap: 12px;
465
+ margin-bottom: 12px;
466
+ }
467
+ .latency-model {
468
+ font-family: var(--mono);
469
+ font-size: 13px;
470
+ min-width: 170px;
471
+ color: var(--text);
472
+ white-space: nowrap;
473
+ overflow: hidden;
474
+ text-overflow: ellipsis;
475
+ }
476
+ .latency-bar-outer {
477
+ flex: 1;
478
+ height: 28px;
479
+ background: var(--bg-input);
480
+ border-radius: 4px;
481
+ overflow: hidden;
482
+ position: relative;
483
+ }
484
+ .latency-bar-inner {
485
+ height: 100%;
486
+ border-radius: 4px;
487
+ transition: width 0.8s cubic-bezier(0.22, 1, 0.36, 1);
488
+ display: flex;
489
+ align-items: center;
490
+ padding: 0 10px;
491
+ font-family: var(--mono);
492
+ font-size: 12px;
493
+ color: #0a0a1a;
494
+ font-weight: 600;
495
+ white-space: nowrap;
496
+ }
497
+ .latency-bar-inner.running {
498
+ background: var(--border) !important;
499
+ width: 100% !important;
500
+ animation: pulse-bar 1.2s ease-in-out infinite;
501
+ }
502
+ @keyframes pulse-bar {
503
+ 0%, 100% { opacity: 0.3; }
504
+ 50% { opacity: 0.6; }
505
+ }
506
+ .latency-stats {
507
+ min-width: 90px;
508
+ font-family: var(--mono);
509
+ font-size: 12px;
510
+ color: var(--text-dim);
511
+ text-align: right;
512
+ }
513
+ .latency-badge {
514
+ font-size: 14px;
515
+ min-width: 20px;
516
+ text-align: center;
517
+ }
518
+
519
+ /* Ranking diff */
520
+ .rank-comparison { margin-top: 16px; }
521
+ .rank-row {
522
+ display: grid;
523
+ grid-template-columns: 30px 1fr 40px 1fr;
524
+ gap: 8px;
525
+ align-items: start;
526
+ margin-bottom: 8px;
527
+ padding: 8px;
528
+ background: var(--bg-input);
529
+ border-radius: var(--radius);
530
+ }
531
+ .rank-num {
532
+ font-size: 16px;
533
+ font-weight: 700;
534
+ color: var(--accent);
535
+ font-family: var(--mono);
536
+ text-align: center;
537
+ }
538
+ .rank-item {
539
+ font-size: 13px;
540
+ padding: 6px 10px;
541
+ border-radius: 4px;
542
+ border-left: 3px solid var(--border);
543
+ }
544
+ .rank-item .rank-score {
545
+ font-family: var(--mono);
546
+ font-size: 11px;
547
+ color: var(--text-dim);
548
+ margin-top: 2px;
549
+ }
550
+ .rank-match { border-left-color: var(--green); }
551
+ .rank-differ { border-left-color: var(--yellow); }
552
+ .rank-arrow { text-align: center; color: var(--text-muted); font-size: 18px; padding-top: 4px; }
553
+
554
+ /* Cost calculator */
555
+ .cost-slider-row {
556
+ display: flex;
557
+ align-items: center;
558
+ gap: 16px;
559
+ margin-bottom: 16px;
560
+ }
561
+ .cost-slider-label {
562
+ font-size: 13px;
563
+ color: var(--text-dim);
564
+ min-width: 130px;
565
+ }
566
+ .cost-slider {
567
+ flex: 1;
568
+ -webkit-appearance: none;
569
+ appearance: none;
570
+ height: 6px;
571
+ background: var(--bg-input);
572
+ border-radius: 3px;
573
+ outline: none;
574
+ }
575
+ .cost-slider::-webkit-slider-thumb {
576
+ -webkit-appearance: none;
577
+ width: 18px; height: 18px;
578
+ border-radius: 50%;
579
+ background: var(--accent);
580
+ cursor: pointer;
581
+ box-shadow: 0 0 8px var(--accent-glow);
582
+ }
583
+ .cost-slider-value {
584
+ font-family: var(--mono);
585
+ font-size: 14px;
586
+ color: var(--accent);
587
+ min-width: 80px;
588
+ text-align: right;
589
+ font-weight: 600;
590
+ }
591
+ .cost-table {
592
+ width: 100%;
593
+ border-collapse: collapse;
594
+ font-size: 13px;
595
+ margin-top: 12px;
596
+ }
597
+ .cost-table th {
598
+ text-align: left;
599
+ padding: 8px 12px;
600
+ color: var(--text-dim);
601
+ font-weight: 500;
602
+ border-bottom: 1px solid var(--border);
603
+ font-size: 12px;
604
+ text-transform: uppercase;
605
+ letter-spacing: 0.5px;
606
+ }
607
+ .cost-table td {
608
+ padding: 8px 12px;
609
+ border-bottom: 1px solid rgba(42, 53, 80, 0.3);
610
+ font-family: var(--mono);
611
+ }
612
+ .cost-table tr:hover { background: rgba(0, 212, 170, 0.03); }
613
+ .cost-highlight {
614
+ color: var(--accent);
615
+ font-weight: 600;
616
+ }
617
+ .cost-bar-cell { position: relative; }
618
+ .cost-bar {
619
+ position: absolute;
620
+ left: 0; top: 50%;
621
+ transform: translateY(-50%);
622
+ height: 20px;
623
+ background: var(--accent-glow);
624
+ border-radius: 3px;
625
+ transition: width 0.4s ease;
626
+ }
627
+
628
+ /* History chart */
629
+ .history-empty {
630
+ text-align: center;
631
+ padding: 40px;
632
+ color: var(--text-muted);
633
+ font-size: 14px;
634
+ }
635
+ .history-chart {
636
+ display: flex;
637
+ align-items: flex-end;
638
+ gap: 4px;
639
+ height: 120px;
640
+ padding: 8px 0;
641
+ border-bottom: 1px solid var(--border);
642
+ }
643
+ .history-bar-group {
644
+ flex: 1;
645
+ display: flex;
646
+ gap: 2px;
647
+ align-items: flex-end;
648
+ height: 100%;
649
+ min-width: 0;
650
+ }
651
+ .history-bar {
652
+ flex: 1;
653
+ border-radius: 3px 3px 0 0;
654
+ min-width: 4px;
655
+ transition: height 0.4s ease;
656
+ cursor: pointer;
657
+ position: relative;
658
+ }
659
+ .history-bar:hover { opacity: 0.8; }
660
+ .history-legend {
661
+ display: flex;
662
+ gap: 16px;
663
+ justify-content: center;
664
+ margin-top: 8px;
665
+ font-size: 12px;
666
+ color: var(--text-dim);
667
+ }
668
+ .history-legend-dot {
669
+ display: inline-block;
670
+ width: 10px; height: 10px;
671
+ border-radius: 2px;
672
+ margin-right: 4px;
673
+ vertical-align: middle;
674
+ }
675
+ .history-labels {
676
+ display: flex;
677
+ justify-content: space-between;
678
+ font-size: 10px;
679
+ color: var(--text-muted);
680
+ margin-top: 4px;
681
+ }
682
+
441
683
  @media (max-width: 768px) {
442
684
  .compare-grid, .search-results { grid-template-columns: 1fr; }
443
685
  .nav { padding: 0 12px; }
@@ -467,6 +709,7 @@ select:focus { outline: none; border-color: var(--accent); }
467
709
  <button class="tab-btn active" data-tab="embed">⚡ Embed</button>
468
710
  <button class="tab-btn" data-tab="compare">⚖️ Compare</button>
469
711
  <button class="tab-btn" data-tab="search">🔍 Search</button>
712
+ <button class="tab-btn" data-tab="benchmark">⏱ Benchmark</button>
470
713
  <button class="tab-btn" data-tab="explore">📚 Explore</button>
471
714
  </div>
472
715
 
@@ -614,8 +857,155 @@ Semantic search understands meaning beyond keyword matching</textarea>
614
857
  </div>
615
858
  </div>
616
859
 
860
+ <!-- ========== BENCHMARK TAB ========== -->
861
+ <div class="tab-panel" id="tab-benchmark">
862
+
863
+ <!-- Sub-panel switcher -->
864
+ <div class="bench-panels">
865
+ <button class="bench-panel-btn active" data-bench="latency">⚡ Latency</button>
866
+ <button class="bench-panel-btn" data-bench="ranking">🏆 Ranking</button>
867
+ <button class="bench-panel-btn" data-bench="cost">💰 Cost</button>
868
+ <button class="bench-panel-btn" data-bench="history">📊 History</button>
869
+ </div>
870
+
871
+ <!-- ── Latency Panel ── -->
872
+ <div class="bench-view active" id="bench-latency">
873
+ <div class="card">
874
+ <div class="card-title">Embedding Latency Benchmark</div>
875
+ <textarea id="benchLatencyInput" rows="3" placeholder="Enter text to benchmark (or leave for built-in sample)..."></textarea>
876
+ <div class="options-row">
877
+ <div class="option-group">
878
+ <span class="option-label">Models</span>
879
+ <div id="benchModelChecks" style="display:flex;gap:8px;flex-wrap:wrap;"></div>
880
+ </div>
881
+ <div class="option-group">
882
+ <span class="option-label">Rounds</span>
883
+ <select id="benchRounds">
884
+ <option value="1">1</option>
885
+ <option value="3" selected>3</option>
886
+ <option value="5">5</option>
887
+ <option value="10">10</option>
888
+ </select>
889
+ </div>
890
+ <button class="btn" id="benchLatencyBtn" onclick="doBenchLatency()">⚡ Run Benchmark</button>
891
+ </div>
892
+ </div>
893
+
894
+ <div class="error-msg" id="benchLatencyError"></div>
895
+
896
+ <div class="result-section" id="benchLatencyResult">
897
+ <div class="card">
898
+ <div class="card-title">Results</div>
899
+ <div id="benchLatencyStats"></div>
900
+ <div class="latency-chart" id="benchLatencyChart"></div>
901
+ </div>
902
+ </div>
903
+ </div>
904
+
905
+ <!-- ── Ranking Panel ── -->
906
+ <div class="bench-view" id="bench-ranking">
907
+ <div class="card">
908
+ <div class="card-title">Model Ranking Comparison</div>
909
+ <div style="margin-bottom:12px;">
910
+ <input type="text" id="benchRankQuery" placeholder="Search query..." value="How do I search for similar documents using embeddings?">
911
+ </div>
912
+ <textarea id="benchRankDocs" rows="6" placeholder="Documents (one per line)...">Vector search finds documents by computing similarity between embedding vectors in high-dimensional space.
913
+ MongoDB Atlas Vector Search lets you index and query vector embeddings alongside your operational data.
914
+ Traditional full-text search uses inverted indexes to match keyword terms in documents.
915
+ Cosine similarity measures the angle between two vectors, commonly used for semantic search.
916
+ Database sharding distributes data across multiple servers for horizontal scalability.
917
+ Embedding models convert text into dense numerical vectors that capture meaning.
918
+ Approximate nearest neighbor algorithms like HNSW enable fast similarity search at scale.
919
+ Reranking models rescore initial search results to improve relevance ordering.</textarea>
920
+ <div class="options-row">
921
+ <div class="option-group">
922
+ <span class="option-label">Model A</span>
923
+ <select id="benchRankModelA"></select>
924
+ </div>
925
+ <div class="option-group">
926
+ <span class="option-label">Model B</span>
927
+ <select id="benchRankModelB"></select>
928
+ </div>
929
+ <div class="option-group">
930
+ <span class="option-label">Mode</span>
931
+ <select id="benchRankMode">
932
+ <option value="embed">Embedding Similarity</option>
933
+ <option value="rerank">Reranking</option>
934
+ </select>
935
+ </div>
936
+ <div class="option-group">
937
+ <span class="option-label">Top K</span>
938
+ <select id="benchRankTopK">
939
+ <option value="3">3</option>
940
+ <option value="5" selected>5</option>
941
+ <option value="8">8</option>
942
+ </select>
943
+ </div>
944
+ <button class="btn" id="benchRankBtn" onclick="doBenchRanking()">🏆 Compare Rankings</button>
945
+ </div>
946
+ </div>
947
+
948
+ <div class="error-msg" id="benchRankError"></div>
949
+
950
+ <div class="result-section" id="benchRankResult">
951
+ <div class="card">
952
+ <div class="card-title">Ranking Comparison</div>
953
+ <div id="benchRankVerdict" style="margin-bottom:12px;font-size:14px;"></div>
954
+ <div class="rank-comparison" id="benchRankGrid"></div>
955
+ </div>
956
+ </div>
957
+ </div>
958
+
959
+ <!-- ── Cost Panel ── -->
960
+ <div class="bench-view" id="bench-cost">
961
+ <div class="card">
962
+ <div class="card-title">Cost Calculator</div>
963
+ <div class="cost-slider-row">
964
+ <span class="cost-slider-label">Tokens per query</span>
965
+ <input type="range" class="cost-slider" id="costTokens" min="50" max="5000" value="500" step="50">
966
+ <span class="cost-slider-value" id="costTokensValue">500</span>
967
+ </div>
968
+ <div class="cost-slider-row">
969
+ <span class="cost-slider-label">Queries per day</span>
970
+ <input type="range" class="cost-slider" id="costQueries" min="10" max="500000" value="1000" step="10">
971
+ <span class="cost-slider-value" id="costQueriesValue">1,000</span>
972
+ </div>
973
+ <table class="cost-table" id="costTable">
974
+ <thead>
975
+ <tr>
976
+ <th>Model</th>
977
+ <th>Type</th>
978
+ <th>$/1M tokens</th>
979
+ <th>Daily Cost</th>
980
+ <th>Monthly Cost</th>
981
+ <th style="width:30%">Relative</th>
982
+ </tr>
983
+ </thead>
984
+ <tbody id="costTableBody"></tbody>
985
+ </table>
986
+ </div>
987
+ </div>
988
+
989
+ <!-- ── History Panel ── -->
990
+ <div class="bench-view" id="bench-history">
991
+ <div class="card">
992
+ <div class="card-title">Benchmark History</div>
993
+ <div id="benchHistoryContent">
994
+ <div class="history-empty">No benchmarks recorded yet. Run a latency benchmark to start tracking.</div>
995
+ </div>
996
+ <div style="margin-top:12px;text-align:right;">
997
+ <button class="btn btn-secondary btn-small" onclick="clearHistory()">🗑 Clear History</button>
998
+ </div>
999
+ </div>
1000
+ </div>
1001
+
1002
+ </div>
1003
+
617
1004
  <!-- ========== EXPLORE TAB ========== -->
618
1005
  <div class="tab-panel" id="tab-explore">
1006
+ <div style="margin-bottom:16px;">
1007
+ <input type="text" id="exploreSearch" placeholder="🔍 Search concepts..." oninput="filterExplore()" style="max-width:400px;">
1008
+ </div>
619
1009
  <div class="explore-grid" id="exploreGrid"></div>
620
1010
  </div>
621
1011
 
@@ -635,7 +1025,7 @@ let lastEmbedding = null;
635
1025
  async function init() {
636
1026
  setupTabs();
637
1027
  await loadConfig();
638
- await loadModels();
1028
+ await Promise.all([loadModels(), loadConcepts()]);
639
1029
  populateModelSelects();
640
1030
  buildExploreCards();
641
1031
  }
@@ -1006,103 +1396,550 @@ function createResultItem(rank, result, maxScore, movement) {
1006
1396
  }
1007
1397
 
1008
1398
  // ── Explore ──
1009
- const exploreTopics = [
1010
- {
1011
- key: 'embeddings', icon: '🧮', title: 'Embeddings',
1012
- summary: 'Numerical representations that capture meaning',
1013
- content: 'Vector embeddings are arrays of floating-point numbers (typically 256–2048 dimensions) that capture the semantic meaning of text. When you embed text, a neural network reads the input and produces a fixed-size vector. Texts with similar meanings end up close together in this high-dimensional space, even if they share no words.\n\nHigher dimensions capture more nuance but cost more to store and search. Voyage 4 models default to 1024 dimensions but support 256–2048 via Matryoshka representation learning — you can truncate embeddings without retraining.',
1014
- tab: 'embed', prefill: () => { document.getElementById('embedInput').value = 'Artificial intelligence is transforming how we build software applications.'; }
1015
- },
1016
- {
1017
- key: 'reranking', icon: '🏆', title: 'Reranking',
1018
- summary: 'Second-stage precision with cross-attention',
1019
- content: 'Reranking re-scores candidate documents against a query using cross-attention — it reads the query and each document together, producing much more accurate relevance scores than embedding similarity alone.\n\nThe two-stage pattern: embedding search retrieves a broad set (high recall), then the reranker re-orders them (high precision). This adds ~50-200ms but dramatically improves result quality.',
1020
- tab: 'search', prefill: () => {
1021
- document.getElementById('searchQuery').value = 'How do I implement semantic search?';
1022
- document.getElementById('searchDocs').value = 'MongoDB Atlas provides vector search capabilities\nThe recipe calls for two cups of flour\nSemantic search uses embeddings to find meaning\nVector databases store high-dimensional data\nThe weather forecast predicts rain tomorrow';
1023
- }
1024
- },
1025
- {
1026
- key: 'vector-search', icon: '🔎', title: 'Vector Search',
1027
- summary: 'Finding documents by meaning, not keywords',
1028
- content: 'Vector search finds documents whose embeddings are closest to a query embedding. Instead of matching keywords, it matches meaning. MongoDB Atlas Vector Search uses $vectorSearch with HNSW (Hierarchical Navigable Small World) graph indexes for fast approximate nearest neighbor search.\n\nSimilarity functions: cosine (direction, ignoring magnitude — best default), dotProduct (magnitude-sensitive), euclidean (straight-line distance).',
1029
- tab: 'search', prefill: () => {}
1030
- },
1031
- {
1032
- key: 'rag', icon: '🤖', title: 'RAG',
1033
- summary: 'Retrieval-Augmented Generation',
1034
- content: 'RAG combines retrieval with LLM generation: instead of relying on the LLM\'s training data alone, you retrieve relevant context from your own data and include it in the prompt.\n\nThe pattern: 1) Embed your corpus and store vectors, 2) Embed the user\'s question and run vector search, 3) Pass retrieved documents + question to an LLM. Adding reranking between steps 2 and 3 dramatically improves answer quality.',
1035
- tab: 'search', prefill: () => {}
1036
- },
1037
- {
1038
- key: 'cosine', icon: '📐', title: 'Cosine Similarity',
1039
- summary: 'Measuring the angle between vectors',
1040
- content: 'Cosine similarity measures the angle between two vectors, ignoring magnitude. Vectors pointing the same direction score 1, perpendicular score 0, opposite score -1.\n\nFor text embeddings (which are typically normalized), cosine similarity and dot product give identical rankings. Cosine is preferred because it\'s intuitive: it measures how similar the direction (meaning) is, regardless of scale.',
1041
- tab: 'compare', prefill: () => {
1042
- document.getElementById('compareA').value = 'The database stores information efficiently';
1043
- document.getElementById('compareB').value = 'Data is saved in an optimized storage system';
1044
- }
1045
- },
1046
- {
1047
- key: 'two-stage', icon: '🎯', title: 'Two-Stage Retrieval',
1048
- summary: 'Embed → Search → Rerank for best results',
1049
- content: 'Two-stage retrieval combines a fast first stage (embedding search for recall) with a precise second stage (reranking for precision).\n\nStage 1: Embed query, run ANN search, retrieve top-100 candidates (fast, milliseconds). Stage 2: Feed query + candidates to a reranker with cross-attention, return top-5-10 (precise, ~100ms extra). This gives you both speed and accuracy.',
1050
- tab: 'search', prefill: () => {}
1051
- },
1052
- {
1053
- key: 'input-types', icon: '🏷️', title: 'Input Types',
1054
- summary: 'Query vs document — why it matters',
1055
- content: 'The input_type parameter tells the model whether text is a search query or a document being indexed. The model internally prepends different prompt prefixes for each, optimizing embeddings for asymmetric retrieval.\n\nAlways use input_type="query" for search queries and input_type="document" for corpus text. Omitting this parameter degrades retrieval accuracy.',
1056
- tab: 'embed', prefill: () => {
1057
- document.getElementById('embedInput').value = 'What is vector search and how does it work?';
1058
- document.getElementById('embedInputType').value = 'query';
1059
- }
1060
- },
1061
- {
1062
- key: 'models', icon: '🧠', title: 'Models',
1063
- summary: 'Choosing the right model for your task',
1064
- content: 'Voyage 4 Series: voyage-4-large (best quality, $0.12/1M tokens), voyage-4 (balanced, $0.06), voyage-4-lite (budget, $0.02). All share the same embedding space — you can mix models.\n\nDomain-specific: voyage-code-3 (code), voyage-finance-2 (financial), voyage-law-2 (legal). Rerankers: rerank-2.5 (best quality), rerank-2.5-lite (faster). Start with voyage-4 for general use.',
1065
- tab: 'embed', prefill: () => {}
1066
- },
1067
- ];
1399
+ // ── Explore: icons and tab mappings per concept ──
1400
+ const CONCEPT_META = {
1401
+ embeddings: { icon: '🧮', tab: 'embed' },
1402
+ reranking: { icon: '🏆', tab: 'search' },
1403
+ 'vector-search': { icon: '🔎', tab: 'search' },
1404
+ rag: { icon: '🤖', tab: 'search' },
1405
+ 'cosine-similarity': { icon: '📐', tab: 'compare' },
1406
+ 'two-stage-retrieval': { icon: '🎯', tab: 'search' },
1407
+ 'input-type': { icon: '🏷️', tab: 'embed' },
1408
+ models: { icon: '🧠', tab: 'embed' },
1409
+ 'api-keys': { icon: '🔑', tab: 'embed' },
1410
+ 'api-access': { icon: '🌐', tab: 'embed' },
1411
+ 'batch-processing': { icon: '📦', tab: 'embed' },
1412
+ benchmarking: { icon: '', tab: 'benchmark' },
1413
+ };
1414
+
1415
+ let exploreConcepts = {};
1416
+
1417
+ async function loadConcepts() {
1418
+ try {
1419
+ const res = await fetch('/api/concepts');
1420
+ const data = await res.json();
1421
+ exploreConcepts = data.concepts || {};
1422
+ } catch {
1423
+ console.error('Failed to load concepts');
1424
+ }
1425
+ }
1426
+
1427
+ function escapeHtml(str) {
1428
+ return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
1429
+ }
1068
1430
 
1069
1431
  function buildExploreCards() {
1070
1432
  const grid = document.getElementById('exploreGrid');
1071
1433
  grid.innerHTML = '';
1072
- exploreTopics.forEach(topic => {
1434
+
1435
+ for (const [key, concept] of Object.entries(exploreConcepts)) {
1436
+ const meta = CONCEPT_META[key] || { icon: '📚', tab: 'embed' };
1073
1437
  const card = document.createElement('div');
1074
1438
  card.className = 'explore-card';
1439
+ card.dataset.key = key;
1440
+
1441
+ // Build links HTML
1442
+ let linksHtml = '';
1443
+ if (concept.links && concept.links.length > 0) {
1444
+ linksHtml = '<div style="margin-top:12px;"><strong style="color:var(--accent);font-size:12px;">LEARN MORE</strong><br>' +
1445
+ concept.links.map(url => `<a href="${escapeHtml(url)}" target="_blank" rel="noopener" style="color:var(--accent);font-size:12px;word-break:break-all;">${escapeHtml(url)}</a>`).join('<br>') +
1446
+ '</div>';
1447
+ }
1448
+
1449
+ // Build try-it HTML
1450
+ let tryItHtml = '';
1451
+ if (concept.tryIt && concept.tryIt.length > 0) {
1452
+ tryItHtml = '<div style="margin-top:12px;"><strong style="color:var(--accent);font-size:12px;">TRY IT</strong>' +
1453
+ concept.tryIt.map(cmd => `<div style="font-family:var(--mono);font-size:12px;color:var(--text-dim);background:var(--bg);padding:4px 8px;border-radius:4px;margin-top:4px;">$ ${escapeHtml(cmd)}</div>`).join('') +
1454
+ '</div>';
1455
+ }
1456
+
1075
1457
  card.innerHTML = `
1076
- <div class="explore-card-icon">${topic.icon}</div>
1077
- <div class="explore-card-title">${topic.title}</div>
1078
- <div class="explore-card-summary">${topic.summary}</div>
1079
- <div class="explore-card-content">${topic.content}</div>
1458
+ <div class="explore-card-icon">${meta.icon}</div>
1459
+ <div class="explore-card-title">${escapeHtml(concept.title)}</div>
1460
+ <div class="explore-card-summary">${escapeHtml(concept.summary)}</div>
1461
+ <div class="explore-card-content">${escapeHtml(concept.content)}${linksHtml}${tryItHtml}</div>
1080
1462
  <div class="explore-card-actions">
1081
- <button class="btn btn-small" onclick="tryTopic('${topic.key}')">Try it →</button>
1463
+ <button class="btn btn-small" onclick="tryTopic('${escapeHtml(key)}')">Try it in playground →</button>
1082
1464
  <button class="btn btn-secondary btn-small" onclick="collapseTopic(this)">Collapse</button>
1083
1465
  </div>
1084
1466
  `;
1085
1467
  card.addEventListener('click', function(e) {
1086
- if (e.target.tagName === 'BUTTON') return;
1468
+ if (e.target.tagName === 'BUTTON' || e.target.tagName === 'A') return;
1087
1469
  if (!this.classList.contains('expanded')) {
1088
1470
  this.classList.add('expanded');
1089
1471
  }
1090
1472
  });
1091
1473
  grid.appendChild(card);
1092
- });
1474
+ }
1093
1475
  }
1094
1476
 
1095
1477
  window.tryTopic = function(key) {
1096
- const topic = exploreTopics.find(t => t.key === key);
1097
- if (!topic) return;
1098
- if (topic.prefill) topic.prefill();
1099
- switchTab(topic.tab);
1478
+ const meta = CONCEPT_META[key];
1479
+ if (meta) switchTab(meta.tab);
1100
1480
  };
1101
1481
 
1102
1482
  window.collapseTopic = function(btn) {
1103
1483
  btn.closest('.explore-card').classList.remove('expanded');
1104
1484
  };
1105
1485
 
1486
+ window.filterExplore = function() {
1487
+ const q = document.getElementById('exploreSearch').value.toLowerCase().trim();
1488
+ document.querySelectorAll('#exploreGrid .explore-card').forEach(card => {
1489
+ if (!q) { card.style.display = ''; return; }
1490
+ const text = card.textContent.toLowerCase();
1491
+ card.style.display = text.includes(q) ? '' : 'none';
1492
+ });
1493
+ };
1494
+
1495
+ // ── Benchmark: Sub-panel switching ──
1496
+ document.querySelectorAll('.bench-panel-btn').forEach(btn => {
1497
+ btn.addEventListener('click', () => {
1498
+ document.querySelectorAll('.bench-panel-btn').forEach(b => b.classList.remove('active'));
1499
+ document.querySelectorAll('.bench-view').forEach(v => v.classList.remove('active'));
1500
+ btn.classList.add('active');
1501
+ document.getElementById('bench-' + btn.dataset.bench).classList.add('active');
1502
+ });
1503
+ });
1504
+
1505
+ // ── Benchmark: Model checkboxes ──
1506
+ function buildModelCheckboxes() {
1507
+ const container = document.getElementById('benchModelChecks');
1508
+ container.innerHTML = '';
1509
+ const defaults = ['voyage-4-large', 'voyage-4', 'voyage-4-lite'];
1510
+ embedModels.forEach(m => {
1511
+ const label = document.createElement('label');
1512
+ label.style.cssText = 'display:flex;align-items:center;gap:4px;font-size:13px;cursor:pointer;color:var(--text);';
1513
+ const cb = document.createElement('input');
1514
+ cb.type = 'checkbox';
1515
+ cb.value = m.name;
1516
+ cb.checked = defaults.includes(m.name);
1517
+ cb.style.accentColor = 'var(--accent)';
1518
+ label.appendChild(cb);
1519
+ label.appendChild(document.createTextNode(m.name));
1520
+ container.appendChild(label);
1521
+ });
1522
+ }
1523
+
1524
+ function populateBenchRankSelects() {
1525
+ const selA = document.getElementById('benchRankModelA');
1526
+ const selB = document.getElementById('benchRankModelB');
1527
+ [selA, selB].forEach(sel => {
1528
+ sel.innerHTML = '';
1529
+ [...embedModels, ...rerankModels].forEach(m => {
1530
+ const opt = document.createElement('option');
1531
+ opt.value = m.name;
1532
+ opt.textContent = m.name;
1533
+ sel.appendChild(opt);
1534
+ });
1535
+ });
1536
+ // Sensible defaults
1537
+ if (embedModels.length >= 2) {
1538
+ selA.value = embedModels[0].name;
1539
+ selB.value = embedModels[embedModels.length > 2 ? 2 : 1].name;
1540
+ }
1541
+ }
1542
+
1543
+ // ── Benchmark: Latency ──
1544
+ const BENCH_SAMPLE_TEXTS = [
1545
+ 'MongoDB Atlas provides a fully managed cloud database service with built-in vector search capabilities.',
1546
+ 'Kubernetes orchestrates containerized applications across clusters of machines for high availability.',
1547
+ 'Machine learning models transform raw data into embeddings that capture semantic meaning.',
1548
+ 'RESTful APIs use HTTP methods like GET, POST, PUT, and DELETE to manage resources.',
1549
+ 'Natural language processing enables computers to understand and generate human language.',
1550
+ ];
1551
+
1552
+ const MODEL_COLORS = [
1553
+ '#00d4aa', '#4ecdc4', '#45b7d1', '#96ceb4', '#ffd93d',
1554
+ '#ff6b6b', '#c792ea', '#f78c6c', '#82aaff', '#c3e88d',
1555
+ ];
1556
+
1557
+ window.doBenchLatency = async function() {
1558
+ hideError('benchLatencyError');
1559
+ const checks = document.querySelectorAll('#benchModelChecks input:checked');
1560
+ const models = Array.from(checks).map(c => c.value);
1561
+ if (models.length === 0) { showError('benchLatencyError', 'Select at least one model'); return; }
1562
+
1563
+ const rounds = parseInt(document.getElementById('benchRounds').value, 10);
1564
+ const customText = document.getElementById('benchLatencyInput').value.trim();
1565
+ const texts = customText ? [customText] : BENCH_SAMPLE_TEXTS;
1566
+
1567
+ setLoading('benchLatencyBtn', true);
1568
+ document.getElementById('benchLatencyResult').classList.add('visible');
1569
+
1570
+ const chart = document.getElementById('benchLatencyChart');
1571
+ const statsEl = document.getElementById('benchLatencyStats');
1572
+
1573
+ // Build placeholder bars
1574
+ chart.innerHTML = '';
1575
+ models.forEach((model, i) => {
1576
+ const row = document.createElement('div');
1577
+ row.className = 'latency-row';
1578
+ row.innerHTML = `
1579
+ <span class="latency-model">${model}</span>
1580
+ <div class="latency-bar-outer">
1581
+ <div class="latency-bar-inner running" id="bench-bar-${i}" style="background:${MODEL_COLORS[i % MODEL_COLORS.length]}"></div>
1582
+ </div>
1583
+ <span class="latency-stats" id="bench-stats-${i}">Running...</span>
1584
+ <span class="latency-badge" id="bench-badge-${i}"></span>
1585
+ `;
1586
+ chart.appendChild(row);
1587
+ });
1588
+ statsEl.innerHTML = `<span class="stat"><span class="stat-label">Texts</span><span class="stat-value">${texts.length}</span></span>
1589
+ <span class="stat"><span class="stat-label">Rounds</span><span class="stat-value">${rounds}</span></span>`;
1590
+
1591
+ const results = [];
1592
+
1593
+ for (let mi = 0; mi < models.length; mi++) {
1594
+ const model = models[mi];
1595
+ const latencies = [];
1596
+ let tokens = 0, dims = 0;
1597
+
1598
+ for (let r = 0; r < rounds; r++) {
1599
+ try {
1600
+ const data = await apiPost('/api/benchmark/embed', { texts, model, inputType: 'document' });
1601
+ latencies.push(data.elapsed);
1602
+ tokens = data.tokens;
1603
+ dims = data.dimensions;
1604
+ } catch (err) {
1605
+ document.getElementById(`bench-stats-${mi}`).textContent = 'Error';
1606
+ document.getElementById(`bench-bar-${mi}`).classList.remove('running');
1607
+ document.getElementById(`bench-bar-${mi}`).style.width = '0%';
1608
+ break;
1609
+ }
1610
+ }
1611
+
1612
+ if (latencies.length > 0) {
1613
+ const avg = latencies.reduce((a, b) => a + b, 0) / latencies.length;
1614
+ const sorted = [...latencies].sort((a, b) => a - b);
1615
+ const p50 = sorted[Math.floor(sorted.length / 2)];
1616
+ results.push({ model, avg, p50, min: sorted[0], max: sorted[sorted.length - 1], tokens, dims, idx: mi });
1617
+ }
1618
+ }
1619
+
1620
+ // Animate bars to final widths
1621
+ if (results.length > 0) {
1622
+ const maxAvg = Math.max(...results.map(r => r.avg));
1623
+ const minAvg = Math.min(...results.map(r => r.avg));
1624
+
1625
+ results.forEach(r => {
1626
+ const bar = document.getElementById(`bench-bar-${r.idx}`);
1627
+ const stats = document.getElementById(`bench-stats-${r.idx}`);
1628
+ const badge = document.getElementById(`bench-badge-${r.idx}`);
1629
+ bar.classList.remove('running');
1630
+ const pct = Math.max(15, (r.avg / maxAvg) * 100);
1631
+ bar.style.width = pct + '%';
1632
+ bar.textContent = r.avg.toFixed(0) + 'ms';
1633
+ stats.textContent = `p50: ${r.p50.toFixed(0)}ms`;
1634
+ stats.title = `min: ${r.min.toFixed(0)}ms, max: ${r.max.toFixed(0)}ms`;
1635
+ if (r.avg === minAvg) badge.textContent = '⚡';
1636
+ });
1637
+
1638
+ // Update stats
1639
+ const fastest = results.reduce((a, b) => a.avg < b.avg ? a : b);
1640
+ const price = getModelPrice(fastest.model);
1641
+ statsEl.innerHTML += `
1642
+ <span class="stat"><span class="stat-label">Fastest</span><span class="stat-value">${fastest.model}</span></span>
1643
+ <span class="stat"><span class="stat-label">Avg</span><span class="stat-value">${fastest.avg.toFixed(0)}ms</span></span>
1644
+ <span class="stat"><span class="stat-label">Dims</span><span class="stat-value">${fastest.dims}</span></span>
1645
+ ${price ? `<span class="stat"><span class="stat-label">Price</span><span class="stat-value">${price}</span></span>` : ''}
1646
+ `;
1647
+
1648
+ // Save to history
1649
+ saveBenchHistory(results, texts.length, rounds);
1650
+ renderHistory();
1651
+ }
1652
+
1653
+ setLoading('benchLatencyBtn', false);
1654
+ };
1655
+
1656
+ function getModelPrice(name) {
1657
+ const m = allModels.find(m => m.name === name);
1658
+ return m ? m.price : null;
1659
+ }
1660
+
1661
+ function getModelPriceNum(name) {
1662
+ const m = allModels.find(m => m.name === name);
1663
+ if (!m) return null;
1664
+ const match = m.price.match(/\$([0-9.]+)\/1M/);
1665
+ return match ? parseFloat(match[1]) : null;
1666
+ }
1667
+
1668
+ // ── Benchmark: Ranking Comparison ──
1669
+ window.doBenchRanking = async function() {
1670
+ hideError('benchRankError');
1671
+ const query = document.getElementById('benchRankQuery').value.trim();
1672
+ const docsText = document.getElementById('benchRankDocs').value.trim();
1673
+ if (!query || !docsText) { showError('benchRankError', 'Enter a query and documents'); return; }
1674
+
1675
+ const documents = docsText.split('\n').map(d => d.trim()).filter(Boolean);
1676
+ if (documents.length < 2) { showError('benchRankError', 'Enter at least 2 documents'); return; }
1677
+
1678
+ const modelA = document.getElementById('benchRankModelA').value;
1679
+ const modelB = document.getElementById('benchRankModelB').value;
1680
+ const mode = document.getElementById('benchRankMode').value;
1681
+ const topK = parseInt(document.getElementById('benchRankTopK').value, 10);
1682
+
1683
+ setLoading('benchRankBtn', true);
1684
+
1685
+ try {
1686
+ let rankedA, rankedB;
1687
+
1688
+ if (mode === 'embed') {
1689
+ // Embedding similarity mode
1690
+ const [dataA, dataB] = await Promise.all([
1691
+ apiPost('/api/benchmark/embed', { texts: [query, ...documents], model: modelA, inputType: 'document' }),
1692
+ apiPost('/api/benchmark/embed', { texts: [query, ...documents], model: modelB, inputType: 'document' }),
1693
+ ]);
1694
+
1695
+ rankedA = rankBySimilarity(dataA.embeddings, documents, topK);
1696
+ rankedB = rankBySimilarity(dataB.embeddings, documents, topK);
1697
+ } else {
1698
+ // Rerank mode
1699
+ const [dataA, dataB] = await Promise.all([
1700
+ apiPost('/api/benchmark/rerank', { query, documents, model: modelA, topK }),
1701
+ apiPost('/api/benchmark/rerank', { query, documents, model: modelB, topK }),
1702
+ ]);
1703
+
1704
+ rankedA = dataA.results.slice(0, topK).map(r => ({
1705
+ index: r.index,
1706
+ text: documents[r.index],
1707
+ score: r.relevance_score,
1708
+ }));
1709
+ rankedB = dataB.results.slice(0, topK).map(r => ({
1710
+ index: r.index,
1711
+ text: documents[r.index],
1712
+ score: r.relevance_score,
1713
+ }));
1714
+ }
1715
+
1716
+ // Render comparison
1717
+ renderRankComparison(modelA, modelB, rankedA, rankedB, topK);
1718
+ document.getElementById('benchRankResult').classList.add('visible');
1719
+ } catch (err) {
1720
+ showError('benchRankError', err.message);
1721
+ } finally {
1722
+ setLoading('benchRankBtn', false);
1723
+ }
1724
+ };
1725
+
1726
+ function rankBySimilarity(embeddings, documents, topK) {
1727
+ const queryVec = embeddings[0];
1728
+ const docVecs = embeddings.slice(1);
1729
+ const scores = docVecs.map((dv, i) => ({
1730
+ index: i,
1731
+ text: documents[i],
1732
+ score: cosineSim(queryVec, dv),
1733
+ }));
1734
+ scores.sort((a, b) => b.score - a.score);
1735
+ return scores.slice(0, topK);
1736
+ }
1737
+
1738
+ function renderRankComparison(modelA, modelB, rankedA, rankedB, topK) {
1739
+ const grid = document.getElementById('benchRankGrid');
1740
+ const verdict = document.getElementById('benchRankVerdict');
1741
+ grid.innerHTML = '';
1742
+
1743
+ // Header
1744
+ const header = document.createElement('div');
1745
+ header.className = 'rank-row';
1746
+ header.style.background = 'none';
1747
+ header.style.fontWeight = '600';
1748
+ header.style.fontSize = '13px';
1749
+ header.style.color = 'var(--accent)';
1750
+ header.innerHTML = `<div></div><div>${modelA}</div><div></div><div>${modelB}</div>`;
1751
+ grid.appendChild(header);
1752
+
1753
+ const orderA = rankedA.map(r => r.index);
1754
+ const orderB = rankedB.map(r => r.index);
1755
+ let matches = 0;
1756
+
1757
+ const k = Math.min(topK, rankedA.length, rankedB.length);
1758
+ for (let i = 0; i < k; i++) {
1759
+ const a = rankedA[i];
1760
+ const b = rankedB[i];
1761
+ const same = a.index === b.index;
1762
+ if (same) matches++;
1763
+
1764
+ const truncA = a.text.length > 60 ? a.text.slice(0, 57) + '...' : a.text;
1765
+ const truncB = b.text.length > 60 ? b.text.slice(0, 57) + '...' : b.text;
1766
+
1767
+ const row = document.createElement('div');
1768
+ row.className = 'rank-row';
1769
+ row.innerHTML = `
1770
+ <div class="rank-num">${i + 1}</div>
1771
+ <div class="rank-item ${same ? 'rank-match' : 'rank-differ'}">
1772
+ <div title="${a.text.replace(/"/g, '&quot;')}">${truncA}</div>
1773
+ <div class="rank-score">${a.score.toFixed(4)} [doc ${a.index}]</div>
1774
+ </div>
1775
+ <div class="rank-arrow">${same ? '=' : '≠'}</div>
1776
+ <div class="rank-item ${same ? 'rank-match' : 'rank-differ'}">
1777
+ <div title="${b.text.replace(/"/g, '&quot;')}">${truncB}</div>
1778
+ <div class="rank-score">${b.score.toFixed(4)} [doc ${b.index}]</div>
1779
+ </div>
1780
+ `;
1781
+ grid.appendChild(row);
1782
+ }
1783
+
1784
+ // Compute overlap
1785
+ const setA = new Set(orderA.slice(0, k));
1786
+ const setB = new Set(orderB.slice(0, k));
1787
+ const overlap = [...setA].filter(x => setB.has(x)).length;
1788
+ const overlapPct = ((overlap / k) * 100).toFixed(0);
1789
+ const agreePct = ((matches / k) * 100).toFixed(0);
1790
+
1791
+ if (matches === k) {
1792
+ verdict.innerHTML = `<span style="color:var(--green)">✓ Models agree on all ${k} positions — the cheaper model is likely sufficient.</span>`;
1793
+ } else if (overlap === k) {
1794
+ verdict.innerHTML = `<span style="color:var(--yellow)">⚠ Same ${k} documents in top-${k}, but in different order (${agreePct}% exact match).</span>`;
1795
+ } else {
1796
+ verdict.innerHTML = `<span style="color:var(--warning)">⚠ ${overlapPct}% overlap in top-${k} results — models see different relevance signals.</span>`;
1797
+ }
1798
+ }
1799
+
1800
+ // ── Benchmark: Cost Calculator ──
1801
+ function initCostCalculator() {
1802
+ const tokSlider = document.getElementById('costTokens');
1803
+ const qSlider = document.getElementById('costQueries');
1804
+ const tokValue = document.getElementById('costTokensValue');
1805
+ const qValue = document.getElementById('costQueriesValue');
1806
+
1807
+ function updateCost() {
1808
+ const tokens = parseInt(tokSlider.value, 10);
1809
+ const queries = parseInt(qSlider.value, 10);
1810
+ tokValue.textContent = tokens.toLocaleString();
1811
+ qValue.textContent = queries.toLocaleString();
1812
+ renderCostTable(tokens, queries);
1813
+ }
1814
+
1815
+ tokSlider.addEventListener('input', updateCost);
1816
+ qSlider.addEventListener('input', updateCost);
1817
+
1818
+ // Initialize
1819
+ updateCost();
1820
+ }
1821
+
1822
+ function renderCostTable(tokensPerQuery, queriesPerDay) {
1823
+ const tbody = document.getElementById('costTableBody');
1824
+ tbody.innerHTML = '';
1825
+
1826
+ const models = allModels.filter(m => !m.legacy);
1827
+ const rows = [];
1828
+
1829
+ models.forEach(m => {
1830
+ const match = m.price.match(/\$([0-9.]+)\/1M/);
1831
+ if (!match) return;
1832
+ const pricePerM = parseFloat(match[1]);
1833
+ const dailyTokens = tokensPerQuery * queriesPerDay;
1834
+ const dailyCost = (dailyTokens / 1_000_000) * pricePerM;
1835
+ const monthlyCost = dailyCost * 30;
1836
+ rows.push({ name: m.name, type: m.type === 'embedding' ? 'embed' : 'rerank', pricePerM, dailyCost, monthlyCost });
1837
+ });
1838
+
1839
+ rows.sort((a, b) => a.monthlyCost - b.monthlyCost);
1840
+ const maxMonthly = Math.max(...rows.map(r => r.monthlyCost), 0.01);
1841
+
1842
+ rows.forEach(r => {
1843
+ const tr = document.createElement('tr');
1844
+ const barPct = Math.max(2, (r.monthlyCost / maxMonthly) * 100);
1845
+ const monthlyStr = r.monthlyCost < 0.01 ? '<$0.01' : '$' + (r.monthlyCost < 1 ? r.monthlyCost.toFixed(2) : r.monthlyCost < 100 ? r.monthlyCost.toFixed(1) : r.monthlyCost.toFixed(0));
1846
+ const dailyStr = r.dailyCost < 0.01 ? '<$0.01' : '$' + r.dailyCost.toFixed(2);
1847
+
1848
+ tr.innerHTML = `
1849
+ <td style="color:var(--text)">${r.name}</td>
1850
+ <td style="color:var(--text-dim)">${r.type}</td>
1851
+ <td>$${r.pricePerM.toFixed(2)}</td>
1852
+ <td>${dailyStr}</td>
1853
+ <td class="cost-highlight">${monthlyStr}</td>
1854
+ <td class="cost-bar-cell" style="position:relative;padding-left:8px;">
1855
+ <div class="cost-bar" style="width:${barPct}%;"></div>
1856
+ <span style="position:relative;z-index:1;font-size:12px;color:var(--text-dim);">${monthlyStr}</span>
1857
+ </td>
1858
+ `;
1859
+ tbody.appendChild(tr);
1860
+ });
1861
+ }
1862
+
1863
+ // ── Benchmark: History ──
1864
+ const HISTORY_KEY = 'vai-bench-history';
1865
+
1866
+ function saveBenchHistory(results, textCount, rounds) {
1867
+ const history = JSON.parse(localStorage.getItem(HISTORY_KEY) || '[]');
1868
+ history.push({
1869
+ timestamp: Date.now(),
1870
+ textCount,
1871
+ rounds,
1872
+ results: results.map(r => ({ model: r.model, avg: r.avg, p50: r.p50, dims: r.dims })),
1873
+ });
1874
+ // Keep last 20
1875
+ if (history.length > 20) history.splice(0, history.length - 20);
1876
+ localStorage.setItem(HISTORY_KEY, JSON.stringify(history));
1877
+ }
1878
+
1879
+ function renderHistory() {
1880
+ const container = document.getElementById('benchHistoryContent');
1881
+ const history = JSON.parse(localStorage.getItem(HISTORY_KEY) || '[]');
1882
+
1883
+ if (history.length === 0) {
1884
+ container.innerHTML = '<div class="history-empty">No benchmarks recorded yet. Run a latency benchmark to start tracking.</div>';
1885
+ return;
1886
+ }
1887
+
1888
+ // Collect all models that appear
1889
+ const modelSet = new Set();
1890
+ history.forEach(h => h.results.forEach(r => modelSet.add(r.model)));
1891
+ const models = [...modelSet];
1892
+ const colorMap = {};
1893
+ models.forEach((m, i) => { colorMap[m] = MODEL_COLORS[i % MODEL_COLORS.length]; });
1894
+
1895
+ // Find global max for scale
1896
+ const maxAvg = Math.max(...history.flatMap(h => h.results.map(r => r.avg)));
1897
+
1898
+ let html = '<div class="history-chart">';
1899
+ history.forEach((h, hi) => {
1900
+ html += '<div class="history-bar-group" title="' + new Date(h.timestamp).toLocaleString() + '">';
1901
+ models.forEach(m => {
1902
+ const r = h.results.find(r => r.model === m);
1903
+ const height = r ? Math.max(4, (r.avg / maxAvg) * 100) : 0;
1904
+ const label = r ? `${m}: ${r.avg.toFixed(0)}ms` : '';
1905
+ html += `<div class="history-bar" style="height:${height}%;background:${colorMap[m]};" title="${label}"></div>`;
1906
+ });
1907
+ html += '</div>';
1908
+ });
1909
+ html += '</div>';
1910
+
1911
+ // Legend
1912
+ html += '<div class="history-legend">';
1913
+ models.forEach(m => {
1914
+ html += `<span><span class="history-legend-dot" style="background:${colorMap[m]}"></span>${m}</span>`;
1915
+ });
1916
+ html += '</div>';
1917
+
1918
+ // Time labels
1919
+ if (history.length > 1) {
1920
+ const first = new Date(history[0].timestamp);
1921
+ const last = new Date(history[history.length - 1].timestamp);
1922
+ html += `<div class="history-labels"><span>${first.toLocaleDateString()} ${first.toLocaleTimeString([], {hour:'2-digit',minute:'2-digit'})}</span><span>${last.toLocaleDateString()} ${last.toLocaleTimeString([], {hour:'2-digit',minute:'2-digit'})}</span></div>`;
1923
+ }
1924
+
1925
+ container.innerHTML = html;
1926
+ }
1927
+
1928
+ window.clearHistory = function() {
1929
+ localStorage.removeItem(HISTORY_KEY);
1930
+ renderHistory();
1931
+ };
1932
+
1933
+ // ── Patch init to include benchmark setup ──
1934
+ const _origInit = init;
1935
+ init = async function() {
1936
+ await _origInit();
1937
+ buildModelCheckboxes();
1938
+ populateBenchRankSelects();
1939
+ initCostCalculator();
1940
+ renderHistory();
1941
+ };
1942
+
1106
1943
  // ── Start ──
1107
1944
  init();
1108
1945
  })();
@@ -14,6 +14,10 @@ describe('ping command', () => {
14
14
  let output;
15
15
  let errorOutput;
16
16
 
17
+ // Strip ANSI escape codes for reliable string assertions in CI
18
+ // (GitHub Actions sets FORCE_COLOR which adds ANSI codes via picocolors)
19
+ const stripAnsi = (s) => s.replace(/\x1b\[[0-9;]*m/g, '');
20
+
17
21
  beforeEach(() => {
18
22
  originalLog = console.log;
19
23
  originalError = console.error;
@@ -70,7 +74,7 @@ describe('ping command', () => {
70
74
 
71
75
  await program.parseAsync(['node', 'test', 'ping']);
72
76
 
73
- const combined = output.join('\n');
77
+ const combined = stripAnsi(output.join('\n'));
74
78
  assert.ok(combined.includes('✓ Connected to Voyage AI API'), 'Should show success message');
75
79
  assert.ok(combined.includes('voyage-4-lite'), 'Should show model name');
76
80
  assert.ok(combined.includes('1024'), 'Should show dimensions');
@@ -101,7 +105,7 @@ describe('ping command', () => {
101
105
  );
102
106
 
103
107
  assert.equal(exitCode, 1);
104
- const combined = errorOutput.join('\n');
108
+ const combined = stripAnsi(errorOutput.join('\n'));
105
109
  assert.ok(combined.includes('Authentication failed'), 'Should show auth error');
106
110
  });
107
111
 
@@ -134,7 +138,7 @@ describe('ping command', () => {
134
138
  );
135
139
 
136
140
  assert.equal(exitCode, 1);
137
- const combined = errorOutput.join('\n');
141
+ const combined = stripAnsi(errorOutput.join('\n'));
138
142
  assert.ok(combined.includes('VOYAGE_API_KEY'), 'Should mention missing key');
139
143
  } finally {
140
144
  config.getConfigValue = origGetConfigValue;