genarena 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. genarena/__init__.py +49 -2
  2. genarena/__main__.py +10 -0
  3. genarena/arena.py +1685 -0
  4. genarena/battle.py +337 -0
  5. genarena/bt_elo.py +507 -0
  6. genarena/cli.py +1581 -0
  7. genarena/data.py +476 -0
  8. genarena/deploy/Dockerfile +25 -0
  9. genarena/deploy/README.md +55 -0
  10. genarena/deploy/__init__.py +5 -0
  11. genarena/deploy/app.py +84 -0
  12. genarena/experiments.py +121 -0
  13. genarena/leaderboard.py +270 -0
  14. genarena/logs.py +409 -0
  15. genarena/models.py +412 -0
  16. genarena/prompts/__init__.py +127 -0
  17. genarena/prompts/mmrb2.py +373 -0
  18. genarena/sampling.py +336 -0
  19. genarena/state.py +656 -0
  20. genarena/sync/__init__.py +105 -0
  21. genarena/sync/auto_commit.py +118 -0
  22. genarena/sync/deploy_ops.py +543 -0
  23. genarena/sync/git_ops.py +422 -0
  24. genarena/sync/hf_ops.py +891 -0
  25. genarena/sync/init_ops.py +431 -0
  26. genarena/sync/packer.py +587 -0
  27. genarena/sync/submit.py +837 -0
  28. genarena/utils.py +103 -0
  29. genarena/validation/__init__.py +19 -0
  30. genarena/validation/schema.py +327 -0
  31. genarena/validation/validator.py +329 -0
  32. genarena/visualize/README.md +148 -0
  33. genarena/visualize/__init__.py +14 -0
  34. genarena/visualize/app.py +938 -0
  35. genarena/visualize/data_loader.py +2335 -0
  36. genarena/visualize/static/app.js +3762 -0
  37. genarena/visualize/static/model_aliases.json +86 -0
  38. genarena/visualize/static/style.css +4104 -0
  39. genarena/visualize/templates/index.html +413 -0
  40. genarena/vlm.py +519 -0
  41. genarena-0.1.0.dist-info/METADATA +178 -0
  42. genarena-0.1.0.dist-info/RECORD +44 -0
  43. {genarena-0.0.1.dist-info → genarena-0.1.0.dist-info}/WHEEL +1 -2
  44. genarena-0.1.0.dist-info/entry_points.txt +2 -0
  45. genarena-0.0.1.dist-info/METADATA +0 -26
  46. genarena-0.0.1.dist-info/RECORD +0 -5
  47. genarena-0.0.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,3762 @@
1
+ /**
2
+ * GenArena Arena Visualizer - Frontend Application
3
+ */
4
+
5
+ // ========== State ==========
6
+ const state = {
7
+ currentPage: 'overview', // 'overview' or 'gallery'
8
+ subset: null,
9
+ experiment: null,
10
+ models: [],
11
+ promptSources: [],
12
+ page: 1,
13
+ pageSize: 20,
14
+ totalPages: 1,
15
+ totalBattles: 0,
16
+ filters: {
17
+ models: [],
18
+ result: null,
19
+ consistent: null,
20
+ minImages: null,
21
+ maxImages: null,
22
+ promptSource: null,
23
+ },
24
+ h2h: null, // Head-to-head stats when 2 models selected
25
+ imageRange: { min: 1, max: 1 }, // Available image count range for current subset
26
+ favorites: [], // Array of {subset, exp_name, sample_index}
27
+ viewMode: 'battles', // 'battles' or 'prompts'
28
+ promptsPageSize: 10, // Prompts have more data, so use smaller page size
29
+ promptsModelFilter: [], // Model filter for prompts view
30
+ favoritesModelFilter: [], // Model filter for favorites modal
31
+ favoritesStatsScope: 'filtered', // 'filtered' = only selected models, 'all' = all opponents
32
+ searchQuery: '', // Search query for filtering by instruction text
33
+ // Overview page state
34
+ overviewData: null,
35
+ overviewSortColumn: 'basic',
36
+ overviewSortDirection: 'desc',
37
+ // Cross-subset modal state
38
+ crossSubsetState: {
39
+ subsets: [],
40
+ selectedSubsets: new Set(),
41
+ subsetInfo: {},
42
+ },
43
+ };
44
+
45
+ // ========== Model Aliases ==========
46
+ let modelAliases = {};
47
+
48
+ /**
49
+ * Load model aliases from JSON file
50
+ */
51
+ async function loadModelAliases() {
52
+ try {
53
+ const response = await fetch('static/model_aliases.json');
54
+ if (response.ok) {
55
+ modelAliases = await response.json();
56
+ }
57
+ } catch (error) {
58
+ console.warn('Failed to load model aliases:', error);
59
+ modelAliases = {};
60
+ }
61
+ }
62
+
63
+ /**
64
+ * Get display name for a model (alias if available, otherwise original name)
65
+ * @param {string} modelName - Original model name
66
+ * @returns {string} Display name (alias or original)
67
+ */
68
+ function getModelDisplayName(modelName) {
69
+ if (modelAliases[modelName] && modelAliases[modelName].alias) {
70
+ return modelAliases[modelName].alias;
71
+ }
72
+ return modelName;
73
+ }
74
+
75
+ /**
76
+ * Get model link if available
77
+ * @param {string} modelName - Original model name
78
+ * @returns {string|null} Link URL or null
79
+ */
80
+ function getModelLink(modelName) {
81
+ if (modelAliases[modelName] && modelAliases[modelName].link && modelAliases[modelName].link !== '#') {
82
+ return modelAliases[modelName].link;
83
+ }
84
+ return null;
85
+ }
86
+
87
+ // ========== DOM Elements ==========
88
+ const elements = {
89
+ // Navigation elements
90
+ logoLink: document.getElementById('logo-link'),
91
+ navOverview: document.getElementById('nav-overview'),
92
+ navGallery: document.getElementById('nav-gallery'),
93
+ // Page containers
94
+ overviewPage: document.getElementById('overview-page'),
95
+ galleryPage: document.getElementById('gallery-page'),
96
+ overviewContent: document.getElementById('overview-content'),
97
+ // Overview page elements
98
+ crossSubsetBtn: document.getElementById('cross-subset-btn'),
99
+ // Gallery controls
100
+ galleryControls: document.querySelector('.gallery-controls'),
101
+ subsetSelect: document.getElementById('subset-select'),
102
+ expSelect: document.getElementById('exp-select'),
103
+ // Sidebar filter elements
104
+ modelCheckboxes: document.getElementById('model-checkboxes'),
105
+ modelCount: document.getElementById('model-count'),
106
+ selectAllModels: document.getElementById('select-all-models'),
107
+ clearAllModels: document.getElementById('clear-all-models'),
108
+ resultFilter: document.getElementById('result-filter'),
109
+ resultFilterGroup: document.getElementById('result-filter-group'),
110
+ consistencyFilter: document.getElementById('consistency-filter'),
111
+ promptSourceFilterGroup: document.getElementById('prompt-source-filter-group'),
112
+ promptSourceFilter: document.getElementById('prompt-source-filter'),
113
+ imageCountFilterGroup: document.getElementById('image-count-filter-group'),
114
+ minImagesSlider: document.getElementById('min-images-slider'),
115
+ maxImagesSlider: document.getElementById('max-images-slider'),
116
+ imageRangeDisplay: document.getElementById('image-range-display'),
117
+ minImagesLabel: document.getElementById('min-images-label'),
118
+ maxImagesLabel: document.getElementById('max-images-label'),
119
+ applyFilters: document.getElementById('apply-filters'),
120
+ clearFilters: document.getElementById('clear-filters'),
121
+ battleList: document.getElementById('battle-list'),
122
+ statsPanel: document.getElementById('stats-panel'),
123
+ h2hSection: document.getElementById('h2h-section'),
124
+ h2hPanel: document.getElementById('h2h-panel'),
125
+ paginationInfo: document.getElementById('pagination-info'),
126
+ pageNumbers: document.getElementById('page-numbers'),
127
+ pageNumbersBottom: document.getElementById('page-numbers-bottom'),
128
+ firstPage: document.getElementById('first-page'),
129
+ prevPage: document.getElementById('prev-page'),
130
+ nextPage: document.getElementById('next-page'),
131
+ lastPage: document.getElementById('last-page'),
132
+ firstPageBottom: document.getElementById('first-page-bottom'),
133
+ prevPageBottom: document.getElementById('prev-page-bottom'),
134
+ nextPageBottom: document.getElementById('next-page-bottom'),
135
+ lastPageBottom: document.getElementById('last-page-bottom'),
136
+ pageInput: document.getElementById('page-input'),
137
+ pageGo: document.getElementById('page-go'),
138
+ pageInputBottom: document.getElementById('page-input-bottom'),
139
+ pageGoBottom: document.getElementById('page-go-bottom'),
140
+ modal: document.getElementById('detail-modal'),
141
+ modalContent: document.getElementById('detail-content'),
142
+ modalClose: document.querySelector('.modal-close'),
143
+ modalBackdrop: document.querySelector('.modal-backdrop'),
144
+ lightbox: document.getElementById('lightbox'),
145
+ lightboxImg: document.getElementById('lightbox-img'),
146
+ lightboxLabel: document.getElementById('lightbox-label'),
147
+ lightboxClose: document.querySelector('.lightbox-close'),
148
+ // Favorites elements
149
+ favoritesBtn: document.getElementById('favorites-btn'),
150
+ favoritesCount: document.getElementById('favorites-count'),
151
+ favoritesModal: document.getElementById('favorites-modal'),
152
+ favoritesContent: document.getElementById('favorites-content'),
153
+ favoritesModalClose: document.querySelector('#favorites-modal .modal-close'),
154
+ favoritesModalBackdrop: document.querySelector('#favorites-modal .modal-backdrop'),
155
+ clearAllFavorites: document.getElementById('clear-all-favorites'),
156
+ // View toggle elements
157
+ viewBattlesBtn: document.getElementById('view-battles'),
158
+ viewPromptsBtn: document.getElementById('view-prompts'),
159
+ promptsList: document.getElementById('prompts-list'),
160
+ // Prompts model filter elements
161
+ promptsModelFilterGroup: document.getElementById('prompts-model-filter-group'),
162
+ promptsModelCheckboxes: document.getElementById('prompts-model-checkboxes'),
163
+ promptsModelCount: document.getElementById('prompts-model-count'),
164
+ promptsSelectAllModels: document.getElementById('prompts-select-all-models'),
165
+ promptsClearAllModels: document.getElementById('prompts-clear-all-models'),
166
+ promptsApplyModelFilter: document.getElementById('prompts-apply-model-filter'),
167
+ // Favorites model filter elements
168
+ favoritesModelFilterGroup: document.getElementById('favorites-model-filter-group'),
169
+ favoritesModelCheckboxes: document.getElementById('favorites-model-checkboxes'),
170
+ favoritesSelectAllModels: document.getElementById('favorites-select-all-models'),
171
+ favoritesClearAllModels: document.getElementById('favorites-clear-all-models'),
172
+ favoritesApplyModelFilter: document.getElementById('favorites-apply-model-filter'),
173
+ favoritesStatsScopeAll: document.getElementById('favorites-stats-scope-all'),
174
+ // ELO Leaderboard elements
175
+ eloSection: document.getElementById('elo-section'),
176
+ eloPanel: document.getElementById('elo-panel'),
177
+ viewFullLeaderboard: document.getElementById('view-full-leaderboard'),
178
+ leaderboardModal: document.getElementById('leaderboard-modal'),
179
+ leaderboardContent: document.getElementById('leaderboard-content'),
180
+ leaderboardModalClose: document.querySelector('#leaderboard-modal .modal-close'),
181
+ leaderboardModalBackdrop: document.querySelector('#leaderboard-modal .modal-backdrop'),
182
+ leaderboardSubsetName: document.getElementById('leaderboard-subset-name'),
183
+ // Model Stats Modal elements
184
+ modelStatsModal: document.getElementById('model-stats-modal'),
185
+ modelStatsContent: document.getElementById('model-stats-content'),
186
+ modelStatsModalClose: document.querySelector('#model-stats-modal .modal-close'),
187
+ modelStatsModalBackdrop: document.querySelector('#model-stats-modal .modal-backdrop'),
188
+ // Search elements
189
+ searchInput: document.getElementById('search-input'),
190
+ searchBtn: document.getElementById('search-btn'),
191
+ clearSearchBtn: document.getElementById('clear-search-btn'),
192
+ // Cross-subset modal elements
193
+ crossSubsetModal: document.getElementById('cross-subset-modal'),
194
+ crossSubsetModalClose: document.querySelector('#cross-subset-modal .modal-close'),
195
+ crossSubsetModalBackdrop: document.querySelector('#cross-subset-modal .modal-backdrop'),
196
+ crossSubsetCheckboxes: document.getElementById('cross-subset-checkboxes'),
197
+ crossSubsetSelectAll: document.getElementById('cross-subset-select-all'),
198
+ crossSubsetClearAll: document.getElementById('cross-subset-clear-all'),
199
+ commonModelCount: document.getElementById('common-model-count'),
200
+ unionModelCount: document.getElementById('union-model-count'),
201
+ totalBattlesCount: document.getElementById('total-battles-count'),
202
+ calculateMergedElo: document.getElementById('calculate-merged-elo'),
203
+ crossSubsetResults: document.getElementById('cross-subset-results'),
204
+ // Matrix modal elements
205
+ viewMatrixBtn: document.getElementById('view-matrix'),
206
+ matrixModal: document.getElementById('matrix-modal'),
207
+ matrixContent: document.getElementById('matrix-content'),
208
+ matrixSubsetName: document.getElementById('matrix-subset-name'),
209
+ matrixModalClose: document.querySelector('#matrix-modal .modal-close'),
210
+ matrixModalBackdrop: document.querySelector('#matrix-modal .modal-backdrop'),
211
+ // ELO History modal elements
212
+ viewEloHistoryBtn: document.getElementById('view-elo-history'),
213
+ eloHistoryModal: document.getElementById('elo-history-modal'),
214
+ eloHistoryContent: document.getElementById('elo-history-content'),
215
+ eloHistoryLegend: document.getElementById('elo-history-legend'),
216
+ eloHistoryGranularity: document.getElementById('elo-history-granularity'),
217
+ eloHistoryModalClose: document.querySelector('#elo-history-modal .modal-close'),
218
+ eloHistoryModalBackdrop: document.querySelector('#elo-history-modal .modal-backdrop'),
219
+ // ELO by Source modal elements
220
+ viewEloBySourceBtn: document.getElementById('view-elo-by-source'),
221
+ eloBySourceModal: document.getElementById('elo-by-source-modal'),
222
+ eloBySourceContent: document.getElementById('elo-by-source-content'),
223
+ eloBySourceSubsetName: document.getElementById('elo-by-source-subset-name'),
224
+ eloBySourceModalClose: document.querySelector('#elo-by-source-modal .modal-close'),
225
+ eloBySourceModalBackdrop: document.querySelector('#elo-by-source-modal .modal-backdrop'),
226
+ };
227
+
228
+ // ========== API Functions ==========
229
+ async function fetchJSON(url) {
230
+ const response = await fetch(url);
231
+ if (!response.ok) {
232
+ throw new Error(`HTTP ${response.status}`);
233
+ }
234
+ return response.json();
235
+ }
236
+
237
+ // ========== Page Navigation Functions ==========
238
+
239
+ /**
240
+ * Switch to a different page (overview or gallery)
241
+ */
242
+ function switchToPage(page) {
243
+ state.currentPage = page;
244
+
245
+ // Update navigation links
246
+ elements.navOverview.classList.toggle('active', page === 'overview');
247
+ elements.navGallery.classList.toggle('active', page === 'gallery');
248
+
249
+ // Show/hide pages
250
+ if (elements.overviewPage) {
251
+ elements.overviewPage.style.display = page === 'overview' ? 'block' : 'none';
252
+ }
253
+ if (elements.galleryPage) {
254
+ elements.galleryPage.style.display = page === 'gallery' ? 'flex' : 'none';
255
+ }
256
+
257
+ // Load page-specific data
258
+ if (page === 'overview') {
259
+ loadOverviewLeaderboards();
260
+ }
261
+ // Gallery page data is loaded when subset/experiment is selected
262
+
263
+ // Update URL
264
+ syncStateToURL();
265
+ }
266
+
267
+ /**
268
+ * Navigate to a specific subset in the gallery
269
+ */
270
+ function navigateToSubset(subset) {
271
+ state.subset = subset;
272
+ state.experiment = null;
273
+ elements.subsetSelect.value = subset;
274
+ switchToPage('gallery');
275
+ loadSubsetInfo(subset);
276
+ loadEloLeaderboard();
277
+ }
278
+
279
+ // ========== Overview Page Functions ==========
280
+
281
+ async function loadOverviewLeaderboards() {
282
+ if (!elements.overviewContent) return;
283
+
284
+ elements.overviewContent.innerHTML = '<div class="loading">Loading leaderboards...</div>';
285
+
286
+ try {
287
+ const data = await fetchJSON('api/overview/leaderboards');
288
+ state.overviewData = data;
289
+ renderOverviewTable();
290
+ } catch (error) {
291
+ console.error('Failed to load overview leaderboards:', error);
292
+ elements.overviewContent.innerHTML = '<div class="empty-state"><p>Failed to load leaderboard data</p></div>';
293
+ }
294
+ }
295
+
296
+ function renderOverviewTable() {
297
+ const data = state.overviewData;
298
+ if (!data || !data.subsets || data.subsets.length === 0) {
299
+ elements.overviewContent.innerHTML = '<div class="empty-state"><p>No subset data available</p></div>';
300
+ return;
301
+ }
302
+
303
+ const { subsets: rawSubsets, models, data: subsetData, subset_info } = data;
304
+
305
+ // Sort subsets: basic, reasoning, multiref first, then others alphabetically
306
+ const subsetOrder = ['basic', 'reasoning', 'multiref'];
307
+ const subsets = [...rawSubsets].sort((a, b) => {
308
+ const aIdx = subsetOrder.indexOf(a);
309
+ const bIdx = subsetOrder.indexOf(b);
310
+ if (aIdx !== -1 && bIdx !== -1) return aIdx - bIdx;
311
+ if (aIdx !== -1) return -1;
312
+ if (bIdx !== -1) return 1;
313
+ return a.localeCompare(b);
314
+ });
315
+
316
+ // Sort models based on current sort settings
317
+ const sortedModels = [...models].sort((a, b) => {
318
+ let valA, valB;
319
+
320
+ if (state.overviewSortColumn === 'model') {
321
+ valA = a.toLowerCase();
322
+ valB = b.toLowerCase();
323
+ return state.overviewSortDirection === 'asc'
324
+ ? valA.localeCompare(valB)
325
+ : valB.localeCompare(valA);
326
+ } else {
327
+ // Sort by specific subset
328
+ const subset = state.overviewSortColumn;
329
+ valA = subsetData[subset]?.[a]?.elo ?? null;
330
+ valB = subsetData[subset]?.[b]?.elo ?? null;
331
+ }
332
+
333
+ // Handle null values (put them at the end)
334
+ if (valA === null && valB === null) return 0;
335
+ if (valA === null) return 1;
336
+ if (valB === null) return -1;
337
+
338
+ return state.overviewSortDirection === 'asc' ? valA - valB : valB - valA;
339
+ });
340
+
341
+ // Build table header
342
+ const sortIcon = (col) => {
343
+ if (state.overviewSortColumn !== col) return '';
344
+ return state.overviewSortDirection === 'asc' ? ' ▲' : ' ▼';
345
+ };
346
+
347
+ let headerHtml = `
348
+ <th class="model-header sortable ${state.overviewSortColumn === 'model' ? 'sorted-' + state.overviewSortDirection : ''}"
349
+ data-sort="model">Model${sortIcon('model')}</th>
350
+ `;
351
+
352
+ subsets.forEach(subset => {
353
+ const info = subset_info[subset] || {};
354
+ headerHtml += `
355
+ <th class="subset-header sortable ${state.overviewSortColumn === subset ? 'sorted-' + state.overviewSortDirection : ''}"
356
+ data-sort="${escapeHtml(subset)}"
357
+ data-subset="${escapeHtml(subset)}"
358
+ title="Click to view ${subset} leaderboard">
359
+ ${escapeHtml(subset)}
360
+ <span class="subset-header-info">${info.model_count || 0} models</span>
361
+ </th>
362
+ `;
363
+ });
364
+
365
+ // Build table body
366
+ let bodyHtml = '';
367
+ sortedModels.forEach((model, idx) => {
368
+ let rowHtml = `<td class="model-cell" data-model="${escapeHtml(model)}" title="View ${getModelDisplayName(model)} stats">`;
369
+
370
+ // Add rank badge for top 3 when sorting by a subset column
371
+ if (idx < 3 && state.overviewSortColumn !== 'model' && state.overviewSortDirection === 'desc') {
372
+ rowHtml += `<span class="rank-badge rank-${idx + 1}">${idx + 1}</span>`;
373
+ }
374
+ rowHtml += `${escapeHtml(getModelDisplayName(model))}</td>`;
375
+
376
+ // Add ELO for each subset
377
+ subsets.forEach(subset => {
378
+ const modelData = subsetData[subset]?.[model];
379
+ if (modelData) {
380
+ const elo = Math.round(modelData.elo);
381
+ rowHtml += `<td class="elo-cell" title="Rank #${modelData.rank}">${elo}</td>`;
382
+ } else {
383
+ rowHtml += `<td class="elo-cell no-data">-</td>`;
384
+ }
385
+ });
386
+
387
+ bodyHtml += `<tr>${rowHtml}</tr>`;
388
+ });
389
+
390
+ elements.overviewContent.innerHTML = `
391
+ <div class="overview-table-container">
392
+ <table class="overview-table">
393
+ <thead><tr>${headerHtml}</tr></thead>
394
+ <tbody>${bodyHtml}</tbody>
395
+ </table>
396
+ </div>
397
+ `;
398
+
399
+ // Add event listeners for sorting
400
+ elements.overviewContent.querySelectorAll('th.sortable').forEach(th => {
401
+ th.addEventListener('click', (e) => {
402
+ const sortCol = th.dataset.sort;
403
+ if (state.overviewSortColumn === sortCol) {
404
+ state.overviewSortDirection = state.overviewSortDirection === 'asc' ? 'desc' : 'asc';
405
+ } else {
406
+ state.overviewSortColumn = sortCol;
407
+ state.overviewSortDirection = 'desc';
408
+ }
409
+ renderOverviewTable();
410
+ });
411
+ });
412
+
413
+ // Add event listeners for subset header clicks (show leaderboard modal)
414
+ elements.overviewContent.querySelectorAll('th.subset-header').forEach(th => {
415
+ th.addEventListener('dblclick', (e) => {
416
+ e.stopPropagation();
417
+ const subset = th.dataset.subset;
418
+ showSubsetLeaderboardModal(subset);
419
+ });
420
+ });
421
+
422
+ // Add event listeners for model cell clicks
423
+ elements.overviewContent.querySelectorAll('td.model-cell').forEach(td => {
424
+ td.addEventListener('click', () => {
425
+ const model = td.dataset.model;
426
+ // Show model stats for the first subset that has this model
427
+ const subsetWithModel = subsets.find(s => subsetData[s]?.[model]);
428
+ if (subsetWithModel) {
429
+ state.subset = subsetWithModel;
430
+ loadModelStats(model);
431
+ }
432
+ });
433
+ });
434
+ }
435
+
436
+ // ========== Cross-Subset Modal Functions ==========
437
+
438
+ function showCrossSubsetModal() {
439
+ if (!elements.crossSubsetModal) return;
440
+ elements.crossSubsetModal.classList.remove('hidden');
441
+ document.body.style.overflow = 'hidden';
442
+ loadCrossSubsetData();
443
+ }
444
+
445
+ function hideCrossSubsetModal() {
446
+ if (!elements.crossSubsetModal) return;
447
+ elements.crossSubsetModal.classList.add('hidden');
448
+ document.body.style.overflow = '';
449
+ }
450
+
451
+ async function loadCrossSubsetData() {
452
+ try {
453
+ const data = await fetchJSON('api/subsets');
454
+ state.crossSubsetState.subsets = data.subsets;
455
+ renderCrossSubsetCheckboxes();
456
+ } catch (error) {
457
+ console.error('Failed to load subsets for cross-subset modal:', error);
458
+ }
459
+ }
460
+
461
+ function renderCrossSubsetCheckboxes() {
462
+ if (!elements.crossSubsetCheckboxes) return;
463
+
464
+ const subsets = state.crossSubsetState.subsets;
465
+ elements.crossSubsetCheckboxes.innerHTML = subsets.map(subset => `
466
+ <div class="checkbox-item">
467
+ <input type="checkbox" id="cross-subset-${escapeHtml(subset)}" value="${escapeHtml(subset)}"
468
+ ${state.crossSubsetState.selectedSubsets.has(subset) ? 'checked' : ''}>
469
+ <label for="cross-subset-${escapeHtml(subset)}">${escapeHtml(subset)}</label>
470
+ </div>
471
+ `).join('');
472
+
473
+ // Add change listeners
474
+ elements.crossSubsetCheckboxes.querySelectorAll('input[type="checkbox"]').forEach(cb => {
475
+ cb.addEventListener('change', () => {
476
+ if (cb.checked) {
477
+ state.crossSubsetState.selectedSubsets.add(cb.value);
478
+ } else {
479
+ state.crossSubsetState.selectedSubsets.delete(cb.value);
480
+ }
481
+ updateCrossSubsetInfo();
482
+ });
483
+ });
484
+ }
485
+
486
+ async function updateCrossSubsetInfo() {
487
+ const selected = Array.from(state.crossSubsetState.selectedSubsets);
488
+
489
+ if (selected.length === 0) {
490
+ elements.commonModelCount.textContent = '-';
491
+ elements.unionModelCount.textContent = '-';
492
+ elements.totalBattlesCount.textContent = '-';
493
+ return;
494
+ }
495
+
496
+ try {
497
+ const data = await fetchJSON(`api/cross-subset/info?subsets=${selected.join(',')}`);
498
+ state.crossSubsetState.subsetInfo = data;
499
+
500
+ elements.commonModelCount.textContent = data.common_models?.length || 0;
501
+ elements.unionModelCount.textContent = data.all_models?.length || 0;
502
+ elements.totalBattlesCount.textContent = data.total_battles || 0;
503
+ } catch (error) {
504
+ console.error('Failed to load cross-subset info:', error);
505
+ }
506
+ }
507
+
508
+ async function calculateMergedEloForPage() {
509
+ const selected = Array.from(state.crossSubsetState.selectedSubsets);
510
+
511
+ if (selected.length === 0) {
512
+ alert('Please select at least one subset');
513
+ return;
514
+ }
515
+
516
+ const modelScope = document.querySelector('input[name="model-scope"]:checked')?.value || 'all';
517
+
518
+ if (!elements.crossSubsetResults) return;
519
+ elements.crossSubsetResults.innerHTML = '<div class="loading">Calculating merged ELO...</div>';
520
+
521
+ try {
522
+ const data = await fetchJSON(`api/cross-subset/elo?subsets=${selected.join(',')}&model_scope=${modelScope}`);
523
+ renderCrossSubsetResults(data);
524
+ } catch (error) {
525
+ console.error('Failed to calculate merged ELO:', error);
526
+ elements.crossSubsetResults.innerHTML = '<div class="empty-state"><p>Failed to calculate merged ELO</p></div>';
527
+ }
528
+ }
529
+
530
+ function renderCrossSubsetResults(data) {
531
+ if (!elements.crossSubsetResults) return;
532
+
533
+ const { leaderboard, subsets, total_battles } = data;
534
+
535
+ if (!leaderboard || leaderboard.length === 0) {
536
+ elements.crossSubsetResults.innerHTML = '<div class="empty-state"><p>No results available</p></div>';
537
+ return;
538
+ }
539
+
540
+ const tableRows = leaderboard.map((model, idx) => {
541
+ const rank = idx + 1;
542
+ const rankClass = rank <= 3 ? `rank-${rank}` : '';
543
+ const winRatePercent = (model.win_rate * 100).toFixed(1);
544
+
545
+ return `
546
+ <tr>
547
+ <td class="rank-cell ${rankClass}">#${rank}</td>
548
+ <td class="model-cell">${escapeHtml(getModelDisplayName(model.model))}</td>
549
+ <td class="elo-cell">${Math.round(model.elo)}</td>
550
+ <td class="stat-cell wins">${model.wins}</td>
551
+ <td class="stat-cell losses">${model.losses}</td>
552
+ <td class="stat-cell ties">${model.ties}</td>
553
+ <td class="stat-cell">${model.total || (model.wins + model.losses + model.ties)}</td>
554
+ <td class="win-rate-cell">${winRatePercent}%</td>
555
+ </tr>
556
+ `;
557
+ }).join('');
558
+
559
+ elements.crossSubsetResults.innerHTML = `
560
+ <h3>Merged ELO Results</h3>
561
+ <div class="merged-elo-info">
562
+ <p>Combined ELO from ${subsets.length} subset(s): ${escapeHtml(subsets.join(', '))}</p>
563
+ <p>Total battles: ${total_battles}</p>
564
+ </div>
565
+ <table class="merged-leaderboard">
566
+ <thead>
567
+ <tr>
568
+ <th>Rank</th>
569
+ <th>Model</th>
570
+ <th>ELO</th>
571
+ <th>Wins</th>
572
+ <th>Losses</th>
573
+ <th>Ties</th>
574
+ <th>Total</th>
575
+ <th>Win %</th>
576
+ </tr>
577
+ </thead>
578
+ <tbody>${tableRows}</tbody>
579
+ </table>
580
+ `;
581
+ }
582
+
583
+ // ========== Subset Data Loading Functions ==========
584
+
585
+ async function loadSubsets() {
586
+ try {
587
+ const data = await fetchJSON('api/subsets');
588
+ elements.subsetSelect.innerHTML = '<option value="">Select subset...</option>';
589
+ data.subsets.forEach(subset => {
590
+ const option = document.createElement('option');
591
+ option.value = subset;
592
+ option.textContent = subset;
593
+ elements.subsetSelect.appendChild(option);
594
+ });
595
+ return data;
596
+ } catch (error) {
597
+ console.error('Failed to load subsets:', error);
598
+ return { subsets: [] };
599
+ }
600
+ }
601
+
602
+ async function loadSubsetInfo(subset) {
603
+ try {
604
+ const data = await fetchJSON(`api/subsets/${subset}/info`);
605
+
606
+ // Sort experiments by date suffix (descending - newest first)
607
+ // Format: xxx_yyyymmdd
608
+ const sortedExperiments = [...data.experiments].sort((a, b) => {
609
+ const dateA = a.match(/_(\d{8})$/)?.[1] || '00000000';
610
+ const dateB = b.match(/_(\d{8})$/)?.[1] || '00000000';
611
+ return dateB.localeCompare(dateA); // Descending order
612
+ });
613
+
614
+ // Update experiments dropdown
615
+ elements.expSelect.innerHTML = '<option value="">Select experiment...</option>';
616
+ // Add "Show All" option (always show if there are experiments)
617
+ if (sortedExperiments.length >= 1) {
618
+ const allOption = document.createElement('option');
619
+ allOption.value = '__all__';
620
+ allOption.textContent = '📊 Show All';
621
+ elements.expSelect.appendChild(allOption);
622
+ }
623
+ sortedExperiments.forEach(exp => {
624
+ const option = document.createElement('option');
625
+ option.value = exp;
626
+ // Non-GenArena_ experiments get indented with two non-breaking spaces
627
+ const isGenArena = exp.startsWith('GenArena_');
628
+ option.textContent = isGenArena ? exp : '\u00A0\u00A0' + exp;
629
+ elements.expSelect.appendChild(option);
630
+ });
631
+ elements.expSelect.disabled = false;
632
+
633
+ // Update model checkboxes
634
+ state.models = data.models;
635
+ renderModelCheckboxes(data.models);
636
+
637
+ // Also update prompts model filter checkboxes
638
+ renderPromptsModelCheckboxes();
639
+
640
+ // Update prompt source filter
641
+ state.promptSources = data.prompt_sources || [];
642
+ updatePromptSourceFilter(state.promptSources);
643
+
644
+ // Update image range filter
645
+ state.imageRange = {
646
+ min: data.min_input_images || 1,
647
+ max: data.max_input_images || 1
648
+ };
649
+ updateImageRangeSlider();
650
+
651
+ } catch (error) {
652
+ console.error('Failed to load subset info:', error);
653
+ }
654
+ }
655
+
656
+ function updatePromptSourceFilter(sources) {
657
+ elements.promptSourceFilter.innerHTML = '<option value="">All sources</option>';
658
+
659
+ if (sources.length > 0) {
660
+ sources.forEach(source => {
661
+ const option = document.createElement('option');
662
+ option.value = source;
663
+ option.textContent = source;
664
+ elements.promptSourceFilter.appendChild(option);
665
+ });
666
+ elements.promptSourceFilterGroup.style.display = 'block';
667
+ } else {
668
+ elements.promptSourceFilterGroup.style.display = 'none';
669
+ }
670
+ }
671
+
672
+ function renderModelCheckboxes(models) {
673
+ elements.modelCheckboxes.innerHTML = models.map(model => `
674
+ <div class="checkbox-item">
675
+ <input type="checkbox" id="model-${escapeHtml(model)}" value="${escapeHtml(model)}">
676
+ <label for="model-${escapeHtml(model)}">${escapeHtml(getModelDisplayName(model))}</label>
677
+ </div>
678
+ `).join('');
679
+
680
+ // Add change listeners
681
+ elements.modelCheckboxes.querySelectorAll('input[type="checkbox"]').forEach(cb => {
682
+ cb.addEventListener('change', updateModelSelection);
683
+ });
684
+
685
+ updateModelCount();
686
+ }
687
+
688
+ function updateModelSelection() {
689
+ const selected = getSelectedModels();
690
+ updateModelCount();
691
+
692
+ // Update result filter based on selection
693
+ if (selected.length === 1) {
694
+ // Single model: show wins/losses/ties
695
+ elements.resultFilterGroup.style.display = 'block';
696
+ elements.resultFilter.innerHTML = `
697
+ <option value="">All results</option>
698
+ <option value="wins">Wins</option>
699
+ <option value="losses">Losses</option>
700
+ <option value="ties">Ties</option>
701
+ `;
702
+ } else if (selected.length === 2) {
703
+ // Two models: show filter by winner
704
+ elements.resultFilterGroup.style.display = 'block';
705
+ elements.resultFilter.innerHTML = `
706
+ <option value="">All results</option>
707
+ <option value="${escapeHtml(selected[0])}">${escapeHtml(selected[0])} wins</option>
708
+ <option value="${escapeHtml(selected[1])}">${escapeHtml(selected[1])} wins</option>
709
+ <option value="ties">Ties</option>
710
+ `;
711
+ } else {
712
+ elements.resultFilterGroup.style.display = 'none';
713
+ elements.resultFilter.value = '';
714
+ }
715
+ }
716
+
717
+ function getSelectedModels() {
718
+ const checkboxes = elements.modelCheckboxes.querySelectorAll('input[type="checkbox"]:checked');
719
+ return Array.from(checkboxes).map(cb => cb.value);
720
+ }
721
+
722
+ function updateModelCount() {
723
+ const count = getSelectedModels().length;
724
+ elements.modelCount.textContent = `(${count} selected)`;
725
+ }
726
+
727
+ function updateImageRangeSlider() {
728
+ const { min, max } = state.imageRange;
729
+
730
+ // Show/hide the filter based on whether there's a range
731
+ if (min === max) {
732
+ elements.imageCountFilterGroup.style.display = 'none';
733
+ return;
734
+ }
735
+
736
+ elements.imageCountFilterGroup.style.display = 'block';
737
+
738
+ // Update slider attributes
739
+ elements.minImagesSlider.min = min;
740
+ elements.minImagesSlider.max = max;
741
+ elements.minImagesSlider.value = min;
742
+
743
+ elements.maxImagesSlider.min = min;
744
+ elements.maxImagesSlider.max = max;
745
+ elements.maxImagesSlider.value = max;
746
+
747
+ // Update labels
748
+ elements.minImagesLabel.textContent = min;
749
+ elements.maxImagesLabel.textContent = max;
750
+ updateImageRangeDisplay();
751
+ }
752
+
753
+ function updateImageRangeDisplay() {
754
+ const minVal = parseInt(elements.minImagesSlider.value);
755
+ const maxVal = parseInt(elements.maxImagesSlider.value);
756
+ elements.imageRangeDisplay.textContent = `${minVal}-${maxVal}`;
757
+ }
758
+
759
+ async function loadH2HStats() {
760
+ const models = state.filters.models;
761
+ if (models.length !== 2 || !state.subset || !state.experiment) {
762
+ elements.h2hSection.style.display = 'none';
763
+ state.h2h = null;
764
+ return;
765
+ }
766
+
767
+ try {
768
+ const url = `api/subsets/${state.subset}/experiments/${state.experiment}/h2h?model_a=${encodeURIComponent(models[0])}&model_b=${encodeURIComponent(models[1])}`;
769
+ const data = await fetchJSON(url);
770
+ state.h2h = data;
771
+ renderH2HStats(data);
772
+ elements.h2hSection.style.display = 'block';
773
+ } catch (error) {
774
+ console.error('Failed to load H2H stats:', error);
775
+ elements.h2hSection.style.display = 'none';
776
+ }
777
+ }
778
+
779
+ function renderH2HStats(h2h) {
780
+ const winRateA = (h2h.win_rate_a * 100).toFixed(1);
781
+ const winRateB = (h2h.win_rate_b * 100).toFixed(1);
782
+ const tieRate = (h2h.tie_rate * 100).toFixed(1);
783
+
784
+ // Calculate bar widths
785
+ const total = h2h.wins_a + h2h.wins_b + h2h.ties;
786
+ const widthA = total > 0 ? (h2h.wins_a / total * 100) : 0;
787
+ const widthTie = total > 0 ? (h2h.ties / total * 100) : 0;
788
+ const widthB = total > 0 ? (h2h.wins_b / total * 100) : 0;
789
+
790
+ elements.h2hPanel.innerHTML = `
791
+ <div class="h2h-labels">
792
+ <span class="h2h-label" title="${escapeHtml(h2h.model_a)}">${escapeHtml(getModelDisplayName(h2h.model_a))}</span>
793
+ <span class="h2h-label" title="${escapeHtml(h2h.model_b)}">${escapeHtml(getModelDisplayName(h2h.model_b))}</span>
794
+ </div>
795
+ <div class="h2h-bar">
796
+ ${widthA > 0 ? `<div class="h2h-bar-a" style="width: ${widthA}%">${h2h.wins_a}</div>` : ''}
797
+ ${widthTie > 0 ? `<div class="h2h-bar-tie" style="width: ${widthTie}%">${h2h.ties}</div>` : ''}
798
+ ${widthB > 0 ? `<div class="h2h-bar-b" style="width: ${widthB}%">${h2h.wins_b}</div>` : ''}
799
+ </div>
800
+ <div class="h2h-stats-row">
801
+ <span>Total battles</span>
802
+ <span class="value">${h2h.total}</span>
803
+ </div>
804
+ <div class="h2h-stats-row">
805
+ <span>Win rate</span>
806
+ <span class="value">${winRateA}% / ${tieRate}% / ${winRateB}%</span>
807
+ </div>
808
+ `;
809
+ }
810
+
811
+ async function loadBattles() {
812
+ if (!state.subset || !state.experiment) {
813
+ return;
814
+ }
815
+
816
+ // Build URL with filters
817
+ const params = new URLSearchParams({
818
+ page: state.page,
819
+ page_size: state.pageSize,
820
+ });
821
+
822
+ if (state.filters.models && state.filters.models.length > 0) {
823
+ params.append('models', state.filters.models.join(','));
824
+ }
825
+ if (state.filters.result) {
826
+ params.append('result', state.filters.result);
827
+ }
828
+ if (state.filters.consistent !== null) {
829
+ params.append('consistent', state.filters.consistent);
830
+ }
831
+ if (state.filters.minImages !== null) {
832
+ params.append('min_images', state.filters.minImages);
833
+ }
834
+ if (state.filters.maxImages !== null) {
835
+ params.append('max_images', state.filters.maxImages);
836
+ }
837
+ if (state.filters.promptSource) {
838
+ params.append('prompt_source', state.filters.promptSource);
839
+ }
840
+
841
+ // Use search API if there's a search query
842
+ let url;
843
+ if (state.searchQuery) {
844
+ params.append('q', state.searchQuery);
845
+ url = `api/subsets/${state.subset}/experiments/${state.experiment}/search?${params}`;
846
+ } else {
847
+ url = `api/subsets/${state.subset}/experiments/${state.experiment}/battles?${params}`;
848
+ }
849
+
850
+ // Show loading state
851
+ elements.battleList.innerHTML = '<div class="loading">Loading battles</div>';
852
+
853
+ try {
854
+ const data = await fetchJSON(url);
855
+
856
+ state.totalPages = data.total_pages;
857
+ state.totalBattles = data.total;
858
+
859
+ renderBattles(data.battles);
860
+ updatePagination();
861
+ updateStats();
862
+ loadH2HStats();
863
+
864
+ } catch (error) {
865
+ console.error('Failed to load battles:', error);
866
+ elements.battleList.innerHTML = '<div class="empty-state"><p>Failed to load battles</p></div>';
867
+ }
868
+ }
869
+
870
+ async function loadPrompts() {
871
+ console.log('loadPrompts called, state:', { subset: state.subset, experiment: state.experiment, viewMode: state.viewMode });
872
+ if (!state.subset || !state.experiment) {
873
+ console.log('loadPrompts: subset or experiment not selected, returning');
874
+ return;
875
+ }
876
+
877
+ // Build URL with filters
878
+ const params = new URLSearchParams({
879
+ page: state.page,
880
+ page_size: state.promptsPageSize,
881
+ });
882
+
883
+ if (state.filters.minImages !== null) {
884
+ params.append('min_images', state.filters.minImages);
885
+ }
886
+ if (state.filters.maxImages !== null) {
887
+ params.append('max_images', state.filters.maxImages);
888
+ }
889
+ if (state.filters.promptSource) {
890
+ params.append('prompt_source', state.filters.promptSource);
891
+ }
892
+ // Add model filter for prompts view
893
+ if (state.promptsModelFilter && state.promptsModelFilter.length > 0) {
894
+ params.append('models', state.promptsModelFilter.join(','));
895
+ }
896
+
897
+ // Use search API if there's a search query
898
+ let url;
899
+ if (state.searchQuery) {
900
+ params.append('q', state.searchQuery);
901
+ url = `api/subsets/${state.subset}/experiments/${state.experiment}/search/prompts?${params}`;
902
+ } else {
903
+ url = `api/subsets/${state.subset}/experiments/${state.experiment}/prompts?${params}`;
904
+ }
905
+
906
+ // Show loading state
907
+ elements.promptsList.innerHTML = '<div class="loading">Loading prompts</div>';
908
+
909
+ try {
910
+ const data = await fetchJSON(url);
911
+
912
+ state.totalPages = data.total_pages;
913
+ state.totalBattles = data.total;
914
+
915
+ renderPrompts(data.prompts);
916
+ updatePagination();
917
+ updateStats();
918
+
919
+ } catch (error) {
920
+ console.error('Failed to load prompts:', error);
921
+ elements.promptsList.innerHTML = '<div class="empty-state"><p>Failed to load prompts</p></div>';
922
+ }
923
+ }
924
+
925
+ async function loadBattleDetail(battle) {
926
+ const battleId = `${battle.model_a}_vs_${battle.model_b}:${battle.sample_index}`;
927
+ const url = `api/subsets/${state.subset}/experiments/${state.experiment}/battles/${battleId}`;
928
+
929
+ try {
930
+ const data = await fetchJSON(url);
931
+
932
+ // Get input image count
933
+ let inputImageCount = 1;
934
+ try {
935
+ const countData = await fetchJSON(`api/subsets/${state.subset}/samples/${battle.sample_index}/input_count`);
936
+ inputImageCount = countData.count || 1;
937
+ } catch (e) {
938
+ // Ignore, default to 1
939
+ }
940
+
941
+ renderDetailModal(data, inputImageCount);
942
+ showModal();
943
+ } catch (error) {
944
+ console.error('Failed to load battle detail:', error);
945
+ }
946
+ }
947
+
948
+ async function updateStats() {
949
+ if (!state.subset) return;
950
+
951
+ try {
952
+ const params = state.experiment ? `?exp_name=${state.experiment}` : '';
953
+ const data = await fetchJSON(`api/subsets/${state.subset}/stats${params}`);
954
+
955
+ const consistencyRate = (data.consistency_rate * 100).toFixed(1);
956
+
957
+ elements.statsPanel.innerHTML = `
958
+ <div class="stat-item">
959
+ <span class="stat-label">Total Battles</span>
960
+ <span class="stat-value">${data.total_battles}</span>
961
+ </div>
962
+ <div class="stat-item">
963
+ <span class="stat-label">Consistent</span>
964
+ <span class="stat-value">${data.consistent_battles} (${consistencyRate}%)</span>
965
+ </div>
966
+ <div class="stat-item">
967
+ <span class="stat-label">Ties</span>
968
+ <span class="stat-value">${data.tie_battles}</span>
969
+ </div>
970
+ <div class="stat-item">
971
+ <span class="stat-label">Models</span>
972
+ <span class="stat-value">${data.models.length}</span>
973
+ </div>
974
+ `;
975
+ } catch (error) {
976
+ console.error('Failed to load stats:', error);
977
+ }
978
+ }
979
+
980
+ // ========== ELO Leaderboard Functions ==========
981
+ async function loadEloLeaderboard() {
982
+ if (!state.subset) {
983
+ elements.eloPanel.innerHTML = '<p class="placeholder">Select a subset to view rankings</p>';
984
+ return;
985
+ }
986
+
987
+ try {
988
+ const data = await fetchJSON(`api/subsets/${state.subset}/leaderboard`);
989
+ renderEloSidebar(data.leaderboard);
990
+ } catch (error) {
991
+ console.error('Failed to load ELO leaderboard:', error);
992
+ elements.eloPanel.innerHTML = '<p class="placeholder">Failed to load rankings</p>';
993
+ }
994
+ }
995
+
996
+ function renderEloSidebar(leaderboard) {
997
+ if (!leaderboard || leaderboard.length === 0) {
998
+ elements.eloPanel.innerHTML = '<p class="placeholder">No ELO data available</p>';
999
+ return;
1000
+ }
1001
+
1002
+ // Find min and max ELO for scaling the bars
1003
+ const elos = leaderboard.map(m => m.elo);
1004
+ const minElo = Math.min(...elos);
1005
+ const maxElo = Math.max(...elos);
1006
+ const eloRange = maxElo - minElo || 1;
1007
+
1008
+ // Show all models in sidebar
1009
+ const displayList = leaderboard;
1010
+
1011
+ elements.eloPanel.innerHTML = displayList.map(model => {
1012
+ const barWidth = ((model.elo - minElo) / eloRange * 70 + 30); // Min 30%, max 100%
1013
+ const rankClass = model.rank <= 3 ? `rank-${model.rank}` : '';
1014
+
1015
+ return `
1016
+ <div class="elo-item" data-model="${escapeHtml(model.model)}" title="Click to view details">
1017
+ <span class="elo-rank ${rankClass}">#${model.rank}</span>
1018
+ <span class="elo-model-name" title="${escapeHtml(model.model)}">${escapeHtml(truncateMiddle(getModelDisplayName(model.model), 10))}</span>
1019
+ <div class="elo-bar-container">
1020
+ <div class="elo-bar" style="width: ${barWidth.toFixed(1)}%"></div>
1021
+ </div>
1022
+ <span class="elo-value">${Math.round(model.elo)}</span>
1023
+ </div>
1024
+ `;
1025
+ }).join('');
1026
+
1027
+ // Add click handlers to show model details
1028
+ elements.eloPanel.querySelectorAll('.elo-item').forEach(item => {
1029
+ item.addEventListener('click', () => {
1030
+ const modelName = item.dataset.model;
1031
+ loadModelStats(modelName);
1032
+ });
1033
+ });
1034
+ }
1035
+
1036
+ async function loadModelStats(modelName) {
1037
+ if (!state.subset) return;
1038
+
1039
+ try {
1040
+ const data = await fetchJSON(`api/subsets/${state.subset}/models/${encodeURIComponent(modelName)}/stats`);
1041
+ renderModelStatsModal(data);
1042
+ showModelStatsModal();
1043
+ } catch (error) {
1044
+ console.error('Failed to load model stats:', error);
1045
+ }
1046
+ }
1047
+
1048
+ function renderModelStatsModal(data) {
1049
+ const winRatePercent = (data.win_rate * 100).toFixed(1);
1050
+
1051
+ let vsStatsHtml = '';
1052
+ if (data.vs_stats && data.vs_stats.length > 0) {
1053
+ vsStatsHtml = `
1054
+ <div class="vs-stats-section">
1055
+ <h3>Win Rate vs Opponents</h3>
1056
+ <table class="vs-stats-table">
1057
+ <thead>
1058
+ <tr>
1059
+ <th>Opponent</th>
1060
+ <th>W / L / T</th>
1061
+ <th>Win Rate</th>
1062
+ </tr>
1063
+ </thead>
1064
+ <tbody>
1065
+ ${data.vs_stats.map(vs => {
1066
+ const vsWinRate = (vs.win_rate * 100).toFixed(1);
1067
+ return `
1068
+ <tr>
1069
+ <td class="opponent-cell">
1070
+ ${escapeHtml(getModelDisplayName(vs.opponent))}
1071
+ <span class="opponent-elo">(${Math.round(vs.opponent_elo)})</span>
1072
+ </td>
1073
+ <td class="wlt-cell">
1074
+ <span class="wins">${vs.wins}</span> /
1075
+ <span class="losses">${vs.losses}</span> /
1076
+ <span class="ties">${vs.ties}</span>
1077
+ </td>
1078
+ <td>
1079
+ <div class="win-rate-bar">
1080
+ <div class="win-rate-bar-bg">
1081
+ <div class="win-rate-bar-fill" style="width: ${vsWinRate}%"></div>
1082
+ </div>
1083
+ <span class="win-rate-text">${vsWinRate}%</span>
1084
+ </div>
1085
+ </td>
1086
+ </tr>
1087
+ `;
1088
+ }).join('')}
1089
+ </tbody>
1090
+ </table>
1091
+ </div>
1092
+ `;
1093
+ }
1094
+
1095
+ elements.modelStatsContent.innerHTML = `
1096
+ <div class="model-stats-header">
1097
+ <h2>${escapeHtml(getModelDisplayName(data.model))}</h2>
1098
+ <div class="model-stats-summary">
1099
+ <div class="model-stat-item">
1100
+ <div class="stat-label">ELO Rating</div>
1101
+ <div class="stat-value elo-value">${Math.round(data.elo)}</div>
1102
+ </div>
1103
+ <div class="model-stat-item">
1104
+ <div class="stat-label">Wins</div>
1105
+ <div class="stat-value wins-value">${data.wins}</div>
1106
+ </div>
1107
+ <div class="model-stat-item">
1108
+ <div class="stat-label">Losses</div>
1109
+ <div class="stat-value losses-value">${data.losses}</div>
1110
+ </div>
1111
+ <div class="model-stat-item">
1112
+ <div class="stat-label">Ties</div>
1113
+ <div class="stat-value ties-value">${data.ties}</div>
1114
+ </div>
1115
+ <div class="model-stat-item">
1116
+ <div class="stat-label">Win Rate</div>
1117
+ <div class="stat-value">${winRatePercent}%</div>
1118
+ </div>
1119
+ <div class="model-stat-item">
1120
+ <div class="stat-label">Total Battles</div>
1121
+ <div class="stat-value">${data.total_battles}</div>
1122
+ </div>
1123
+ </div>
1124
+ </div>
1125
+ ${vsStatsHtml}
1126
+ `;
1127
+ }
1128
+
1129
+ function showModelStatsModal() {
1130
+ elements.modelStatsModal.classList.remove('hidden');
1131
+ document.body.style.overflow = 'hidden';
1132
+ }
1133
+
1134
+ function hideModelStatsModal() {
1135
+ elements.modelStatsModal.classList.add('hidden');
1136
+ document.body.style.overflow = '';
1137
+ }
1138
+
1139
+ async function loadFullLeaderboard() {
1140
+ if (!state.subset) return;
1141
+
1142
+ try {
1143
+ const data = await fetchJSON(`api/subsets/${state.subset}/leaderboard`);
1144
+ renderFullLeaderboard(data.leaderboard);
1145
+ elements.leaderboardSubsetName.textContent = state.subset;
1146
+ showLeaderboardModal();
1147
+ } catch (error) {
1148
+ console.error('Failed to load full leaderboard:', error);
1149
+ }
1150
+ }
1151
+
1152
+ function renderFullLeaderboard(leaderboard) {
1153
+ if (!leaderboard || leaderboard.length === 0) {
1154
+ elements.leaderboardContent.innerHTML = '<p class="placeholder">No ELO data available</p>';
1155
+ return;
1156
+ }
1157
+
1158
+ elements.leaderboardContent.innerHTML = `
1159
+ <table class="leaderboard-table">
1160
+ <thead>
1161
+ <tr>
1162
+ <th>Rank</th>
1163
+ <th>Model</th>
1164
+ <th>ELO</th>
1165
+ <th>Wins</th>
1166
+ <th>Losses</th>
1167
+ <th>Ties</th>
1168
+ <th>Total</th>
1169
+ <th class="win-rate-cell">Win Rate</th>
1170
+ </tr>
1171
+ </thead>
1172
+ <tbody>
1173
+ ${leaderboard.map(model => {
1174
+ const rankClass = model.rank <= 3 ? `rank-${model.rank}` : '';
1175
+ const winRatePercent = (model.win_rate * 100).toFixed(1);
1176
+ return `
1177
+ <tr data-model="${escapeHtml(model.model)}">
1178
+ <td class="rank-cell ${rankClass}">#${model.rank}</td>
1179
+ <td class="model-cell">${escapeHtml(getModelDisplayName(model.model))}</td>
1180
+ <td class="elo-cell">${Math.round(model.elo)}</td>
1181
+ <td class="stat-cell" style="color: var(--accent-green)">${model.wins}</td>
1182
+ <td class="stat-cell" style="color: var(--accent-red)">${model.losses}</td>
1183
+ <td class="stat-cell" style="color: var(--accent-yellow)">${model.ties}</td>
1184
+ <td class="stat-cell">${model.total_battles}</td>
1185
+ <td class="win-rate-cell">
1186
+ <div class="win-rate-bar">
1187
+ <div class="win-rate-bar-bg">
1188
+ <div class="win-rate-bar-fill" style="width: ${winRatePercent}%"></div>
1189
+ </div>
1190
+ <span class="win-rate-text">${winRatePercent}%</span>
1191
+ </div>
1192
+ </td>
1193
+ </tr>
1194
+ `;
1195
+ }).join('')}
1196
+ </tbody>
1197
+ </table>
1198
+ `;
1199
+
1200
+ // Add click handlers to show model details
1201
+ elements.leaderboardContent.querySelectorAll('tbody tr').forEach(row => {
1202
+ row.addEventListener('click', () => {
1203
+ const modelName = row.dataset.model;
1204
+ hideLeaderboardModal();
1205
+ loadModelStats(modelName);
1206
+ });
1207
+ });
1208
+ }
1209
+
1210
+ function showLeaderboardModal() {
1211
+ elements.leaderboardModal.classList.remove('hidden');
1212
+ document.body.style.overflow = 'hidden';
1213
+ }
1214
+
1215
+ function hideLeaderboardModal() {
1216
+ elements.leaderboardModal.classList.add('hidden');
1217
+ document.body.style.overflow = '';
1218
+ }
1219
+
1220
+ // ========== Render Functions ==========
1221
+ function renderBattles(battles) {
1222
+ if (!battles || battles.length === 0) {
1223
+ elements.battleList.innerHTML = '<div class="empty-state"><p>No battles found</p></div>';
1224
+ return;
1225
+ }
1226
+
1227
+ elements.battleList.innerHTML = battles.map(battle => renderBattleCard(battle)).join('');
1228
+
1229
+ // Add click handlers for battle detail
1230
+ elements.battleList.querySelectorAll('.battle-card').forEach((card, index) => {
1231
+ // Don't open detail when clicking favorite button
1232
+ card.addEventListener('click', (e) => {
1233
+ if (!e.target.closest('.btn-favorite-toggle')) {
1234
+ loadBattleDetail(battles[index]);
1235
+ }
1236
+ });
1237
+ });
1238
+
1239
+ // Add click handlers for favorite buttons
1240
+ elements.battleList.querySelectorAll('.btn-favorite-toggle').forEach(btn => {
1241
+ btn.addEventListener('click', (e) => {
1242
+ e.stopPropagation();
1243
+ const sampleIndex = parseInt(btn.dataset.sampleIndex);
1244
+ const instruction = btn.dataset.instruction || '';
1245
+ const added = toggleFavorite(state.subset, state.experiment, sampleIndex, instruction);
1246
+
1247
+ // Update button appearance
1248
+ btn.classList.toggle('favorited', added);
1249
+ btn.textContent = added ? '★' : '☆';
1250
+ btn.title = added ? 'Remove from favorites' : 'Add to favorites';
1251
+ });
1252
+ });
1253
+ }
1254
+
1255
+ function renderPrompts(prompts) {
1256
+ if (!prompts || prompts.length === 0) {
1257
+ elements.promptsList.innerHTML = '<div class="empty-state"><p>No prompts found</p></div>';
1258
+ return;
1259
+ }
1260
+
1261
+ elements.promptsList.innerHTML = prompts.map(prompt => renderPromptCard(prompt)).join('');
1262
+
1263
+ // Add click handlers for images (lightbox)
1264
+ elements.promptsList.querySelectorAll('.prompt-input-image img, .prompt-model-image img').forEach(img => {
1265
+ img.addEventListener('click', (e) => {
1266
+ e.stopPropagation();
1267
+ openLightbox(img.src, img.alt || img.dataset.label || '');
1268
+ });
1269
+ });
1270
+
1271
+ // Add click handlers for favorite buttons
1272
+ elements.promptsList.querySelectorAll('.btn-favorite-toggle').forEach(btn => {
1273
+ btn.addEventListener('click', (e) => {
1274
+ e.stopPropagation();
1275
+ const sampleIndex = parseInt(btn.dataset.sampleIndex);
1276
+ const instruction = btn.dataset.instruction || '';
1277
+ const added = toggleFavorite(state.subset, state.experiment, sampleIndex, instruction);
1278
+
1279
+ // Update button appearance
1280
+ btn.classList.toggle('favorited', added);
1281
+ btn.textContent = added ? '★' : '☆';
1282
+ btn.title = added ? 'Remove from favorites' : 'Add to favorites';
1283
+ });
1284
+ });
1285
+
1286
+ // Add click handlers for model names to show battle details
1287
+ elements.promptsList.querySelectorAll('.prompt-model-name.clickable').forEach(nameEl => {
1288
+ nameEl.addEventListener('click', (e) => {
1289
+ e.stopPropagation();
1290
+ const model = nameEl.dataset.model;
1291
+ const sampleIndex = parseInt(nameEl.dataset.sampleIndex);
1292
+ const subset = nameEl.dataset.subset;
1293
+ showModelBattlesModal(subset, state.experiment, sampleIndex, model);
1294
+ });
1295
+ });
1296
+ }
1297
+
1298
+ function renderPromptCard(prompt) {
1299
+ const inputImagesHtml = renderPromptInputImages(prompt.subset, prompt.sample_index, prompt.input_image_count || 1);
1300
+ const modelsHtml = renderPromptModelsGrid(prompt.subset, prompt.sample_index, prompt.models || []);
1301
+ const favorited = isFavorited(state.subset, state.experiment, prompt.sample_index);
1302
+
1303
+ return `
1304
+ <div class="prompt-card">
1305
+ <div class="prompt-card-header">
1306
+ <div class="prompt-card-info">
1307
+ <div class="prompt-card-instruction">${escapeHtml(prompt.instruction || 'No instruction')}</div>
1308
+ <div class="prompt-card-meta">
1309
+ <span>Index: ${prompt.sample_index}</span>
1310
+ ${state.experiment === '__all__' ? `<span>Exp: ${escapeHtml(prompt.exp_name)}</span>` : ''}
1311
+ ${prompt.task_type ? `<span>Task: ${escapeHtml(prompt.task_type)}</span>` : ''}
1312
+ ${prompt.prompt_source ? `<span>Source: ${escapeHtml(prompt.prompt_source)}</span>` : ''}
1313
+ <span>Input Images: ${prompt.input_image_count || 1}</span>
1314
+ </div>
1315
+ </div>
1316
+ <div class="prompt-card-actions">
1317
+ <button class="btn-favorite-toggle ${favorited ? 'favorited' : ''}"
1318
+ data-sample-index="${prompt.sample_index}"
1319
+ data-instruction="${escapeHtml(prompt.instruction || '')}"
1320
+ title="${favorited ? 'Remove from favorites' : 'Add to favorites'}">
1321
+ ${favorited ? '★' : '☆'}
1322
+ </button>
1323
+ </div>
1324
+ </div>
1325
+ ${inputImagesHtml}
1326
+ ${modelsHtml}
1327
+ </div>
1328
+ `;
1329
+ }
1330
+
1331
+ function renderPromptInputImages(subset, sampleIndex, count) {
1332
+ if (count === 0) return '';
1333
+
1334
+ let imagesHtml = '';
1335
+ for (let i = 0; i < count; i++) {
1336
+ const imgUrl = `images/${subset}/input/${sampleIndex}/${i}`;
1337
+ imagesHtml += `
1338
+ <div class="prompt-input-image">
1339
+ <img src="${imgUrl}" alt="Input ${i + 1}" loading="lazy">
1340
+ </div>
1341
+ `;
1342
+ }
1343
+
1344
+ return `
1345
+ <div class="prompt-input-section">
1346
+ <div class="prompt-input-title">Input Images (${count})</div>
1347
+ <div class="prompt-input-images">
1348
+ ${imagesHtml}
1349
+ </div>
1350
+ </div>
1351
+ `;
1352
+ }
1353
+
1354
+ function renderPromptModelsGrid(subset, sampleIndex, models) {
1355
+ if (models.length === 0) {
1356
+ return `
1357
+ <div class="prompt-models-section">
1358
+ <div class="prompt-models-title">Model Outputs</div>
1359
+ <p class="placeholder">No model outputs available</p>
1360
+ </div>
1361
+ `;
1362
+ }
1363
+
1364
+ const modelsHtml = models.map((m, idx) => {
1365
+ const rank = idx + 1;
1366
+ const imgUrl = `images/${subset}/${encodeURIComponent(m.model)}/${sampleIndex}`;
1367
+ const winRatePercent = (m.win_rate * 100).toFixed(1);
1368
+
1369
+ let rankClass = '';
1370
+ let rankBadge = '';
1371
+ if (rank === 1) {
1372
+ rankClass = 'rank-1';
1373
+ rankBadge = '<span class="prompt-model-rank rank-1">🥇</span>';
1374
+ } else if (rank === 2) {
1375
+ rankClass = 'rank-2';
1376
+ rankBadge = '<span class="prompt-model-rank rank-2">🥈</span>';
1377
+ } else if (rank === 3) {
1378
+ rankClass = 'rank-3';
1379
+ rankBadge = '<span class="prompt-model-rank rank-3">🥉</span>';
1380
+ }
1381
+
1382
+ return `
1383
+ <div class="prompt-model-card ${rankClass}" data-model="${escapeHtml(m.model)}" data-sample-index="${sampleIndex}" data-subset="${subset}">
1384
+ <div class="prompt-model-image">
1385
+ <img src="${imgUrl}" alt="${escapeHtml(m.model)}" data-label="${escapeHtml(getModelDisplayName(m.model))}" loading="lazy">
1386
+ </div>
1387
+ <div class="prompt-model-info">
1388
+ <div class="prompt-model-name clickable" data-model="${escapeHtml(m.model)}" data-sample-index="${sampleIndex}" data-subset="${subset}">${rankBadge}${escapeHtml(getModelDisplayName(m.model))}</div>
1389
+ <div class="prompt-model-stats">
1390
+ <span class="win-rate">${winRatePercent}%</span>
1391
+ (<span class="wins">${m.wins}W</span>/<span class="losses">${m.losses}L</span>/<span class="ties">${m.ties}T</span>)
1392
+ </div>
1393
+ </div>
1394
+ </div>
1395
+ `;
1396
+ }).join('');
1397
+
1398
+ return `
1399
+ <div class="prompt-models-section">
1400
+ <div class="prompt-models-title">Model Outputs (sorted by win rate)</div>
1401
+ <div class="prompt-models-grid">
1402
+ ${modelsHtml}
1403
+ </div>
1404
+ </div>
1405
+ `;
1406
+ }
1407
+
1408
+ function renderBattleCard(battle) {
1409
+ const selectedModels = state.filters.models || [];
1410
+ const isSingleModelFilter = selectedModels.length === 1;
1411
+ const selectedModel = isSingleModelFilter ? selectedModels[0] : null;
1412
+
1413
+ const isWin = selectedModel && battle.final_winner === selectedModel;
1414
+ const isLoss = selectedModel && battle.final_winner !== 'tie' && battle.final_winner !== selectedModel;
1415
+ const isTie = battle.final_winner === 'tie';
1416
+
1417
+ // Determine winner/loser styling for model names
1418
+ let modelAClass = 'model-name';
1419
+ let modelBClass = 'model-name';
1420
+
1421
+ if (battle.final_winner === battle.model_a) {
1422
+ modelAClass += ' winner';
1423
+ modelBClass += ' loser';
1424
+ } else if (battle.final_winner === battle.model_b) {
1425
+ modelBClass += ' winner';
1426
+ modelAClass += ' loser';
1427
+ }
1428
+
1429
+ // Result badge
1430
+ let resultBadge = '';
1431
+ if (isTie) {
1432
+ resultBadge = '<span class="badge badge-tie">Tie</span>';
1433
+ } else if (selectedModel) {
1434
+ if (isWin) {
1435
+ resultBadge = '<span class="badge badge-win">Win</span>';
1436
+ } else if (isLoss) {
1437
+ resultBadge = '<span class="badge badge-loss">Loss</span>';
1438
+ }
1439
+ }
1440
+
1441
+ // Consistency badge
1442
+ const consistencyBadge = battle.is_consistent
1443
+ ? '<span class="badge badge-consistent">Consistent</span>'
1444
+ : '<span class="badge badge-inconsistent">Inconsistent</span>';
1445
+
1446
+ // Favorite button
1447
+ const favorited = isFavorited(state.subset, state.experiment, battle.sample_index);
1448
+ const favoriteBtn = `
1449
+ <button class="btn-favorite-toggle ${favorited ? 'favorited' : ''}"
1450
+ data-sample-index="${battle.sample_index}"
1451
+ data-instruction="${escapeHtml(battle.instruction || '')}"
1452
+ title="${favorited ? 'Remove from favorites' : 'Add to favorites'}">
1453
+ ${favorited ? '★' : '☆'}
1454
+ </button>
1455
+ `;
1456
+
1457
+ // Generate input images HTML (support multiple)
1458
+ const inputImageCount = battle.input_image_count || 1;
1459
+ let inputImagesHtml = '';
1460
+
1461
+ if (inputImageCount === 1) {
1462
+ // Single input image - normal size
1463
+ const inputImageUrl = `images/${state.subset}/input/${battle.sample_index}/0`;
1464
+ inputImagesHtml = `
1465
+ <div class="battle-image-container">
1466
+ <img src="${inputImageUrl}" alt="Input" loading="lazy">
1467
+ <span class="image-label">Input</span>
1468
+ </div>
1469
+ `;
1470
+ } else {
1471
+ // Multiple input images - show in a grid within the input column
1472
+ let inputThumbs = '';
1473
+ for (let i = 0; i < inputImageCount; i++) {
1474
+ const inputImageUrl = `images/${state.subset}/input/${battle.sample_index}/${i}`;
1475
+ inputThumbs += `
1476
+ <div class="input-thumb">
1477
+ <img src="${inputImageUrl}" alt="Input ${i + 1}" loading="lazy">
1478
+ </div>
1479
+ `;
1480
+ }
1481
+ inputImagesHtml = `
1482
+ <div class="battle-image-container multi-input" data-count="${inputImageCount}">
1483
+ <div class="input-thumbs-grid">${inputThumbs}</div>
1484
+ <span class="image-label">Input (${inputImageCount})</span>
1485
+ </div>
1486
+ `;
1487
+ }
1488
+
1489
+ const modelAImageUrl = `images/${state.subset}/${encodeURIComponent(battle.model_a)}/${battle.sample_index}`;
1490
+ const modelBImageUrl = `images/${state.subset}/${encodeURIComponent(battle.model_b)}/${battle.sample_index}`;
1491
+
1492
+ return `
1493
+ <div class="battle-card" data-id="${battle.id}">
1494
+ <div class="battle-card-header">
1495
+ <div class="battle-models">
1496
+ <span class="${modelAClass}">${escapeHtml(getModelDisplayName(battle.model_a))}</span>
1497
+ <span class="vs-label">vs</span>
1498
+ <span class="${modelBClass}">${escapeHtml(getModelDisplayName(battle.model_b))}</span>
1499
+ </div>
1500
+ <div class="battle-badges">
1501
+ ${favoriteBtn}
1502
+ ${resultBadge}
1503
+ ${consistencyBadge}
1504
+ </div>
1505
+ </div>
1506
+ <div class="battle-instruction">${escapeHtml(battle.instruction || 'No instruction')}</div>
1507
+ <div class="battle-images">
1508
+ ${inputImagesHtml}
1509
+ <div class="battle-image-container">
1510
+ <img src="${modelAImageUrl}" alt="${escapeHtml(battle.model_a)}" loading="lazy">
1511
+ <span class="image-label">${escapeHtml(getModelDisplayName(battle.model_a))}</span>
1512
+ </div>
1513
+ <div class="battle-image-container">
1514
+ <img src="${modelBImageUrl}" alt="${escapeHtml(battle.model_b)}" loading="lazy">
1515
+ <span class="image-label">${escapeHtml(getModelDisplayName(battle.model_b))}</span>
1516
+ </div>
1517
+ </div>
1518
+ <div class="battle-meta">
1519
+ <span>Index: ${battle.sample_index}</span>
1520
+ ${state.experiment === '__all__' ? `<span>Exp: ${escapeHtml(battle.exp_name)}</span>` : ''}
1521
+ ${battle.prompt_source ? `<span>Source: ${escapeHtml(battle.prompt_source)}</span>` : ''}
1522
+ <span>Winner: ${escapeHtml(battle.winner_display)}</span>
1523
+ </div>
1524
+ </div>
1525
+ `;
1526
+ }
1527
+
1528
+ function renderDetailModal(battle, inputImageCount = 1) {
1529
+ // Determine winner/loser classes
1530
+ const modelAIsWinner = battle.final_winner === battle.model_a;
1531
+ const modelBIsWinner = battle.final_winner === battle.model_b;
1532
+ const modelAClass = modelAIsWinner ? 'winner' : (battle.final_winner !== 'tie' ? 'loser' : '');
1533
+ const modelBClass = modelBIsWinner ? 'winner' : (battle.final_winner !== 'tie' ? 'loser' : '');
1534
+
1535
+ // Generate input image elements
1536
+ let inputImagesHtml = '';
1537
+ for (let i = 0; i < inputImageCount; i++) {
1538
+ const inputImageUrl = `images/${state.subset}/input/${battle.sample_index}/${i}`;
1539
+ const inputLabel = inputImageCount > 1 ? `Input ${i + 1}` : 'Input';
1540
+ inputImagesHtml += `
1541
+ <div class="detail-image-box input-image">
1542
+ <h4>${inputLabel}</h4>
1543
+ <img src="${inputImageUrl}" alt="${inputLabel}" data-label="${inputLabel}" class="zoomable">
1544
+ </div>
1545
+ `;
1546
+ }
1547
+
1548
+ const modelAImageUrl = `images/${state.subset}/${encodeURIComponent(battle.model_a)}/${battle.sample_index}`;
1549
+ const modelBImageUrl = `images/${state.subset}/${encodeURIComponent(battle.model_b)}/${battle.sample_index}`;
1550
+
1551
+ // VLM outputs
1552
+ let vlmOutputsHtml = '';
1553
+
1554
+ if (battle.original_call || battle.swapped_call) {
1555
+ vlmOutputsHtml = '<div class="detail-vlm-outputs"><h3>VLM Judge Outputs</h3>';
1556
+
1557
+ if (battle.original_call) {
1558
+ const parsed = battle.original_call.parsed_result || {};
1559
+ vlmOutputsHtml += `
1560
+ <div class="vlm-call">
1561
+ <h4>Original Order (A=${escapeHtml(getModelDisplayName(battle.model_a))}, B=${escapeHtml(getModelDisplayName(battle.model_b))})</h4>
1562
+ <div class="vlm-call-meta">
1563
+ Winner: <strong>${escapeHtml(parsed.winner || 'N/A')}</strong> |
1564
+ Parse: ${battle.original_call.parse_success ? '✓' : '✗'}
1565
+ </div>
1566
+ <div class="vlm-response">${escapeHtml(battle.original_call.raw_response || 'No response')}</div>
1567
+ </div>
1568
+ `;
1569
+ }
1570
+
1571
+ if (battle.swapped_call) {
1572
+ const parsed = battle.swapped_call.parsed_result || {};
1573
+ vlmOutputsHtml += `
1574
+ <div class="vlm-call">
1575
+ <h4>Swapped Order (A=${escapeHtml(getModelDisplayName(battle.model_b))}, B=${escapeHtml(getModelDisplayName(battle.model_a))})</h4>
1576
+ <div class="vlm-call-meta">
1577
+ Winner: <strong>${escapeHtml(parsed.winner || 'N/A')}</strong> |
1578
+ Parse: ${battle.swapped_call.parse_success ? '✓' : '✗'}
1579
+ </div>
1580
+ <div class="vlm-response">${escapeHtml(battle.swapped_call.raw_response || 'No response')}</div>
1581
+ </div>
1582
+ `;
1583
+ }
1584
+
1585
+ vlmOutputsHtml += '</div>';
1586
+ } else {
1587
+ vlmOutputsHtml = `
1588
+ <div class="detail-vlm-outputs">
1589
+ <h3>VLM Judge Outputs</h3>
1590
+ <p class="placeholder">Audit logs not available for this battle</p>
1591
+ </div>
1592
+ `;
1593
+ }
1594
+
1595
+ // Format original_metadata as JSON if it exists
1596
+ let originalMetadataHtml = '';
1597
+ if (battle.original_metadata) {
1598
+ try {
1599
+ const metaStr = typeof battle.original_metadata === 'string'
1600
+ ? battle.original_metadata
1601
+ : JSON.stringify(battle.original_metadata, null, 2);
1602
+ originalMetadataHtml = `
1603
+ <div class="detail-metadata-section">
1604
+ <h4>Original Metadata</h4>
1605
+ <pre class="metadata-json">${escapeHtml(metaStr)}</pre>
1606
+ </div>
1607
+ `;
1608
+ } catch (e) {
1609
+ originalMetadataHtml = `
1610
+ <div class="detail-metadata-section">
1611
+ <h4>Original Metadata</h4>
1612
+ <pre class="metadata-json">${escapeHtml(String(battle.original_metadata))}</pre>
1613
+ </div>
1614
+ `;
1615
+ }
1616
+ }
1617
+
1618
+ elements.modalContent.innerHTML = `
1619
+ <div class="detail-header">
1620
+ <h2>
1621
+ <span class="model-name ${battle.final_winner === battle.model_a ? 'winner' : (battle.final_winner !== 'tie' ? 'loser' : '')}">${escapeHtml(getModelDisplayName(battle.model_a))}</span>
1622
+ <span class="vs-label">vs</span>
1623
+ <span class="model-name ${battle.final_winner === battle.model_b ? 'winner' : (battle.final_winner !== 'tie' ? 'loser' : '')}">${escapeHtml(getModelDisplayName(battle.model_b))}</span>
1624
+ <span class="badge ${battle.is_consistent ? 'badge-consistent' : 'badge-inconsistent'}">${battle.is_consistent ? 'Consistent' : 'Inconsistent'}</span>
1625
+ </h2>
1626
+ <div class="detail-meta-info">
1627
+ <span class="meta-tag"><strong>Index:</strong> ${battle.sample_index}</span>
1628
+ ${battle.task_type ? `<span class="meta-tag"><strong>Task:</strong> ${escapeHtml(battle.task_type)}</span>` : ''}
1629
+ ${battle.prompt_source ? `<span class="meta-tag"><strong>Source:</strong> ${escapeHtml(battle.prompt_source)}</span>` : ''}
1630
+ </div>
1631
+ <div class="detail-instruction">${escapeHtml(battle.instruction || 'No instruction')}</div>
1632
+ ${originalMetadataHtml}
1633
+ </div>
1634
+
1635
+ <div class="detail-images">
1636
+ ${inputImagesHtml}
1637
+ <div class="detail-image-box output-image ${modelAClass}">
1638
+ <h4>${escapeHtml(getModelDisplayName(battle.model_a))} ${modelAIsWinner ? '👑' : ''}</h4>
1639
+ <img src="${modelAImageUrl}" alt="${escapeHtml(battle.model_a)}" data-label="${escapeHtml(getModelDisplayName(battle.model_a))}" class="zoomable">
1640
+ </div>
1641
+ <div class="detail-image-box output-image ${modelBClass}">
1642
+ <h4>${escapeHtml(getModelDisplayName(battle.model_b))} ${modelBIsWinner ? '👑' : ''}</h4>
1643
+ <img src="${modelBImageUrl}" alt="${escapeHtml(battle.model_b)}" data-label="${escapeHtml(getModelDisplayName(battle.model_b))}" class="zoomable">
1644
+ </div>
1645
+ </div>
1646
+
1647
+ ${vlmOutputsHtml}
1648
+ `;
1649
+
1650
+ // Add click handlers for zoomable images
1651
+ setTimeout(() => {
1652
+ elements.modalContent.querySelectorAll('img.zoomable').forEach(img => {
1653
+ img.addEventListener('click', (e) => {
1654
+ e.stopPropagation();
1655
+ openLightbox(img.src, img.dataset.label || img.alt);
1656
+ });
1657
+ });
1658
+ }, 0);
1659
+ }
1660
+
1661
+ function updatePagination() {
1662
+ const start = (state.page - 1) * state.pageSize + 1;
1663
+ const end = Math.min(state.page * state.pageSize, state.totalBattles);
1664
+
1665
+ elements.paginationInfo.textContent = state.totalBattles > 0
1666
+ ? `Showing ${start}-${end} of ${state.totalBattles}`
1667
+ : '';
1668
+
1669
+ const canPrev = state.page > 1;
1670
+ const canNext = state.page < state.totalPages;
1671
+
1672
+ elements.firstPage.disabled = !canPrev;
1673
+ elements.prevPage.disabled = !canPrev;
1674
+ elements.nextPage.disabled = !canNext;
1675
+ elements.lastPage.disabled = !canNext;
1676
+ elements.firstPageBottom.disabled = !canPrev;
1677
+ elements.prevPageBottom.disabled = !canPrev;
1678
+ elements.nextPageBottom.disabled = !canNext;
1679
+ elements.lastPageBottom.disabled = !canNext;
1680
+
1681
+ // Update page input max value and placeholder
1682
+ elements.pageInput.max = state.totalPages;
1683
+ elements.pageInput.placeholder = `1-${state.totalPages}`;
1684
+ elements.pageInputBottom.max = state.totalPages;
1685
+ elements.pageInputBottom.placeholder = `1-${state.totalPages}`;
1686
+
1687
+ // Render page numbers
1688
+ renderPageNumbers(elements.pageNumbers);
1689
+ renderPageNumbers(elements.pageNumbersBottom);
1690
+ }
1691
+
1692
+ function renderPageNumbers(container) {
1693
+ const total = state.totalPages || 1;
1694
+ const current = state.page;
1695
+
1696
+ // Generate page numbers with ellipsis
1697
+ const pages = [];
1698
+ const maxVisible = 7; // Max visible page numbers
1699
+
1700
+ if (total <= maxVisible) {
1701
+ // Show all pages
1702
+ for (let i = 1; i <= total; i++) {
1703
+ pages.push(i);
1704
+ }
1705
+ } else {
1706
+ // Always show first page
1707
+ pages.push(1);
1708
+
1709
+ if (current > 3) {
1710
+ pages.push('...');
1711
+ }
1712
+
1713
+ // Pages around current
1714
+ const start = Math.max(2, current - 1);
1715
+ const end = Math.min(total - 1, current + 1);
1716
+
1717
+ for (let i = start; i <= end; i++) {
1718
+ if (!pages.includes(i)) {
1719
+ pages.push(i);
1720
+ }
1721
+ }
1722
+
1723
+ if (current < total - 2) {
1724
+ pages.push('...');
1725
+ }
1726
+
1727
+ // Always show last page
1728
+ if (!pages.includes(total)) {
1729
+ pages.push(total);
1730
+ }
1731
+ }
1732
+
1733
+ container.innerHTML = pages.map(p => {
1734
+ if (p === '...') {
1735
+ return '<span class="page-number ellipsis">...</span>';
1736
+ }
1737
+ const activeClass = p === current ? 'active' : '';
1738
+ return `<button class="page-number ${activeClass}" data-page="${p}">${p}</button>`;
1739
+ }).join('');
1740
+
1741
+ // Add click handlers
1742
+ container.querySelectorAll('.page-number:not(.ellipsis)').forEach(btn => {
1743
+ btn.addEventListener('click', () => {
1744
+ const page = parseInt(btn.dataset.page);
1745
+ if (page !== state.page) {
1746
+ state.page = page;
1747
+ loadCurrentView();
1748
+ }
1749
+ });
1750
+ });
1751
+ }
1752
+
1753
+ // ========== Modal Functions ==========
1754
+ function showModal() {
1755
+ elements.modal.classList.remove('hidden');
1756
+ document.body.style.overflow = 'hidden';
1757
+ }
1758
+
1759
+ function hideModal() {
1760
+ elements.modal.classList.add('hidden');
1761
+ document.body.style.overflow = '';
1762
+ }
1763
+
1764
+ // ========== Model Battles Modal Functions ==========
1765
+ // State for model battles modal
1766
+ let modelBattlesState = {
1767
+ subset: null,
1768
+ expName: null,
1769
+ sampleIndex: null,
1770
+ model: null,
1771
+ allOpponents: [],
1772
+ selectedOpponents: new Set(),
1773
+ battles: [],
1774
+ };
1775
+
1776
+ async function showModelBattlesModal(subset, expName, sampleIndex, model) {
1777
+ // Store current state
1778
+ modelBattlesState.subset = subset;
1779
+ modelBattlesState.expName = expName;
1780
+ modelBattlesState.sampleIndex = sampleIndex;
1781
+ modelBattlesState.model = model;
1782
+
1783
+ // Show loading state
1784
+ elements.modalContent.innerHTML = `
1785
+ <div class="model-battles-modal">
1786
+ <div class="model-battles-header">
1787
+ <h2>Battle Records: ${escapeHtml(getModelDisplayName(model))}</h2>
1788
+ <p class="model-battles-subtitle">Sample Index: ${sampleIndex}</p>
1789
+ </div>
1790
+ <div class="loading">Loading battle records...</div>
1791
+ </div>
1792
+ `;
1793
+ showModal();
1794
+
1795
+ try {
1796
+ // Fetch battle data
1797
+ const url = `api/subsets/${subset}/experiments/${expName}/samples/${sampleIndex}/models/${encodeURIComponent(model)}/battles`;
1798
+ const data = await fetchJSON(url);
1799
+
1800
+ // Store data
1801
+ modelBattlesState.allOpponents = data.all_opponents || [];
1802
+ modelBattlesState.selectedOpponents = new Set(modelBattlesState.allOpponents);
1803
+ modelBattlesState.battles = data.battles || [];
1804
+
1805
+ // Render full modal content
1806
+ renderModelBattlesModal(data);
1807
+ } catch (error) {
1808
+ console.error('Failed to load model battles:', error);
1809
+ elements.modalContent.innerHTML = `
1810
+ <div class="model-battles-modal">
1811
+ <div class="model-battles-header">
1812
+ <h2>Battle Records: ${escapeHtml(getModelDisplayName(model))}</h2>
1813
+ </div>
1814
+ <div class="empty-state"><p>Failed to load battle records</p></div>
1815
+ </div>
1816
+ `;
1817
+ }
1818
+ }
1819
+
1820
+ function renderModelBattlesModal(data) {
1821
+ const { model, sample_index, wins, losses, ties, total, win_rate, battles, all_opponents } = data;
1822
+ const winRatePercent = (win_rate * 100).toFixed(1);
1823
+
1824
+ // Filter battles based on selected opponents
1825
+ const filteredBattles = battles.filter(b => modelBattlesState.selectedOpponents.has(b.opponent));
1826
+
1827
+ // Group battles by opponent
1828
+ const battlesByOpponent = {};
1829
+ filteredBattles.forEach(b => {
1830
+ if (!battlesByOpponent[b.opponent]) {
1831
+ battlesByOpponent[b.opponent] = [];
1832
+ }
1833
+ battlesByOpponent[b.opponent].push(b);
1834
+ });
1835
+
1836
+ // Calculate stats per opponent
1837
+ const opponentStats = {};
1838
+ all_opponents.forEach(opponent => {
1839
+ const opponentBattles = battles.filter(b => b.opponent === opponent);
1840
+ let w = 0, l = 0, t = 0;
1841
+ opponentBattles.forEach(b => {
1842
+ if (b.result === 'win') w++;
1843
+ else if (b.result === 'loss') l++;
1844
+ else t++;
1845
+ });
1846
+ const opTotal = w + l + t;
1847
+ opponentStats[opponent] = {
1848
+ wins: w,
1849
+ losses: l,
1850
+ ties: t,
1851
+ total: opTotal,
1852
+ winRate: opTotal > 0 ? ((w / opTotal) * 100).toFixed(1) : '0.0',
1853
+ };
1854
+ });
1855
+
1856
+ // Helper function to render judge call
1857
+ const renderJudgeCall = (call, label, modelA, modelB) => {
1858
+ if (!call) return '';
1859
+ const parsed = call.parsed_result || {};
1860
+ const winner = parsed.winner || 'N/A';
1861
+ const parseSuccess = call.parse_success ? '✓' : '✗';
1862
+ const rawResponse = call.raw_response || 'No response';
1863
+
1864
+ return `
1865
+ <div class="judge-call">
1866
+ <div class="judge-call-header">
1867
+ <span class="judge-call-label">${label}</span>
1868
+ <span class="judge-call-order">(A=${escapeHtml(getModelDisplayName(modelA))}, B=${escapeHtml(getModelDisplayName(modelB))})</span>
1869
+ </div>
1870
+ <div class="judge-call-meta">
1871
+ Winner: <strong>${escapeHtml(winner)}</strong> | Parse: ${parseSuccess}
1872
+ </div>
1873
+ <div class="judge-call-response">${escapeHtml(rawResponse)}</div>
1874
+ </div>
1875
+ `;
1876
+ };
1877
+
1878
+ // Generate opponent sections (collapsible)
1879
+ const sortedOpponents = Object.keys(battlesByOpponent).sort();
1880
+ const opponentSectionsHtml = sortedOpponents.length > 0 ? sortedOpponents.map(opponent => {
1881
+ const opponentBattles = battlesByOpponent[opponent];
1882
+ const stats = opponentStats[opponent];
1883
+ const isSelected = modelBattlesState.selectedOpponents.has(opponent);
1884
+
1885
+ // Determine overall result against this opponent
1886
+ let overallResultClass = '';
1887
+ if (stats.wins > stats.losses) {
1888
+ overallResultClass = 'result-win';
1889
+ } else if (stats.losses > stats.wins) {
1890
+ overallResultClass = 'result-loss';
1891
+ } else if (stats.ties > 0 && stats.wins === 0 && stats.losses === 0) {
1892
+ overallResultClass = 'result-tie';
1893
+ }
1894
+
1895
+ // Generate battle records for this opponent
1896
+ const battlesHtml = opponentBattles.map((b, idx) => {
1897
+ const resultClass = b.result === 'win' ? 'result-win' : (b.result === 'loss' ? 'result-loss' : 'result-tie');
1898
+ const resultText = b.result === 'win' ? 'Win' : (b.result === 'loss' ? 'Loss' : 'Tie');
1899
+ const consistentBadge = b.is_consistent
1900
+ ? '<span class="badge badge-consistent">Consistent</span>'
1901
+ : '<span class="badge badge-inconsistent">Inconsistent</span>';
1902
+
1903
+ // Render judge outputs
1904
+ const hasJudgeOutputs = b.original_call || b.swapped_call;
1905
+ let judgeOutputsHtml = '';
1906
+ if (hasJudgeOutputs) {
1907
+ judgeOutputsHtml = `
1908
+ <div class="battle-judge-outputs">
1909
+ ${renderJudgeCall(b.original_call, 'Original Order', b.model_a, b.model_b)}
1910
+ ${renderJudgeCall(b.swapped_call, 'Swapped Order', b.model_b, b.model_a)}
1911
+ </div>
1912
+ `;
1913
+ } else {
1914
+ judgeOutputsHtml = `
1915
+ <div class="battle-judge-outputs">
1916
+ <p class="placeholder">Judge outputs not available</p>
1917
+ </div>
1918
+ `;
1919
+ }
1920
+
1921
+ return `
1922
+ <div class="battle-record-item">
1923
+ <div class="battle-record-item-header">
1924
+ <span class="badge ${resultClass}">${resultText}</span>
1925
+ ${consistentBadge}
1926
+ ${b.exp_name ? `<span class="battle-exp-name">${escapeHtml(b.exp_name)}</span>` : ''}
1927
+ </div>
1928
+ ${judgeOutputsHtml}
1929
+ </div>
1930
+ `;
1931
+ }).join('');
1932
+
1933
+ return `
1934
+ <div class="opponent-section ${isSelected ? '' : 'hidden'}" data-opponent="${escapeHtml(opponent)}">
1935
+ <div class="opponent-section-header" onclick="this.parentElement.classList.toggle('expanded')">
1936
+ <div class="opponent-section-info">
1937
+ <span class="opponent-name ${overallResultClass}">vs ${escapeHtml(getModelDisplayName(opponent))}</span>
1938
+ <span class="opponent-stats">
1939
+ ${stats.winRate}% (<span class="wins">${stats.wins}W</span>/<span class="losses">${stats.losses}L</span>/<span class="ties">${stats.ties}T</span>)
1940
+ </span>
1941
+ </div>
1942
+ <span class="expand-icon">▼</span>
1943
+ </div>
1944
+ <div class="opponent-section-content">
1945
+ ${battlesHtml}
1946
+ </div>
1947
+ </div>
1948
+ `;
1949
+ }).join('') : '<p class="empty-state">No battles match the current filter</p>';
1950
+
1951
+ // Generate opponent filter checkboxes
1952
+ const opponentCheckboxesHtml = all_opponents.map(opponent => {
1953
+ const checked = modelBattlesState.selectedOpponents.has(opponent) ? 'checked' : '';
1954
+ return `
1955
+ <label class="opponent-checkbox">
1956
+ <input type="checkbox" value="${escapeHtml(opponent)}" ${checked}>
1957
+ <span>${escapeHtml(getModelDisplayName(opponent))}</span>
1958
+ </label>
1959
+ `;
1960
+ }).join('');
1961
+
1962
+ // Calculate filtered stats
1963
+ let filteredWins = 0, filteredLosses = 0, filteredTies = 0;
1964
+ filteredBattles.forEach(b => {
1965
+ if (b.result === 'win') filteredWins++;
1966
+ else if (b.result === 'loss') filteredLosses++;
1967
+ else filteredTies++;
1968
+ });
1969
+ const filteredTotal = filteredWins + filteredLosses + filteredTies;
1970
+ const filteredWinRate = filteredTotal > 0 ? ((filteredWins / filteredTotal) * 100).toFixed(1) : '0.0';
1971
+
1972
+ elements.modalContent.innerHTML = `
1973
+ <div class="model-battles-modal">
1974
+ <div class="model-battles-header">
1975
+ <h2>Battle Records: ${escapeHtml(getModelDisplayName(model))}</h2>
1976
+ <p class="model-battles-subtitle">Sample Index: ${sample_index}</p>
1977
+ <div class="model-battles-stats">
1978
+ <span class="stat-item"><strong>Overall:</strong> ${winRatePercent}% win rate (${wins}W / ${losses}L / ${ties}T)</span>
1979
+ <span class="stat-item"><strong>Filtered:</strong> ${filteredWinRate}% win rate (${filteredWins}W / ${filteredLosses}L / ${filteredTies}T)</span>
1980
+ </div>
1981
+ </div>
1982
+
1983
+ <div class="model-battles-filter">
1984
+ <div class="filter-header">
1985
+ <h4>Filter by Opponent:</h4>
1986
+ <div class="filter-actions">
1987
+ <button class="btn btn-small" id="select-all-opponents">Select All</button>
1988
+ <button class="btn btn-small" id="clear-all-opponents">Clear All</button>
1989
+ </div>
1990
+ </div>
1991
+ <div class="opponent-checkboxes">
1992
+ ${opponentCheckboxesHtml}
1993
+ </div>
1994
+ </div>
1995
+
1996
+ <div class="model-battles-list">
1997
+ <h4>Battle Records by Opponent (${sortedOpponents.length} opponents, ${filteredBattles.length} battles)</h4>
1998
+ <p class="model-battles-hint">Click on an opponent to expand/collapse battle details</p>
1999
+ <div class="opponent-sections-container">
2000
+ ${opponentSectionsHtml}
2001
+ </div>
2002
+ </div>
2003
+ </div>
2004
+ `;
2005
+
2006
+ // Add event listeners for filter checkboxes
2007
+ setTimeout(() => {
2008
+ // Opponent checkbox change
2009
+ elements.modalContent.querySelectorAll('.opponent-checkbox input').forEach(checkbox => {
2010
+ checkbox.addEventListener('change', () => {
2011
+ if (checkbox.checked) {
2012
+ modelBattlesState.selectedOpponents.add(checkbox.value);
2013
+ } else {
2014
+ modelBattlesState.selectedOpponents.delete(checkbox.value);
2015
+ }
2016
+ // Re-render with current data
2017
+ renderModelBattlesModal({
2018
+ model: modelBattlesState.model,
2019
+ sample_index: modelBattlesState.sampleIndex,
2020
+ wins, losses, ties, total, win_rate,
2021
+ battles: modelBattlesState.battles,
2022
+ all_opponents: modelBattlesState.allOpponents,
2023
+ });
2024
+ });
2025
+ });
2026
+
2027
+ // Select all button
2028
+ const selectAllBtn = elements.modalContent.querySelector('#select-all-opponents');
2029
+ if (selectAllBtn) {
2030
+ selectAllBtn.addEventListener('click', () => {
2031
+ modelBattlesState.selectedOpponents = new Set(modelBattlesState.allOpponents);
2032
+ renderModelBattlesModal({
2033
+ model: modelBattlesState.model,
2034
+ sample_index: modelBattlesState.sampleIndex,
2035
+ wins, losses, ties, total, win_rate,
2036
+ battles: modelBattlesState.battles,
2037
+ all_opponents: modelBattlesState.allOpponents,
2038
+ });
2039
+ });
2040
+ }
2041
+
2042
+ // Clear all button
2043
+ const clearAllBtn = elements.modalContent.querySelector('#clear-all-opponents');
2044
+ if (clearAllBtn) {
2045
+ clearAllBtn.addEventListener('click', () => {
2046
+ modelBattlesState.selectedOpponents = new Set();
2047
+ renderModelBattlesModal({
2048
+ model: modelBattlesState.model,
2049
+ sample_index: modelBattlesState.sampleIndex,
2050
+ wins, losses, ties, total, win_rate,
2051
+ battles: modelBattlesState.battles,
2052
+ all_opponents: modelBattlesState.allOpponents,
2053
+ });
2054
+ });
2055
+ }
2056
+ }, 0);
2057
+ }
2058
+
2059
+ // ========== Lightbox Functions ==========
2060
+ function openLightbox(src, label) {
2061
+ elements.lightboxImg.src = src;
2062
+ elements.lightboxLabel.textContent = label || '';
2063
+ elements.lightbox.classList.add('active');
2064
+ }
2065
+
2066
+ function closeLightbox() {
2067
+ elements.lightbox.classList.remove('active');
2068
+ elements.lightboxImg.src = '';
2069
+ }
2070
+
2071
+ // ========== Favorites Functions ==========
2072
+ function loadFavoritesFromStorage() {
2073
+ try {
2074
+ const stored = localStorage.getItem('genarena_favorites');
2075
+ if (stored) {
2076
+ state.favorites = JSON.parse(stored);
2077
+ }
2078
+ } catch (e) {
2079
+ console.error('Failed to load favorites:', e);
2080
+ state.favorites = [];
2081
+ }
2082
+ updateFavoritesCount();
2083
+ }
2084
+
2085
+ function saveFavoritesToStorage() {
2086
+ try {
2087
+ localStorage.setItem('genarena_favorites', JSON.stringify(state.favorites));
2088
+ } catch (e) {
2089
+ console.error('Failed to save favorites:', e);
2090
+ }
2091
+ updateFavoritesCount();
2092
+ }
2093
+
2094
+ function updateFavoritesCount() {
2095
+ elements.favoritesCount.textContent = state.favorites.length;
2096
+ }
2097
+
2098
+ function isFavorited(subset, expName, sampleIndex) {
2099
+ return state.favorites.some(
2100
+ f => f.subset === subset && f.exp_name === expName && f.sample_index === sampleIndex
2101
+ );
2102
+ }
2103
+
2104
+ function toggleFavorite(subset, expName, sampleIndex, instruction = '') {
2105
+ const index = state.favorites.findIndex(
2106
+ f => f.subset === subset && f.exp_name === expName && f.sample_index === sampleIndex
2107
+ );
2108
+
2109
+ if (index >= 0) {
2110
+ // Remove from favorites
2111
+ state.favorites.splice(index, 1);
2112
+ } else {
2113
+ // Add to favorites
2114
+ state.favorites.push({
2115
+ subset,
2116
+ exp_name: expName,
2117
+ sample_index: sampleIndex,
2118
+ instruction: instruction,
2119
+ added_at: new Date().toISOString()
2120
+ });
2121
+ }
2122
+
2123
+ saveFavoritesToStorage();
2124
+ return index < 0; // Returns true if added, false if removed
2125
+ }
2126
+
2127
+ function removeFavorite(subset, expName, sampleIndex) {
2128
+ const index = state.favorites.findIndex(
2129
+ f => f.subset === subset && f.exp_name === expName && f.sample_index === sampleIndex
2130
+ );
2131
+
2132
+ if (index >= 0) {
2133
+ state.favorites.splice(index, 1);
2134
+ saveFavoritesToStorage();
2135
+ }
2136
+ }
2137
+
2138
+ function clearAllFavorites() {
2139
+ if (confirm('Are you sure you want to clear all favorites?')) {
2140
+ state.favorites = [];
2141
+ saveFavoritesToStorage();
2142
+ renderFavoritesModal();
2143
+ }
2144
+ }
2145
+
2146
+ function showFavoritesModal() {
2147
+ elements.favoritesModal.classList.remove('hidden');
2148
+ document.body.style.overflow = 'hidden';
2149
+ renderFavoritesModal();
2150
+ }
2151
+
2152
+ function hideFavoritesModal() {
2153
+ elements.favoritesModal.classList.add('hidden');
2154
+ document.body.style.overflow = '';
2155
+ }
2156
+
2157
+ async function renderFavoritesModal() {
2158
+ // First, render the model filter checkboxes
2159
+ renderFavoritesModelFilter();
2160
+
2161
+ if (state.favorites.length === 0) {
2162
+ elements.favoritesContent.innerHTML = `
2163
+ <div class="favorites-empty">
2164
+ <p>No favorite prompts yet.</p>
2165
+ <p>Click the ☆ icon on any battle card to add it to favorites.</p>
2166
+ </div>
2167
+ `;
2168
+ return;
2169
+ }
2170
+
2171
+ elements.favoritesContent.innerHTML = '<div class="favorite-loading">Loading favorites...</div>';
2172
+
2173
+ // Build query params for model filter and stats scope
2174
+ const params = [];
2175
+ if (state.favoritesModelFilter.length > 0) {
2176
+ params.push(`models=${state.favoritesModelFilter.join(',')}`);
2177
+ }
2178
+ params.push(`stats_scope=${state.favoritesStatsScope}`);
2179
+ const queryString = params.length > 0 ? `?${params.join('&')}` : '';
2180
+
2181
+ // Load all favorites data
2182
+ const favoritesHtml = [];
2183
+
2184
+ for (const fav of state.favorites) {
2185
+ try {
2186
+ const url = `api/subsets/${fav.subset}/experiments/${fav.exp_name}/samples/${fav.sample_index}/all_models${queryString}`;
2187
+ const data = await fetchJSON(url);
2188
+ favoritesHtml.push(renderFavoritePromptCard(fav, data));
2189
+ } catch (e) {
2190
+ console.error('Failed to load favorite:', fav, e);
2191
+ favoritesHtml.push(renderFavoritePromptCardError(fav));
2192
+ }
2193
+ }
2194
+
2195
+ elements.favoritesContent.innerHTML = favoritesHtml.join('');
2196
+
2197
+ // Add event handlers
2198
+ elements.favoritesContent.querySelectorAll('.btn-unfavorite').forEach(btn => {
2199
+ btn.addEventListener('click', (e) => {
2200
+ const subset = btn.dataset.subset;
2201
+ const expName = btn.dataset.expName;
2202
+ const sampleIndex = parseInt(btn.dataset.sampleIndex);
2203
+ removeFavorite(subset, expName, sampleIndex);
2204
+ renderFavoritesModal();
2205
+ });
2206
+ });
2207
+
2208
+ // Add image click handlers for lightbox
2209
+ elements.favoritesContent.querySelectorAll('.favorite-input-image img, .favorite-model-image img').forEach(img => {
2210
+ img.addEventListener('click', (e) => {
2211
+ e.stopPropagation();
2212
+ openLightbox(img.src, img.alt || img.dataset.label || '');
2213
+ });
2214
+ });
2215
+ }
2216
+
2217
+ function renderFavoritePromptCard(fav, data) {
2218
+ const inputImagesHtml = renderFavoriteInputImages(data.subset, data.sample_index, data.input_image_count || 1);
2219
+ const modelsHtml = renderFavoriteModelsGrid(data.subset, data.sample_index, data.models || []);
2220
+
2221
+ return `
2222
+ <div class="favorite-prompt-card">
2223
+ <div class="favorite-prompt-header">
2224
+ <div class="favorite-prompt-info">
2225
+ <div class="favorite-prompt-instruction">${escapeHtml(data.instruction || 'No instruction')}</div>
2226
+ <div class="favorite-prompt-meta">
2227
+ <span>Subset: ${escapeHtml(data.subset)}</span>
2228
+ <span>Experiment: ${escapeHtml(data.exp_name)}</span>
2229
+ <span>Index: ${data.sample_index}</span>
2230
+ ${data.task_type ? `<span>Task: ${escapeHtml(data.task_type)}</span>` : ''}
2231
+ ${data.prompt_source ? `<span>Source: ${escapeHtml(data.prompt_source)}</span>` : ''}
2232
+ </div>
2233
+ </div>
2234
+ <div class="favorite-prompt-actions">
2235
+ <button class="btn-unfavorite" data-subset="${escapeHtml(fav.subset)}" data-exp-name="${escapeHtml(fav.exp_name)}" data-sample-index="${fav.sample_index}">Remove</button>
2236
+ </div>
2237
+ </div>
2238
+ ${inputImagesHtml}
2239
+ ${modelsHtml}
2240
+ </div>
2241
+ `;
2242
+ }
2243
+
2244
+ function renderFavoritePromptCardError(fav) {
2245
+ return `
2246
+ <div class="favorite-prompt-card">
2247
+ <div class="favorite-prompt-header">
2248
+ <div class="favorite-prompt-info">
2249
+ <div class="favorite-prompt-instruction">${escapeHtml(fav.instruction || 'Failed to load')}</div>
2250
+ <div class="favorite-prompt-meta">
2251
+ <span>Subset: ${escapeHtml(fav.subset)}</span>
2252
+ <span>Experiment: ${escapeHtml(fav.exp_name)}</span>
2253
+ <span>Index: ${fav.sample_index}</span>
2254
+ <span style="color: var(--accent-red);">Error loading data</span>
2255
+ </div>
2256
+ </div>
2257
+ <div class="favorite-prompt-actions">
2258
+ <button class="btn-unfavorite" data-subset="${escapeHtml(fav.subset)}" data-exp-name="${escapeHtml(fav.exp_name)}" data-sample-index="${fav.sample_index}">Remove</button>
2259
+ </div>
2260
+ </div>
2261
+ </div>
2262
+ `;
2263
+ }
2264
+
2265
+ function renderFavoriteInputImages(subset, sampleIndex, count) {
2266
+ if (count === 0) return '';
2267
+
2268
+ let imagesHtml = '';
2269
+ for (let i = 0; i < count; i++) {
2270
+ const imgUrl = `images/${subset}/input/${sampleIndex}/${i}`;
2271
+ imagesHtml += `
2272
+ <div class="favorite-input-image">
2273
+ <img src="${imgUrl}" alt="Input ${i + 1}" loading="lazy">
2274
+ </div>
2275
+ `;
2276
+ }
2277
+
2278
+ return `
2279
+ <div class="favorite-input-section">
2280
+ <div class="favorite-input-title">Input Images (${count})</div>
2281
+ <div class="favorite-input-images">
2282
+ ${imagesHtml}
2283
+ </div>
2284
+ </div>
2285
+ `;
2286
+ }
2287
+
2288
+ function renderFavoriteModelsGrid(subset, sampleIndex, models) {
2289
+ if (models.length === 0) {
2290
+ return `
2291
+ <div class="favorite-models-section">
2292
+ <div class="favorite-models-title">Model Outputs</div>
2293
+ <p class="placeholder">No model outputs available</p>
2294
+ </div>
2295
+ `;
2296
+ }
2297
+
2298
+ const modelsHtml = models.map((m, idx) => {
2299
+ const rank = idx + 1;
2300
+ const imgUrl = `images/${subset}/${encodeURIComponent(m.model)}/${sampleIndex}`;
2301
+ const winRatePercent = (m.win_rate * 100).toFixed(1);
2302
+
2303
+ let rankClass = '';
2304
+ let rankBadge = '';
2305
+ if (rank === 1) {
2306
+ rankClass = 'rank-1';
2307
+ rankBadge = '<span class="favorite-model-rank rank-1">🥇</span>';
2308
+ } else if (rank === 2) {
2309
+ rankClass = 'rank-2';
2310
+ rankBadge = '<span class="favorite-model-rank rank-2">🥈</span>';
2311
+ } else if (rank === 3) {
2312
+ rankClass = 'rank-3';
2313
+ rankBadge = '<span class="favorite-model-rank rank-3">🥉</span>';
2314
+ }
2315
+
2316
+ return `
2317
+ <div class="favorite-model-card ${rankClass}">
2318
+ <div class="favorite-model-image">
2319
+ <img src="${imgUrl}" alt="${escapeHtml(m.model)}" data-label="${escapeHtml(getModelDisplayName(m.model))}" loading="lazy">
2320
+ </div>
2321
+ <div class="favorite-model-info">
2322
+ <div class="favorite-model-name">${rankBadge}${escapeHtml(getModelDisplayName(m.model))}</div>
2323
+ <div class="favorite-model-stats">
2324
+ <span class="win-rate">${winRatePercent}%</span>
2325
+ (<span class="wins">${m.wins}W</span>/<span class="losses">${m.losses}L</span>/<span class="ties">${m.ties}T</span>)
2326
+ </div>
2327
+ </div>
2328
+ </div>
2329
+ `;
2330
+ }).join('');
2331
+
2332
+ return `
2333
+ <div class="favorite-models-section">
2334
+ <div class="favorite-models-title">Model Outputs (sorted by win rate)</div>
2335
+ <div class="favorite-models-grid">
2336
+ ${modelsHtml}
2337
+ </div>
2338
+ </div>
2339
+ `;
2340
+ }
2341
+
2342
+ // ========== Utility Functions ==========
2343
+ function escapeHtml(text) {
2344
+ if (!text) return '';
2345
+ const div = document.createElement('div');
2346
+ div.textContent = text;
2347
+ return div.innerHTML;
2348
+ }
2349
+
2350
+ // Truncate text in the middle with ellipsis
2351
+ function truncateMiddle(text, maxLen = 10) {
2352
+ if (!text || text.length <= maxLen) return text;
2353
+ const half = Math.floor((maxLen - 2) / 2);
2354
+ return text.slice(0, half) + '..' + text.slice(-half);
2355
+ }
2356
+
2357
+ // ========== Event Handlers ==========
2358
+ elements.subsetSelect.addEventListener('change', (e) => {
2359
+ state.subset = e.target.value || null;
2360
+ state.experiment = null;
2361
+ state.page = 1;
2362
+ state.searchQuery = '';
2363
+
2364
+ // Reset all filters when switching subsets
2365
+ state.filters = { models: [], result: null, consistent: null, minImages: null, maxImages: null, promptSource: null };
2366
+ elements.resultFilter.value = '';
2367
+ elements.consistencyFilter.value = '';
2368
+ elements.resultFilterGroup.style.display = 'none';
2369
+
2370
+ // Clear search input if exists
2371
+ if (elements.searchInput) {
2372
+ elements.searchInput.value = '';
2373
+ }
2374
+
2375
+ // Hide image count filter (will be shown again if applicable in loadSubsetInfo)
2376
+ elements.imageCountFilterGroup.style.display = 'none';
2377
+
2378
+ // Reset experiment dropdown
2379
+ elements.expSelect.innerHTML = '<option value="">Select experiment...</option>';
2380
+ elements.expSelect.disabled = true;
2381
+
2382
+ // Clear model checkboxes
2383
+ elements.modelCheckboxes.innerHTML = '';
2384
+ updateModelCount();
2385
+
2386
+ // Clear prompt source filter
2387
+ elements.promptSourceFilter.innerHTML = '<option value="">All sources</option>';
2388
+ elements.promptSourceFilterGroup.style.display = 'none';
2389
+
2390
+ // Hide h2h section
2391
+ elements.h2hSection.style.display = 'none';
2392
+
2393
+ // Clear both lists
2394
+ elements.battleList.innerHTML = '<div class="empty-state"><p>Select an experiment to view battles</p></div>';
2395
+ elements.promptsList.innerHTML = '<div class="empty-state"><p>Select an experiment to view prompts</p></div>';
2396
+
2397
+ if (state.subset) {
2398
+ loadSubsetInfo(state.subset);
2399
+ loadEloLeaderboard(); // Load ELO rankings when subset is selected
2400
+ }
2401
+
2402
+ syncStateToURL();
2403
+ });
2404
+
2405
+ elements.expSelect.addEventListener('change', (e) => {
2406
+ state.experiment = e.target.value || null;
2407
+ state.page = 1;
2408
+
2409
+ if (state.experiment) {
2410
+ loadCurrentView();
2411
+ }
2412
+
2413
+ syncStateToURL();
2414
+ });
2415
+
2416
+ // Select all / Clear all model buttons
2417
+ elements.selectAllModels.addEventListener('click', () => {
2418
+ elements.modelCheckboxes.querySelectorAll('input[type="checkbox"]').forEach(cb => {
2419
+ cb.checked = true;
2420
+ });
2421
+ updateModelSelection();
2422
+ });
2423
+
2424
+ elements.clearAllModels.addEventListener('click', () => {
2425
+ elements.modelCheckboxes.querySelectorAll('input[type="checkbox"]').forEach(cb => {
2426
+ cb.checked = false;
2427
+ });
2428
+ updateModelSelection();
2429
+ });
2430
+
2431
+ // Image range slider handlers
2432
+ elements.minImagesSlider.addEventListener('input', () => {
2433
+ const minVal = parseInt(elements.minImagesSlider.value);
2434
+ const maxVal = parseInt(elements.maxImagesSlider.value);
2435
+ if (minVal > maxVal) {
2436
+ elements.maxImagesSlider.value = minVal;
2437
+ }
2438
+ updateImageRangeDisplay();
2439
+ });
2440
+
2441
+ elements.maxImagesSlider.addEventListener('input', () => {
2442
+ const minVal = parseInt(elements.minImagesSlider.value);
2443
+ const maxVal = parseInt(elements.maxImagesSlider.value);
2444
+ if (maxVal < minVal) {
2445
+ elements.minImagesSlider.value = maxVal;
2446
+ }
2447
+ updateImageRangeDisplay();
2448
+ });
2449
+
2450
+ elements.applyFilters.addEventListener('click', () => {
2451
+ state.filters.models = getSelectedModels();
2452
+ state.filters.result = elements.resultFilter.value || null;
2453
+ state.filters.consistent = elements.consistencyFilter.value || null;
2454
+ state.filters.promptSource = elements.promptSourceFilter.value || null;
2455
+
2456
+ // Get image range if filter is visible
2457
+ if (elements.imageCountFilterGroup.style.display !== 'none') {
2458
+ const minVal = parseInt(elements.minImagesSlider.value);
2459
+ const maxVal = parseInt(elements.maxImagesSlider.value);
2460
+ // Only set filter if it's different from the full range
2461
+ if (minVal > state.imageRange.min || maxVal < state.imageRange.max) {
2462
+ state.filters.minImages = minVal;
2463
+ state.filters.maxImages = maxVal;
2464
+ } else {
2465
+ state.filters.minImages = null;
2466
+ state.filters.maxImages = null;
2467
+ }
2468
+ } else {
2469
+ state.filters.minImages = null;
2470
+ state.filters.maxImages = null;
2471
+ }
2472
+
2473
+ state.page = 1;
2474
+ loadCurrentView();
2475
+ syncStateToURL();
2476
+ });
2477
+
2478
+ elements.clearFilters.addEventListener('click', () => {
2479
+ state.filters = { models: [], result: null, consistent: null, minImages: null, maxImages: null, promptSource: null };
2480
+ state.searchQuery = '';
2481
+ elements.modelCheckboxes.querySelectorAll('input[type="checkbox"]').forEach(cb => {
2482
+ cb.checked = false;
2483
+ });
2484
+ elements.resultFilter.value = '';
2485
+ elements.consistencyFilter.value = '';
2486
+ elements.promptSourceFilter.value = '';
2487
+ elements.resultFilterGroup.style.display = 'none';
2488
+ elements.h2hSection.style.display = 'none';
2489
+
2490
+ // Clear search input if exists
2491
+ if (elements.searchInput) {
2492
+ elements.searchInput.value = '';
2493
+ }
2494
+
2495
+ // Reset image range sliders
2496
+ if (elements.imageCountFilterGroup.style.display !== 'none') {
2497
+ elements.minImagesSlider.value = state.imageRange.min;
2498
+ elements.maxImagesSlider.value = state.imageRange.max;
2499
+ updateImageRangeDisplay();
2500
+ }
2501
+
2502
+ updateModelCount();
2503
+ state.page = 1;
2504
+ loadCurrentView();
2505
+ syncStateToURL();
2506
+ });
2507
+
2508
+ // Pagination handlers
2509
+ function goToFirstPage() {
2510
+ if (state.page > 1) {
2511
+ state.page = 1;
2512
+ loadCurrentView();
2513
+ syncStateToURL();
2514
+ }
2515
+ }
2516
+
2517
+ function goToPrevPage() {
2518
+ if (state.page > 1) {
2519
+ state.page--;
2520
+ loadCurrentView();
2521
+ syncStateToURL();
2522
+ }
2523
+ }
2524
+
2525
+ function goToNextPage() {
2526
+ if (state.page < state.totalPages) {
2527
+ state.page++;
2528
+ loadCurrentView();
2529
+ syncStateToURL();
2530
+ }
2531
+ }
2532
+
2533
+ function goToLastPage() {
2534
+ if (state.page < state.totalPages) {
2535
+ state.page = state.totalPages;
2536
+ loadCurrentView();
2537
+ syncStateToURL();
2538
+ }
2539
+ }
2540
+
2541
+ elements.firstPage.addEventListener('click', goToFirstPage);
2542
+ elements.prevPage.addEventListener('click', goToPrevPage);
2543
+ elements.nextPage.addEventListener('click', goToNextPage);
2544
+ elements.lastPage.addEventListener('click', goToLastPage);
2545
+ elements.firstPageBottom.addEventListener('click', goToFirstPage);
2546
+ elements.prevPageBottom.addEventListener('click', goToPrevPage);
2547
+ elements.nextPageBottom.addEventListener('click', goToNextPage);
2548
+ elements.lastPageBottom.addEventListener('click', goToLastPage);
2549
+
2550
+ // Page input handlers
2551
+ function goToPage(pageNum) {
2552
+ const page = parseInt(pageNum);
2553
+ if (!isNaN(page) && page >= 1 && page <= state.totalPages && page !== state.page) {
2554
+ state.page = page;
2555
+ loadCurrentView();
2556
+ syncStateToURL();
2557
+ }
2558
+ }
2559
+
2560
+ function handlePageInputKeydown(e) {
2561
+ if (e.key === 'Enter') {
2562
+ goToPage(e.target.value);
2563
+ e.target.value = '';
2564
+ }
2565
+ }
2566
+
2567
+ elements.pageGo.addEventListener('click', () => {
2568
+ goToPage(elements.pageInput.value);
2569
+ elements.pageInput.value = '';
2570
+ });
2571
+
2572
+ elements.pageGoBottom.addEventListener('click', () => {
2573
+ goToPage(elements.pageInputBottom.value);
2574
+ elements.pageInputBottom.value = '';
2575
+ });
2576
+
2577
+ elements.pageInput.addEventListener('keydown', handlePageInputKeydown);
2578
+ elements.pageInputBottom.addEventListener('keydown', handlePageInputKeydown);
2579
+
2580
+ // Modal handlers
2581
+ elements.modalClose.addEventListener('click', hideModal);
2582
+ elements.modalBackdrop.addEventListener('click', hideModal);
2583
+
2584
+ // Lightbox handlers
2585
+ elements.lightboxClose.addEventListener('click', closeLightbox);
2586
+ elements.lightbox.addEventListener('click', (e) => {
2587
+ // Close when clicking on backdrop (not the image)
2588
+ if (e.target === elements.lightbox || e.target === elements.lightboxLabel) {
2589
+ closeLightbox();
2590
+ }
2591
+ });
2592
+
2593
+ // Favorites handlers
2594
+ elements.favoritesBtn.addEventListener('click', showFavoritesModal);
2595
+ elements.favoritesModalClose.addEventListener('click', hideFavoritesModal);
2596
+ elements.favoritesModalBackdrop.addEventListener('click', hideFavoritesModal);
2597
+ elements.clearAllFavorites.addEventListener('click', clearAllFavorites);
2598
+
2599
+ // Keyboard shortcuts
2600
+ document.addEventListener('keydown', (e) => {
2601
+ // Handle lightbox first (highest priority)
2602
+ if (elements.lightbox.classList.contains('active')) {
2603
+ if (e.key === 'Escape') {
2604
+ closeLightbox();
2605
+ }
2606
+ return;
2607
+ }
2608
+
2609
+ // Handle model stats modal
2610
+ if (elements.modelStatsModal && !elements.modelStatsModal.classList.contains('hidden')) {
2611
+ if (e.key === 'Escape') {
2612
+ hideModelStatsModal();
2613
+ }
2614
+ return;
2615
+ }
2616
+
2617
+ // Handle leaderboard modal
2618
+ if (elements.leaderboardModal && !elements.leaderboardModal.classList.contains('hidden')) {
2619
+ if (e.key === 'Escape') {
2620
+ hideLeaderboardModal();
2621
+ }
2622
+ return;
2623
+ }
2624
+
2625
+ // Handle matrix modal
2626
+ if (elements.matrixModal && !elements.matrixModal.classList.contains('hidden')) {
2627
+ if (e.key === 'Escape') {
2628
+ hideMatrixModal();
2629
+ }
2630
+ return;
2631
+ }
2632
+
2633
+ // Handle ELO by source modal
2634
+ if (elements.eloBySourceModal && !elements.eloBySourceModal.classList.contains('hidden')) {
2635
+ if (e.key === 'Escape') {
2636
+ hideEloBySourceModal();
2637
+ }
2638
+ return;
2639
+ }
2640
+
2641
+ // Handle cross-subset modal
2642
+ if (elements.crossSubsetModal && !elements.crossSubsetModal.classList.contains('hidden')) {
2643
+ if (e.key === 'Escape') {
2644
+ hideCrossSubsetModal();
2645
+ }
2646
+ return;
2647
+ }
2648
+
2649
+ // Handle ELO history modal
2650
+ if (elements.eloHistoryModal && !elements.eloHistoryModal.classList.contains('hidden')) {
2651
+ if (e.key === 'Escape') {
2652
+ hideEloHistoryModal();
2653
+ }
2654
+ return;
2655
+ }
2656
+
2657
+ // Handle favorites modal
2658
+ if (!elements.favoritesModal.classList.contains('hidden')) {
2659
+ if (e.key === 'Escape') {
2660
+ hideFavoritesModal();
2661
+ }
2662
+ return;
2663
+ }
2664
+
2665
+ // Handle modal
2666
+ if (!elements.modal.classList.contains('hidden')) {
2667
+ if (e.key === 'Escape') {
2668
+ hideModal();
2669
+ }
2670
+ return;
2671
+ }
2672
+
2673
+ // Only when not in an input
2674
+ if (document.activeElement.tagName === 'SELECT' || document.activeElement.tagName === 'INPUT') return;
2675
+
2676
+ if (e.key === 'j' || e.key === 'ArrowRight') {
2677
+ goToNextPage();
2678
+ } else if (e.key === 'k' || e.key === 'ArrowLeft') {
2679
+ goToPrevPage();
2680
+ } else if (e.key === 'Home') {
2681
+ goToFirstPage();
2682
+ } else if (e.key === 'End') {
2683
+ goToLastPage();
2684
+ } else if (e.key === 'f' || e.key === 'F') {
2685
+ showFavoritesModal();
2686
+ }
2687
+ });
2688
+
2689
+ // ========== View Toggle Functions ==========
2690
+ function switchToView(viewMode) {
2691
+ console.log('switchToView called:', viewMode);
2692
+ state.viewMode = viewMode;
2693
+ state.page = 1;
2694
+
2695
+ // Update button states
2696
+ elements.viewBattlesBtn.classList.toggle('active', viewMode === 'battles');
2697
+ elements.viewPromptsBtn.classList.toggle('active', viewMode === 'prompts');
2698
+
2699
+ // Toggle visibility of lists
2700
+ elements.battleList.style.display = viewMode === 'battles' ? 'flex' : 'none';
2701
+ elements.promptsList.style.display = viewMode === 'prompts' ? 'flex' : 'none';
2702
+
2703
+ // Toggle visibility of battles-only filters
2704
+ document.querySelectorAll('.battles-only').forEach(el => {
2705
+ el.style.display = viewMode === 'battles' ? 'block' : 'none';
2706
+ });
2707
+
2708
+ // Toggle visibility of prompts-only filters
2709
+ document.querySelectorAll('.prompts-only').forEach(el => {
2710
+ el.style.display = viewMode === 'prompts' ? 'block' : 'none';
2711
+ });
2712
+
2713
+ // Hide H2H section in prompts view
2714
+ if (viewMode !== 'battles') {
2715
+ elements.h2hSection.style.display = 'none';
2716
+ }
2717
+
2718
+ // Load data for the current view
2719
+ loadCurrentView();
2720
+ syncStateToURL();
2721
+ }
2722
+
2723
+ function loadCurrentView() {
2724
+ console.log('loadCurrentView called, viewMode:', state.viewMode);
2725
+ if (state.viewMode === 'battles') {
2726
+ loadBattles();
2727
+ } else if (state.viewMode === 'prompts') {
2728
+ loadPrompts();
2729
+ }
2730
+ }
2731
+
2732
+ // View toggle event handlers
2733
+ elements.viewBattlesBtn.addEventListener('click', () => switchToView('battles'));
2734
+ elements.viewPromptsBtn.addEventListener('click', () => switchToView('prompts'));
2735
+
2736
+ // ========== Search Functions ==========
2737
+
2738
+ async function performSearch() {
2739
+ const query = elements.searchInput ? elements.searchInput.value.trim() : '';
2740
+
2741
+ if (!state.subset || !state.experiment) {
2742
+ return;
2743
+ }
2744
+
2745
+ state.searchQuery = query;
2746
+ state.page = 1;
2747
+
2748
+ // Show/hide clear button
2749
+ if (elements.clearSearchBtn) {
2750
+ elements.clearSearchBtn.style.display = query ? 'inline-block' : 'none';
2751
+ }
2752
+
2753
+ loadCurrentView();
2754
+ syncStateToURL();
2755
+ }
2756
+
2757
+ function clearSearch() {
2758
+ if (elements.searchInput) {
2759
+ elements.searchInput.value = '';
2760
+ }
2761
+ state.searchQuery = '';
2762
+ state.page = 1;
2763
+
2764
+ if (elements.clearSearchBtn) {
2765
+ elements.clearSearchBtn.style.display = 'none';
2766
+ }
2767
+
2768
+ loadCurrentView();
2769
+ syncStateToURL();
2770
+ }
2771
+
2772
+ // Search event handlers
2773
+ if (elements.searchBtn) {
2774
+ elements.searchBtn.addEventListener('click', performSearch);
2775
+ }
2776
+
2777
+ if (elements.clearSearchBtn) {
2778
+ elements.clearSearchBtn.addEventListener('click', clearSearch);
2779
+ }
2780
+
2781
+ if (elements.searchInput) {
2782
+ elements.searchInput.addEventListener('keydown', (e) => {
2783
+ if (e.key === 'Enter') {
2784
+ performSearch();
2785
+ }
2786
+ });
2787
+ }
2788
+
2789
+ // ========== Prompts Model Filter Functions ==========
2790
+
2791
+ function renderPromptsModelCheckboxes() {
2792
+ const models = state.models || [];
2793
+ if (models.length === 0) {
2794
+ if (elements.promptsModelCheckboxes) {
2795
+ elements.promptsModelCheckboxes.innerHTML = '<p class="placeholder">No models available</p>';
2796
+ }
2797
+ return;
2798
+ }
2799
+
2800
+ if (elements.promptsModelCheckboxes) {
2801
+ elements.promptsModelCheckboxes.innerHTML = models.map(model => `
2802
+ <div class="checkbox-item">
2803
+ <input type="checkbox" id="prompts-model-${escapeHtml(model)}" value="${escapeHtml(model)}"
2804
+ ${state.promptsModelFilter.includes(model) ? 'checked' : ''}>
2805
+ <label for="prompts-model-${escapeHtml(model)}">${escapeHtml(getModelDisplayName(model))}</label>
2806
+ </div>
2807
+ `).join('');
2808
+
2809
+ // Add change listeners
2810
+ elements.promptsModelCheckboxes.querySelectorAll('input[type="checkbox"]').forEach(cb => {
2811
+ cb.addEventListener('change', updatePromptsModelFilter);
2812
+ });
2813
+
2814
+ updatePromptsModelCount();
2815
+ }
2816
+ }
2817
+
2818
+ function updatePromptsModelFilter() {
2819
+ const selected = [];
2820
+ if (elements.promptsModelCheckboxes) {
2821
+ elements.promptsModelCheckboxes.querySelectorAll('input[type="checkbox"]:checked').forEach(cb => {
2822
+ selected.push(cb.value);
2823
+ });
2824
+ }
2825
+ state.promptsModelFilter = selected;
2826
+ updatePromptsModelCount();
2827
+ }
2828
+
2829
+ function updatePromptsModelCount() {
2830
+ if (elements.promptsModelCount) {
2831
+ const count = state.promptsModelFilter.length;
2832
+ const total = state.models.length;
2833
+ elements.promptsModelCount.textContent = count > 0 ? `(${count} of ${total} selected)` : `(0 selected)`;
2834
+ }
2835
+ }
2836
+
2837
+ function applyPromptsModelFilter() {
2838
+ state.page = 1;
2839
+ loadPrompts();
2840
+ }
2841
+
2842
+ // Prompts model filter event handlers
2843
+ if (elements.promptsSelectAllModels) {
2844
+ elements.promptsSelectAllModels.addEventListener('click', () => {
2845
+ if (elements.promptsModelCheckboxes) {
2846
+ elements.promptsModelCheckboxes.querySelectorAll('input[type="checkbox"]').forEach(cb => {
2847
+ cb.checked = true;
2848
+ });
2849
+ updatePromptsModelFilter();
2850
+ applyPromptsModelFilter();
2851
+ }
2852
+ });
2853
+ }
2854
+
2855
+ if (elements.promptsClearAllModels) {
2856
+ elements.promptsClearAllModels.addEventListener('click', () => {
2857
+ if (elements.promptsModelCheckboxes) {
2858
+ elements.promptsModelCheckboxes.querySelectorAll('input[type="checkbox"]').forEach(cb => {
2859
+ cb.checked = false;
2860
+ });
2861
+ updatePromptsModelFilter();
2862
+ applyPromptsModelFilter();
2863
+ }
2864
+ });
2865
+ }
2866
+
2867
+ if (elements.promptsApplyModelFilter) {
2868
+ elements.promptsApplyModelFilter.addEventListener('click', () => {
2869
+ applyPromptsModelFilter();
2870
+ });
2871
+ }
2872
+
2873
+ // ========== Favorites Model Filter Functions ==========
2874
+
2875
+ function renderFavoritesModelFilter() {
2876
+ // Collect all unique models from all favorites' subsets
2877
+ const allModels = new Set();
2878
+
2879
+ // Use the current subset's models as the base
2880
+ if (state.models) {
2881
+ state.models.forEach(m => allModels.add(m));
2882
+ }
2883
+
2884
+ const models = Array.from(allModels).sort();
2885
+
2886
+ if (models.length === 0) {
2887
+ if (elements.favoritesModelCheckboxes) {
2888
+ elements.favoritesModelCheckboxes.innerHTML = '<p class="placeholder">No models available</p>';
2889
+ }
2890
+ return;
2891
+ }
2892
+
2893
+ if (elements.favoritesModelCheckboxes) {
2894
+ elements.favoritesModelCheckboxes.innerHTML = models.map(model => `
2895
+ <div class="checkbox-item${state.favoritesModelFilter.includes(model) ? ' selected' : ''}" data-model="${escapeHtml(model)}">
2896
+ <input type="checkbox" value="${escapeHtml(model)}"
2897
+ ${state.favoritesModelFilter.includes(model) ? 'checked' : ''}>
2898
+ <span class="checkbox-label">${escapeHtml(getModelDisplayName(model))}</span>
2899
+ </div>
2900
+ `).join('');
2901
+
2902
+ // Add click listeners for the entire item
2903
+ elements.favoritesModelCheckboxes.querySelectorAll('.checkbox-item').forEach(item => {
2904
+ item.addEventListener('click', () => {
2905
+ const checkbox = item.querySelector('input[type="checkbox"]');
2906
+ if (checkbox) {
2907
+ checkbox.checked = !checkbox.checked;
2908
+ item.classList.toggle('selected', checkbox.checked);
2909
+ updateFavoritesModelFilter();
2910
+ }
2911
+ });
2912
+ });
2913
+ }
2914
+
2915
+ // Sync the stats scope toggle checkbox with current state
2916
+ if (elements.favoritesStatsScopeAll) {
2917
+ elements.favoritesStatsScopeAll.checked = state.favoritesStatsScope === 'all';
2918
+ }
2919
+ }
2920
+
2921
+ function updateFavoritesModelFilter() {
2922
+ const selected = [];
2923
+ if (elements.favoritesModelCheckboxes) {
2924
+ elements.favoritesModelCheckboxes.querySelectorAll('input[type="checkbox"]:checked').forEach(cb => {
2925
+ selected.push(cb.value);
2926
+ });
2927
+ }
2928
+ state.favoritesModelFilter = selected;
2929
+ }
2930
+
2931
+ function applyFavoritesModelFilter() {
2932
+ renderFavoritesModal();
2933
+ }
2934
+
2935
+ // Favorites model filter event handlers
2936
+ if (elements.favoritesSelectAllModels) {
2937
+ elements.favoritesSelectAllModels.addEventListener('click', () => {
2938
+ if (elements.favoritesModelCheckboxes) {
2939
+ elements.favoritesModelCheckboxes.querySelectorAll('.checkbox-item').forEach(item => {
2940
+ const cb = item.querySelector('input[type="checkbox"]');
2941
+ if (cb) {
2942
+ cb.checked = true;
2943
+ item.classList.add('selected');
2944
+ }
2945
+ });
2946
+ updateFavoritesModelFilter();
2947
+ applyFavoritesModelFilter();
2948
+ }
2949
+ });
2950
+ }
2951
+
2952
+ if (elements.favoritesClearAllModels) {
2953
+ elements.favoritesClearAllModels.addEventListener('click', () => {
2954
+ if (elements.favoritesModelCheckboxes) {
2955
+ elements.favoritesModelCheckboxes.querySelectorAll('.checkbox-item').forEach(item => {
2956
+ const cb = item.querySelector('input[type="checkbox"]');
2957
+ if (cb) {
2958
+ cb.checked = false;
2959
+ item.classList.remove('selected');
2960
+ }
2961
+ });
2962
+ updateFavoritesModelFilter();
2963
+ applyFavoritesModelFilter();
2964
+ }
2965
+ });
2966
+ }
2967
+
2968
+ if (elements.favoritesApplyModelFilter) {
2969
+ elements.favoritesApplyModelFilter.addEventListener('click', () => {
2970
+ applyFavoritesModelFilter();
2971
+ });
2972
+ }
2973
+
2974
+ // Stats scope toggle - controls whether win rate includes all opponents or only filtered models
2975
+ if (elements.favoritesStatsScopeAll) {
2976
+ elements.favoritesStatsScopeAll.addEventListener('change', (e) => {
2977
+ state.favoritesStatsScope = e.target.checked ? 'all' : 'filtered';
2978
+ applyFavoritesModelFilter();
2979
+ });
2980
+ }
2981
+
2982
+ // ========== ELO Leaderboard Event Handlers ==========
2983
+ if (elements.viewFullLeaderboard) {
2984
+ elements.viewFullLeaderboard.addEventListener('click', () => {
2985
+ loadFullLeaderboard();
2986
+ });
2987
+ }
2988
+
2989
+ if (elements.leaderboardModalClose) {
2990
+ elements.leaderboardModalClose.addEventListener('click', hideLeaderboardModal);
2991
+ }
2992
+
2993
+ if (elements.leaderboardModalBackdrop) {
2994
+ elements.leaderboardModalBackdrop.addEventListener('click', hideLeaderboardModal);
2995
+ }
2996
+
2997
+ if (elements.modelStatsModalClose) {
2998
+ elements.modelStatsModalClose.addEventListener('click', hideModelStatsModal);
2999
+ }
3000
+
3001
+ if (elements.modelStatsModalBackdrop) {
3002
+ elements.modelStatsModalBackdrop.addEventListener('click', hideModelStatsModal);
3003
+ }
3004
+
3005
+ // ========== URL State Management ==========
3006
+
3007
+ /**
3008
+ * Sync current state to URL query parameters
3009
+ * Called after any state change that should be persisted
3010
+ */
3011
+ function syncStateToURL() {
3012
+ const params = new URLSearchParams();
3013
+
3014
+ // Gallery page - include subset and experiment
3015
+ if (state.currentPage === 'gallery' && state.subset) {
3016
+ params.set('subset', state.subset);
3017
+ if (state.experiment) params.set('exp', state.experiment);
3018
+
3019
+ // View mode (only if not default)
3020
+ if (state.viewMode !== 'battles') params.set('view', state.viewMode);
3021
+
3022
+ // Page number (only if not 1)
3023
+ if (state.page > 1) params.set('p', state.page);
3024
+
3025
+ // Search query
3026
+ if (state.searchQuery) params.set('q', state.searchQuery);
3027
+
3028
+ // Filters
3029
+ if (state.filters.models && state.filters.models.length > 0) {
3030
+ params.set('models', state.filters.models.join(','));
3031
+ }
3032
+ if (state.filters.result) params.set('result', state.filters.result);
3033
+ if (state.filters.consistent !== null) params.set('consistent', state.filters.consistent);
3034
+ if (state.filters.minImages !== null) params.set('minImg', state.filters.minImages);
3035
+ if (state.filters.maxImages !== null) params.set('maxImg', state.filters.maxImages);
3036
+ if (state.filters.promptSource) params.set('source', state.filters.promptSource);
3037
+ }
3038
+ // Overview page has no params (default)
3039
+
3040
+ // Build URL
3041
+ const newURL = params.toString() ? `?${params}` : window.location.pathname;
3042
+ window.history.replaceState({}, '', newURL);
3043
+ }
3044
+
3045
+ /**
3046
+ * Load state from URL query parameters on page load
3047
+ * Returns true if there was state to restore
3048
+ */
3049
+ function loadStateFromURL() {
3050
+ const params = new URLSearchParams(window.location.search);
3051
+
3052
+ // Check for gallery page (has subset param)
3053
+ if (params.has('subset')) {
3054
+ window._urlStateToRestore = {
3055
+ currentPage: 'gallery',
3056
+ subset: params.get('subset'),
3057
+ experiment: params.get('exp'),
3058
+ viewMode: params.get('view') || 'battles',
3059
+ page: parseInt(params.get('p')) || 1,
3060
+ searchQuery: params.get('q') || '',
3061
+ filters: {
3062
+ models: params.get('models') ? params.get('models').split(',') : [],
3063
+ result: params.get('result') || null,
3064
+ consistent: params.has('consistent') ? params.get('consistent') : null,
3065
+ minImages: params.has('minImg') ? parseInt(params.get('minImg')) : null,
3066
+ maxImages: params.has('maxImg') ? parseInt(params.get('maxImg')) : null,
3067
+ promptSource: params.get('source') || null,
3068
+ }
3069
+ };
3070
+ return true;
3071
+ }
3072
+
3073
+ // Default to overview (no state to restore)
3074
+ return false;
3075
+ }
3076
+
3077
+ /**
3078
+ * Apply URL state after subset info has loaded
3079
+ */
3080
+ async function applyURLState() {
3081
+ const urlState = window._urlStateToRestore;
3082
+ if (!urlState) return;
3083
+
3084
+ // Handle gallery page
3085
+ if (urlState.currentPage === 'gallery' && urlState.subset) {
3086
+ // Switch to gallery page first
3087
+ switchToPage('gallery');
3088
+
3089
+ elements.subsetSelect.value = urlState.subset;
3090
+ state.subset = urlState.subset;
3091
+ await loadSubsetInfo(urlState.subset);
3092
+ await loadEloLeaderboard();
3093
+
3094
+ // Set experiment
3095
+ if (urlState.experiment) {
3096
+ elements.expSelect.value = urlState.experiment;
3097
+ state.experiment = urlState.experiment;
3098
+ }
3099
+
3100
+ // Set view mode
3101
+ if (urlState.viewMode && urlState.viewMode !== 'battles') {
3102
+ state.viewMode = urlState.viewMode;
3103
+ elements.viewBattlesBtn.classList.toggle('active', urlState.viewMode === 'battles');
3104
+ elements.viewPromptsBtn.classList.toggle('active', urlState.viewMode === 'prompts');
3105
+ elements.battleList.style.display = urlState.viewMode === 'battles' ? 'flex' : 'none';
3106
+ elements.promptsList.style.display = urlState.viewMode === 'prompts' ? 'flex' : 'none';
3107
+ }
3108
+
3109
+ // Set page
3110
+ state.page = urlState.page || 1;
3111
+
3112
+ // Set search query
3113
+ if (urlState.searchQuery) {
3114
+ state.searchQuery = urlState.searchQuery;
3115
+ if (elements.searchInput) {
3116
+ elements.searchInput.value = urlState.searchQuery;
3117
+ }
3118
+ }
3119
+
3120
+ // Set filters
3121
+ if (urlState.filters.models && urlState.filters.models.length > 0) {
3122
+ state.filters.models = urlState.filters.models;
3123
+ // Check the corresponding checkboxes
3124
+ elements.modelCheckboxes.querySelectorAll('input[type="checkbox"]').forEach(cb => {
3125
+ cb.checked = urlState.filters.models.includes(cb.value);
3126
+ });
3127
+ updateModelCount();
3128
+ updateModelSelection();
3129
+ }
3130
+
3131
+ if (urlState.filters.result) {
3132
+ state.filters.result = urlState.filters.result;
3133
+ elements.resultFilter.value = urlState.filters.result;
3134
+ }
3135
+
3136
+ if (urlState.filters.consistent !== null) {
3137
+ state.filters.consistent = urlState.filters.consistent;
3138
+ elements.consistencyFilter.value = urlState.filters.consistent;
3139
+ }
3140
+
3141
+ if (urlState.filters.promptSource) {
3142
+ state.filters.promptSource = urlState.filters.promptSource;
3143
+ elements.promptSourceFilter.value = urlState.filters.promptSource;
3144
+ }
3145
+
3146
+ if (urlState.filters.minImages !== null) {
3147
+ state.filters.minImages = urlState.filters.minImages;
3148
+ elements.minImagesSlider.value = urlState.filters.minImages;
3149
+ }
3150
+
3151
+ if (urlState.filters.maxImages !== null) {
3152
+ state.filters.maxImages = urlState.filters.maxImages;
3153
+ elements.maxImagesSlider.value = urlState.filters.maxImages;
3154
+ }
3155
+
3156
+ updateImageRangeDisplay();
3157
+
3158
+ // Load data if experiment is set
3159
+ if (state.experiment) {
3160
+ loadCurrentView();
3161
+ }
3162
+ }
3163
+
3164
+ // Clear the stored state
3165
+ window._urlStateToRestore = null;
3166
+ }
3167
+
3168
+ // ========== Win Rate Matrix Modal Functions ==========
3169
+
3170
+ async function loadWinRateMatrix() {
3171
+ if (!state.subset) return;
3172
+
3173
+ try {
3174
+ elements.matrixContent.innerHTML = '<div class="loading">Loading matrix...</div>';
3175
+ elements.matrixSubsetName.textContent = state.subset;
3176
+ showMatrixModal();
3177
+
3178
+ const data = await fetchJSON(`api/subsets/${state.subset}/matrix`);
3179
+ renderWinRateMatrix(data);
3180
+ } catch (error) {
3181
+ console.error('Failed to load win rate matrix:', error);
3182
+ elements.matrixContent.innerHTML = '<div class="empty-state"><p>Failed to load matrix data</p></div>';
3183
+ }
3184
+ }
3185
+
3186
+ function renderWinRateMatrix(data) {
3187
+ const { models, matrix, counts: battle_counts } = data;
3188
+
3189
+ if (!models || models.length === 0) {
3190
+ elements.matrixContent.innerHTML = '<div class="empty-state"><p>No model data available</p></div>';
3191
+ return;
3192
+ }
3193
+
3194
+ // Build the matrix table
3195
+ // Header row with model names
3196
+ let headerCells = '<th class="matrix-corner"></th>';
3197
+ models.forEach((model, idx) => {
3198
+ headerCells += `<th class="matrix-header-cell" title="${escapeHtml(model)}">${escapeHtml(truncateMiddle(getModelDisplayName(model), 8))}</th>`;
3199
+ });
3200
+
3201
+ // Body rows
3202
+ let bodyRows = '';
3203
+ models.forEach((rowModel, rowIdx) => {
3204
+ let cells = `<td class="matrix-row-header">${escapeHtml(getModelDisplayName(rowModel))}</td>`;
3205
+
3206
+ models.forEach((colModel, colIdx) => {
3207
+ if (rowIdx === colIdx) {
3208
+ // Diagonal - same model
3209
+ cells += '<td class="matrix-cell matrix-diagonal">-</td>';
3210
+ } else {
3211
+ const winRate = matrix[rowIdx][colIdx];
3212
+ const battleCount = battle_counts[rowIdx][colIdx];
3213
+
3214
+ if (battleCount === 0) {
3215
+ cells += '<td class="matrix-cell matrix-no-data" title="No battles">-</td>';
3216
+ } else {
3217
+ // Color based on win rate: red (0%) -> white (50%) -> green (100%)
3218
+ const bgColor = getWinRateColor(winRate);
3219
+ const textColor = getWinRateTextColor(winRate);
3220
+ const winRatePercent = (winRate * 100).toFixed(1);
3221
+ cells += `<td class="matrix-cell" style="background-color: ${bgColor}; color: ${textColor}" title="${escapeHtml(getModelDisplayName(rowModel))} vs ${escapeHtml(getModelDisplayName(colModel))}: ${winRatePercent}% (${battleCount} battles)">${winRatePercent}%</td>`;
3222
+ }
3223
+ }
3224
+ });
3225
+
3226
+ bodyRows += `<tr>${cells}</tr>`;
3227
+ });
3228
+
3229
+ elements.matrixContent.innerHTML = `
3230
+ <div class="matrix-scroll-container">
3231
+ <table class="win-rate-matrix">
3232
+ <thead><tr>${headerCells}</tr></thead>
3233
+ <tbody>${bodyRows}</tbody>
3234
+ </table>
3235
+ </div>
3236
+ <div class="matrix-legend">
3237
+ <span class="matrix-legend-label">Row model win rate vs column model:</span>
3238
+ <div class="matrix-legend-gradient">
3239
+ <span class="legend-low">0%</span>
3240
+ <div class="legend-bar"></div>
3241
+ <span class="legend-high">100%</span>
3242
+ </div>
3243
+ </div>
3244
+ `;
3245
+ }
3246
+
3247
+ function getWinRateColor(winRate) {
3248
+ // Red (0%) -> White (50%) -> Green (100%)
3249
+ if (winRate < 0.5) {
3250
+ // Red to white
3251
+ const intensity = winRate * 2; // 0 to 1
3252
+ const r = 255;
3253
+ const g = Math.round(200 * intensity + 55);
3254
+ const b = Math.round(200 * intensity + 55);
3255
+ return `rgb(${r}, ${g}, ${b})`;
3256
+ } else {
3257
+ // White to green
3258
+ const intensity = (winRate - 0.5) * 2; // 0 to 1
3259
+ const r = Math.round(255 * (1 - intensity * 0.6));
3260
+ const g = Math.round(255 - intensity * 55);
3261
+ const b = Math.round(255 * (1 - intensity * 0.6));
3262
+ return `rgb(${r}, ${g}, ${b})`;
3263
+ }
3264
+ }
3265
+
3266
+ function getWinRateTextColor(winRate) {
3267
+ // Use dark text for light backgrounds (near 50%), white text for strong red (near 0%)
3268
+ // Near 0%: strong red background -> white text
3269
+ // Near 50%: white/light background -> black text
3270
+ // Near 100%: green background -> black text (green is not too dark)
3271
+ if (winRate < 0.25) {
3272
+ // Strong red - use white text
3273
+ return '#fff';
3274
+ } else {
3275
+ // Light red, white, or green - use black text
3276
+ return '#000';
3277
+ }
3278
+ }
3279
+
3280
+ function showMatrixModal() {
3281
+ if (elements.matrixModal) {
3282
+ elements.matrixModal.classList.remove('hidden');
3283
+ document.body.style.overflow = 'hidden';
3284
+ }
3285
+ }
3286
+
3287
+ function hideMatrixModal() {
3288
+ if (elements.matrixModal) {
3289
+ elements.matrixModal.classList.add('hidden');
3290
+ document.body.style.overflow = '';
3291
+ }
3292
+ }
3293
+
3294
+ // ========== ELO by Source Modal Functions ==========
3295
+
3296
+ async function loadEloBySource() {
3297
+ if (!state.subset) return;
3298
+
3299
+ try {
3300
+ elements.eloBySourceContent.innerHTML = '<div class="loading">Loading ELO by source...</div>';
3301
+ elements.eloBySourceSubsetName.textContent = state.subset;
3302
+ showEloBySourceModal();
3303
+
3304
+ const data = await fetchJSON(`api/subsets/${state.subset}/leaderboard/by-source`);
3305
+ renderEloBySource(data);
3306
+ } catch (error) {
3307
+ console.error('Failed to load ELO by source:', error);
3308
+ elements.eloBySourceContent.innerHTML = '<div class="empty-state"><p>Failed to load ELO by source data</p></div>';
3309
+ }
3310
+ }
3311
+
3312
+ function renderEloBySource(data) {
3313
+ const { sources, leaderboards, battle_counts } = data;
3314
+
3315
+ if (!sources || sources.length === 0) {
3316
+ elements.eloBySourceContent.innerHTML = '<div class="empty-state"><p>No source-specific ELO data available</p></div>';
3317
+ return;
3318
+ }
3319
+
3320
+ // sources is an array of source names (sorted by battle count)
3321
+ const sourceNames = sources;
3322
+
3323
+ const sectionsHtml = sourceNames.map(sourceName => {
3324
+ const leaderboard = leaderboards[sourceName] || [];
3325
+ const battleCount = battle_counts[sourceName] || 0;
3326
+
3327
+ if (leaderboard.length === 0) {
3328
+ return `
3329
+ <div class="source-section">
3330
+ <div class="source-section-header" onclick="this.parentElement.classList.toggle('expanded')">
3331
+ <span class="source-name">${escapeHtml(sourceName)}</span>
3332
+ <span class="source-battles">(${battleCount} battles)</span>
3333
+ <span class="expand-icon">▼</span>
3334
+ </div>
3335
+ <div class="source-section-content">
3336
+ <p class="placeholder">No ELO data for this source</p>
3337
+ </div>
3338
+ </div>
3339
+ `;
3340
+ }
3341
+
3342
+ const tableRows = leaderboard.map((model, index) => {
3343
+ const rank = index + 1;
3344
+ const rankClass = rank <= 3 ? `rank-${rank}` : '';
3345
+ const winRatePercent = (model.win_rate * 100).toFixed(1);
3346
+ return `
3347
+ <tr>
3348
+ <td class="rank-cell ${rankClass}">#${rank}</td>
3349
+ <td class="model-cell">${escapeHtml(getModelDisplayName(model.model))}</td>
3350
+ <td class="elo-cell">${Math.round(model.elo)}</td>
3351
+ <td class="stat-cell wins">${model.wins}</td>
3352
+ <td class="stat-cell losses">${model.losses}</td>
3353
+ <td class="stat-cell ties">${model.ties}</td>
3354
+ <td class="win-rate-cell">${winRatePercent}%</td>
3355
+ </tr>
3356
+ `;
3357
+ }).join('');
3358
+
3359
+ return `
3360
+ <div class="source-section expanded">
3361
+ <div class="source-section-header" onclick="this.parentElement.classList.toggle('expanded')">
3362
+ <span class="source-name">${escapeHtml(sourceName)}</span>
3363
+ <span class="source-battles">(${battleCount} battles, ${leaderboard.length} models)</span>
3364
+ <span class="expand-icon">▼</span>
3365
+ </div>
3366
+ <div class="source-section-content">
3367
+ <table class="source-leaderboard">
3368
+ <thead>
3369
+ <tr>
3370
+ <th>Rank</th>
3371
+ <th>Model</th>
3372
+ <th>ELO</th>
3373
+ <th>W</th>
3374
+ <th>L</th>
3375
+ <th>T</th>
3376
+ <th>Win %</th>
3377
+ </tr>
3378
+ </thead>
3379
+ <tbody>${tableRows}</tbody>
3380
+ </table>
3381
+ </div>
3382
+ </div>
3383
+ `;
3384
+ }).join('');
3385
+
3386
+ elements.eloBySourceContent.innerHTML = `
3387
+ <div class="source-sections-container">
3388
+ ${sectionsHtml}
3389
+ </div>
3390
+ `;
3391
+ }
3392
+
3393
+ function showEloBySourceModal() {
3394
+ if (elements.eloBySourceModal) {
3395
+ elements.eloBySourceModal.classList.remove('hidden');
3396
+ document.body.style.overflow = 'hidden';
3397
+ }
3398
+ }
3399
+
3400
+ function hideEloBySourceModal() {
3401
+ if (elements.eloBySourceModal) {
3402
+ elements.eloBySourceModal.classList.add('hidden');
3403
+ document.body.style.overflow = '';
3404
+ }
3405
+ }
3406
+
3407
+ // ========== ELO History Modal Functions ==========
3408
+
3409
+ let eloHistoryState = {
3410
+ data: null,
3411
+ visibleModels: new Set(),
3412
+ granularity: 10,
3413
+ };
3414
+
3415
+ async function loadEloHistory() {
3416
+ if (!state.subset) return;
3417
+
3418
+ try {
3419
+ const granularity = elements.eloHistoryGranularity ? elements.eloHistoryGranularity.value || 'experiment' : 'experiment';
3420
+ eloHistoryState.granularity = granularity;
3421
+
3422
+ elements.eloHistoryContent.innerHTML = '<div class="loading">Loading ELO history...</div>';
3423
+ elements.eloHistoryLegend.innerHTML = '';
3424
+ showEloHistoryModal();
3425
+
3426
+ const rawData = await fetchJSON(`api/subsets/${state.subset}/elo-history?granularity=${granularity}`);
3427
+
3428
+ // Transform backend format to frontend format
3429
+ // Backend: { timestamps: [], models: { model -> [elo values] }, battle_counts: [] }
3430
+ // Frontend: { history: [{ timestamp, elos: {model -> elo} }], models: [list of model names] }
3431
+ const { timestamps, models: modelElos, battle_counts } = rawData;
3432
+ const modelNames = Object.keys(modelElos);
3433
+ const history = timestamps.map((timestamp, i) => {
3434
+ const elos = {};
3435
+ for (const model of modelNames) {
3436
+ const eloValue = modelElos[model][i];
3437
+ if (eloValue !== null && eloValue !== undefined) {
3438
+ elos[model] = eloValue;
3439
+ }
3440
+ }
3441
+ return { timestamp, elos, battle_count: battle_counts[i] };
3442
+ });
3443
+
3444
+ const data = { history, models: modelNames };
3445
+ eloHistoryState.data = data;
3446
+
3447
+ // Initialize all models as visible
3448
+ eloHistoryState.visibleModels = new Set(data.models || []);
3449
+
3450
+ renderEloHistory();
3451
+ renderEloHistoryLegend();
3452
+ } catch (error) {
3453
+ console.error('Failed to load ELO history:', error);
3454
+ elements.eloHistoryContent.innerHTML = '<div class="empty-state"><p>Failed to load ELO history</p></div>';
3455
+ }
3456
+ }
3457
+
3458
+ function renderEloHistory() {
3459
+ const data = eloHistoryState.data;
3460
+ if (!data || !data.history || data.history.length === 0) {
3461
+ elements.eloHistoryContent.innerHTML = '<div class="empty-state"><p>No ELO history available</p></div>';
3462
+ return;
3463
+ }
3464
+
3465
+ const { history, models } = data;
3466
+
3467
+ // Filter to visible models
3468
+ const visibleModels = models.filter(m => eloHistoryState.visibleModels.has(m));
3469
+ if (visibleModels.length === 0) {
3470
+ elements.eloHistoryContent.innerHTML = '<div class="empty-state"><p>No models selected. Click on models in the legend to show them.</p></div>';
3471
+ return;
3472
+ }
3473
+
3474
+ // Calculate bounds
3475
+ let minElo = Infinity, maxElo = -Infinity;
3476
+ history.forEach(point => {
3477
+ visibleModels.forEach(model => {
3478
+ const elo = point.elos[model];
3479
+ if (elo !== undefined) {
3480
+ minElo = Math.min(minElo, elo);
3481
+ maxElo = Math.max(maxElo, elo);
3482
+ }
3483
+ });
3484
+ });
3485
+
3486
+ // Add padding
3487
+ const eloPadding = (maxElo - minElo) * 0.1 || 50;
3488
+ minElo -= eloPadding;
3489
+ maxElo += eloPadding;
3490
+
3491
+ // SVG dimensions
3492
+ const width = 800;
3493
+ const height = 400;
3494
+ const margin = { top: 20, right: 120, bottom: 50, left: 60 };
3495
+ const plotWidth = width - margin.left - margin.right;
3496
+ const plotHeight = height - margin.top - margin.bottom;
3497
+
3498
+ // Scales
3499
+ const xScale = (i) => margin.left + (i / (history.length - 1)) * plotWidth;
3500
+ const yScale = (elo) => margin.top + (1 - (elo - minElo) / (maxElo - minElo)) * plotHeight;
3501
+
3502
+ // Generate colors for models
3503
+ const colors = generateModelColors(models.length);
3504
+ const modelColorMap = {};
3505
+ models.forEach((model, i) => {
3506
+ modelColorMap[model] = colors[i];
3507
+ });
3508
+
3509
+ // Build SVG paths and points for each visible model
3510
+ let pathsHtml = '';
3511
+ let pointsHtml = '';
3512
+ visibleModels.forEach(model => {
3513
+ const color = modelColorMap[model];
3514
+ let pathData = '';
3515
+ let started = false;
3516
+
3517
+ history.forEach((point, i) => {
3518
+ const elo = point.elos[model];
3519
+ if (elo !== undefined) {
3520
+ const x = xScale(i);
3521
+ const y = yScale(elo);
3522
+ if (!started) {
3523
+ pathData += `M ${x} ${y}`;
3524
+ started = true;
3525
+ } else {
3526
+ pathData += ` L ${x} ${y}`;
3527
+ }
3528
+ // Add interactive point
3529
+ const eloRounded = Math.round(elo);
3530
+ const timestamp = point.timestamp || '';
3531
+ pointsHtml += `<circle cx="${x}" cy="${y}" r="4" fill="${color}" class="elo-point" data-model="${escapeHtml(model)}" data-elo="${eloRounded}" data-timestamp="${escapeHtml(timestamp)}" data-display-name="${escapeHtml(getModelDisplayName(model))}"/>`;
3532
+ }
3533
+ });
3534
+
3535
+ if (pathData) {
3536
+ pathsHtml += `<path d="${pathData}" stroke="${color}" fill="none" stroke-width="2" class="elo-line" data-model="${escapeHtml(model)}"/>`;
3537
+ }
3538
+ });
3539
+
3540
+ // X-axis labels (show all experiment names)
3541
+ let xAxisHtml = '';
3542
+ history.forEach((point, i) => {
3543
+ const x = xScale(i);
3544
+ const label = point.timestamp || '';
3545
+ xAxisHtml += `<text x="${x}" y="${height - margin.bottom + 15}" text-anchor="end" class="axis-label" transform="rotate(-45, ${x}, ${height - margin.bottom + 15})">${escapeHtml(label)}</text>`;
3546
+ });
3547
+
3548
+ // Y-axis labels (ELO values)
3549
+ let yAxisHtml = '';
3550
+ const eloStep = Math.ceil((maxElo - minElo) / 5);
3551
+ for (let elo = Math.ceil(minElo); elo <= maxElo; elo += eloStep) {
3552
+ const y = yScale(elo);
3553
+ yAxisHtml += `<text x="${margin.left - 10}" y="${y + 4}" text-anchor="end" class="axis-label">${Math.round(elo)}</text>`;
3554
+ yAxisHtml += `<line x1="${margin.left}" y1="${y}" x2="${width - margin.right}" y2="${y}" class="grid-line"/>`;
3555
+ }
3556
+
3557
+ // Determine X-axis title based on granularity
3558
+ const xAxisTitle = eloHistoryState.granularity === 'experiment' ? 'Experiment' : 'Time';
3559
+
3560
+ elements.eloHistoryContent.innerHTML = `
3561
+ <div class="elo-history-chart-container">
3562
+ <svg width="100%" height="${height}" viewBox="0 0 ${width} ${height}" class="elo-history-chart">
3563
+ <!-- Grid lines -->
3564
+ ${yAxisHtml}
3565
+
3566
+ <!-- Axes -->
3567
+ <line x1="${margin.left}" y1="${margin.top}" x2="${margin.left}" y2="${height - margin.bottom}" class="axis-line"/>
3568
+ <line x1="${margin.left}" y1="${height - margin.bottom}" x2="${width - margin.right}" y2="${height - margin.bottom}" class="axis-line"/>
3569
+
3570
+ <!-- X-axis labels -->
3571
+ ${xAxisHtml}
3572
+ <text x="${width / 2}" y="${height - 2}" text-anchor="middle" class="axis-title">${xAxisTitle}</text>
3573
+
3574
+ <!-- Y-axis title -->
3575
+ <text x="15" y="${height / 2}" text-anchor="middle" transform="rotate(-90, 15, ${height / 2})" class="axis-title">ELO</text>
3576
+
3577
+ <!-- Data lines -->
3578
+ ${pathsHtml}
3579
+
3580
+ <!-- Interactive points -->
3581
+ ${pointsHtml}
3582
+ </svg>
3583
+ <div class="elo-tooltip" id="elo-tooltip"></div>
3584
+ </div>
3585
+ `;
3586
+
3587
+ // Add tooltip event listeners
3588
+ const tooltip = document.getElementById('elo-tooltip');
3589
+ const points = elements.eloHistoryContent.querySelectorAll('.elo-point');
3590
+ points.forEach(point => {
3591
+ point.addEventListener('mouseenter', (e) => {
3592
+ const model = point.getAttribute('data-model');
3593
+ const elo = point.getAttribute('data-elo');
3594
+ const timestamp = point.getAttribute('data-timestamp');
3595
+ tooltip.innerHTML = `<strong>${model}</strong><br>ELO: ${elo}<br>${timestamp}`;
3596
+ tooltip.classList.add('visible');
3597
+ });
3598
+ point.addEventListener('mousemove', (e) => {
3599
+ const container = elements.eloHistoryContent.querySelector('.elo-history-chart-container');
3600
+ const rect = container.getBoundingClientRect();
3601
+ tooltip.style.left = (e.clientX - rect.left + 10) + 'px';
3602
+ tooltip.style.top = (e.clientY - rect.top - 10) + 'px';
3603
+ });
3604
+ point.addEventListener('mouseleave', () => {
3605
+ tooltip.classList.remove('visible');
3606
+ });
3607
+ });
3608
+ }
3609
+
3610
+ function renderEloHistoryLegend() {
3611
+ const data = eloHistoryState.data;
3612
+ if (!data || !data.models) return;
3613
+
3614
+ const { models } = data;
3615
+ const colors = generateModelColors(models.length);
3616
+
3617
+ elements.eloHistoryLegend.innerHTML = models.map((model, i) => {
3618
+ const isVisible = eloHistoryState.visibleModels.has(model);
3619
+ return `
3620
+ <div class="legend-item ${isVisible ? '' : 'hidden-model'}" data-model="${escapeHtml(model)}">
3621
+ <span class="legend-color" style="background-color: ${colors[i]}"></span>
3622
+ <span class="legend-label">${escapeHtml(truncateMiddle(getModelDisplayName(model), 15))}</span>
3623
+ </div>
3624
+ `;
3625
+ }).join('');
3626
+
3627
+ // Add click handlers to toggle visibility
3628
+ elements.eloHistoryLegend.querySelectorAll('.legend-item').forEach(item => {
3629
+ item.addEventListener('click', () => {
3630
+ const model = item.dataset.model;
3631
+ if (eloHistoryState.visibleModels.has(model)) {
3632
+ eloHistoryState.visibleModels.delete(model);
3633
+ item.classList.add('hidden-model');
3634
+ } else {
3635
+ eloHistoryState.visibleModels.add(model);
3636
+ item.classList.remove('hidden-model');
3637
+ }
3638
+ renderEloHistory();
3639
+ });
3640
+ });
3641
+ }
3642
+
3643
+ function generateModelColors(count) {
3644
+ const colors = [];
3645
+ for (let i = 0; i < count; i++) {
3646
+ const hue = (i * 360 / count) % 360;
3647
+ colors.push(`hsl(${hue}, 70%, 50%)`);
3648
+ }
3649
+ return colors;
3650
+ }
3651
+
3652
+ function showEloHistoryModal() {
3653
+ if (elements.eloHistoryModal) {
3654
+ elements.eloHistoryModal.classList.remove('hidden');
3655
+ document.body.style.overflow = 'hidden';
3656
+ }
3657
+ }
3658
+
3659
+ function hideEloHistoryModal() {
3660
+ if (elements.eloHistoryModal) {
3661
+ elements.eloHistoryModal.classList.add('hidden');
3662
+ document.body.style.overflow = '';
3663
+ }
3664
+ }
3665
+
3666
+ // ========== Event Handlers for New Modals ==========
3667
+
3668
+ // Matrix modal
3669
+ if (elements.viewMatrixBtn) {
3670
+ elements.viewMatrixBtn.addEventListener('click', loadWinRateMatrix);
3671
+ }
3672
+ if (elements.matrixModalClose) {
3673
+ elements.matrixModalClose.addEventListener('click', hideMatrixModal);
3674
+ }
3675
+ if (elements.matrixModalBackdrop) {
3676
+ elements.matrixModalBackdrop.addEventListener('click', hideMatrixModal);
3677
+ }
3678
+
3679
+ // ELO by Source modal
3680
+ if (elements.viewEloBySourceBtn) {
3681
+ elements.viewEloBySourceBtn.addEventListener('click', loadEloBySource);
3682
+ }
3683
+ if (elements.eloBySourceModalClose) {
3684
+ elements.eloBySourceModalClose.addEventListener('click', hideEloBySourceModal);
3685
+ }
3686
+ if (elements.eloBySourceModalBackdrop) {
3687
+ elements.eloBySourceModalBackdrop.addEventListener('click', hideEloBySourceModal);
3688
+ }
3689
+
3690
+ // Cross-Subset page event handlers
3691
+ if (elements.crossSubsetSelectAll) {
3692
+ elements.crossSubsetSelectAll.addEventListener('click', () => {
3693
+ state.crossSubsetState.subsets.forEach(s => state.crossSubsetState.selectedSubsets.add(s));
3694
+ renderCrossSubsetCheckboxes();
3695
+ updateCrossSubsetInfo();
3696
+ });
3697
+ }
3698
+ if (elements.crossSubsetClearAll) {
3699
+ elements.crossSubsetClearAll.addEventListener('click', () => {
3700
+ state.crossSubsetState.selectedSubsets.clear();
3701
+ renderCrossSubsetCheckboxes();
3702
+ updateCrossSubsetInfo();
3703
+ });
3704
+ }
3705
+ if (elements.calculateMergedElo) {
3706
+ elements.calculateMergedElo.addEventListener('click', calculateMergedEloForPage);
3707
+ }
3708
+
3709
+ // Navigation event handlers
3710
+ if (elements.logoLink) {
3711
+ elements.logoLink.addEventListener('click', () => switchToPage('overview'));
3712
+ }
3713
+ if (elements.navOverview) {
3714
+ elements.navOverview.addEventListener('click', () => switchToPage('overview'));
3715
+ }
3716
+ if (elements.navGallery) {
3717
+ elements.navGallery.addEventListener('click', () => switchToPage('gallery'));
3718
+ }
3719
+
3720
+ // Cross-Subset modal
3721
+ if (elements.crossSubsetBtn) {
3722
+ elements.crossSubsetBtn.addEventListener('click', showCrossSubsetModal);
3723
+ }
3724
+ if (elements.crossSubsetModalClose) {
3725
+ elements.crossSubsetModalClose.addEventListener('click', hideCrossSubsetModal);
3726
+ }
3727
+ if (elements.crossSubsetModalBackdrop) {
3728
+ elements.crossSubsetModalBackdrop.addEventListener('click', hideCrossSubsetModal);
3729
+ }
3730
+
3731
+ // ELO History modal
3732
+ if (elements.viewEloHistoryBtn) {
3733
+ elements.viewEloHistoryBtn.addEventListener('click', loadEloHistory);
3734
+ }
3735
+ if (elements.eloHistoryModalClose) {
3736
+ elements.eloHistoryModalClose.addEventListener('click', hideEloHistoryModal);
3737
+ }
3738
+ if (elements.eloHistoryModalBackdrop) {
3739
+ elements.eloHistoryModalBackdrop.addEventListener('click', hideEloHistoryModal);
3740
+ }
3741
+ if (elements.eloHistoryGranularity) {
3742
+ elements.eloHistoryGranularity.addEventListener('change', loadEloHistory);
3743
+ }
3744
+
3745
+ // ========== Initialize ==========
3746
+ loadFavoritesFromStorage();
3747
+
3748
+ // Load model aliases first
3749
+ loadModelAliases();
3750
+
3751
+ // Check for URL state first
3752
+ const hasURLState = loadStateFromURL();
3753
+
3754
+ // Load subsets, then apply URL state or show overview
3755
+ loadSubsets().then(() => {
3756
+ if (hasURLState) {
3757
+ applyURLState();
3758
+ } else {
3759
+ // Default to overview page
3760
+ switchToPage('overview');
3761
+ }
3762
+ });