genarena 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. genarena/__init__.py +49 -2
  2. genarena/__main__.py +10 -0
  3. genarena/arena.py +1685 -0
  4. genarena/battle.py +337 -0
  5. genarena/bt_elo.py +507 -0
  6. genarena/cli.py +1581 -0
  7. genarena/data.py +476 -0
  8. genarena/deploy/Dockerfile +25 -0
  9. genarena/deploy/README.md +55 -0
  10. genarena/deploy/__init__.py +5 -0
  11. genarena/deploy/app.py +84 -0
  12. genarena/experiments.py +121 -0
  13. genarena/leaderboard.py +270 -0
  14. genarena/logs.py +409 -0
  15. genarena/models.py +412 -0
  16. genarena/prompts/__init__.py +127 -0
  17. genarena/prompts/mmrb2.py +373 -0
  18. genarena/sampling.py +336 -0
  19. genarena/state.py +656 -0
  20. genarena/sync/__init__.py +105 -0
  21. genarena/sync/auto_commit.py +118 -0
  22. genarena/sync/deploy_ops.py +543 -0
  23. genarena/sync/git_ops.py +422 -0
  24. genarena/sync/hf_ops.py +891 -0
  25. genarena/sync/init_ops.py +431 -0
  26. genarena/sync/packer.py +587 -0
  27. genarena/sync/submit.py +837 -0
  28. genarena/utils.py +103 -0
  29. genarena/validation/__init__.py +19 -0
  30. genarena/validation/schema.py +327 -0
  31. genarena/validation/validator.py +329 -0
  32. genarena/visualize/README.md +148 -0
  33. genarena/visualize/__init__.py +14 -0
  34. genarena/visualize/app.py +938 -0
  35. genarena/visualize/data_loader.py +2335 -0
  36. genarena/visualize/static/app.js +3762 -0
  37. genarena/visualize/static/model_aliases.json +86 -0
  38. genarena/visualize/static/style.css +4104 -0
  39. genarena/visualize/templates/index.html +413 -0
  40. genarena/vlm.py +519 -0
  41. genarena-0.1.0.dist-info/METADATA +178 -0
  42. genarena-0.1.0.dist-info/RECORD +44 -0
  43. {genarena-0.0.1.dist-info → genarena-0.1.0.dist-info}/WHEEL +1 -2
  44. genarena-0.1.0.dist-info/entry_points.txt +2 -0
  45. genarena-0.0.1.dist-info/METADATA +0 -26
  46. genarena-0.0.1.dist-info/RECORD +0 -5
  47. genarena-0.0.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,413 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>GenArena Explorer</title>
7
+ <link rel="stylesheet" href="static/style.css">
8
+ </head>
9
+ <body>
10
+ <div id="app">
11
+ <!-- Header -->
12
+ <header class="header">
13
+ <div class="header-left">
14
+ <h1 class="logo" id="logo-link" title="Back to Overview">GenArena</h1>
15
+ <nav class="header-nav">
16
+ <a id="nav-overview" class="nav-link active" title="All Subsets Overview">Overview</a>
17
+ <a id="nav-gallery" class="nav-link" title="Browse Battles">Gallery</a>
18
+ <span class="nav-separator">|</span>
19
+ <a href="#" class="nav-link nav-external" target="_blank" rel="noopener noreferrer" title="Project Page">Project Page <span class="external-icon">↗</span></a>
20
+ <a href="#" class="nav-link nav-external" target="_blank" rel="noopener noreferrer" title="arXiv Paper">arXiv <span class="external-icon">↗</span></a>
21
+ <a href="#" class="nav-link nav-external" target="_blank" rel="noopener noreferrer" title="GitHub Repository">GitHub <span class="external-icon">↗</span></a>
22
+ </nav>
23
+ </div>
24
+ <div class="header-right">
25
+ <button id="favorites-btn" class="btn-header-action" title="View Favorites">
26
+ <span class="header-action-icon">★</span>
27
+ <span id="favorites-count" class="header-action-count">0</span>
28
+ </button>
29
+ </div>
30
+ </header>
31
+
32
+ <!-- Main Content -->
33
+ <div class="main-container">
34
+ <!-- Overview Page (default landing) -->
35
+ <div id="overview-page" class="full-page">
36
+ <div class="page-header">
37
+ <h2>All Subsets Leaderboard</h2>
38
+ <button id="cross-subset-btn" class="btn btn-secondary">Cross-Subset</button>
39
+ </div>
40
+ <p class="page-description">ELO rankings across all evaluation subsets. Click on a subset column header to view details, or click a model row to see its performance.</p>
41
+ <div id="overview-content" class="overview-content">
42
+ <div class="loading">Loading leaderboards...</div>
43
+ </div>
44
+ </div>
45
+
46
+ <!-- Gallery Page (with sidebar) -->
47
+ <div id="gallery-page" class="gallery-page-container" style="display: none;">
48
+ <!-- Sidebar Filters -->
49
+ <aside class="sidebar">
50
+ <div class="filter-section">
51
+ <h3>Filters</h3>
52
+
53
+ <!-- Model filter - only visible in battles view -->
54
+ <div class="filter-group battles-only">
55
+ <label>Models: <span id="model-count">(0 selected)</span></label>
56
+ <div id="model-checkboxes" class="checkbox-group">
57
+ <!-- Populated by JavaScript -->
58
+ </div>
59
+ <div class="checkbox-actions">
60
+ <button id="select-all-models" class="btn btn-small">Select All</button>
61
+ <button id="clear-all-models" class="btn btn-small">Clear All</button>
62
+ </div>
63
+ </div>
64
+
65
+ <div class="filter-group battles-only" id="result-filter-group" style="display: none;">
66
+ <label for="result-filter">Result (single model only):</label>
67
+ <select id="result-filter" class="filter-select">
68
+ <option value="">All results</option>
69
+ <option value="wins">Wins</option>
70
+ <option value="losses">Losses</option>
71
+ <option value="ties">Ties</option>
72
+ </select>
73
+ </div>
74
+
75
+ <div class="filter-group battles-only">
76
+ <label for="consistency-filter">Consistency:</label>
77
+ <select id="consistency-filter" class="filter-select">
78
+ <option value="">All</option>
79
+ <option value="true">Consistent</option>
80
+ <option value="false">Inconsistent</option>
81
+ </select>
82
+ </div>
83
+
84
+ <div class="filter-group" id="prompt-source-filter-group" style="display: none;">
85
+ <label for="prompt-source-filter">Prompt Source:</label>
86
+ <select id="prompt-source-filter" class="filter-select">
87
+ <option value="">All sources</option>
88
+ </select>
89
+ </div>
90
+
91
+ <!-- Model filter for prompts view -->
92
+ <div class="filter-group prompts-only" id="prompts-model-filter-group" style="display: none;">
93
+ <label>Filter Models: <span id="prompts-model-count">(0 selected)</span></label>
94
+ <div id="prompts-model-checkboxes" class="checkbox-group">
95
+ <!-- Populated by JavaScript -->
96
+ </div>
97
+ <div class="checkbox-actions">
98
+ <button id="prompts-select-all-models" class="btn btn-small">Select All</button>
99
+ <button id="prompts-clear-all-models" class="btn btn-small">Clear All</button>
100
+ </div>
101
+ <button id="prompts-apply-model-filter" class="btn btn-primary btn-small" style="width: 100%; margin-top: 8px;">Apply Filter</button>
102
+ <p class="filter-hint">Show only selected models and their battles</p>
103
+ </div>
104
+
105
+ <div class="filter-group" id="image-count-filter-group" style="display: none;">
106
+ <label>Input Images: <span id="image-range-display">1-1</span></label>
107
+ <div class="range-slider-container">
108
+ <input type="range" id="min-images-slider" class="range-slider" min="1" max="10" value="1">
109
+ <input type="range" id="max-images-slider" class="range-slider" min="1" max="10" value="10">
110
+ </div>
111
+ <div class="range-labels">
112
+ <span id="min-images-label">1</span>
113
+ <span id="max-images-label">10</span>
114
+ </div>
115
+ </div>
116
+
117
+ <button id="apply-filters" class="btn btn-primary">Apply Filters</button>
118
+ <button id="clear-filters" class="btn btn-secondary">Clear</button>
119
+ </div>
120
+
121
+ <div class="stats-section">
122
+ <h3>Statistics</h3>
123
+ <div id="stats-panel">
124
+ <p class="placeholder">Select a subset and experiment</p>
125
+ </div>
126
+ </div>
127
+
128
+ <div class="h2h-section" id="h2h-section" style="display: none;">
129
+ <h3>Head-to-Head</h3>
130
+ <div id="h2h-panel">
131
+ <!-- Filled by JavaScript -->
132
+ </div>
133
+ </div>
134
+
135
+ <!-- ELO Leaderboard Section -->
136
+ <div class="elo-section" id="elo-section">
137
+ <div class="elo-header">
138
+ <h3>ELO Leaderboard</h3>
139
+ <button id="view-full-leaderboard" class="btn btn-small btn-link" title="View Full Leaderboard">
140
+ View All →
141
+ </button>
142
+ </div>
143
+ <div id="elo-panel">
144
+ <p class="placeholder">Select a subset to view rankings</p>
145
+ </div>
146
+ <div class="elo-actions">
147
+ <button id="view-matrix" class="btn btn-small btn-secondary" title="View Win Rate Matrix">Matrix</button>
148
+ <button id="view-elo-history" class="btn btn-small btn-secondary" title="View ELO History">History</button>
149
+ <button id="view-elo-by-source" class="btn btn-small btn-secondary" title="View ELO by Source">By Source</button>
150
+ </div>
151
+ </div>
152
+ </aside>
153
+
154
+ <!-- Battle List -->
155
+ <main class="content">
156
+ <!-- Gallery Controls -->
157
+ <div class="gallery-controls">
158
+ <div class="selector-group">
159
+ <label for="subset-select">Subset:</label>
160
+ <select id="subset-select" class="selector">
161
+ <option value="">Select subset...</option>
162
+ </select>
163
+ </div>
164
+ <div class="selector-group">
165
+ <label for="exp-select">Experiment:</label>
166
+ <select id="exp-select" class="selector" disabled>
167
+ <option value="">Select experiment...</option>
168
+ </select>
169
+ </div>
170
+ <div class="view-toggle">
171
+ <button id="view-battles" class="view-btn active" title="View Battles">
172
+ <span class="view-icon">⚔️</span>
173
+ <span>Battles</span>
174
+ </button>
175
+ <button id="view-prompts" class="view-btn" title="View by Prompt">
176
+ <span class="view-icon">📝</span>
177
+ <span>Prompts</span>
178
+ </button>
179
+ </div>
180
+ <div class="search-box">
181
+ <input type="text" id="search-input" class="search-input" placeholder="Search prompts..." title="Search by instruction, task type, or metadata">
182
+ <button id="search-btn" class="btn btn-small search-btn" title="Search">🔍</button>
183
+ <button id="clear-search-btn" class="btn btn-small clear-search-btn" title="Clear search" style="display: none;">✕</button>
184
+ </div>
185
+ </div>
186
+
187
+ <div class="content-header">
188
+ <div id="pagination-info" class="pagination-info"></div>
189
+ <div class="pagination-controls">
190
+ <button id="first-page" class="btn btn-small" disabled>&laquo;</button>
191
+ <button id="prev-page" class="btn btn-small" disabled>&lt;</button>
192
+ <div id="page-numbers" class="page-numbers">
193
+ <!-- Populated by JavaScript -->
194
+ </div>
195
+ <button id="next-page" class="btn btn-small" disabled>&gt;</button>
196
+ <button id="last-page" class="btn btn-small" disabled>&raquo;</button>
197
+ <div class="page-jump">
198
+ <input type="number" id="page-input" class="page-input" min="1" placeholder="Page">
199
+ <button id="page-go" class="btn btn-small">Go</button>
200
+ </div>
201
+ </div>
202
+ </div>
203
+
204
+ <div id="battle-list" class="battle-list">
205
+ <div class="empty-state">
206
+ <p>Select a subset and experiment to view battles</p>
207
+ </div>
208
+ </div>
209
+
210
+ <!-- Prompts View Container -->
211
+ <div id="prompts-list" class="prompts-list" style="display: none;">
212
+ <div class="empty-state">
213
+ <p>Select a subset and experiment to view prompts</p>
214
+ </div>
215
+ </div>
216
+
217
+ <div class="content-footer">
218
+ <div class="pagination-controls">
219
+ <button id="first-page-bottom" class="btn btn-small" disabled>&laquo;</button>
220
+ <button id="prev-page-bottom" class="btn btn-small" disabled>&lt;</button>
221
+ <div id="page-numbers-bottom" class="page-numbers">
222
+ <!-- Populated by JavaScript -->
223
+ </div>
224
+ <button id="next-page-bottom" class="btn btn-small" disabled>&gt;</button>
225
+ <button id="last-page-bottom" class="btn btn-small" disabled>&raquo;</button>
226
+ <div class="page-jump">
227
+ <input type="number" id="page-input-bottom" class="page-input" min="1" placeholder="Page">
228
+ <button id="page-go-bottom" class="btn btn-small">Go</button>
229
+ </div>
230
+ </div>
231
+ </div>
232
+ </main>
233
+ </div> <!-- End of gallery-page -->
234
+ </div>
235
+
236
+ <!-- Detail Modal -->
237
+ <div id="detail-modal" class="modal hidden">
238
+ <div class="modal-backdrop"></div>
239
+ <div class="modal-content">
240
+ <button class="modal-close">&times;</button>
241
+ <div id="detail-content">
242
+ <!-- Filled by JavaScript -->
243
+ </div>
244
+ </div>
245
+ </div>
246
+
247
+ <!-- Favorites Modal -->
248
+ <div id="favorites-modal" class="modal hidden">
249
+ <div class="modal-backdrop"></div>
250
+ <div class="modal-content modal-content-wide">
251
+ <button class="modal-close">&times;</button>
252
+ <div class="favorites-modal-header">
253
+ <h2>Favorite Prompts</h2>
254
+ <button id="clear-all-favorites" class="btn btn-secondary btn-small">Clear All</button>
255
+ </div>
256
+ <div id="favorites-scrollable" class="favorites-scrollable">
257
+ <!-- Favorites Model Filter - horizontal layout -->
258
+ <div class="favorites-model-filter" id="favorites-model-filter-group">
259
+ <label>Filter Models:</label>
260
+ <div id="favorites-model-checkboxes" class="checkbox-group-horizontal">
261
+ <!-- Populated by JavaScript -->
262
+ </div>
263
+ <div class="filter-controls-row">
264
+ <div class="checkbox-actions-inline">
265
+ <button id="favorites-select-all-models" class="btn btn-small">Select All</button>
266
+ <button id="favorites-clear-all-models" class="btn btn-small">Clear All</button>
267
+ <button id="favorites-apply-model-filter" class="btn btn-primary btn-small">Apply Filter</button>
268
+ </div>
269
+ <div class="stats-scope-toggle">
270
+ <label class="toggle-label">
271
+ <input type="checkbox" id="favorites-stats-scope-all">
272
+ <span class="toggle-text">Win rate includes all opponents</span>
273
+ </label>
274
+ </div>
275
+ </div>
276
+ </div>
277
+ <div id="favorites-content">
278
+ <!-- Filled by JavaScript -->
279
+ </div>
280
+ </div>
281
+ </div>
282
+ </div>
283
+
284
+ <!-- Image Lightbox -->
285
+ <div id="lightbox" class="lightbox">
286
+ <button class="lightbox-close">&times;</button>
287
+ <img id="lightbox-img" src="" alt="Enlarged image">
288
+ <div id="lightbox-label" class="lightbox-label"></div>
289
+ </div>
290
+
291
+ <!-- ELO Leaderboard Modal -->
292
+ <div id="leaderboard-modal" class="modal hidden">
293
+ <div class="modal-backdrop"></div>
294
+ <div class="modal-content modal-content-wide">
295
+ <button class="modal-close">&times;</button>
296
+ <div class="leaderboard-modal-header">
297
+ <h2>ELO Leaderboard</h2>
298
+ <span id="leaderboard-subset-name" class="subset-badge"></span>
299
+ </div>
300
+ <div id="leaderboard-content">
301
+ <!-- Filled by JavaScript -->
302
+ </div>
303
+ </div>
304
+ </div>
305
+
306
+ <!-- Model Stats Modal -->
307
+ <div id="model-stats-modal" class="modal hidden">
308
+ <div class="modal-backdrop"></div>
309
+ <div class="modal-content modal-content-wide">
310
+ <button class="modal-close">&times;</button>
311
+ <div id="model-stats-content">
312
+ <!-- Filled by JavaScript -->
313
+ </div>
314
+ </div>
315
+ </div>
316
+
317
+ <!-- Win Rate Matrix Modal -->
318
+ <div id="matrix-modal" class="modal hidden">
319
+ <div class="modal-backdrop"></div>
320
+ <div class="modal-content modal-content-wide">
321
+ <button class="modal-close">&times;</button>
322
+ <div class="matrix-modal-header">
323
+ <h2>Win Rate Matrix</h2>
324
+ <span id="matrix-subset-name" class="subset-badge"></span>
325
+ </div>
326
+ <div id="matrix-content" class="matrix-content">
327
+ <!-- Filled by JavaScript -->
328
+ </div>
329
+ </div>
330
+ </div>
331
+
332
+ <!-- ELO History Modal -->
333
+ <div id="elo-history-modal" class="modal hidden">
334
+ <div class="modal-backdrop"></div>
335
+ <div class="modal-content modal-content-wide">
336
+ <button class="modal-close">&times;</button>
337
+ <div class="elo-history-header">
338
+ <h2>ELO History</h2>
339
+ <div class="elo-history-controls">
340
+ <label for="elo-history-granularity">Group by:</label>
341
+ <select id="elo-history-granularity" class="selector">
342
+ <option value="experiment" selected>Experiment</option>
343
+ <option value="day">Day</option>
344
+ <option value="week">Week</option>
345
+ </select>
346
+ </div>
347
+ </div>
348
+ <div id="elo-history-content" class="elo-history-content">
349
+ <!-- Filled by JavaScript - SVG chart -->
350
+ </div>
351
+ <div id="elo-history-legend" class="elo-history-legend">
352
+ <!-- Filled by JavaScript -->
353
+ </div>
354
+ </div>
355
+ </div>
356
+
357
+ <!-- ELO by Source Modal -->
358
+ <div id="elo-by-source-modal" class="modal hidden">
359
+ <div class="modal-backdrop"></div>
360
+ <div class="modal-content modal-content-wide">
361
+ <button class="modal-close">&times;</button>
362
+ <div class="elo-by-source-header">
363
+ <h2>ELO Rankings by Prompt Source</h2>
364
+ <span id="elo-by-source-subset-name" class="subset-badge"></span>
365
+ </div>
366
+ <div id="elo-by-source-content" class="elo-by-source-content">
367
+ <!-- Filled by JavaScript -->
368
+ </div>
369
+ </div>
370
+ </div>
371
+
372
+ <!-- Cross-Subset Modal -->
373
+ <div id="cross-subset-modal" class="modal hidden">
374
+ <div class="modal-backdrop"></div>
375
+ <div class="modal-content modal-content-wide">
376
+ <button class="modal-close">&times;</button>
377
+ <div class="cross-subset-modal-header">
378
+ <h2>Cross-Subset ELO Analysis</h2>
379
+ </div>
380
+ <p class="modal-description">Merge battles from multiple subsets to compute combined ELO rankings.</p>
381
+ <div class="cross-subset-content">
382
+ <div class="cross-subset-selection">
383
+ <h4>Select subsets to merge:</h4>
384
+ <div id="cross-subset-checkboxes" class="checkbox-group">
385
+ <!-- Populated by JavaScript -->
386
+ </div>
387
+ <div class="checkbox-actions">
388
+ <button id="cross-subset-select-all" class="btn btn-small">Select All</button>
389
+ <button id="cross-subset-clear-all" class="btn btn-small">Clear All</button>
390
+ </div>
391
+ </div>
392
+ <div class="cross-subset-info">
393
+ <p>Common models (in all selected): <span id="common-model-count">-</span></p>
394
+ <p>Union models (in any selected): <span id="union-model-count">-</span></p>
395
+ <p>Total battles: <span id="total-battles-count">-</span></p>
396
+ </div>
397
+ <div class="cross-subset-options">
398
+ <label>Model scope:</label>
399
+ <label class="radio-label"><input type="radio" name="model-scope" value="all" checked> All models</label>
400
+ <label class="radio-label"><input type="radio" name="model-scope" value="common"> Common only</label>
401
+ </div>
402
+ <button id="calculate-merged-elo" class="btn btn-primary">Calculate Merged ELO</button>
403
+ </div>
404
+ <div id="cross-subset-results" class="cross-subset-results">
405
+ <!-- Filled by JavaScript -->
406
+ </div>
407
+ </div>
408
+ </div>
409
+ </div>
410
+
411
+ <script src="static/app.js"></script>
412
+ </body>
413
+ </html>