genarena 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genarena/__init__.py +49 -2
- genarena/__main__.py +10 -0
- genarena/arena.py +1685 -0
- genarena/battle.py +337 -0
- genarena/bt_elo.py +507 -0
- genarena/cli.py +1581 -0
- genarena/data.py +476 -0
- genarena/deploy/Dockerfile +25 -0
- genarena/deploy/README.md +55 -0
- genarena/deploy/__init__.py +5 -0
- genarena/deploy/app.py +84 -0
- genarena/experiments.py +121 -0
- genarena/leaderboard.py +270 -0
- genarena/logs.py +409 -0
- genarena/models.py +412 -0
- genarena/prompts/__init__.py +127 -0
- genarena/prompts/mmrb2.py +373 -0
- genarena/sampling.py +336 -0
- genarena/state.py +656 -0
- genarena/sync/__init__.py +105 -0
- genarena/sync/auto_commit.py +118 -0
- genarena/sync/deploy_ops.py +543 -0
- genarena/sync/git_ops.py +422 -0
- genarena/sync/hf_ops.py +891 -0
- genarena/sync/init_ops.py +431 -0
- genarena/sync/packer.py +587 -0
- genarena/sync/submit.py +837 -0
- genarena/utils.py +103 -0
- genarena/validation/__init__.py +19 -0
- genarena/validation/schema.py +327 -0
- genarena/validation/validator.py +329 -0
- genarena/visualize/README.md +148 -0
- genarena/visualize/__init__.py +14 -0
- genarena/visualize/app.py +938 -0
- genarena/visualize/data_loader.py +2335 -0
- genarena/visualize/static/app.js +3762 -0
- genarena/visualize/static/model_aliases.json +86 -0
- genarena/visualize/static/style.css +4104 -0
- genarena/visualize/templates/index.html +413 -0
- genarena/vlm.py +519 -0
- genarena-0.1.0.dist-info/METADATA +178 -0
- genarena-0.1.0.dist-info/RECORD +44 -0
- {genarena-0.0.1.dist-info → genarena-0.1.0.dist-info}/WHEEL +1 -2
- genarena-0.1.0.dist-info/entry_points.txt +2 -0
- genarena-0.0.1.dist-info/METADATA +0 -26
- genarena-0.0.1.dist-info/RECORD +0 -5
- genarena-0.0.1.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,938 @@
|
|
|
1
|
+
# Copyright 2026 Ruihang Li.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0.
|
|
3
|
+
# See LICENSE file in the project root for details.
|
|
4
|
+
|
|
5
|
+
"""Flask application for arena visualization."""
|
|
6
|
+
|
|
7
|
+
import io
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
from flask import Flask, jsonify, render_template, request, send_file, abort, redirect
|
|
11
|
+
|
|
12
|
+
from genarena.visualize.data_loader import ArenaDataLoader
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def create_app(arena_dir: str, data_dir: str) -> Flask:
|
|
16
|
+
"""
|
|
17
|
+
Create and configure the Flask application.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
arena_dir: Path to arena directory
|
|
21
|
+
data_dir: Path to data directory
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Configured Flask app
|
|
25
|
+
"""
|
|
26
|
+
# Get the directory containing this file for templates/static
|
|
27
|
+
app_dir = os.path.dirname(os.path.abspath(__file__))
|
|
28
|
+
|
|
29
|
+
app = Flask(
|
|
30
|
+
__name__,
|
|
31
|
+
template_folder=os.path.join(app_dir, "templates"),
|
|
32
|
+
static_folder=os.path.join(app_dir, "static"),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Store paths in config
|
|
36
|
+
app.config["ARENA_DIR"] = arena_dir
|
|
37
|
+
app.config["DATA_DIR"] = data_dir
|
|
38
|
+
|
|
39
|
+
# Create data loader
|
|
40
|
+
data_loader = ArenaDataLoader(arena_dir, data_dir)
|
|
41
|
+
|
|
42
|
+
# ========== Page Routes ==========
|
|
43
|
+
|
|
44
|
+
@app.route("/")
|
|
45
|
+
def index():
|
|
46
|
+
"""Main page."""
|
|
47
|
+
return render_template("index.html")
|
|
48
|
+
|
|
49
|
+
# ========== API Routes ==========
|
|
50
|
+
|
|
51
|
+
@app.route("/api/subsets")
|
|
52
|
+
def api_subsets():
|
|
53
|
+
"""Get list of available subsets."""
|
|
54
|
+
subsets = data_loader.discover_subsets()
|
|
55
|
+
return jsonify({"subsets": subsets})
|
|
56
|
+
|
|
57
|
+
@app.route("/api/subsets/<subset>/info")
|
|
58
|
+
def api_subset_info(subset: str):
|
|
59
|
+
"""Get information about a subset."""
|
|
60
|
+
info = data_loader.get_subset_info(subset)
|
|
61
|
+
if not info:
|
|
62
|
+
return jsonify({"error": "Subset not found"}), 404
|
|
63
|
+
|
|
64
|
+
return jsonify({
|
|
65
|
+
"name": info.name,
|
|
66
|
+
"models": info.models,
|
|
67
|
+
"experiments": info.experiments,
|
|
68
|
+
"total_battles": info.total_battles,
|
|
69
|
+
"min_input_images": info.min_input_images,
|
|
70
|
+
"max_input_images": info.max_input_images,
|
|
71
|
+
"prompt_sources": info.prompt_sources,
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/battles")
|
|
75
|
+
def api_battles(subset: str, exp_name: str):
|
|
76
|
+
"""Get paginated battle records."""
|
|
77
|
+
# Parse query parameters
|
|
78
|
+
page = request.args.get("page", 1, type=int)
|
|
79
|
+
page_size = request.args.get("page_size", 20, type=int)
|
|
80
|
+
result_filter = request.args.get("result", None, type=str)
|
|
81
|
+
consistency = request.args.get("consistent", None, type=str)
|
|
82
|
+
min_images = request.args.get("min_images", None, type=int)
|
|
83
|
+
max_images = request.args.get("max_images", None, type=int)
|
|
84
|
+
prompt_source = request.args.get("prompt_source", None, type=str)
|
|
85
|
+
|
|
86
|
+
# Support multiple models (comma-separated or multiple params)
|
|
87
|
+
models_param = request.args.get("models", None, type=str)
|
|
88
|
+
models = None
|
|
89
|
+
if models_param:
|
|
90
|
+
models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
91
|
+
|
|
92
|
+
# Convert consistency filter
|
|
93
|
+
consistency_filter = None
|
|
94
|
+
if consistency == "true":
|
|
95
|
+
consistency_filter = True
|
|
96
|
+
elif consistency == "false":
|
|
97
|
+
consistency_filter = False
|
|
98
|
+
|
|
99
|
+
# Get battles
|
|
100
|
+
records, total = data_loader.get_battles(
|
|
101
|
+
subset=subset,
|
|
102
|
+
exp_name=exp_name,
|
|
103
|
+
page=page,
|
|
104
|
+
page_size=page_size,
|
|
105
|
+
models=models,
|
|
106
|
+
result_filter=result_filter,
|
|
107
|
+
consistency_filter=consistency_filter,
|
|
108
|
+
min_images=min_images,
|
|
109
|
+
max_images=max_images,
|
|
110
|
+
prompt_source=prompt_source,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return jsonify({
|
|
114
|
+
"battles": [r.to_dict() for r in records],
|
|
115
|
+
"total": total,
|
|
116
|
+
"page": page,
|
|
117
|
+
"page_size": page_size,
|
|
118
|
+
"total_pages": (total + page_size - 1) // page_size,
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/battles/<path:battle_id>")
|
|
122
|
+
def api_battle_detail(subset: str, exp_name: str, battle_id: str):
|
|
123
|
+
"""Get detailed battle record."""
|
|
124
|
+
# Parse battle_id: model_a_vs_model_b:sample_index
|
|
125
|
+
try:
|
|
126
|
+
parts = battle_id.rsplit(":", 1)
|
|
127
|
+
sample_index = int(parts[1])
|
|
128
|
+
model_part = parts[0]
|
|
129
|
+
|
|
130
|
+
# Split model names
|
|
131
|
+
if "_vs_" in model_part:
|
|
132
|
+
models = model_part.split("_vs_")
|
|
133
|
+
model_a, model_b = models[0], models[1]
|
|
134
|
+
else:
|
|
135
|
+
return jsonify({"error": "Invalid battle_id format"}), 400
|
|
136
|
+
except (ValueError, IndexError):
|
|
137
|
+
return jsonify({"error": "Invalid battle_id format"}), 400
|
|
138
|
+
|
|
139
|
+
record = data_loader.get_battle_detail(
|
|
140
|
+
subset, exp_name, model_a, model_b, sample_index
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if not record:
|
|
144
|
+
return jsonify({"error": "Battle not found"}), 404
|
|
145
|
+
|
|
146
|
+
return jsonify(record.to_detail_dict())
|
|
147
|
+
|
|
148
|
+
@app.route("/api/subsets/<subset>/stats")
|
|
149
|
+
def api_stats(subset: str):
|
|
150
|
+
"""Get statistics for a subset."""
|
|
151
|
+
exp_name = request.args.get("exp_name", None, type=str)
|
|
152
|
+
stats = data_loader.get_stats(subset, exp_name)
|
|
153
|
+
|
|
154
|
+
if not stats:
|
|
155
|
+
return jsonify({"error": "Subset not found"}), 404
|
|
156
|
+
|
|
157
|
+
return jsonify(stats)
|
|
158
|
+
|
|
159
|
+
@app.route("/api/subsets/<subset>/leaderboard")
|
|
160
|
+
def api_elo_leaderboard(subset: str):
|
|
161
|
+
"""Get ELO leaderboard for a subset."""
|
|
162
|
+
# Support multiple models filter (comma-separated)
|
|
163
|
+
models_param = request.args.get("models", None, type=str)
|
|
164
|
+
filter_models = None
|
|
165
|
+
if models_param:
|
|
166
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
167
|
+
|
|
168
|
+
leaderboard = data_loader.get_elo_leaderboard(subset, filter_models)
|
|
169
|
+
return jsonify({"leaderboard": leaderboard})
|
|
170
|
+
|
|
171
|
+
@app.route("/api/subsets/<subset>/models/<path:model>/stats")
|
|
172
|
+
def api_model_stats(subset: str, model: str):
|
|
173
|
+
"""Get detailed statistics for a specific model including win rates against all opponents."""
|
|
174
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
175
|
+
stats = data_loader.get_model_vs_stats(subset, model, exp_name)
|
|
176
|
+
|
|
177
|
+
if not stats:
|
|
178
|
+
return jsonify({"error": "Model not found"}), 404
|
|
179
|
+
|
|
180
|
+
return jsonify(stats)
|
|
181
|
+
|
|
182
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/h2h")
|
|
183
|
+
def api_head_to_head(subset: str, exp_name: str):
|
|
184
|
+
"""Get head-to-head statistics between two models."""
|
|
185
|
+
model_a = request.args.get("model_a", None, type=str)
|
|
186
|
+
model_b = request.args.get("model_b", None, type=str)
|
|
187
|
+
|
|
188
|
+
if not model_a or not model_b:
|
|
189
|
+
return jsonify({"error": "model_a and model_b are required"}), 400
|
|
190
|
+
|
|
191
|
+
h2h = data_loader.get_head_to_head(subset, exp_name, model_a, model_b)
|
|
192
|
+
return jsonify(h2h)
|
|
193
|
+
|
|
194
|
+
@app.route("/api/subsets/<subset>/samples/<int:sample_index>/input_count")
|
|
195
|
+
def api_input_image_count(subset: str, sample_index: int):
|
|
196
|
+
"""Get the number of input images for a sample."""
|
|
197
|
+
count = data_loader.get_input_image_count(subset, sample_index)
|
|
198
|
+
return jsonify({"count": count})
|
|
199
|
+
|
|
200
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/samples/<int:sample_index>/all_models")
|
|
201
|
+
def api_sample_all_models(subset: str, exp_name: str, sample_index: int):
|
|
202
|
+
"""Get all model outputs for a specific sample, sorted by win rate."""
|
|
203
|
+
# Support multiple models filter (comma-separated)
|
|
204
|
+
models_param = request.args.get("models", None, type=str)
|
|
205
|
+
filter_models = None
|
|
206
|
+
if models_param:
|
|
207
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
208
|
+
|
|
209
|
+
# stats_scope: 'filtered' = only count battles between filtered models
|
|
210
|
+
# 'all' = count all battles (but show only filtered models)
|
|
211
|
+
stats_scope = request.args.get("stats_scope", "filtered", type=str)
|
|
212
|
+
|
|
213
|
+
result = data_loader.get_sample_all_models(
|
|
214
|
+
subset, exp_name, sample_index, filter_models, stats_scope
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if not result:
|
|
218
|
+
return jsonify({"error": "Sample not found"}), 404
|
|
219
|
+
|
|
220
|
+
return jsonify(result)
|
|
221
|
+
|
|
222
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/samples/<int:sample_index>/models/<path:model>/battles")
|
|
223
|
+
def api_model_battles_for_sample(subset: str, exp_name: str, sample_index: int, model: str):
|
|
224
|
+
"""Get all battle records for a specific model on a specific sample."""
|
|
225
|
+
# Parse optional opponent models filter (comma-separated)
|
|
226
|
+
opponents_param = request.args.get("opponents", None, type=str)
|
|
227
|
+
opponent_models = None
|
|
228
|
+
if opponents_param:
|
|
229
|
+
opponent_models = [m.strip() for m in opponents_param.split(",") if m.strip()]
|
|
230
|
+
|
|
231
|
+
result = data_loader.get_model_battles_for_sample(
|
|
232
|
+
subset=subset,
|
|
233
|
+
exp_name=exp_name,
|
|
234
|
+
sample_index=sample_index,
|
|
235
|
+
model=model,
|
|
236
|
+
opponent_models=opponent_models,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
return jsonify(result)
|
|
240
|
+
|
|
241
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/prompts")
|
|
242
|
+
def api_prompts(subset: str, exp_name: str):
|
|
243
|
+
"""Get paginated list of prompts/samples with all model outputs."""
|
|
244
|
+
# Parse query parameters
|
|
245
|
+
page = request.args.get("page", 1, type=int)
|
|
246
|
+
page_size = request.args.get("page_size", 10, type=int)
|
|
247
|
+
min_images = request.args.get("min_images", None, type=int)
|
|
248
|
+
max_images = request.args.get("max_images", None, type=int)
|
|
249
|
+
prompt_source = request.args.get("prompt_source", None, type=str)
|
|
250
|
+
|
|
251
|
+
# Support multiple models filter (comma-separated)
|
|
252
|
+
models_param = request.args.get("models", None, type=str)
|
|
253
|
+
filter_models = None
|
|
254
|
+
if models_param:
|
|
255
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
256
|
+
|
|
257
|
+
# Get prompts
|
|
258
|
+
prompts, total = data_loader.get_prompts(
|
|
259
|
+
subset=subset,
|
|
260
|
+
exp_name=exp_name,
|
|
261
|
+
page=page,
|
|
262
|
+
page_size=page_size,
|
|
263
|
+
min_images=min_images,
|
|
264
|
+
max_images=max_images,
|
|
265
|
+
prompt_source=prompt_source,
|
|
266
|
+
filter_models=filter_models,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
return jsonify({
|
|
270
|
+
"prompts": prompts,
|
|
271
|
+
"total": total,
|
|
272
|
+
"page": page,
|
|
273
|
+
"page_size": page_size,
|
|
274
|
+
"total_pages": (total + page_size - 1) // page_size,
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/search")
|
|
278
|
+
def api_search(subset: str, exp_name: str):
|
|
279
|
+
"""Search battles by text query (full-text search across instruction, task_type, prompt_source, metadata)."""
|
|
280
|
+
# Parse query parameters
|
|
281
|
+
query = request.args.get("q", "", type=str)
|
|
282
|
+
page = request.args.get("page", 1, type=int)
|
|
283
|
+
page_size = request.args.get("page_size", 20, type=int)
|
|
284
|
+
consistency = request.args.get("consistent", None, type=str)
|
|
285
|
+
|
|
286
|
+
# Support multiple models (comma-separated)
|
|
287
|
+
models_param = request.args.get("models", None, type=str)
|
|
288
|
+
models = None
|
|
289
|
+
if models_param:
|
|
290
|
+
models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
291
|
+
|
|
292
|
+
# Convert consistency filter
|
|
293
|
+
consistency_filter = None
|
|
294
|
+
if consistency == "true":
|
|
295
|
+
consistency_filter = True
|
|
296
|
+
elif consistency == "false":
|
|
297
|
+
consistency_filter = False
|
|
298
|
+
|
|
299
|
+
# Search battles
|
|
300
|
+
records, total = data_loader.search_battles(
|
|
301
|
+
subset=subset,
|
|
302
|
+
exp_name=exp_name,
|
|
303
|
+
query=query,
|
|
304
|
+
page=page,
|
|
305
|
+
page_size=page_size,
|
|
306
|
+
models=models,
|
|
307
|
+
consistency_filter=consistency_filter,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
return jsonify({
|
|
311
|
+
"battles": [r.to_dict() for r in records],
|
|
312
|
+
"total": total,
|
|
313
|
+
"page": page,
|
|
314
|
+
"page_size": page_size,
|
|
315
|
+
"total_pages": (total + page_size - 1) // page_size,
|
|
316
|
+
"query": query,
|
|
317
|
+
})
|
|
318
|
+
|
|
319
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/search/prompts")
|
|
320
|
+
def api_search_prompts(subset: str, exp_name: str):
|
|
321
|
+
"""Search prompts by text query."""
|
|
322
|
+
# Parse query parameters
|
|
323
|
+
query = request.args.get("q", "", type=str)
|
|
324
|
+
page = request.args.get("page", 1, type=int)
|
|
325
|
+
page_size = request.args.get("page_size", 10, type=int)
|
|
326
|
+
|
|
327
|
+
# Support multiple models filter (comma-separated)
|
|
328
|
+
models_param = request.args.get("models", None, type=str)
|
|
329
|
+
filter_models = None
|
|
330
|
+
if models_param:
|
|
331
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
332
|
+
|
|
333
|
+
# Search prompts
|
|
334
|
+
prompts, total = data_loader.search_prompts(
|
|
335
|
+
subset=subset,
|
|
336
|
+
exp_name=exp_name,
|
|
337
|
+
query=query,
|
|
338
|
+
page=page,
|
|
339
|
+
page_size=page_size,
|
|
340
|
+
filter_models=filter_models,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
return jsonify({
|
|
344
|
+
"prompts": prompts,
|
|
345
|
+
"total": total,
|
|
346
|
+
"page": page,
|
|
347
|
+
"page_size": page_size,
|
|
348
|
+
"total_pages": (total + page_size - 1) // page_size,
|
|
349
|
+
"query": query,
|
|
350
|
+
})
|
|
351
|
+
|
|
352
|
+
@app.route("/api/subsets/<subset>/matrix")
|
|
353
|
+
def api_win_rate_matrix(subset: str):
|
|
354
|
+
"""Get win rate matrix for all model pairs."""
|
|
355
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
356
|
+
|
|
357
|
+
# Support model filter (comma-separated)
|
|
358
|
+
models_param = request.args.get("models", None, type=str)
|
|
359
|
+
filter_models = None
|
|
360
|
+
if models_param:
|
|
361
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
362
|
+
|
|
363
|
+
result = data_loader.get_win_rate_matrix(subset, exp_name, filter_models)
|
|
364
|
+
return jsonify(result)
|
|
365
|
+
|
|
366
|
+
@app.route("/api/subsets/<subset>/leaderboard/by-source")
|
|
367
|
+
def api_elo_by_source(subset: str):
|
|
368
|
+
"""Get ELO rankings grouped by prompt source."""
|
|
369
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
370
|
+
result = data_loader.get_elo_by_source(subset, exp_name)
|
|
371
|
+
return jsonify(result)
|
|
372
|
+
|
|
373
|
+
@app.route("/api/subsets/<subset>/elo-history")
|
|
374
|
+
def api_elo_history(subset: str):
|
|
375
|
+
"""Get ELO history over time."""
|
|
376
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
377
|
+
granularity = request.args.get("granularity", "day", type=str)
|
|
378
|
+
|
|
379
|
+
# Support model filter (comma-separated)
|
|
380
|
+
models_param = request.args.get("models", None, type=str)
|
|
381
|
+
filter_models = None
|
|
382
|
+
if models_param:
|
|
383
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
384
|
+
|
|
385
|
+
result = data_loader.get_elo_history(subset, exp_name, granularity, filter_models)
|
|
386
|
+
return jsonify(result)
|
|
387
|
+
|
|
388
|
+
@app.route("/api/overview/leaderboards")
|
|
389
|
+
def api_overview_leaderboards():
|
|
390
|
+
"""Get leaderboard data for all subsets (for Overview page)."""
|
|
391
|
+
result = data_loader.get_all_subsets_leaderboards()
|
|
392
|
+
return jsonify(result)
|
|
393
|
+
|
|
394
|
+
@app.route("/api/cross-subset/info")
|
|
395
|
+
def api_cross_subset_info():
|
|
396
|
+
"""Get information about models across multiple subsets."""
|
|
397
|
+
subsets_param = request.args.get("subsets", "", type=str)
|
|
398
|
+
if not subsets_param:
|
|
399
|
+
return jsonify({"error": "subsets parameter is required"}), 400
|
|
400
|
+
|
|
401
|
+
subsets = [s.strip() for s in subsets_param.split(",") if s.strip()]
|
|
402
|
+
if len(subsets) < 1:
|
|
403
|
+
return jsonify({"error": "At least 1 subset required"}), 400
|
|
404
|
+
|
|
405
|
+
result = data_loader.get_cross_subset_info(subsets)
|
|
406
|
+
return jsonify(result)
|
|
407
|
+
|
|
408
|
+
@app.route("/api/cross-subset/elo")
|
|
409
|
+
def api_cross_subset_elo():
|
|
410
|
+
"""Compute ELO rankings across multiple subsets."""
|
|
411
|
+
subsets_param = request.args.get("subsets", "", type=str)
|
|
412
|
+
if not subsets_param:
|
|
413
|
+
return jsonify({"error": "subsets parameter is required"}), 400
|
|
414
|
+
|
|
415
|
+
subsets = [s.strip() for s in subsets_param.split(",") if s.strip()]
|
|
416
|
+
if len(subsets) < 1:
|
|
417
|
+
return jsonify({"error": "At least 1 subset required"}), 400
|
|
418
|
+
|
|
419
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
420
|
+
model_scope = request.args.get("model_scope", "all", type=str)
|
|
421
|
+
|
|
422
|
+
result = data_loader.get_cross_subset_elo(subsets, exp_name, model_scope)
|
|
423
|
+
return jsonify(result)
|
|
424
|
+
|
|
425
|
+
# ========== Image Routes ==========
|
|
426
|
+
|
|
427
|
+
@app.route("/images/<subset>/<model>/<int:sample_index>")
|
|
428
|
+
def serve_model_image(subset: str, model: str, sample_index: int):
|
|
429
|
+
"""Serve model output image."""
|
|
430
|
+
image_path = data_loader.get_image_path(subset, model, sample_index)
|
|
431
|
+
|
|
432
|
+
if not image_path or not os.path.isfile(image_path):
|
|
433
|
+
abort(404)
|
|
434
|
+
|
|
435
|
+
# Determine mime type
|
|
436
|
+
ext = os.path.splitext(image_path)[1].lower()
|
|
437
|
+
mime_types = {
|
|
438
|
+
".png": "image/png",
|
|
439
|
+
".jpg": "image/jpeg",
|
|
440
|
+
".jpeg": "image/jpeg",
|
|
441
|
+
".webp": "image/webp",
|
|
442
|
+
}
|
|
443
|
+
mimetype = mime_types.get(ext, "image/png")
|
|
444
|
+
|
|
445
|
+
return send_file(
|
|
446
|
+
image_path,
|
|
447
|
+
mimetype=mimetype,
|
|
448
|
+
max_age=3600, # Cache for 1 hour
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
@app.route("/images/<subset>/input/<int:sample_index>")
|
|
452
|
+
@app.route("/images/<subset>/input/<int:sample_index>/<int:img_idx>")
|
|
453
|
+
def serve_input_image(subset: str, sample_index: int, img_idx: int = 0):
|
|
454
|
+
"""Serve input image from parquet dataset. Supports multiple images via img_idx."""
|
|
455
|
+
image_bytes = data_loader.get_input_image_by_idx(subset, sample_index, img_idx)
|
|
456
|
+
|
|
457
|
+
if not image_bytes:
|
|
458
|
+
abort(404)
|
|
459
|
+
|
|
460
|
+
return send_file(
|
|
461
|
+
io.BytesIO(image_bytes),
|
|
462
|
+
mimetype="image/png",
|
|
463
|
+
max_age=3600,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
return app
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def run_server(
|
|
470
|
+
arena_dir: str,
|
|
471
|
+
data_dir: str,
|
|
472
|
+
host: str = "0.0.0.0",
|
|
473
|
+
port: int = 8080,
|
|
474
|
+
debug: bool = False,
|
|
475
|
+
):
|
|
476
|
+
"""
|
|
477
|
+
Run the visualization server.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
arena_dir: Path to arena directory
|
|
481
|
+
data_dir: Path to data directory
|
|
482
|
+
host: Host to bind to
|
|
483
|
+
port: Port to listen on
|
|
484
|
+
debug: Enable debug mode
|
|
485
|
+
"""
|
|
486
|
+
import logging
|
|
487
|
+
logging.basicConfig(
|
|
488
|
+
level=logging.INFO,
|
|
489
|
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
490
|
+
datefmt="%H:%M:%S"
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
print(f"\n{'='*60}")
|
|
494
|
+
print(f" GenArena Arena Visualizer")
|
|
495
|
+
print(f"{'='*60}")
|
|
496
|
+
print(f" Arena Dir: {arena_dir}")
|
|
497
|
+
print(f" Data Dir: {data_dir}")
|
|
498
|
+
print(f"{'='*60}")
|
|
499
|
+
print(f" Preloading data (this may take a while)...")
|
|
500
|
+
print(f"{'='*60}\n")
|
|
501
|
+
|
|
502
|
+
app = create_app(arena_dir, data_dir)
|
|
503
|
+
|
|
504
|
+
print(f"\n{'='*60}")
|
|
505
|
+
print(f" Server ready: http://{host}:{port}")
|
|
506
|
+
print(f"{'='*60}\n")
|
|
507
|
+
|
|
508
|
+
app.run(host=host, port=port, debug=debug, threaded=True)
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def create_hf_app(
|
|
512
|
+
arena_dir: str,
|
|
513
|
+
data_dir: str,
|
|
514
|
+
hf_repo: str,
|
|
515
|
+
image_files: list[str],
|
|
516
|
+
) -> Flask:
|
|
517
|
+
"""
|
|
518
|
+
Create Flask app for HuggingFace Spaces deployment.
|
|
519
|
+
|
|
520
|
+
This version uses HF CDN URLs for model output images instead of
|
|
521
|
+
serving them from local filesystem.
|
|
522
|
+
|
|
523
|
+
Args:
|
|
524
|
+
arena_dir: Path to arena directory (metadata only, no images)
|
|
525
|
+
data_dir: Path to data directory containing parquet files
|
|
526
|
+
hf_repo: HuggingFace repo ID for image CDN URLs
|
|
527
|
+
image_files: List of image file paths in the HF repo
|
|
528
|
+
|
|
529
|
+
Returns:
|
|
530
|
+
Configured Flask app for HF Spaces
|
|
531
|
+
"""
|
|
532
|
+
from genarena.visualize.data_loader import HFArenaDataLoader
|
|
533
|
+
|
|
534
|
+
# Get the directory containing this file for templates/static
|
|
535
|
+
app_dir = os.path.dirname(os.path.abspath(__file__))
|
|
536
|
+
|
|
537
|
+
app = Flask(
|
|
538
|
+
__name__,
|
|
539
|
+
template_folder=os.path.join(app_dir, "templates"),
|
|
540
|
+
static_folder=os.path.join(app_dir, "static"),
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
# Store config
|
|
544
|
+
app.config["ARENA_DIR"] = arena_dir
|
|
545
|
+
app.config["DATA_DIR"] = data_dir
|
|
546
|
+
app.config["USE_HF_CDN"] = True
|
|
547
|
+
app.config["HF_REPO"] = hf_repo
|
|
548
|
+
|
|
549
|
+
# Create HF data loader
|
|
550
|
+
data_loader = HFArenaDataLoader(arena_dir, data_dir, hf_repo, image_files)
|
|
551
|
+
|
|
552
|
+
# ========== Page Routes ==========
|
|
553
|
+
|
|
554
|
+
@app.route("/")
|
|
555
|
+
def index():
|
|
556
|
+
"""Main page."""
|
|
557
|
+
return render_template("index.html")
|
|
558
|
+
|
|
559
|
+
# ========== API Routes ==========
|
|
560
|
+
# Copy all API routes from create_app - they work the same way
|
|
561
|
+
|
|
562
|
+
@app.route("/api/subsets")
|
|
563
|
+
def api_subsets():
|
|
564
|
+
"""Get list of available subsets."""
|
|
565
|
+
subsets = data_loader.discover_subsets()
|
|
566
|
+
return jsonify({"subsets": subsets})
|
|
567
|
+
|
|
568
|
+
@app.route("/api/subsets/<subset>/info")
|
|
569
|
+
def api_subset_info(subset: str):
|
|
570
|
+
"""Get information about a subset."""
|
|
571
|
+
info = data_loader.get_subset_info(subset)
|
|
572
|
+
if not info:
|
|
573
|
+
return jsonify({"error": "Subset not found"}), 404
|
|
574
|
+
|
|
575
|
+
return jsonify({
|
|
576
|
+
"name": info.name,
|
|
577
|
+
"models": info.models,
|
|
578
|
+
"experiments": info.experiments,
|
|
579
|
+
"total_battles": info.total_battles,
|
|
580
|
+
"min_input_images": info.min_input_images,
|
|
581
|
+
"max_input_images": info.max_input_images,
|
|
582
|
+
"prompt_sources": info.prompt_sources,
|
|
583
|
+
})
|
|
584
|
+
|
|
585
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/battles")
|
|
586
|
+
def api_battles(subset: str, exp_name: str):
|
|
587
|
+
"""Get paginated battle records."""
|
|
588
|
+
page = request.args.get("page", 1, type=int)
|
|
589
|
+
page_size = request.args.get("page_size", 20, type=int)
|
|
590
|
+
result_filter = request.args.get("result", None, type=str)
|
|
591
|
+
consistency = request.args.get("consistent", None, type=str)
|
|
592
|
+
min_images = request.args.get("min_images", None, type=int)
|
|
593
|
+
max_images = request.args.get("max_images", None, type=int)
|
|
594
|
+
prompt_source = request.args.get("prompt_source", None, type=str)
|
|
595
|
+
|
|
596
|
+
models_param = request.args.get("models", None, type=str)
|
|
597
|
+
models = None
|
|
598
|
+
if models_param:
|
|
599
|
+
models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
600
|
+
|
|
601
|
+
consistency_filter = None
|
|
602
|
+
if consistency == "true":
|
|
603
|
+
consistency_filter = True
|
|
604
|
+
elif consistency == "false":
|
|
605
|
+
consistency_filter = False
|
|
606
|
+
|
|
607
|
+
records, total = data_loader.get_battles(
|
|
608
|
+
subset=subset,
|
|
609
|
+
exp_name=exp_name,
|
|
610
|
+
page=page,
|
|
611
|
+
page_size=page_size,
|
|
612
|
+
models=models,
|
|
613
|
+
result_filter=result_filter,
|
|
614
|
+
consistency_filter=consistency_filter,
|
|
615
|
+
min_images=min_images,
|
|
616
|
+
max_images=max_images,
|
|
617
|
+
prompt_source=prompt_source,
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
return jsonify({
|
|
621
|
+
"battles": [r.to_dict() for r in records],
|
|
622
|
+
"total": total,
|
|
623
|
+
"page": page,
|
|
624
|
+
"page_size": page_size,
|
|
625
|
+
"total_pages": (total + page_size - 1) // page_size,
|
|
626
|
+
})
|
|
627
|
+
|
|
628
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/battles/<path:battle_id>")
|
|
629
|
+
def api_battle_detail(subset: str, exp_name: str, battle_id: str):
|
|
630
|
+
"""Get detailed battle record."""
|
|
631
|
+
try:
|
|
632
|
+
parts = battle_id.rsplit(":", 1)
|
|
633
|
+
sample_index = int(parts[1])
|
|
634
|
+
model_part = parts[0]
|
|
635
|
+
|
|
636
|
+
if "_vs_" in model_part:
|
|
637
|
+
models = model_part.split("_vs_")
|
|
638
|
+
model_a, model_b = models[0], models[1]
|
|
639
|
+
else:
|
|
640
|
+
return jsonify({"error": "Invalid battle_id format"}), 400
|
|
641
|
+
except (ValueError, IndexError):
|
|
642
|
+
return jsonify({"error": "Invalid battle_id format"}), 400
|
|
643
|
+
|
|
644
|
+
record = data_loader.get_battle_detail(
|
|
645
|
+
subset, exp_name, model_a, model_b, sample_index
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
if not record:
|
|
649
|
+
return jsonify({"error": "Battle not found"}), 404
|
|
650
|
+
|
|
651
|
+
return jsonify(record.to_detail_dict())
|
|
652
|
+
|
|
653
|
+
@app.route("/api/subsets/<subset>/stats")
|
|
654
|
+
def api_stats(subset: str):
|
|
655
|
+
"""Get statistics for a subset."""
|
|
656
|
+
exp_name = request.args.get("exp_name", None, type=str)
|
|
657
|
+
stats = data_loader.get_stats(subset, exp_name)
|
|
658
|
+
|
|
659
|
+
if not stats:
|
|
660
|
+
return jsonify({"error": "Subset not found"}), 404
|
|
661
|
+
|
|
662
|
+
return jsonify(stats)
|
|
663
|
+
|
|
664
|
+
@app.route("/api/subsets/<subset>/leaderboard")
|
|
665
|
+
def api_elo_leaderboard(subset: str):
|
|
666
|
+
"""Get ELO leaderboard for a subset."""
|
|
667
|
+
models_param = request.args.get("models", None, type=str)
|
|
668
|
+
filter_models = None
|
|
669
|
+
if models_param:
|
|
670
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
671
|
+
|
|
672
|
+
leaderboard = data_loader.get_elo_leaderboard(subset, filter_models)
|
|
673
|
+
return jsonify({"leaderboard": leaderboard})
|
|
674
|
+
|
|
675
|
+
@app.route("/api/subsets/<subset>/models/<path:model>/stats")
|
|
676
|
+
def api_model_stats(subset: str, model: str):
|
|
677
|
+
"""Get detailed statistics for a specific model."""
|
|
678
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
679
|
+
stats = data_loader.get_model_vs_stats(subset, model, exp_name)
|
|
680
|
+
|
|
681
|
+
if not stats:
|
|
682
|
+
return jsonify({"error": "Model not found"}), 404
|
|
683
|
+
|
|
684
|
+
return jsonify(stats)
|
|
685
|
+
|
|
686
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/h2h")
|
|
687
|
+
def api_head_to_head(subset: str, exp_name: str):
|
|
688
|
+
"""Get head-to-head statistics between two models."""
|
|
689
|
+
model_a = request.args.get("model_a", None, type=str)
|
|
690
|
+
model_b = request.args.get("model_b", None, type=str)
|
|
691
|
+
|
|
692
|
+
if not model_a or not model_b:
|
|
693
|
+
return jsonify({"error": "model_a and model_b are required"}), 400
|
|
694
|
+
|
|
695
|
+
h2h = data_loader.get_head_to_head(subset, exp_name, model_a, model_b)
|
|
696
|
+
return jsonify(h2h)
|
|
697
|
+
|
|
698
|
+
@app.route("/api/subsets/<subset>/samples/<int:sample_index>/input_count")
|
|
699
|
+
def api_input_image_count(subset: str, sample_index: int):
|
|
700
|
+
"""Get the number of input images for a sample."""
|
|
701
|
+
count = data_loader.get_input_image_count(subset, sample_index)
|
|
702
|
+
return jsonify({"count": count})
|
|
703
|
+
|
|
704
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/samples/<int:sample_index>/all_models")
|
|
705
|
+
def api_sample_all_models(subset: str, exp_name: str, sample_index: int):
|
|
706
|
+
"""Get all model outputs for a specific sample."""
|
|
707
|
+
models_param = request.args.get("models", None, type=str)
|
|
708
|
+
filter_models = None
|
|
709
|
+
if models_param:
|
|
710
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
711
|
+
|
|
712
|
+
stats_scope = request.args.get("stats_scope", "filtered", type=str)
|
|
713
|
+
|
|
714
|
+
result = data_loader.get_sample_all_models(
|
|
715
|
+
subset, exp_name, sample_index, filter_models, stats_scope
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
if not result:
|
|
719
|
+
return jsonify({"error": "Sample not found"}), 404
|
|
720
|
+
|
|
721
|
+
return jsonify(result)
|
|
722
|
+
|
|
723
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/samples/<int:sample_index>/models/<path:model>/battles")
|
|
724
|
+
def api_model_battles_for_sample(subset: str, exp_name: str, sample_index: int, model: str):
|
|
725
|
+
"""Get all battle records for a specific model on a specific sample."""
|
|
726
|
+
opponents_param = request.args.get("opponents", None, type=str)
|
|
727
|
+
opponent_models = None
|
|
728
|
+
if opponents_param:
|
|
729
|
+
opponent_models = [m.strip() for m in opponents_param.split(",") if m.strip()]
|
|
730
|
+
|
|
731
|
+
result = data_loader.get_model_battles_for_sample(
|
|
732
|
+
subset=subset,
|
|
733
|
+
exp_name=exp_name,
|
|
734
|
+
sample_index=sample_index,
|
|
735
|
+
model=model,
|
|
736
|
+
opponent_models=opponent_models,
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
return jsonify(result)
|
|
740
|
+
|
|
741
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/prompts")
|
|
742
|
+
def api_prompts(subset: str, exp_name: str):
|
|
743
|
+
"""Get paginated list of prompts/samples."""
|
|
744
|
+
page = request.args.get("page", 1, type=int)
|
|
745
|
+
page_size = request.args.get("page_size", 10, type=int)
|
|
746
|
+
min_images = request.args.get("min_images", None, type=int)
|
|
747
|
+
max_images = request.args.get("max_images", None, type=int)
|
|
748
|
+
prompt_source = request.args.get("prompt_source", None, type=str)
|
|
749
|
+
|
|
750
|
+
models_param = request.args.get("models", None, type=str)
|
|
751
|
+
filter_models = None
|
|
752
|
+
if models_param:
|
|
753
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
754
|
+
|
|
755
|
+
prompts, total = data_loader.get_prompts(
|
|
756
|
+
subset=subset,
|
|
757
|
+
exp_name=exp_name,
|
|
758
|
+
page=page,
|
|
759
|
+
page_size=page_size,
|
|
760
|
+
min_images=min_images,
|
|
761
|
+
max_images=max_images,
|
|
762
|
+
prompt_source=prompt_source,
|
|
763
|
+
filter_models=filter_models,
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
return jsonify({
|
|
767
|
+
"prompts": prompts,
|
|
768
|
+
"total": total,
|
|
769
|
+
"page": page,
|
|
770
|
+
"page_size": page_size,
|
|
771
|
+
"total_pages": (total + page_size - 1) // page_size,
|
|
772
|
+
})
|
|
773
|
+
|
|
774
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/search")
|
|
775
|
+
def api_search(subset: str, exp_name: str):
|
|
776
|
+
"""Search battles by text query."""
|
|
777
|
+
query = request.args.get("q", "", type=str)
|
|
778
|
+
page = request.args.get("page", 1, type=int)
|
|
779
|
+
page_size = request.args.get("page_size", 20, type=int)
|
|
780
|
+
consistency = request.args.get("consistent", None, type=str)
|
|
781
|
+
|
|
782
|
+
models_param = request.args.get("models", None, type=str)
|
|
783
|
+
models = None
|
|
784
|
+
if models_param:
|
|
785
|
+
models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
786
|
+
|
|
787
|
+
consistency_filter = None
|
|
788
|
+
if consistency == "true":
|
|
789
|
+
consistency_filter = True
|
|
790
|
+
elif consistency == "false":
|
|
791
|
+
consistency_filter = False
|
|
792
|
+
|
|
793
|
+
records, total = data_loader.search_battles(
|
|
794
|
+
subset=subset,
|
|
795
|
+
exp_name=exp_name,
|
|
796
|
+
query=query,
|
|
797
|
+
page=page,
|
|
798
|
+
page_size=page_size,
|
|
799
|
+
models=models,
|
|
800
|
+
consistency_filter=consistency_filter,
|
|
801
|
+
)
|
|
802
|
+
|
|
803
|
+
return jsonify({
|
|
804
|
+
"battles": [r.to_dict() for r in records],
|
|
805
|
+
"total": total,
|
|
806
|
+
"page": page,
|
|
807
|
+
"page_size": page_size,
|
|
808
|
+
"total_pages": (total + page_size - 1) // page_size,
|
|
809
|
+
"query": query,
|
|
810
|
+
})
|
|
811
|
+
|
|
812
|
+
@app.route("/api/subsets/<subset>/experiments/<exp_name>/search/prompts")
|
|
813
|
+
def api_search_prompts(subset: str, exp_name: str):
|
|
814
|
+
"""Search prompts by text query."""
|
|
815
|
+
query = request.args.get("q", "", type=str)
|
|
816
|
+
page = request.args.get("page", 1, type=int)
|
|
817
|
+
page_size = request.args.get("page_size", 10, type=int)
|
|
818
|
+
|
|
819
|
+
models_param = request.args.get("models", None, type=str)
|
|
820
|
+
filter_models = None
|
|
821
|
+
if models_param:
|
|
822
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
823
|
+
|
|
824
|
+
prompts, total = data_loader.search_prompts(
|
|
825
|
+
subset=subset,
|
|
826
|
+
exp_name=exp_name,
|
|
827
|
+
query=query,
|
|
828
|
+
page=page,
|
|
829
|
+
page_size=page_size,
|
|
830
|
+
filter_models=filter_models,
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
return jsonify({
|
|
834
|
+
"prompts": prompts,
|
|
835
|
+
"total": total,
|
|
836
|
+
"page": page,
|
|
837
|
+
"page_size": page_size,
|
|
838
|
+
"total_pages": (total + page_size - 1) // page_size,
|
|
839
|
+
"query": query,
|
|
840
|
+
})
|
|
841
|
+
|
|
842
|
+
@app.route("/api/subsets/<subset>/matrix")
|
|
843
|
+
def api_win_rate_matrix(subset: str):
|
|
844
|
+
"""Get win rate matrix for all model pairs."""
|
|
845
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
846
|
+
|
|
847
|
+
models_param = request.args.get("models", None, type=str)
|
|
848
|
+
filter_models = None
|
|
849
|
+
if models_param:
|
|
850
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
851
|
+
|
|
852
|
+
result = data_loader.get_win_rate_matrix(subset, exp_name, filter_models)
|
|
853
|
+
return jsonify(result)
|
|
854
|
+
|
|
855
|
+
@app.route("/api/subsets/<subset>/leaderboard/by-source")
|
|
856
|
+
def api_elo_by_source(subset: str):
|
|
857
|
+
"""Get ELO rankings grouped by prompt source."""
|
|
858
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
859
|
+
result = data_loader.get_elo_by_source(subset, exp_name)
|
|
860
|
+
return jsonify(result)
|
|
861
|
+
|
|
862
|
+
@app.route("/api/subsets/<subset>/elo-history")
|
|
863
|
+
def api_elo_history(subset: str):
|
|
864
|
+
"""Get ELO history over time."""
|
|
865
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
866
|
+
granularity = request.args.get("granularity", "day", type=str)
|
|
867
|
+
|
|
868
|
+
models_param = request.args.get("models", None, type=str)
|
|
869
|
+
filter_models = None
|
|
870
|
+
if models_param:
|
|
871
|
+
filter_models = [m.strip() for m in models_param.split(",") if m.strip()]
|
|
872
|
+
|
|
873
|
+
result = data_loader.get_elo_history(subset, exp_name, granularity, filter_models)
|
|
874
|
+
return jsonify(result)
|
|
875
|
+
|
|
876
|
+
@app.route("/api/overview/leaderboards")
|
|
877
|
+
def api_overview_leaderboards():
|
|
878
|
+
"""Get leaderboard data for all subsets."""
|
|
879
|
+
result = data_loader.get_all_subsets_leaderboards()
|
|
880
|
+
return jsonify(result)
|
|
881
|
+
|
|
882
|
+
@app.route("/api/cross-subset/info")
|
|
883
|
+
def api_cross_subset_info():
|
|
884
|
+
"""Get information about models across multiple subsets."""
|
|
885
|
+
subsets_param = request.args.get("subsets", "", type=str)
|
|
886
|
+
if not subsets_param:
|
|
887
|
+
return jsonify({"error": "subsets parameter is required"}), 400
|
|
888
|
+
|
|
889
|
+
subsets = [s.strip() for s in subsets_param.split(",") if s.strip()]
|
|
890
|
+
if len(subsets) < 1:
|
|
891
|
+
return jsonify({"error": "At least 1 subset required"}), 400
|
|
892
|
+
|
|
893
|
+
result = data_loader.get_cross_subset_info(subsets)
|
|
894
|
+
return jsonify(result)
|
|
895
|
+
|
|
896
|
+
@app.route("/api/cross-subset/elo")
|
|
897
|
+
def api_cross_subset_elo():
|
|
898
|
+
"""Compute ELO rankings across multiple subsets."""
|
|
899
|
+
subsets_param = request.args.get("subsets", "", type=str)
|
|
900
|
+
if not subsets_param:
|
|
901
|
+
return jsonify({"error": "subsets parameter is required"}), 400
|
|
902
|
+
|
|
903
|
+
subsets = [s.strip() for s in subsets_param.split(",") if s.strip()]
|
|
904
|
+
if len(subsets) < 1:
|
|
905
|
+
return jsonify({"error": "At least 1 subset required"}), 400
|
|
906
|
+
|
|
907
|
+
exp_name = request.args.get("exp_name", "__all__", type=str)
|
|
908
|
+
model_scope = request.args.get("model_scope", "all", type=str)
|
|
909
|
+
|
|
910
|
+
result = data_loader.get_cross_subset_elo(subsets, exp_name, model_scope)
|
|
911
|
+
return jsonify(result)
|
|
912
|
+
|
|
913
|
+
# ========== Image Routes ==========
|
|
914
|
+
|
|
915
|
+
@app.route("/images/<subset>/<model>/<int:sample_index>")
|
|
916
|
+
def serve_model_image(subset: str, model: str, sample_index: int):
|
|
917
|
+
"""Redirect to HF CDN for model output images."""
|
|
918
|
+
url = data_loader.get_model_image_url(subset, model, sample_index)
|
|
919
|
+
if url:
|
|
920
|
+
return redirect(url)
|
|
921
|
+
abort(404)
|
|
922
|
+
|
|
923
|
+
@app.route("/images/<subset>/input/<int:sample_index>")
|
|
924
|
+
@app.route("/images/<subset>/input/<int:sample_index>/<int:img_idx>")
|
|
925
|
+
def serve_input_image(subset: str, sample_index: int, img_idx: int = 0):
|
|
926
|
+
"""Serve input image from parquet dataset."""
|
|
927
|
+
image_bytes = data_loader.get_input_image_by_idx(subset, sample_index, img_idx)
|
|
928
|
+
|
|
929
|
+
if not image_bytes:
|
|
930
|
+
abort(404)
|
|
931
|
+
|
|
932
|
+
return send_file(
|
|
933
|
+
io.BytesIO(image_bytes),
|
|
934
|
+
mimetype="image/png",
|
|
935
|
+
max_age=3600,
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
return app
|