genarena 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genarena/__init__.py +49 -2
- genarena/__main__.py +10 -0
- genarena/arena.py +1685 -0
- genarena/battle.py +337 -0
- genarena/bt_elo.py +507 -0
- genarena/cli.py +1581 -0
- genarena/data.py +476 -0
- genarena/deploy/Dockerfile +25 -0
- genarena/deploy/README.md +55 -0
- genarena/deploy/__init__.py +5 -0
- genarena/deploy/app.py +84 -0
- genarena/experiments.py +121 -0
- genarena/leaderboard.py +270 -0
- genarena/logs.py +409 -0
- genarena/models.py +412 -0
- genarena/prompts/__init__.py +127 -0
- genarena/prompts/mmrb2.py +373 -0
- genarena/sampling.py +336 -0
- genarena/state.py +656 -0
- genarena/sync/__init__.py +105 -0
- genarena/sync/auto_commit.py +118 -0
- genarena/sync/deploy_ops.py +543 -0
- genarena/sync/git_ops.py +422 -0
- genarena/sync/hf_ops.py +891 -0
- genarena/sync/init_ops.py +431 -0
- genarena/sync/packer.py +587 -0
- genarena/sync/submit.py +837 -0
- genarena/utils.py +103 -0
- genarena/validation/__init__.py +19 -0
- genarena/validation/schema.py +327 -0
- genarena/validation/validator.py +329 -0
- genarena/visualize/README.md +148 -0
- genarena/visualize/__init__.py +14 -0
- genarena/visualize/app.py +938 -0
- genarena/visualize/data_loader.py +2335 -0
- genarena/visualize/static/app.js +3762 -0
- genarena/visualize/static/model_aliases.json +86 -0
- genarena/visualize/static/style.css +4104 -0
- genarena/visualize/templates/index.html +413 -0
- genarena/vlm.py +519 -0
- genarena-0.1.0.dist-info/METADATA +178 -0
- genarena-0.1.0.dist-info/RECORD +44 -0
- {genarena-0.0.1.dist-info → genarena-0.1.0.dist-info}/WHEEL +1 -2
- genarena-0.1.0.dist-info/entry_points.txt +2 -0
- genarena-0.0.1.dist-info/METADATA +0 -26
- genarena-0.0.1.dist-info/RECORD +0 -5
- genarena-0.0.1.dist-info/top_level.txt +0 -1
genarena/data.py
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
# Copyright 2026 Ruihang Li.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0.
|
|
3
|
+
# See LICENSE file in the project root for details.
|
|
4
|
+
|
|
5
|
+
"""Data loading module for parquet datasets."""
|
|
6
|
+
|
|
7
|
+
import io
|
|
8
|
+
import os
|
|
9
|
+
import warnings
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import pyarrow.parquet as pq
|
|
16
|
+
from PIL import Image as PILImage
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class DataSample:
|
|
21
|
+
"""Data sample from parquet dataset."""
|
|
22
|
+
|
|
23
|
+
index: int
|
|
24
|
+
task_type: str
|
|
25
|
+
instruction: str
|
|
26
|
+
input_images: list[bytes] # List of image bytes from parquet
|
|
27
|
+
prompt_source: Optional[str] = None
|
|
28
|
+
original_metadata: Optional[dict[str, Any]] = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _convert_to_bytes(img: Any) -> Optional[bytes]:
|
|
32
|
+
"""
|
|
33
|
+
Convert various image formats to bytes.
|
|
34
|
+
|
|
35
|
+
Handles:
|
|
36
|
+
- bytes: return as-is
|
|
37
|
+
- PIL.Image: convert to PNG bytes
|
|
38
|
+
- dict with 'bytes' key: extract bytes
|
|
39
|
+
- dict with 'image' key: recursively process
|
|
40
|
+
- io.BytesIO: read bytes from buffer
|
|
41
|
+
- str (file path): read file bytes
|
|
42
|
+
- pyarrow struct: extract bytes from pyarrow internal format
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
img: Image in any supported format
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Image bytes, or None if conversion fails
|
|
49
|
+
"""
|
|
50
|
+
if img is None:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
# Already bytes
|
|
54
|
+
if isinstance(img, bytes):
|
|
55
|
+
return img
|
|
56
|
+
|
|
57
|
+
# BytesIO object
|
|
58
|
+
if isinstance(img, io.BytesIO):
|
|
59
|
+
img.seek(0)
|
|
60
|
+
return img.read()
|
|
61
|
+
|
|
62
|
+
# PIL Image
|
|
63
|
+
if isinstance(img, PILImage.Image):
|
|
64
|
+
buffer = io.BytesIO()
|
|
65
|
+
img.save(buffer, format="PNG")
|
|
66
|
+
return buffer.getvalue()
|
|
67
|
+
|
|
68
|
+
# Dict formats (from HuggingFace datasets Image() type)
|
|
69
|
+
if isinstance(img, dict):
|
|
70
|
+
# Try 'bytes' key first
|
|
71
|
+
if "bytes" in img:
|
|
72
|
+
raw = img["bytes"]
|
|
73
|
+
if isinstance(raw, bytes):
|
|
74
|
+
return raw
|
|
75
|
+
elif isinstance(raw, io.BytesIO):
|
|
76
|
+
raw.seek(0)
|
|
77
|
+
return raw.read()
|
|
78
|
+
# Recurse for nested structures
|
|
79
|
+
return _convert_to_bytes(raw)
|
|
80
|
+
|
|
81
|
+
# Try 'image' key (some formats use this)
|
|
82
|
+
if "image" in img:
|
|
83
|
+
return _convert_to_bytes(img["image"])
|
|
84
|
+
|
|
85
|
+
# Try 'path' key if it's a file path
|
|
86
|
+
if "path" in img and img["path"] and isinstance(img["path"], str):
|
|
87
|
+
path = img["path"]
|
|
88
|
+
if os.path.isfile(path):
|
|
89
|
+
with open(path, "rb") as f:
|
|
90
|
+
return f.read()
|
|
91
|
+
|
|
92
|
+
# String (file path)
|
|
93
|
+
if isinstance(img, str):
|
|
94
|
+
if os.path.isfile(img):
|
|
95
|
+
with open(img, "rb") as f:
|
|
96
|
+
return f.read()
|
|
97
|
+
|
|
98
|
+
# Handle pyarrow struct (when reading HuggingFace datasets parquet with pyarrow)
|
|
99
|
+
# PyArrow may return a struct with 'bytes' and 'path' fields
|
|
100
|
+
try:
|
|
101
|
+
# Try to access as pyarrow scalar
|
|
102
|
+
if hasattr(img, "as_py"):
|
|
103
|
+
return _convert_to_bytes(img.as_py())
|
|
104
|
+
|
|
105
|
+
# Try to access struct fields
|
|
106
|
+
if hasattr(img, "__getitem__") and not isinstance(img, (str, bytes, dict, list, tuple)):
|
|
107
|
+
# Try to get 'bytes' field
|
|
108
|
+
try:
|
|
109
|
+
bytes_val = img["bytes"]
|
|
110
|
+
if bytes_val is not None:
|
|
111
|
+
return _convert_to_bytes(bytes_val)
|
|
112
|
+
except (KeyError, TypeError, IndexError):
|
|
113
|
+
pass
|
|
114
|
+
except Exception:
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
# Numpy array (convert to PIL first)
|
|
118
|
+
try:
|
|
119
|
+
import numpy as np
|
|
120
|
+
if isinstance(img, np.ndarray):
|
|
121
|
+
pil_img = PILImage.fromarray(img)
|
|
122
|
+
buffer = io.BytesIO()
|
|
123
|
+
pil_img.save(buffer, format="PNG")
|
|
124
|
+
return buffer.getvalue()
|
|
125
|
+
except (ImportError, Exception):
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
# Last resort: try to get bytes attribute
|
|
129
|
+
if hasattr(img, "tobytes"):
|
|
130
|
+
try:
|
|
131
|
+
return img.tobytes()
|
|
132
|
+
except Exception:
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
# Debug: log the type we couldn't handle
|
|
136
|
+
warnings.warn(f"_convert_to_bytes: Unknown image type: {type(img)}, repr: {repr(img)[:200]}")
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def discover_subsets(data_dir: str) -> list[str]:
|
|
141
|
+
"""
|
|
142
|
+
Discover all subset directories in the data directory.
|
|
143
|
+
|
|
144
|
+
A valid subset directory should contain at least one data-*.parquet file.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
data_dir: Path to the parquet data directory
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
List of subset names (directory names)
|
|
151
|
+
"""
|
|
152
|
+
subsets = []
|
|
153
|
+
|
|
154
|
+
if not os.path.isdir(data_dir):
|
|
155
|
+
warnings.warn(f"Data directory does not exist: {data_dir}")
|
|
156
|
+
return subsets
|
|
157
|
+
|
|
158
|
+
for name in os.listdir(data_dir):
|
|
159
|
+
subset_path = os.path.join(data_dir, name)
|
|
160
|
+
if os.path.isdir(subset_path):
|
|
161
|
+
# Check if directory contains parquet files
|
|
162
|
+
parquet_files = [
|
|
163
|
+
f for f in os.listdir(subset_path)
|
|
164
|
+
if f.startswith("data-") and f.endswith(".parquet")
|
|
165
|
+
]
|
|
166
|
+
if parquet_files:
|
|
167
|
+
subsets.append(name)
|
|
168
|
+
|
|
169
|
+
return sorted(subsets)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class ParquetDataset:
|
|
173
|
+
"""
|
|
174
|
+
Dataset class for loading parquet formatted evaluation data.
|
|
175
|
+
|
|
176
|
+
Expected parquet columns:
|
|
177
|
+
- task_type: str
|
|
178
|
+
- instruction: str
|
|
179
|
+
- input_images: list of image bytes
|
|
180
|
+
- index: int (optional, will use row index if not present)
|
|
181
|
+
- prompt_source: str (optional)
|
|
182
|
+
- original_metadata: dict (optional)
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
data_dir: str,
|
|
188
|
+
subset: str,
|
|
189
|
+
parquet_files: Optional[list[str]] = None,
|
|
190
|
+
load_mode: str = "full",
|
|
191
|
+
):
|
|
192
|
+
"""
|
|
193
|
+
Initialize the dataset.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
data_dir: Root directory containing subset directories
|
|
197
|
+
subset: Name of the subset to load (e.g., 'basic', 'reasoning')
|
|
198
|
+
parquet_files: Optional explicit parquet file list to load. If provided,
|
|
199
|
+
only these files are loaded (useful for per-parquet multiprocessing).
|
|
200
|
+
load_mode: "full" loads all columns (and may decode images via HF datasets),
|
|
201
|
+
"index_only" only scans the parquet "index" column for fast sharding.
|
|
202
|
+
"""
|
|
203
|
+
self.data_dir = data_dir
|
|
204
|
+
self.subset = subset
|
|
205
|
+
self.subset_path = os.path.join(data_dir, subset)
|
|
206
|
+
self.load_mode = load_mode
|
|
207
|
+
|
|
208
|
+
# Load and concatenate all parquet files in the subset
|
|
209
|
+
self._data: Optional[pd.DataFrame] = None
|
|
210
|
+
# Bookkeeping for sharding by parquet file
|
|
211
|
+
self._parquet_files: list[str] = list(parquet_files) if parquet_files else []
|
|
212
|
+
self._index_to_parquet: dict[int, str] = {}
|
|
213
|
+
self._all_indices: list[int] = []
|
|
214
|
+
self._load_data()
|
|
215
|
+
|
|
216
|
+
def _load_data(self) -> None:
|
|
217
|
+
"""Load all parquet files in the subset directory.
|
|
218
|
+
|
|
219
|
+
Uses HuggingFace datasets library to properly decode Image features,
|
|
220
|
+
then converts to pandas DataFrame for consistent access.
|
|
221
|
+
"""
|
|
222
|
+
if not os.path.isdir(self.subset_path):
|
|
223
|
+
warnings.warn(f"Subset directory does not exist: {self.subset_path}")
|
|
224
|
+
self._data = pd.DataFrame()
|
|
225
|
+
return
|
|
226
|
+
|
|
227
|
+
# Find all data-*.parquet files unless explicitly provided
|
|
228
|
+
if self._parquet_files:
|
|
229
|
+
parquet_files = list(self._parquet_files)
|
|
230
|
+
else:
|
|
231
|
+
parquet_files = sorted([
|
|
232
|
+
os.path.join(self.subset_path, f)
|
|
233
|
+
for f in os.listdir(self.subset_path)
|
|
234
|
+
if f.startswith("data-") and f.endswith(".parquet")
|
|
235
|
+
])
|
|
236
|
+
self._parquet_files = parquet_files
|
|
237
|
+
|
|
238
|
+
if not parquet_files:
|
|
239
|
+
warnings.warn(f"No parquet files found in: {self.subset_path}")
|
|
240
|
+
self._data = pd.DataFrame()
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
# Fast path: only scan parquet index for sharding/grouping.
|
|
244
|
+
if self.load_mode == "index_only":
|
|
245
|
+
index_to_parquet: dict[int, str] = {}
|
|
246
|
+
all_indices: list[int] = []
|
|
247
|
+
for pf in parquet_files:
|
|
248
|
+
try:
|
|
249
|
+
table = pq.read_table(pf, columns=["index"])
|
|
250
|
+
col = table.column(0).to_pylist()
|
|
251
|
+
for v in col:
|
|
252
|
+
try:
|
|
253
|
+
idx_int = int(v)
|
|
254
|
+
except Exception:
|
|
255
|
+
continue
|
|
256
|
+
all_indices.append(idx_int)
|
|
257
|
+
if idx_int not in index_to_parquet:
|
|
258
|
+
index_to_parquet[idx_int] = pf
|
|
259
|
+
except Exception as e:
|
|
260
|
+
warnings.warn(f"Failed to scan parquet index column {pf}: {e}")
|
|
261
|
+
|
|
262
|
+
self._index_to_parquet = index_to_parquet
|
|
263
|
+
self._all_indices = all_indices
|
|
264
|
+
self._data = pd.DataFrame()
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
# Try to use HuggingFace datasets library first (properly decodes Image features)
|
|
268
|
+
try:
|
|
269
|
+
from datasets import load_dataset
|
|
270
|
+
|
|
271
|
+
# Faster full load: let datasets load all files together.
|
|
272
|
+
ds = load_dataset(
|
|
273
|
+
"parquet",
|
|
274
|
+
data_files={"train": parquet_files},
|
|
275
|
+
split="train",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
records: list[dict[str, Any]] = []
|
|
279
|
+
for i in range(len(ds)):
|
|
280
|
+
records.append(dict(ds[i]))
|
|
281
|
+
|
|
282
|
+
self._data = pd.DataFrame(records)
|
|
283
|
+
# Mapping is not guaranteed in this mode; multi-process sharding uses index_only mode.
|
|
284
|
+
if "index" in self._data.columns:
|
|
285
|
+
try:
|
|
286
|
+
self._all_indices = [int(v) for v in self._data["index"].tolist()]
|
|
287
|
+
except Exception:
|
|
288
|
+
self._all_indices = []
|
|
289
|
+
|
|
290
|
+
except ImportError:
|
|
291
|
+
# Fall back to pyarrow if datasets not available
|
|
292
|
+
warnings.warn(
|
|
293
|
+
"HuggingFace datasets library not available, "
|
|
294
|
+
"falling back to pyarrow (Image features may not decode correctly)"
|
|
295
|
+
)
|
|
296
|
+
dfs: list[pd.DataFrame] = []
|
|
297
|
+
index_to_parquet: dict[int, str] = {}
|
|
298
|
+
for pf in parquet_files:
|
|
299
|
+
try:
|
|
300
|
+
df = pq.read_table(pf).to_pandas()
|
|
301
|
+
# Preserve source mapping for sharding
|
|
302
|
+
df["__source_parquet"] = pf
|
|
303
|
+
if "index" in df.columns:
|
|
304
|
+
for v in df["index"].tolist():
|
|
305
|
+
try:
|
|
306
|
+
idx_int = int(v)
|
|
307
|
+
if idx_int not in index_to_parquet:
|
|
308
|
+
index_to_parquet[idx_int] = pf
|
|
309
|
+
except Exception:
|
|
310
|
+
continue
|
|
311
|
+
dfs.append(df)
|
|
312
|
+
except Exception as e:
|
|
313
|
+
warnings.warn(f"Failed to read parquet file {pf}: {e}")
|
|
314
|
+
|
|
315
|
+
if dfs:
|
|
316
|
+
self._data = pd.concat(dfs, ignore_index=True)
|
|
317
|
+
else:
|
|
318
|
+
self._data = pd.DataFrame()
|
|
319
|
+
self._index_to_parquet = index_to_parquet
|
|
320
|
+
if "index" in self._data.columns:
|
|
321
|
+
try:
|
|
322
|
+
self._all_indices = [int(v) for v in self._data["index"].tolist()]
|
|
323
|
+
except Exception:
|
|
324
|
+
self._all_indices = []
|
|
325
|
+
|
|
326
|
+
@property
|
|
327
|
+
def parquet_files(self) -> list[str]:
|
|
328
|
+
"""Return the list of parquet files that back this dataset."""
|
|
329
|
+
return list(self._parquet_files)
|
|
330
|
+
|
|
331
|
+
def get_parquet_file_for_index(self, sample_index: int) -> Optional[str]:
|
|
332
|
+
"""
|
|
333
|
+
Get the source parquet file for a given sample_index.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
sample_index: The sample "index" field value.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Parquet file path if known, else None.
|
|
340
|
+
"""
|
|
341
|
+
try:
|
|
342
|
+
return self._index_to_parquet.get(int(sample_index))
|
|
343
|
+
except Exception:
|
|
344
|
+
return None
|
|
345
|
+
|
|
346
|
+
def group_indices_by_parquet(self, indices: Optional[list[int]] = None) -> dict[str, list[int]]:
|
|
347
|
+
"""
|
|
348
|
+
Group indices by their source parquet file.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
indices: Optional subset of indices to group. If None, uses all indices.
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
Dict mapping parquet file path -> list of sample indices (in input order).
|
|
355
|
+
Indices whose parquet source is unknown are grouped under key "".
|
|
356
|
+
"""
|
|
357
|
+
if indices is None:
|
|
358
|
+
indices = self.get_all_indices()
|
|
359
|
+
|
|
360
|
+
grouped: dict[str, list[int]] = defaultdict(list)
|
|
361
|
+
for idx in indices:
|
|
362
|
+
pf = self.get_parquet_file_for_index(idx) or ""
|
|
363
|
+
grouped[pf].append(idx)
|
|
364
|
+
return dict(grouped)
|
|
365
|
+
|
|
366
|
+
def __len__(self) -> int:
|
|
367
|
+
"""Return the number of samples in the dataset."""
|
|
368
|
+
if self.load_mode == "index_only":
|
|
369
|
+
return len(self._all_indices)
|
|
370
|
+
return len(self._data) if self._data is not None else 0
|
|
371
|
+
|
|
372
|
+
def __getitem__(self, idx: int) -> DataSample:
|
|
373
|
+
"""
|
|
374
|
+
Get a sample by index.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
idx: Index of the sample
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
DataSample object
|
|
381
|
+
"""
|
|
382
|
+
if self._data is None or idx >= len(self._data):
|
|
383
|
+
raise IndexError(f"Index {idx} out of range for dataset of size {len(self)}")
|
|
384
|
+
|
|
385
|
+
row = self._data.iloc[idx]
|
|
386
|
+
|
|
387
|
+
# Extract fields with defaults
|
|
388
|
+
sample_index = row.get("index", idx)
|
|
389
|
+
task_type = row.get("task_type", "edit")
|
|
390
|
+
instruction = row.get("instruction", "")
|
|
391
|
+
|
|
392
|
+
# Handle input_images - could be various formats
|
|
393
|
+
# PyArrow reads HuggingFace Image() as dict: {"bytes": <bytes>, "path": None}
|
|
394
|
+
input_images_raw = row.get("input_images", [])
|
|
395
|
+
input_images = []
|
|
396
|
+
|
|
397
|
+
if input_images_raw is None:
|
|
398
|
+
pass # input_images stays empty
|
|
399
|
+
elif isinstance(input_images_raw, (list, tuple)):
|
|
400
|
+
# List of images
|
|
401
|
+
for img in input_images_raw:
|
|
402
|
+
img_bytes = _convert_to_bytes(img)
|
|
403
|
+
if img_bytes is not None:
|
|
404
|
+
input_images.append(img_bytes)
|
|
405
|
+
else:
|
|
406
|
+
# Log warning but continue
|
|
407
|
+
warnings.warn(f"Failed to convert image at index {sample_index}: {type(img)}")
|
|
408
|
+
else:
|
|
409
|
+
# Single image
|
|
410
|
+
img_bytes = _convert_to_bytes(input_images_raw)
|
|
411
|
+
if img_bytes is not None:
|
|
412
|
+
input_images.append(img_bytes)
|
|
413
|
+
|
|
414
|
+
prompt_source = row.get("prompt_source", None)
|
|
415
|
+
original_metadata = row.get("original_metadata", None)
|
|
416
|
+
|
|
417
|
+
return DataSample(
|
|
418
|
+
index=int(sample_index),
|
|
419
|
+
task_type=str(task_type),
|
|
420
|
+
instruction=str(instruction),
|
|
421
|
+
input_images=input_images,
|
|
422
|
+
prompt_source=prompt_source,
|
|
423
|
+
original_metadata=original_metadata,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
def get_by_index(self, sample_index: int) -> Optional[DataSample]:
|
|
427
|
+
"""
|
|
428
|
+
Get a sample by its index field (not row position).
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
sample_index: The 'index' field value to search for
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
DataSample if found, None otherwise
|
|
435
|
+
"""
|
|
436
|
+
if self.load_mode == "index_only":
|
|
437
|
+
return None
|
|
438
|
+
|
|
439
|
+
if self._data is None or len(self._data) == 0:
|
|
440
|
+
return None
|
|
441
|
+
|
|
442
|
+
# Check if 'index' column exists
|
|
443
|
+
if "index" in self._data.columns:
|
|
444
|
+
matches = self._data[self._data["index"] == sample_index]
|
|
445
|
+
if not matches.empty:
|
|
446
|
+
row_idx = matches.index[0]
|
|
447
|
+
return self[row_idx]
|
|
448
|
+
|
|
449
|
+
# Fall back to using position as index
|
|
450
|
+
if 0 <= sample_index < len(self._data):
|
|
451
|
+
return self[sample_index]
|
|
452
|
+
|
|
453
|
+
return None
|
|
454
|
+
|
|
455
|
+
def get_all_indices(self) -> list[int]:
|
|
456
|
+
"""
|
|
457
|
+
Get all sample indices in the dataset.
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
List of index values
|
|
461
|
+
"""
|
|
462
|
+
if self.load_mode == "index_only":
|
|
463
|
+
return list(self._all_indices)
|
|
464
|
+
|
|
465
|
+
if self._data is None or len(self._data) == 0:
|
|
466
|
+
return []
|
|
467
|
+
|
|
468
|
+
if "index" in self._data.columns:
|
|
469
|
+
return self._data["index"].tolist()
|
|
470
|
+
else:
|
|
471
|
+
return list(range(len(self._data)))
|
|
472
|
+
|
|
473
|
+
@property
|
|
474
|
+
def is_empty(self) -> bool:
|
|
475
|
+
"""Check if the dataset is empty."""
|
|
476
|
+
return len(self) == 0
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
FROM python:3.10-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
# Install system dependencies
|
|
6
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
7
|
+
git \
|
|
8
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
9
|
+
|
|
10
|
+
# Copy project files
|
|
11
|
+
COPY . .
|
|
12
|
+
|
|
13
|
+
# Install Python dependencies
|
|
14
|
+
RUN pip install --no-cache-dir -e .[web]
|
|
15
|
+
|
|
16
|
+
# Download parquet benchmark data from HuggingFace
|
|
17
|
+
# This dataset contains the prompt/benchmark data (not arena battle results)
|
|
18
|
+
ENV HF_DATA_REPO="rhli/genarena"
|
|
19
|
+
RUN python -c "from huggingface_hub import snapshot_download; snapshot_download('${HF_DATA_REPO}', repo_type='dataset', local_dir='/app/data')"
|
|
20
|
+
|
|
21
|
+
# Expose port (HuggingFace Spaces default)
|
|
22
|
+
EXPOSE 7860
|
|
23
|
+
|
|
24
|
+
# Start the application
|
|
25
|
+
CMD ["python", "genarena/deploy/app.py"]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: GenArena Leaderboard
|
|
3
|
+
emoji: ⚔️
|
|
4
|
+
colorFrom: purple
|
|
5
|
+
colorTo: blue
|
|
6
|
+
sdk: docker
|
|
7
|
+
app_port: 7860
|
|
8
|
+
pinned: false
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# GenArena Leaderboard
|
|
12
|
+
|
|
13
|
+
Interactive visualization for GenArena image generation model evaluations.
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
|
|
17
|
+
- Browse battle records across multiple subsets
|
|
18
|
+
- View model outputs side-by-side with input images
|
|
19
|
+
- ELO leaderboard and win rate matrix
|
|
20
|
+
- Search and filter battles by model, result, consistency
|
|
21
|
+
- Detailed VLM judge reasoning for each battle
|
|
22
|
+
- Head-to-head comparison between models
|
|
23
|
+
|
|
24
|
+
## Data Sources
|
|
25
|
+
|
|
26
|
+
- **Arena Data**: Loaded from `HF_ARENA_REPO` environment variable
|
|
27
|
+
- **Benchmark Data**: Stored in this Space under `data/`
|
|
28
|
+
|
|
29
|
+
## Environment Variables
|
|
30
|
+
|
|
31
|
+
| Variable | Default | Description |
|
|
32
|
+
|----------|---------|-------------|
|
|
33
|
+
| `HF_ARENA_REPO` | `genarena/leaderboard-data` | HuggingFace Dataset repo for arena data (battle logs, model outputs) |
|
|
34
|
+
|
|
35
|
+
## Deployment
|
|
36
|
+
|
|
37
|
+
This Space is deployed using Docker. The parquet benchmark data is stored directly in this repository under `data/`, while arena data (battle logs and model output images) is fetched from the configured `HF_ARENA_REPO`.
|
|
38
|
+
|
|
39
|
+
Model output images are served via HuggingFace CDN URLs for efficient delivery.
|
|
40
|
+
|
|
41
|
+
## Local Development
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# Install genarena
|
|
45
|
+
pip install -e .
|
|
46
|
+
|
|
47
|
+
# Run local server
|
|
48
|
+
genarena serve --arena_dir ./arena --data_dir ./data --port 8080
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Links
|
|
52
|
+
|
|
53
|
+
- [GenArena GitHub](https://github.com/genarena/genarena)
|
|
54
|
+
- [Official Arena Data](https://huggingface.co/datasets/rhli/genarena-battlefield)
|
|
55
|
+
- [Benchmark Data](https://huggingface.co/datasets/rhli/genarena)
|
genarena/deploy/app.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Copyright 2026 Ruihang Li.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0.
|
|
3
|
+
# See LICENSE file in the project root for details.
|
|
4
|
+
|
|
5
|
+
"""HuggingFace Spaces startup script for GenArena Explorer."""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
logging.basicConfig(
|
|
11
|
+
level=logging.INFO,
|
|
12
|
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
13
|
+
datefmt="%H:%M:%S",
|
|
14
|
+
)
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
# Configuration from environment variables
|
|
18
|
+
HF_ARENA_REPO = os.environ.get("HF_ARENA_REPO", "genarena/leaderboard-data")
|
|
19
|
+
HF_DATA_REPO = os.environ.get("HF_DATA_REPO", "rhli/genarena")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main():
|
|
23
|
+
"""Main entry point for HuggingFace Spaces."""
|
|
24
|
+
from huggingface_hub import list_repo_files, snapshot_download
|
|
25
|
+
|
|
26
|
+
logger.info("=" * 60)
|
|
27
|
+
logger.info(" GenArena Explorer - HuggingFace Spaces")
|
|
28
|
+
logger.info("=" * 60)
|
|
29
|
+
logger.info(f" Arena Repo: {HF_ARENA_REPO}")
|
|
30
|
+
logger.info(f" Data Repo: {HF_DATA_REPO}")
|
|
31
|
+
logger.info("=" * 60)
|
|
32
|
+
|
|
33
|
+
# 1. Parquet data - check if already downloaded during Docker build
|
|
34
|
+
data_dir = "/app/data"
|
|
35
|
+
if not os.path.isdir(data_dir) or not os.listdir(data_dir):
|
|
36
|
+
# Download parquet data at runtime if not present
|
|
37
|
+
logger.info(f"Downloading parquet data from {HF_DATA_REPO}...")
|
|
38
|
+
data_dir = snapshot_download(
|
|
39
|
+
HF_DATA_REPO,
|
|
40
|
+
repo_type="dataset",
|
|
41
|
+
local_dir="/app/data",
|
|
42
|
+
)
|
|
43
|
+
logger.info(f"Parquet data downloaded to: {data_dir}")
|
|
44
|
+
else:
|
|
45
|
+
logger.info(f"Using pre-downloaded parquet data: {data_dir}")
|
|
46
|
+
|
|
47
|
+
# 2. Download arena metadata (excluding images)
|
|
48
|
+
logger.info(f"Downloading arena metadata from {HF_ARENA_REPO}...")
|
|
49
|
+
arena_dir = snapshot_download(
|
|
50
|
+
HF_ARENA_REPO,
|
|
51
|
+
repo_type="dataset",
|
|
52
|
+
local_dir="/app/arena",
|
|
53
|
+
ignore_patterns=["*.png", "*.jpg", "*.jpeg", "*.webp"],
|
|
54
|
+
)
|
|
55
|
+
logger.info(f"Arena metadata downloaded to: {arena_dir}")
|
|
56
|
+
|
|
57
|
+
# 3. Get image file list for CDN URL mapping
|
|
58
|
+
logger.info("Scanning image files for CDN URL mapping...")
|
|
59
|
+
all_files = list_repo_files(HF_ARENA_REPO, repo_type="dataset")
|
|
60
|
+
image_files = [
|
|
61
|
+
f for f in all_files if f.endswith((".png", ".jpg", ".jpeg", ".webp"))
|
|
62
|
+
]
|
|
63
|
+
logger.info(f"Found {len(image_files)} image files")
|
|
64
|
+
|
|
65
|
+
# 4. Start Flask server
|
|
66
|
+
logger.info("Starting Flask server...")
|
|
67
|
+
from genarena.visualize.app import create_hf_app
|
|
68
|
+
|
|
69
|
+
app = create_hf_app(
|
|
70
|
+
arena_dir=arena_dir,
|
|
71
|
+
data_dir=data_dir,
|
|
72
|
+
hf_repo=HF_ARENA_REPO,
|
|
73
|
+
image_files=image_files,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
logger.info("=" * 60)
|
|
77
|
+
logger.info(" Server ready: http://0.0.0.0:7860")
|
|
78
|
+
logger.info("=" * 60)
|
|
79
|
+
|
|
80
|
+
app.run(host="0.0.0.0", port=7860, threaded=True)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
if __name__ == "__main__":
|
|
84
|
+
main()
|