genarena 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genarena/__init__.py +49 -2
- genarena/__main__.py +10 -0
- genarena/arena.py +1685 -0
- genarena/battle.py +337 -0
- genarena/bt_elo.py +507 -0
- genarena/cli.py +1581 -0
- genarena/data.py +476 -0
- genarena/deploy/Dockerfile +22 -0
- genarena/deploy/README.md +55 -0
- genarena/deploy/__init__.py +5 -0
- genarena/deploy/app.py +84 -0
- genarena/experiments.py +121 -0
- genarena/leaderboard.py +270 -0
- genarena/logs.py +409 -0
- genarena/models.py +412 -0
- genarena/prompts/__init__.py +127 -0
- genarena/prompts/mmrb2.py +373 -0
- genarena/sampling.py +336 -0
- genarena/state.py +656 -0
- genarena/sync/__init__.py +105 -0
- genarena/sync/auto_commit.py +118 -0
- genarena/sync/deploy_ops.py +543 -0
- genarena/sync/git_ops.py +422 -0
- genarena/sync/hf_ops.py +891 -0
- genarena/sync/init_ops.py +431 -0
- genarena/sync/packer.py +587 -0
- genarena/sync/submit.py +837 -0
- genarena/utils.py +103 -0
- genarena/validation/__init__.py +19 -0
- genarena/validation/schema.py +327 -0
- genarena/validation/validator.py +329 -0
- genarena/visualize/README.md +148 -0
- genarena/visualize/__init__.py +14 -0
- genarena/visualize/app.py +938 -0
- genarena/visualize/data_loader.py +2430 -0
- genarena/visualize/static/app.js +3762 -0
- genarena/visualize/static/model_aliases.json +86 -0
- genarena/visualize/static/style.css +4104 -0
- genarena/visualize/templates/index.html +413 -0
- genarena/vlm.py +519 -0
- genarena-0.1.1.dist-info/METADATA +178 -0
- genarena-0.1.1.dist-info/RECORD +44 -0
- {genarena-0.0.1.dist-info → genarena-0.1.1.dist-info}/WHEEL +1 -2
- genarena-0.1.1.dist-info/entry_points.txt +2 -0
- genarena-0.0.1.dist-info/METADATA +0 -26
- genarena-0.0.1.dist-info/RECORD +0 -5
- genarena-0.0.1.dist-info/top_level.txt +0 -1
genarena/sync/packer.py
ADDED
|
@@ -0,0 +1,587 @@
|
|
|
1
|
+
# Copyright 2026 Ruihang Li.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0.
|
|
3
|
+
# See LICENSE file in the project root for details.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
ZIP packing utilities for GenArena.
|
|
7
|
+
|
|
8
|
+
This module provides functionality for packing and unpacking arena data
|
|
9
|
+
for Huggingface upload/download operations.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
import tempfile
|
|
16
|
+
import zipfile
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
# Supported image file extensions for model directories
|
|
24
|
+
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".svg"}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TaskType(Enum):
|
|
28
|
+
"""Type of upload/download task."""
|
|
29
|
+
MODEL_ZIP = "model_zip" # ZIP file for experiment-scoped model images
|
|
30
|
+
EXP_ZIP = "exp_zip" # ZIP file for experiment logs
|
|
31
|
+
SMALL_FILE = "small_file" # Small file (state.json, README.md)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class PackTask:
|
|
36
|
+
"""Represents a file packing/upload task."""
|
|
37
|
+
task_type: TaskType
|
|
38
|
+
local_path: str # Local path (directory for ZIP, file for small files)
|
|
39
|
+
remote_path: str # Remote path in the HF repo
|
|
40
|
+
subset: str # Subset name
|
|
41
|
+
name: str # Model name or experiment name or file name
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class UnpackTask:
|
|
46
|
+
"""Represents a file unpacking/download task."""
|
|
47
|
+
task_type: TaskType
|
|
48
|
+
remote_path: str # Remote path in the HF repo
|
|
49
|
+
local_path: str # Local target path
|
|
50
|
+
subset: str # Subset name
|
|
51
|
+
name: str # Model name or experiment name or file name
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def pack_directory(
|
|
55
|
+
source_dir: str,
|
|
56
|
+
output_zip: str,
|
|
57
|
+
file_extensions: Optional[set] = None,
|
|
58
|
+
max_depth: Optional[int] = None,
|
|
59
|
+
) -> tuple[bool, str]:
|
|
60
|
+
"""
|
|
61
|
+
Pack a directory into a ZIP file.
|
|
62
|
+
|
|
63
|
+
The directory name is preserved as the root folder inside the ZIP.
|
|
64
|
+
Symbolic links are followed and the actual file contents are packed.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
source_dir: Path to the directory to pack
|
|
68
|
+
output_zip: Path to the output ZIP file
|
|
69
|
+
file_extensions: Optional set of file extensions to include (e.g., {".png", ".jpg"}).
|
|
70
|
+
If None, all files are included. Extensions should be lowercase with dot.
|
|
71
|
+
max_depth: Optional maximum directory depth to traverse. None means unlimited.
|
|
72
|
+
0 = only files directly in source_dir
|
|
73
|
+
1 = files in source_dir and its immediate subdirectories
|
|
74
|
+
etc.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Tuple of (success, message)
|
|
78
|
+
"""
|
|
79
|
+
if not os.path.isdir(source_dir):
|
|
80
|
+
return False, f"Source directory does not exist: {source_dir}"
|
|
81
|
+
|
|
82
|
+
# Resolve symlink if source_dir itself is a symlink
|
|
83
|
+
resolved_source = os.path.realpath(source_dir)
|
|
84
|
+
if not os.path.isdir(resolved_source):
|
|
85
|
+
return False, f"Source directory symlink target does not exist: {resolved_source}"
|
|
86
|
+
|
|
87
|
+
# Get the directory name to use as root in ZIP (use original name, not resolved)
|
|
88
|
+
dir_name = os.path.basename(source_dir.rstrip(os.sep))
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
# Ensure output directory exists
|
|
92
|
+
os.makedirs(os.path.dirname(output_zip), exist_ok=True)
|
|
93
|
+
|
|
94
|
+
file_count = 0
|
|
95
|
+
with zipfile.ZipFile(output_zip, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
96
|
+
# followlinks=True to traverse symlinked directories
|
|
97
|
+
for root, dirs, files in os.walk(resolved_source, followlinks=True):
|
|
98
|
+
# Calculate current depth relative to source
|
|
99
|
+
if max_depth is not None:
|
|
100
|
+
rel_root = os.path.relpath(root, resolved_source)
|
|
101
|
+
if rel_root == ".":
|
|
102
|
+
current_depth = 0
|
|
103
|
+
else:
|
|
104
|
+
current_depth = len(rel_root.split(os.sep))
|
|
105
|
+
|
|
106
|
+
# Skip directories beyond max_depth
|
|
107
|
+
if current_depth > max_depth:
|
|
108
|
+
dirs[:] = [] # Prevent further recursion
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
# Stop recursion at max_depth
|
|
112
|
+
if current_depth == max_depth:
|
|
113
|
+
dirs[:] = []
|
|
114
|
+
|
|
115
|
+
for file in files:
|
|
116
|
+
# Filter by extension if specified
|
|
117
|
+
if file_extensions is not None:
|
|
118
|
+
ext = os.path.splitext(file)[1].lower()
|
|
119
|
+
if ext not in file_extensions:
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
file_path = os.path.join(root, file)
|
|
123
|
+
|
|
124
|
+
# Skip broken symlinks
|
|
125
|
+
if os.path.islink(file_path) and not os.path.exists(file_path):
|
|
126
|
+
logger.warning(f"Skipping broken symlink: {file_path}")
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
# Calculate archive name: use original dir_name as root
|
|
130
|
+
rel_to_resolved = os.path.relpath(file_path, resolved_source)
|
|
131
|
+
archive_name = os.path.join(dir_name, rel_to_resolved)
|
|
132
|
+
zf.write(file_path, archive_name)
|
|
133
|
+
file_count += 1
|
|
134
|
+
|
|
135
|
+
if file_count == 0:
|
|
136
|
+
# Remove empty ZIP file
|
|
137
|
+
os.remove(output_zip)
|
|
138
|
+
return False, f"No files to pack in {source_dir}"
|
|
139
|
+
|
|
140
|
+
return True, f"Packed {source_dir} -> {output_zip} ({file_count} files)"
|
|
141
|
+
except Exception as e:
|
|
142
|
+
return False, f"Failed to pack directory: {e}"
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def pack_model_dir(model_dir: str, output_zip: str) -> tuple[bool, str]:
|
|
146
|
+
"""
|
|
147
|
+
Pack a single model directory (containing images) into a ZIP file.
|
|
148
|
+
|
|
149
|
+
Only image files (png, jpg, jpeg, gif, webp, bmp, tiff, svg) are packed.
|
|
150
|
+
Only files directly under the model directory are included;
|
|
151
|
+
nested subdirectories (e.g., fail/) are excluded.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
model_dir: Path to the model directory (e.g., arena_dir/basic/models/exp_001/model_a/)
|
|
155
|
+
output_zip: Path to the output ZIP file
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Tuple of (success, message)
|
|
159
|
+
"""
|
|
160
|
+
return pack_directory(model_dir, output_zip, file_extensions=IMAGE_EXTENSIONS, max_depth=0)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def pack_exp_dir(exp_dir: str, output_zip: str) -> tuple[bool, str]:
|
|
164
|
+
"""
|
|
165
|
+
Pack an experiment directory (containing battle logs) into a ZIP file.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
exp_dir: Path to the experiment directory (e.g., arena_dir/basic/pk_logs/exp_001/)
|
|
169
|
+
output_zip: Path to the output ZIP file
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Tuple of (success, message)
|
|
173
|
+
"""
|
|
174
|
+
return pack_directory(exp_dir, output_zip)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def unpack_zip(
|
|
178
|
+
zip_path: str,
|
|
179
|
+
target_dir: str,
|
|
180
|
+
overwrite: bool = False,
|
|
181
|
+
) -> tuple[bool, str]:
|
|
182
|
+
"""
|
|
183
|
+
Unpack a ZIP file to a target directory.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
zip_path: Path to the ZIP file
|
|
187
|
+
target_dir: Target directory to extract to
|
|
188
|
+
overwrite: If True, overwrite existing files
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Tuple of (success, message)
|
|
192
|
+
"""
|
|
193
|
+
if not os.path.isfile(zip_path):
|
|
194
|
+
return False, f"ZIP file does not exist: {zip_path}"
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
os.makedirs(target_dir, exist_ok=True)
|
|
198
|
+
|
|
199
|
+
with zipfile.ZipFile(zip_path, "r") as zf:
|
|
200
|
+
for member in zf.namelist():
|
|
201
|
+
target_path = os.path.join(target_dir, member)
|
|
202
|
+
|
|
203
|
+
# Check if file exists and skip if not overwriting
|
|
204
|
+
if os.path.exists(target_path) and not overwrite:
|
|
205
|
+
logger.debug(f"Skipping existing file: {target_path}")
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
# Extract file
|
|
209
|
+
zf.extract(member, target_dir)
|
|
210
|
+
|
|
211
|
+
return True, f"Unpacked {zip_path} -> {target_dir}"
|
|
212
|
+
except Exception as e:
|
|
213
|
+
return False, f"Failed to unpack ZIP: {e}"
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def discover_subsets(arena_dir: str) -> list[str]:
|
|
217
|
+
"""
|
|
218
|
+
Discover all subset directories in the arena directory.
|
|
219
|
+
|
|
220
|
+
A valid subset directory contains at least one of: models/, pk_logs/, arena/
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
arena_dir: Path to the arena directory
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
List of subset names
|
|
227
|
+
"""
|
|
228
|
+
subsets = []
|
|
229
|
+
|
|
230
|
+
if not os.path.isdir(arena_dir):
|
|
231
|
+
return subsets
|
|
232
|
+
|
|
233
|
+
for name in os.listdir(arena_dir):
|
|
234
|
+
subset_path = os.path.join(arena_dir, name)
|
|
235
|
+
if not os.path.isdir(subset_path):
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
# Check if it looks like a subset directory
|
|
239
|
+
has_models = os.path.isdir(os.path.join(subset_path, "models"))
|
|
240
|
+
has_pk_logs = os.path.isdir(os.path.join(subset_path, "pk_logs"))
|
|
241
|
+
has_arena = os.path.isdir(os.path.join(subset_path, "arena"))
|
|
242
|
+
|
|
243
|
+
if has_models or has_pk_logs or has_arena:
|
|
244
|
+
subsets.append(name)
|
|
245
|
+
|
|
246
|
+
return sorted(subsets)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def discover_models(arena_dir: str, subset: str) -> list[str]:
|
|
250
|
+
"""
|
|
251
|
+
Discover all model names in a subset (v2 layout).
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
arena_dir: Path to the arena directory
|
|
255
|
+
subset: Subset name
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
List of model names (globally unique across experiments)
|
|
259
|
+
"""
|
|
260
|
+
from genarena.models import GlobalModelOutputManager
|
|
261
|
+
|
|
262
|
+
models_root = os.path.join(arena_dir, subset, "models")
|
|
263
|
+
if not os.path.isdir(models_root):
|
|
264
|
+
return []
|
|
265
|
+
try:
|
|
266
|
+
mgr = GlobalModelOutputManager(models_root)
|
|
267
|
+
return mgr.models
|
|
268
|
+
except Exception:
|
|
269
|
+
# For packer utilities, be conservative: return empty on scan failure.
|
|
270
|
+
return []
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def discover_model_experiments(arena_dir: str, subset: str) -> list[str]:
|
|
274
|
+
"""
|
|
275
|
+
Discover experiment directories under a subset's models (v2 layout).
|
|
276
|
+
|
|
277
|
+
In v2, model outputs live under:
|
|
278
|
+
models/<exp_name>/<model_name>/...
|
|
279
|
+
This function returns exp_name directories that contain at least one model with images.
|
|
280
|
+
"""
|
|
281
|
+
from genarena.models import GlobalModelOutputManager
|
|
282
|
+
|
|
283
|
+
models_root = os.path.join(arena_dir, subset, "models")
|
|
284
|
+
if not os.path.isdir(models_root):
|
|
285
|
+
return []
|
|
286
|
+
try:
|
|
287
|
+
mgr = GlobalModelOutputManager(models_root)
|
|
288
|
+
return mgr.experiments
|
|
289
|
+
except Exception:
|
|
290
|
+
return []
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def discover_experiments(arena_dir: str, subset: str) -> list[str]:
|
|
294
|
+
"""
|
|
295
|
+
Discover all experiment directories in a subset's pk_logs.
|
|
296
|
+
|
|
297
|
+
Excludes .pk_logs_rm (deleted/orphaned logs).
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
arena_dir: Path to the arena directory
|
|
301
|
+
subset: Subset name
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
List of experiment names
|
|
305
|
+
"""
|
|
306
|
+
pk_logs_dir = os.path.join(arena_dir, subset, "pk_logs")
|
|
307
|
+
experiments = []
|
|
308
|
+
|
|
309
|
+
if not os.path.isdir(pk_logs_dir):
|
|
310
|
+
return experiments
|
|
311
|
+
|
|
312
|
+
for name in os.listdir(pk_logs_dir):
|
|
313
|
+
# Skip hidden directories and .pk_logs_rm
|
|
314
|
+
if name.startswith("."):
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
exp_path = os.path.join(pk_logs_dir, name)
|
|
318
|
+
if os.path.isdir(exp_path):
|
|
319
|
+
experiments.append(name)
|
|
320
|
+
|
|
321
|
+
return sorted(experiments)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def collect_upload_tasks(
|
|
325
|
+
arena_dir: str,
|
|
326
|
+
subsets: Optional[list[str]] = None,
|
|
327
|
+
models: Optional[list[str]] = None,
|
|
328
|
+
experiments: Optional[list[str]] = None,
|
|
329
|
+
) -> list[PackTask]:
|
|
330
|
+
"""
|
|
331
|
+
Collect all files/directories that need to be uploaded.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
arena_dir: Path to the arena directory
|
|
335
|
+
subsets: List of subsets to include (None = all)
|
|
336
|
+
models: List of models to include (None = all)
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
List of PackTask objects
|
|
340
|
+
"""
|
|
341
|
+
tasks = []
|
|
342
|
+
|
|
343
|
+
# Discover subsets if not specified
|
|
344
|
+
all_subsets = discover_subsets(arena_dir)
|
|
345
|
+
target_subsets = subsets if subsets else all_subsets
|
|
346
|
+
|
|
347
|
+
for subset in target_subsets:
|
|
348
|
+
if subset not in all_subsets:
|
|
349
|
+
logger.warning(f"Subset '{subset}' not found in arena directory")
|
|
350
|
+
continue
|
|
351
|
+
|
|
352
|
+
subset_path = os.path.join(arena_dir, subset)
|
|
353
|
+
|
|
354
|
+
# Collect model directories (v2 layout: models/<exp_name>/<model_name>/):
|
|
355
|
+
# Each model is packed as a separate ZIP file.
|
|
356
|
+
# - Default: upload all models
|
|
357
|
+
# - If experiments filter is provided: only models under those exp_name
|
|
358
|
+
# - If models filter is provided: only those specific models
|
|
359
|
+
models_root = os.path.join(subset_path, "models")
|
|
360
|
+
all_model_exps = discover_model_experiments(arena_dir, subset)
|
|
361
|
+
|
|
362
|
+
target_model_exps: list[str]
|
|
363
|
+
if experiments:
|
|
364
|
+
target_model_exps = [e for e in experiments if e in all_model_exps]
|
|
365
|
+
else:
|
|
366
|
+
target_model_exps = all_model_exps
|
|
367
|
+
|
|
368
|
+
# Collect individual model directories
|
|
369
|
+
for exp in target_model_exps:
|
|
370
|
+
exp_model_path = os.path.join(subset_path, "models", exp)
|
|
371
|
+
if not os.path.isdir(exp_model_path):
|
|
372
|
+
continue
|
|
373
|
+
|
|
374
|
+
# List all model directories under this experiment
|
|
375
|
+
for model_name in os.listdir(exp_model_path):
|
|
376
|
+
model_path = os.path.join(exp_model_path, model_name)
|
|
377
|
+
if not os.path.isdir(model_path):
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
# Apply models filter if specified
|
|
381
|
+
if models and model_name not in models:
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
remote_path = f"{subset}/models/{exp}/{model_name}.zip"
|
|
385
|
+
|
|
386
|
+
tasks.append(PackTask(
|
|
387
|
+
task_type=TaskType.MODEL_ZIP,
|
|
388
|
+
local_path=model_path,
|
|
389
|
+
remote_path=remote_path,
|
|
390
|
+
subset=subset,
|
|
391
|
+
name=f"{exp}/{model_name}",
|
|
392
|
+
))
|
|
393
|
+
|
|
394
|
+
# Collect experiment directories (only if no model filter, or always)
|
|
395
|
+
# Note: pk_logs are always uploaded regardless of model filter
|
|
396
|
+
pk_experiments = discover_experiments(arena_dir, subset)
|
|
397
|
+
if experiments:
|
|
398
|
+
pk_experiments = [e for e in pk_experiments if e in set(experiments)]
|
|
399
|
+
for exp in pk_experiments:
|
|
400
|
+
exp_path = os.path.join(subset_path, "pk_logs", exp)
|
|
401
|
+
remote_path = f"{subset}/pk_logs/{exp}.zip"
|
|
402
|
+
|
|
403
|
+
tasks.append(PackTask(
|
|
404
|
+
task_type=TaskType.EXP_ZIP,
|
|
405
|
+
local_path=exp_path,
|
|
406
|
+
remote_path=remote_path,
|
|
407
|
+
subset=subset,
|
|
408
|
+
name=exp,
|
|
409
|
+
))
|
|
410
|
+
|
|
411
|
+
# Collect small files
|
|
412
|
+
# state.json
|
|
413
|
+
state_path = os.path.join(subset_path, "arena", "state.json")
|
|
414
|
+
if os.path.isfile(state_path):
|
|
415
|
+
tasks.append(PackTask(
|
|
416
|
+
task_type=TaskType.SMALL_FILE,
|
|
417
|
+
local_path=state_path,
|
|
418
|
+
remote_path=f"{subset}/arena/state.json",
|
|
419
|
+
subset=subset,
|
|
420
|
+
name="state.json",
|
|
421
|
+
))
|
|
422
|
+
|
|
423
|
+
# README.md
|
|
424
|
+
readme_path = os.path.join(subset_path, "README.md")
|
|
425
|
+
if os.path.isfile(readme_path):
|
|
426
|
+
tasks.append(PackTask(
|
|
427
|
+
task_type=TaskType.SMALL_FILE,
|
|
428
|
+
local_path=readme_path,
|
|
429
|
+
remote_path=f"{subset}/README.md",
|
|
430
|
+
subset=subset,
|
|
431
|
+
name="README.md",
|
|
432
|
+
))
|
|
433
|
+
|
|
434
|
+
return tasks
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def collect_download_tasks(
|
|
438
|
+
repo_files: list[str],
|
|
439
|
+
arena_dir: str,
|
|
440
|
+
subsets: Optional[list[str]] = None,
|
|
441
|
+
models: Optional[list[str]] = None,
|
|
442
|
+
experiments: Optional[list[str]] = None,
|
|
443
|
+
) -> list[UnpackTask]:
|
|
444
|
+
"""
|
|
445
|
+
Collect files to download based on repo contents and filters.
|
|
446
|
+
|
|
447
|
+
Args:
|
|
448
|
+
repo_files: List of file paths in the HF repo
|
|
449
|
+
arena_dir: Local arena directory path
|
|
450
|
+
subsets: List of subsets to download (None = all)
|
|
451
|
+
models: List of models to download (None = all)
|
|
452
|
+
experiments: List of experiments to download (None = all)
|
|
453
|
+
|
|
454
|
+
Returns:
|
|
455
|
+
List of UnpackTask objects
|
|
456
|
+
"""
|
|
457
|
+
tasks = []
|
|
458
|
+
|
|
459
|
+
for remote_path in repo_files:
|
|
460
|
+
# Parse the remote path to determine type
|
|
461
|
+
parts = remote_path.split("/")
|
|
462
|
+
if len(parts) < 2:
|
|
463
|
+
continue
|
|
464
|
+
|
|
465
|
+
subset = parts[0]
|
|
466
|
+
|
|
467
|
+
# Apply subset filter
|
|
468
|
+
if subsets and subset not in subsets:
|
|
469
|
+
continue
|
|
470
|
+
|
|
471
|
+
# Determine task type and apply filters
|
|
472
|
+
# New format: models/<exp_name>/<model_name>.zip
|
|
473
|
+
if len(parts) >= 4 and parts[1] == "models" and parts[3].endswith(".zip"):
|
|
474
|
+
exp_name = parts[2]
|
|
475
|
+
model_name = parts[3][:-4] # Remove .zip
|
|
476
|
+
|
|
477
|
+
# Apply experiments filter
|
|
478
|
+
if experiments and exp_name not in experiments:
|
|
479
|
+
continue
|
|
480
|
+
|
|
481
|
+
# Apply models filter
|
|
482
|
+
if models and model_name not in models:
|
|
483
|
+
continue
|
|
484
|
+
|
|
485
|
+
local_path = os.path.join(arena_dir, subset, "models", exp_name)
|
|
486
|
+
tasks.append(UnpackTask(
|
|
487
|
+
task_type=TaskType.MODEL_ZIP,
|
|
488
|
+
remote_path=remote_path,
|
|
489
|
+
local_path=local_path,
|
|
490
|
+
subset=subset,
|
|
491
|
+
name=f"{exp_name}/{model_name}",
|
|
492
|
+
))
|
|
493
|
+
|
|
494
|
+
# Legacy format: models/<exp_name>.zip (for backward compatibility)
|
|
495
|
+
elif len(parts) == 3 and parts[1] == "models" and parts[2].endswith(".zip"):
|
|
496
|
+
exp_name = parts[2][:-4] # Remove .zip
|
|
497
|
+
|
|
498
|
+
# Apply experiments filter (legacy: models filter acts as exp filter)
|
|
499
|
+
exp_filter = experiments if experiments is not None else models
|
|
500
|
+
if exp_filter and exp_name not in exp_filter:
|
|
501
|
+
continue
|
|
502
|
+
|
|
503
|
+
local_path = os.path.join(arena_dir, subset, "models")
|
|
504
|
+
tasks.append(UnpackTask(
|
|
505
|
+
task_type=TaskType.MODEL_ZIP,
|
|
506
|
+
remote_path=remote_path,
|
|
507
|
+
local_path=local_path,
|
|
508
|
+
subset=subset,
|
|
509
|
+
name=exp_name,
|
|
510
|
+
))
|
|
511
|
+
|
|
512
|
+
elif len(parts) >= 3 and parts[1] == "pk_logs" and parts[2].endswith(".zip"):
|
|
513
|
+
# Experiment ZIP file
|
|
514
|
+
exp_name = parts[2][:-4] # Remove .zip
|
|
515
|
+
|
|
516
|
+
if experiments and exp_name not in experiments:
|
|
517
|
+
continue
|
|
518
|
+
|
|
519
|
+
local_path = os.path.join(arena_dir, subset, "pk_logs")
|
|
520
|
+
tasks.append(UnpackTask(
|
|
521
|
+
task_type=TaskType.EXP_ZIP,
|
|
522
|
+
remote_path=remote_path,
|
|
523
|
+
local_path=local_path,
|
|
524
|
+
subset=subset,
|
|
525
|
+
name=exp_name,
|
|
526
|
+
))
|
|
527
|
+
|
|
528
|
+
elif len(parts) >= 3 and parts[1] == "arena" and parts[2] == "state.json":
|
|
529
|
+
# state.json
|
|
530
|
+
local_path = os.path.join(arena_dir, subset, "arena", "state.json")
|
|
531
|
+
tasks.append(UnpackTask(
|
|
532
|
+
task_type=TaskType.SMALL_FILE,
|
|
533
|
+
remote_path=remote_path,
|
|
534
|
+
local_path=local_path,
|
|
535
|
+
subset=subset,
|
|
536
|
+
name="state.json",
|
|
537
|
+
))
|
|
538
|
+
|
|
539
|
+
elif len(parts) >= 2 and parts[1] == "README.md":
|
|
540
|
+
# README.md
|
|
541
|
+
local_path = os.path.join(arena_dir, subset, "README.md")
|
|
542
|
+
tasks.append(UnpackTask(
|
|
543
|
+
task_type=TaskType.SMALL_FILE,
|
|
544
|
+
remote_path=remote_path,
|
|
545
|
+
local_path=local_path,
|
|
546
|
+
subset=subset,
|
|
547
|
+
name="README.md",
|
|
548
|
+
))
|
|
549
|
+
|
|
550
|
+
return tasks
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
class TempPackingContext:
|
|
554
|
+
"""
|
|
555
|
+
Context manager for temporary packing operations.
|
|
556
|
+
|
|
557
|
+
Creates a temporary directory for ZIP files and cleans up on exit.
|
|
558
|
+
"""
|
|
559
|
+
|
|
560
|
+
def __init__(self):
|
|
561
|
+
self.temp_dir: Optional[str] = None
|
|
562
|
+
|
|
563
|
+
def __enter__(self) -> "TempPackingContext":
|
|
564
|
+
self.temp_dir = tempfile.mkdtemp(prefix="genarena_pack_")
|
|
565
|
+
return self
|
|
566
|
+
|
|
567
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
568
|
+
if self.temp_dir and os.path.isdir(self.temp_dir):
|
|
569
|
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
570
|
+
|
|
571
|
+
def get_temp_zip_path(self, remote_path: str) -> str:
|
|
572
|
+
"""
|
|
573
|
+
Get a temporary path for a ZIP file.
|
|
574
|
+
|
|
575
|
+
Args:
|
|
576
|
+
remote_path: The remote path (used to generate unique local path)
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
Temporary file path
|
|
580
|
+
"""
|
|
581
|
+
if not self.temp_dir:
|
|
582
|
+
raise RuntimeError("TempPackingContext not entered")
|
|
583
|
+
|
|
584
|
+
# Use the remote path structure for the temp file
|
|
585
|
+
temp_path = os.path.join(self.temp_dir, remote_path)
|
|
586
|
+
os.makedirs(os.path.dirname(temp_path), exist_ok=True)
|
|
587
|
+
return temp_path
|