genarena 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. genarena/__init__.py +49 -2
  2. genarena/__main__.py +10 -0
  3. genarena/arena.py +1685 -0
  4. genarena/battle.py +337 -0
  5. genarena/bt_elo.py +507 -0
  6. genarena/cli.py +1581 -0
  7. genarena/data.py +476 -0
  8. genarena/deploy/Dockerfile +22 -0
  9. genarena/deploy/README.md +55 -0
  10. genarena/deploy/__init__.py +5 -0
  11. genarena/deploy/app.py +84 -0
  12. genarena/experiments.py +121 -0
  13. genarena/leaderboard.py +270 -0
  14. genarena/logs.py +409 -0
  15. genarena/models.py +412 -0
  16. genarena/prompts/__init__.py +127 -0
  17. genarena/prompts/mmrb2.py +373 -0
  18. genarena/sampling.py +336 -0
  19. genarena/state.py +656 -0
  20. genarena/sync/__init__.py +105 -0
  21. genarena/sync/auto_commit.py +118 -0
  22. genarena/sync/deploy_ops.py +543 -0
  23. genarena/sync/git_ops.py +422 -0
  24. genarena/sync/hf_ops.py +891 -0
  25. genarena/sync/init_ops.py +431 -0
  26. genarena/sync/packer.py +587 -0
  27. genarena/sync/submit.py +837 -0
  28. genarena/utils.py +103 -0
  29. genarena/validation/__init__.py +19 -0
  30. genarena/validation/schema.py +327 -0
  31. genarena/validation/validator.py +329 -0
  32. genarena/visualize/README.md +148 -0
  33. genarena/visualize/__init__.py +14 -0
  34. genarena/visualize/app.py +938 -0
  35. genarena/visualize/data_loader.py +2430 -0
  36. genarena/visualize/static/app.js +3762 -0
  37. genarena/visualize/static/model_aliases.json +86 -0
  38. genarena/visualize/static/style.css +4104 -0
  39. genarena/visualize/templates/index.html +413 -0
  40. genarena/vlm.py +519 -0
  41. genarena-0.1.1.dist-info/METADATA +178 -0
  42. genarena-0.1.1.dist-info/RECORD +44 -0
  43. {genarena-0.0.1.dist-info → genarena-0.1.1.dist-info}/WHEEL +1 -2
  44. genarena-0.1.1.dist-info/entry_points.txt +2 -0
  45. genarena-0.0.1.dist-info/METADATA +0 -26
  46. genarena-0.0.1.dist-info/RECORD +0 -5
  47. genarena-0.0.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,543 @@
1
+ # Copyright 2026 Ruihang Li.
2
+ # Licensed under the Apache License, Version 2.0.
3
+ # See LICENSE file in the project root for details.
4
+
5
+ """
6
+ Deploy operations for GenArena.
7
+
8
+ Handles uploading arena data to HuggingFace for Spaces deployment.
9
+ Unlike `hf upload`, this uploads images directly (not as ZIP) for CDN access.
10
+ Parquet benchmark data is downloaded from rhli/genarena during Docker build.
11
+ """
12
+
13
+ import logging
14
+ import os
15
+ from multiprocessing import Pool
16
+ from typing import Optional
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ # Default multiprocessing settings
22
+ DEFAULT_NUM_WORKERS = 16
23
+ DEFAULT_WORKER_TIMEOUT = 300 # seconds
24
+
25
+
26
+ def upload_for_deploy(
27
+ arena_dir: str,
28
+ arena_repo: str,
29
+ space_repo: str,
30
+ subsets: Optional[list[str]] = None,
31
+ overwrite: bool = False,
32
+ show_progress: bool = True,
33
+ max_retries: int = 3,
34
+ num_workers: int = DEFAULT_NUM_WORKERS,
35
+ worker_timeout: int = DEFAULT_WORKER_TIMEOUT,
36
+ ) -> tuple[bool, str]:
37
+ """
38
+ Upload all data needed for HuggingFace Spaces deployment.
39
+
40
+ This uploads:
41
+ 1. Arena data (pk_logs, models, state.json) to arena_repo (Dataset)
42
+ - Images are uploaded directly (not as ZIP) for CDN access
43
+ - Follows symlinks to upload actual image files
44
+ 2. Deploy files (Dockerfile, app.py, README.md) to space_repo
45
+
46
+ Note: Parquet benchmark data is NOT uploaded. It is downloaded from
47
+ rhli/genarena during Docker build in the Space.
48
+
49
+ Args:
50
+ arena_dir: Local arena directory
51
+ arena_repo: HF Dataset repo for arena data
52
+ space_repo: HF Space repo for deployment
53
+ subsets: Subsets to upload (None = all)
54
+ overwrite: Overwrite existing files
55
+ show_progress: Show progress bar
56
+ max_retries: Max retries per file
57
+ num_workers: Number of parallel workers for upload (default: 16)
58
+ worker_timeout: Timeout in seconds for each worker (default: 300)
59
+
60
+ Returns:
61
+ Tuple of (success, message)
62
+ """
63
+ from genarena.sync.hf_ops import (
64
+ require_hf_token,
65
+ validate_dataset_repo,
66
+ )
67
+
68
+ # Get token
69
+ try:
70
+ token = require_hf_token()
71
+ except ValueError as e:
72
+ return False, str(e)
73
+
74
+ messages = []
75
+
76
+ # 1. Upload arena data to Dataset repo (images as individual files, not ZIP)
77
+ logger.info(f"Uploading arena data to {arena_repo}...")
78
+ valid, msg = validate_dataset_repo(arena_repo, token)
79
+ if not valid:
80
+ return False, f"Arena repo validation failed: {msg}"
81
+
82
+ success, msg = upload_arena_data_for_cdn(
83
+ arena_dir=arena_dir,
84
+ repo_id=arena_repo,
85
+ subsets=subsets,
86
+ overwrite=overwrite,
87
+ show_progress=show_progress,
88
+ token=token,
89
+ num_workers=num_workers,
90
+ worker_timeout=worker_timeout,
91
+ )
92
+ if not success:
93
+ return False, f"Arena upload failed: {msg}"
94
+ messages.append(f"Arena data: {msg}")
95
+
96
+ # 2. Upload deploy files to Space repo
97
+ logger.info(f"Uploading deploy files to {space_repo}...")
98
+ success, msg = upload_deploy_files(
99
+ space_repo=space_repo,
100
+ overwrite=overwrite,
101
+ token=token,
102
+ )
103
+ if not success:
104
+ return False, f"Deploy files upload failed: {msg}"
105
+ messages.append(f"Deploy files: {msg}")
106
+
107
+ return True, "\n".join(messages)
108
+
109
+
110
+ def collect_files_follow_symlinks(
111
+ base_dir: str,
112
+ path_prefix: str = "",
113
+ ) -> list[tuple[str, str]]:
114
+ """
115
+ Collect all files under base_dir, following symlinks.
116
+
117
+ Args:
118
+ base_dir: Directory to scan
119
+ path_prefix: Prefix for remote paths
120
+
121
+ Returns:
122
+ List of (local_path, remote_path) tuples
123
+ """
124
+ files = []
125
+
126
+ if not os.path.isdir(base_dir):
127
+ return files
128
+
129
+ # Use os.walk with followlinks=True to traverse symlinks
130
+ for root, dirs, filenames in os.walk(base_dir, followlinks=True):
131
+ # Skip hidden directories and special directories
132
+ dirs[:] = [d for d in dirs if not d.startswith(".") and d != "__pycache__" and d != "raw_outputs"]
133
+
134
+ rel_root = os.path.relpath(root, base_dir)
135
+ if rel_root == ".":
136
+ rel_root = ""
137
+
138
+ for filename in filenames:
139
+ if filename.startswith("."):
140
+ continue
141
+
142
+ local_path = os.path.join(root, filename)
143
+
144
+ # Build remote path
145
+ if rel_root:
146
+ remote_path = f"{path_prefix}/{rel_root}/{filename}" if path_prefix else f"{rel_root}/{filename}"
147
+ else:
148
+ remote_path = f"{path_prefix}/{filename}" if path_prefix else filename
149
+
150
+ # Normalize path separators
151
+ remote_path = remote_path.replace("\\", "/")
152
+
153
+ files.append((local_path, remote_path))
154
+
155
+ return files
156
+
157
+
158
+ def _upload_batch_worker(args: tuple) -> tuple[int, int]:
159
+ """
160
+ Worker function for uploading a single batch.
161
+
162
+ Args:
163
+ args: Tuple of (batch_index, batch, repo_id, token, total_batches, max_retries)
164
+
165
+ Returns:
166
+ Tuple of (uploaded_count, failed_count)
167
+ """
168
+ from huggingface_hub import HfApi, CommitOperationAdd
169
+
170
+ batch_index, batch, repo_id, token, total_batches, max_retries = args
171
+
172
+ api = HfApi(token=token)
173
+
174
+ operations = []
175
+ failed_read = 0
176
+ for local_path, remote_path in batch:
177
+ try:
178
+ operations.append(
179
+ CommitOperationAdd(
180
+ path_in_repo=remote_path,
181
+ path_or_fileobj=local_path,
182
+ )
183
+ )
184
+ except Exception as e:
185
+ logger.warning(f"Failed to read {local_path}: {e}")
186
+ failed_read += 1
187
+
188
+ if not operations:
189
+ return 0, failed_read
190
+
191
+ # Try to commit batch with retries
192
+ for attempt in range(max_retries):
193
+ try:
194
+ api.create_commit(
195
+ repo_id=repo_id,
196
+ repo_type="dataset",
197
+ operations=operations,
198
+ commit_message=f"[genarena deploy] Upload batch {batch_index + 1}/{total_batches}",
199
+ )
200
+ return len(operations), failed_read
201
+ except Exception as e:
202
+ if attempt < max_retries - 1:
203
+ logger.warning(f"Batch {batch_index + 1} failed (attempt {attempt + 1}), retrying: {e}")
204
+ else:
205
+ logger.error(f"Batch {batch_index + 1} failed after {max_retries} attempts: {e}")
206
+ return 0, len(operations) + failed_read
207
+
208
+ return 0, len(operations) + failed_read
209
+
210
+
211
+ def upload_arena_data_for_cdn(
212
+ arena_dir: str,
213
+ repo_id: str,
214
+ subsets: Optional[list[str]] = None,
215
+ overwrite: bool = False,
216
+ show_progress: bool = True,
217
+ token: Optional[str] = None,
218
+ num_workers: int = DEFAULT_NUM_WORKERS,
219
+ worker_timeout: int = DEFAULT_WORKER_TIMEOUT,
220
+ ) -> tuple[bool, str]:
221
+ """
222
+ Upload arena data with images as individual files (not ZIP) for CDN access.
223
+
224
+ This function follows symlinks to upload actual image files.
225
+ Models directory often contains symlinks to external image directories.
226
+
227
+ Directory structure uploaded:
228
+ {subset}/models/{exp_name}/{model}/{index}.png (individual images)
229
+ {subset}/pk_logs/{exp_name}/*.jsonl (battle logs)
230
+ {subset}/arena/state.json (ELO state)
231
+
232
+ Args:
233
+ arena_dir: Path to the arena directory
234
+ repo_id: HuggingFace repository ID
235
+ subsets: List of subsets to upload (None = all)
236
+ overwrite: If True, overwrite existing files
237
+ show_progress: If True, show progress bar
238
+ token: HuggingFace token
239
+ num_workers: Number of parallel workers for upload (default: 16)
240
+ worker_timeout: Timeout in seconds for each worker (default: 300)
241
+
242
+ Returns:
243
+ Tuple of (success, message)
244
+ """
245
+ from huggingface_hub import HfApi
246
+
247
+ if token is None:
248
+ from genarena.sync.hf_ops import require_hf_token
249
+ token = require_hf_token()
250
+
251
+ api = HfApi(token=token)
252
+
253
+ # Validate arena directory
254
+ if not os.path.isdir(arena_dir):
255
+ return False, f"Arena directory not found: {arena_dir}"
256
+
257
+ # Discover subsets
258
+ available_subsets = [
259
+ d for d in os.listdir(arena_dir)
260
+ if os.path.isdir(os.path.join(arena_dir, d)) and not d.startswith(".")
261
+ ]
262
+
263
+ if subsets:
264
+ target_subsets = [s for s in subsets if s in available_subsets]
265
+ else:
266
+ target_subsets = available_subsets
267
+
268
+ if not target_subsets:
269
+ return False, "No subsets found to upload"
270
+
271
+ logger.info(f"Target subsets: {target_subsets}")
272
+
273
+ # Collect all files to upload (following symlinks)
274
+ all_files: list[tuple[str, str]] = []
275
+
276
+ for subset in target_subsets:
277
+ subset_dir = os.path.join(arena_dir, subset)
278
+ logger.info(f"Scanning subset: {subset}")
279
+
280
+ # Collect files from models/, pk_logs/, arena/
281
+ for subdir in ["models", "pk_logs", "arena"]:
282
+ subdir_path = os.path.join(subset_dir, subdir)
283
+ if os.path.isdir(subdir_path):
284
+ files = collect_files_follow_symlinks(subdir_path, f"{subset}/{subdir}")
285
+ all_files.extend(files)
286
+ logger.info(f" {subdir}: {len(files)} files")
287
+
288
+ if not all_files:
289
+ return False, "No files found to upload"
290
+
291
+ logger.info(f"Total files to upload: {len(all_files)}")
292
+
293
+ # Filter by extension (only upload relevant files)
294
+ valid_extensions = {".png", ".jpg", ".jpeg", ".webp", ".json", ".jsonl"}
295
+ all_files = [
296
+ (local, remote) for local, remote in all_files
297
+ if os.path.splitext(local)[1].lower() in valid_extensions
298
+ ]
299
+ logger.info(f"Files after extension filtering: {len(all_files)}")
300
+
301
+ # Filter out files in subdirectories under models/<exp>/<model>/
302
+ # Expected structure: {subset}/models/{exp}/{model}/{file}
303
+ # Files deeper than this (e.g., {subset}/models/{exp}/{model}/subfolder/{file}) should be skipped
304
+ def is_valid_model_path(remote: str) -> bool:
305
+ parts = remote.split("/")
306
+ # Non-models paths are always valid
307
+ if len(parts) < 2 or parts[1] != "models":
308
+ return True
309
+ # For models paths, expect exactly: subset/models/exp/model/file (5 parts)
310
+ return len(parts) == 5
311
+
312
+ before_depth_filter = len(all_files)
313
+ all_files = [(local, remote) for local, remote in all_files if is_valid_model_path(remote)]
314
+ depth_filtered = before_depth_filter - len(all_files)
315
+ if depth_filtered > 0:
316
+ logger.info(f"Skipped {depth_filtered} files in model subdirectories")
317
+ logger.info(f"Files after filtering: {len(all_files)}")
318
+
319
+ # Get existing files in repo (for skip check)
320
+ existing_files: set[str] = set()
321
+ if not overwrite:
322
+ try:
323
+ existing_files = set(api.list_repo_files(repo_id=repo_id, repo_type="dataset"))
324
+ logger.info(f"Existing files in repo: {len(existing_files)}")
325
+ except Exception:
326
+ pass
327
+
328
+ # Filter out existing files
329
+ if not overwrite:
330
+ original_count = len(all_files)
331
+ all_files = [
332
+ (local, remote) for local, remote in all_files
333
+ if remote not in existing_files
334
+ ]
335
+ skipped = original_count - len(all_files)
336
+ logger.info(f"Skipping {skipped} existing files, {len(all_files)} to upload")
337
+ else:
338
+ skipped = 0
339
+
340
+ if not all_files:
341
+ return True, f"All files already exist. Skipped {skipped} files."
342
+
343
+ # Upload in batches using create_commit with multiprocessing
344
+ batch_size = 500 # HuggingFace recommends smaller batches for large files
345
+ max_retries = 3
346
+
347
+ # Create batches
348
+ batches = []
349
+ for i in range(0, len(all_files), batch_size):
350
+ batch = all_files[i:i + batch_size]
351
+ batches.append(batch)
352
+
353
+ total_batches = len(batches)
354
+ logger.info(f"Uploading {total_batches} batches with {num_workers} workers (timeout: {worker_timeout}s per worker)")
355
+
356
+ # Prepare worker arguments
357
+ worker_args = [
358
+ (i, batch, repo_id, token, total_batches, max_retries)
359
+ for i, batch in enumerate(batches)
360
+ ]
361
+
362
+ total_uploaded = 0
363
+ total_failed = 0
364
+
365
+ # Use multiprocessing pool
366
+ with Pool(processes=num_workers) as pool:
367
+ if show_progress:
368
+ try:
369
+ from tqdm import tqdm
370
+ results = list(tqdm(
371
+ pool.imap_unordered(_upload_batch_worker, worker_args),
372
+ total=total_batches,
373
+ desc="Uploading batches",
374
+ unit="batch",
375
+ ))
376
+ except ImportError:
377
+ results = []
378
+ for args in worker_args:
379
+ try:
380
+ result = pool.apply_async(_upload_batch_worker, (args,))
381
+ uploaded, failed = result.get(timeout=worker_timeout)
382
+ results.append((uploaded, failed))
383
+ except Exception as e:
384
+ logger.error(f"Worker timeout or error: {e}")
385
+ results.append((0, len(args[1])))
386
+ else:
387
+ results = []
388
+ for args in worker_args:
389
+ try:
390
+ result = pool.apply_async(_upload_batch_worker, (args,))
391
+ uploaded, failed = result.get(timeout=worker_timeout)
392
+ results.append((uploaded, failed))
393
+ except Exception as e:
394
+ logger.error(f"Worker timeout or error: {e}")
395
+ results.append((0, len(args[1])))
396
+
397
+ # Aggregate results
398
+ for uploaded, failed in results:
399
+ total_uploaded += uploaded
400
+ total_failed += failed
401
+
402
+ return True, f"Uploaded {total_uploaded}, skipped {skipped}, failed {total_failed} files"
403
+
404
+
405
+ def upload_deploy_files(
406
+ space_repo: str,
407
+ overwrite: bool = False,
408
+ token: Optional[str] = None,
409
+ ) -> tuple[bool, str]:
410
+ """
411
+ Upload deploy files (Dockerfile, app.py, README.md) to Space repo.
412
+
413
+ Args:
414
+ space_repo: HF Space repo ID
415
+ overwrite: Overwrite existing files
416
+ token: HF token
417
+
418
+ Returns:
419
+ Tuple of (success, message)
420
+ """
421
+ from huggingface_hub import HfApi
422
+
423
+ from genarena.sync.hf_ops import upload_file
424
+
425
+ if token is None:
426
+ from genarena.sync.hf_ops import require_hf_token
427
+
428
+ token = require_hf_token()
429
+
430
+ api = HfApi(token=token)
431
+
432
+ # Get deploy directory
433
+ deploy_dir = os.path.dirname(os.path.abspath(__file__))
434
+ deploy_dir = os.path.join(os.path.dirname(deploy_dir), "deploy")
435
+
436
+ if not os.path.isdir(deploy_dir):
437
+ return False, f"Deploy directory not found: {deploy_dir}"
438
+
439
+ # Files to upload
440
+ deploy_files = [
441
+ ("Dockerfile", "Dockerfile"),
442
+ ("app.py", "genarena/deploy/app.py"),
443
+ ("README.md", "README.md"),
444
+ ]
445
+
446
+ # Get existing files
447
+ existing_files: set[str] = set()
448
+ if not overwrite:
449
+ try:
450
+ existing_files = set(
451
+ api.list_repo_files(repo_id=space_repo, repo_type="space")
452
+ )
453
+ except Exception:
454
+ pass
455
+
456
+ uploaded = 0
457
+ skipped = 0
458
+ failed = 0
459
+
460
+ for local_name, remote_path in deploy_files:
461
+ local_path = os.path.join(deploy_dir, local_name)
462
+ if not os.path.isfile(local_path):
463
+ logger.warning(f"Deploy file not found: {local_path}")
464
+ continue
465
+
466
+ if not overwrite and remote_path in existing_files:
467
+ skipped += 1
468
+ continue
469
+
470
+ success, msg = upload_file(
471
+ repo_id=space_repo,
472
+ local_path=local_path,
473
+ remote_path=remote_path,
474
+ token=token,
475
+ commit_message=f"Upload {remote_path}",
476
+ repo_type="space",
477
+ )
478
+ if success:
479
+ uploaded += 1
480
+ else:
481
+ failed += 1
482
+ logger.warning(f"Failed to upload {remote_path}: {msg}")
483
+
484
+ # Also upload the genarena package files needed for the Space
485
+ # We need to upload the entire genarena package
486
+ success, msg = upload_genarena_package(space_repo, token, overwrite)
487
+ if not success:
488
+ return False, f"Failed to upload genarena package: {msg}"
489
+
490
+ return True, f"Uploaded {uploaded}, skipped {skipped}, failed {failed} deploy files. {msg}"
491
+
492
+
493
+ def upload_genarena_package(
494
+ space_repo: str,
495
+ token: str,
496
+ overwrite: bool = False,
497
+ ) -> tuple[bool, str]:
498
+ """
499
+ Upload the genarena package to the Space repo.
500
+
501
+ Args:
502
+ space_repo: HF Space repo ID
503
+ token: HF token
504
+ overwrite: Overwrite existing files
505
+
506
+ Returns:
507
+ Tuple of (success, message)
508
+ """
509
+ from huggingface_hub import HfApi
510
+
511
+ api = HfApi(token=token)
512
+
513
+ # Get genarena package directory
514
+ genarena_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
515
+ project_root = os.path.dirname(genarena_dir)
516
+
517
+ try:
518
+ # Upload pyproject.toml
519
+ pyproject_path = os.path.join(project_root, "pyproject.toml")
520
+ if os.path.isfile(pyproject_path):
521
+ api.upload_file(
522
+ repo_id=space_repo,
523
+ path_or_fileobj=pyproject_path,
524
+ path_in_repo="pyproject.toml",
525
+ repo_type="space",
526
+ commit_message="Upload pyproject.toml",
527
+ )
528
+
529
+ # Upload genarena package using upload_folder
530
+ api.upload_folder(
531
+ repo_id=space_repo,
532
+ folder_path=genarena_dir,
533
+ path_in_repo="genarena",
534
+ repo_type="space",
535
+ commit_message="[genarena deploy] Upload genarena package",
536
+ allow_patterns=["**/*.py", "**/*.html", "**/*.css", "**/*.js"],
537
+ ignore_patterns=["**/__pycache__/**", "**/.pytest_cache/**"],
538
+ )
539
+
540
+ return True, "Package uploaded successfully"
541
+ except Exception as e:
542
+ logger.error(f"Failed to upload package: {e}")
543
+ return False, str(e)