genarena 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {genarena-0.1.0 → genarena-0.1.2}/PKG-INFO +1 -1
  2. {genarena-0.1.0 → genarena-0.1.2}/genarena/deploy/Dockerfile +2 -5
  3. {genarena-0.1.0 → genarena-0.1.2}/genarena/visualize/data_loader.py +175 -6
  4. {genarena-0.1.0 → genarena-0.1.2}/pyproject.toml +1 -1
  5. {genarena-0.1.0 → genarena-0.1.2}/.github/workflows/publish.yml +0 -0
  6. {genarena-0.1.0 → genarena-0.1.2}/.gitignore +0 -0
  7. {genarena-0.1.0 → genarena-0.1.2}/README.md +0 -0
  8. {genarena-0.1.0 → genarena-0.1.2}/docs/README.md +0 -0
  9. {genarena-0.1.0 → genarena-0.1.2}/docs/architecture.md +0 -0
  10. {genarena-0.1.0 → genarena-0.1.2}/docs/cli-reference.md +0 -0
  11. {genarena-0.1.0 → genarena-0.1.2}/docs/experiments.md +0 -0
  12. {genarena-0.1.0 → genarena-0.1.2}/docs/faq.md +0 -0
  13. {genarena-0.1.0 → genarena-0.1.2}/docs/maintainer-guide/README.md +0 -0
  14. {genarena-0.1.0 → genarena-0.1.2}/docs/maintainer-guide/deploy.md +0 -0
  15. {genarena-0.1.0 → genarena-0.1.2}/docs/quickstart.md +0 -0
  16. {genarena-0.1.0 → genarena-0.1.2}/genarena/__init__.py +0 -0
  17. {genarena-0.1.0 → genarena-0.1.2}/genarena/__main__.py +0 -0
  18. {genarena-0.1.0 → genarena-0.1.2}/genarena/arena.py +0 -0
  19. {genarena-0.1.0 → genarena-0.1.2}/genarena/battle.py +0 -0
  20. {genarena-0.1.0 → genarena-0.1.2}/genarena/bt_elo.py +0 -0
  21. {genarena-0.1.0 → genarena-0.1.2}/genarena/cli.py +0 -0
  22. {genarena-0.1.0 → genarena-0.1.2}/genarena/data.py +0 -0
  23. {genarena-0.1.0 → genarena-0.1.2}/genarena/deploy/README.md +0 -0
  24. {genarena-0.1.0 → genarena-0.1.2}/genarena/deploy/__init__.py +0 -0
  25. {genarena-0.1.0 → genarena-0.1.2}/genarena/deploy/app.py +0 -0
  26. {genarena-0.1.0 → genarena-0.1.2}/genarena/experiments.py +0 -0
  27. {genarena-0.1.0 → genarena-0.1.2}/genarena/leaderboard.py +0 -0
  28. {genarena-0.1.0 → genarena-0.1.2}/genarena/logs.py +0 -0
  29. {genarena-0.1.0 → genarena-0.1.2}/genarena/models.py +0 -0
  30. {genarena-0.1.0 → genarena-0.1.2}/genarena/prompts/__init__.py +0 -0
  31. {genarena-0.1.0 → genarena-0.1.2}/genarena/prompts/mmrb2.py +0 -0
  32. {genarena-0.1.0 → genarena-0.1.2}/genarena/sampling.py +0 -0
  33. {genarena-0.1.0 → genarena-0.1.2}/genarena/state.py +0 -0
  34. {genarena-0.1.0 → genarena-0.1.2}/genarena/sync/__init__.py +0 -0
  35. {genarena-0.1.0 → genarena-0.1.2}/genarena/sync/auto_commit.py +0 -0
  36. {genarena-0.1.0 → genarena-0.1.2}/genarena/sync/deploy_ops.py +0 -0
  37. {genarena-0.1.0 → genarena-0.1.2}/genarena/sync/git_ops.py +0 -0
  38. {genarena-0.1.0 → genarena-0.1.2}/genarena/sync/hf_ops.py +0 -0
  39. {genarena-0.1.0 → genarena-0.1.2}/genarena/sync/init_ops.py +0 -0
  40. {genarena-0.1.0 → genarena-0.1.2}/genarena/sync/packer.py +0 -0
  41. {genarena-0.1.0 → genarena-0.1.2}/genarena/sync/submit.py +0 -0
  42. {genarena-0.1.0 → genarena-0.1.2}/genarena/utils.py +0 -0
  43. {genarena-0.1.0 → genarena-0.1.2}/genarena/validation/__init__.py +0 -0
  44. {genarena-0.1.0 → genarena-0.1.2}/genarena/validation/schema.py +0 -0
  45. {genarena-0.1.0 → genarena-0.1.2}/genarena/validation/validator.py +0 -0
  46. {genarena-0.1.0 → genarena-0.1.2}/genarena/visualize/README.md +0 -0
  47. {genarena-0.1.0 → genarena-0.1.2}/genarena/visualize/__init__.py +0 -0
  48. {genarena-0.1.0 → genarena-0.1.2}/genarena/visualize/app.py +0 -0
  49. {genarena-0.1.0 → genarena-0.1.2}/genarena/visualize/static/app.js +0 -0
  50. {genarena-0.1.0 → genarena-0.1.2}/genarena/visualize/static/model_aliases.json +0 -0
  51. {genarena-0.1.0 → genarena-0.1.2}/genarena/visualize/static/style.css +0 -0
  52. {genarena-0.1.0 → genarena-0.1.2}/genarena/visualize/templates/index.html +0 -0
  53. {genarena-0.1.0 → genarena-0.1.2}/genarena/vlm.py +0 -0
  54. {genarena-0.1.0 → genarena-0.1.2}/requirements.txt +0 -0
  55. {genarena-0.1.0 → genarena-0.1.2}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: genarena
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: GenArena Arena Evaluation - VLM-based pairwise image generation evaluation
5
5
  Author: GenArena Team
6
6
  License: Apache-2.0
@@ -7,11 +7,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
7
7
  git \
8
8
  && rm -rf /var/lib/apt/lists/*
9
9
 
10
- # Copy project files
11
- COPY . .
12
-
13
10
  # Install Python dependencies
14
- RUN pip install --no-cache-dir -e .[web]
11
+ RUN pip install genarena[web]
15
12
 
16
13
  # Download parquet benchmark data from HuggingFace
17
14
  # This dataset contains the prompt/benchmark data (not arena battle results)
@@ -22,4 +19,4 @@ RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(
22
19
  EXPOSE 7860
23
20
 
24
21
  # Start the application
25
- CMD ["python", "genarena/deploy/app.py"]
22
+ CMD ["python", "-m", "genarena.deploy.app"]
@@ -2267,14 +2267,15 @@ class HFArenaDataLoader(ArenaDataLoader):
2267
2267
  preload: If True, preload all data at initialization
2268
2268
  """
2269
2269
  self.hf_repo = hf_repo
2270
- self._image_url_index = self._build_image_index(image_files)
2270
+ # Build both indexes at once
2271
+ self._image_url_index, self._subset_models_index = self._build_image_index(image_files)
2271
2272
  super().__init__(arena_dir, data_dir, preload=preload)
2272
2273
 
2273
2274
  def _build_image_index(
2274
2275
  self, image_files: list[str]
2275
- ) -> dict[tuple[str, str, int], str]:
2276
+ ) -> tuple[dict[tuple[str, str, int], str], dict[str, list[str]]]:
2276
2277
  """
2277
- Build index: (subset, model, sample_index) -> hf_file_path
2278
+ Build indexes from HF image file list.
2278
2279
 
2279
2280
  Expected path format: {subset}/models/{exp_name}/{model}/{index}.png
2280
2281
 
@@ -2282,11 +2283,14 @@ class HFArenaDataLoader(ArenaDataLoader):
2282
2283
  image_files: List of image file paths from HF repo
2283
2284
 
2284
2285
  Returns:
2285
- Dict mapping (subset, model, sample_index) to HF file path
2286
+ Tuple of:
2287
+ - Dict mapping (subset, model, sample_index) to HF file path
2288
+ - Dict mapping subset to sorted list of model names
2286
2289
  """
2287
2290
  from genarena.models import parse_image_index
2288
2291
 
2289
2292
  index: dict[tuple[str, str, int], str] = {}
2293
+ subset_models: dict[str, set[str]] = {}
2290
2294
 
2291
2295
  for path in image_files:
2292
2296
  parts = path.split("/")
@@ -2300,9 +2304,18 @@ class HFArenaDataLoader(ArenaDataLoader):
2300
2304
  if idx is not None:
2301
2305
  # If duplicate, later entries overwrite earlier ones
2302
2306
  index[(subset, model, idx)] = path
2307
+ # Also track subset -> models mapping
2308
+ if subset not in subset_models:
2309
+ subset_models[subset] = set()
2310
+ subset_models[subset].add(model)
2311
+
2312
+ # Convert sets to sorted lists
2313
+ subset_models_sorted: dict[str, list[str]] = {
2314
+ subset: sorted(models) for subset, models in subset_models.items()
2315
+ }
2303
2316
 
2304
- logger.info(f"Built image URL index with {len(index)} entries")
2305
- return index
2317
+ logger.info(f"Built image URL index with {len(index)} entries across {len(subset_models_sorted)} subsets")
2318
+ return index, subset_models_sorted
2306
2319
 
2307
2320
  def get_model_image_url(
2308
2321
  self, subset: str, model: str, sample_index: int
@@ -2333,3 +2346,159 @@ class HFArenaDataLoader(ArenaDataLoader):
2333
2346
  """
2334
2347
  # Return None to indicate image should be fetched via CDN
2335
2348
  return None
2349
+
2350
+ def _get_available_models_for_subset(self, subset: str) -> list[str]:
2351
+ """
2352
+ Get list of models that have images in the HF CDN for this subset.
2353
+
2354
+ Uses pre-built index for O(1) lookup.
2355
+
2356
+ Returns:
2357
+ List of model names (sorted)
2358
+ """
2359
+ return self._subset_models_index.get(subset, [])
2360
+
2361
+ def _has_model_image(self, subset: str, model: str, sample_index: int) -> bool:
2362
+ """
2363
+ Check if a model has an image for a specific sample in the HF CDN.
2364
+
2365
+ Args:
2366
+ subset: Subset name
2367
+ model: Model name
2368
+ sample_index: Sample index
2369
+
2370
+ Returns:
2371
+ True if image exists in CDN index
2372
+ """
2373
+ return (subset, model, sample_index) in self._image_url_index
2374
+
2375
+ def get_subset_info(self, subset: str) -> Optional[SubsetInfo]:
2376
+ """
2377
+ Get information about a subset.
2378
+
2379
+ Override for HF deployment to use CDN image index for models list.
2380
+
2381
+ Args:
2382
+ subset: Subset name
2383
+
2384
+ Returns:
2385
+ SubsetInfo or None if subset doesn't exist
2386
+ """
2387
+ if subset in self._subset_info_cache:
2388
+ return self._subset_info_cache[subset]
2389
+
2390
+ subset_path = os.path.join(self.arena_dir, subset)
2391
+ if not os.path.isdir(subset_path):
2392
+ return None
2393
+
2394
+ # Get models from CDN index instead of local file system
2395
+ models = self._get_available_models_for_subset(subset)
2396
+
2397
+ # Get experiments
2398
+ pk_logs_dir = os.path.join(subset_path, "pk_logs")
2399
+ experiments = []
2400
+ if os.path.isdir(pk_logs_dir):
2401
+ for name in os.listdir(pk_logs_dir):
2402
+ exp_path = os.path.join(pk_logs_dir, name)
2403
+ if os.path.isdir(exp_path):
2404
+ # Check for battle logs
2405
+ has_logs = any(
2406
+ f.endswith(".jsonl")
2407
+ for f in os.listdir(exp_path)
2408
+ if os.path.isfile(os.path.join(exp_path, f))
2409
+ )
2410
+ if has_logs:
2411
+ experiments.append(name)
2412
+ experiments.sort()
2413
+
2414
+ # Load state
2415
+ state_path = os.path.join(subset_path, "arena", "state.json")
2416
+ state = load_state(state_path)
2417
+
2418
+ # Get image count range
2419
+ img_range = self._image_count_range.get(subset, (1, 1))
2420
+
2421
+ # Get prompt sources
2422
+ prompt_sources = self._prompt_sources.get(subset, [])
2423
+
2424
+ info = SubsetInfo(
2425
+ name=subset,
2426
+ models=models,
2427
+ experiments=experiments,
2428
+ total_battles=state.total_battles,
2429
+ state=state,
2430
+ min_input_images=img_range[0],
2431
+ max_input_images=img_range[1],
2432
+ prompt_sources=prompt_sources,
2433
+ )
2434
+
2435
+ self._subset_info_cache[subset] = info
2436
+ return info
2437
+
2438
+ def get_sample_all_models(
2439
+ self, subset: str, exp_name: str, sample_index: int,
2440
+ filter_models: Optional[list[str]] = None,
2441
+ stats_scope: str = "filtered"
2442
+ ) -> dict[str, Any]:
2443
+ """
2444
+ Get all model outputs for a specific sample, sorted by win rate.
2445
+
2446
+ Override for HF deployment to use CDN image index instead of local files.
2447
+
2448
+ Args:
2449
+ subset: Subset name
2450
+ exp_name: Experiment name
2451
+ sample_index: Sample index
2452
+ filter_models: Optional list of models to filter (show only these models)
2453
+ stats_scope: 'filtered' = only count battles between filtered models,
2454
+ 'all' = count all battles (but show only filtered models)
2455
+
2456
+ Returns:
2457
+ Dict with sample info and all model outputs sorted by win rate
2458
+ """
2459
+ # Get sample metadata
2460
+ sample_meta = self._get_sample_data(subset, sample_index)
2461
+
2462
+ # Determine which models to use for stats calculation
2463
+ stats_filter = filter_models if stats_scope == "filtered" else None
2464
+ model_stats = self.get_model_win_stats(subset, exp_name, sample_index, stats_filter)
2465
+
2466
+ # Get all models that have outputs in CDN (O(1) lookup)
2467
+ available_models_list = self._get_available_models_for_subset(subset)
2468
+
2469
+ # Apply filter if specified
2470
+ if filter_models:
2471
+ filter_set = set(filter_models)
2472
+ available_models_list = [m for m in available_models_list if m in filter_set]
2473
+
2474
+ # Build model info for models that have images for this sample
2475
+ available_models = []
2476
+ for model in available_models_list:
2477
+ # Check if model has image for this sample in CDN index
2478
+ if self._has_model_image(subset, model, sample_index):
2479
+ stats = model_stats.get(model, {
2480
+ "wins": 0, "losses": 0, "ties": 0, "total": 0, "win_rate": 0
2481
+ })
2482
+ available_models.append({
2483
+ "model": model,
2484
+ "wins": stats["wins"],
2485
+ "losses": stats["losses"],
2486
+ "ties": stats["ties"],
2487
+ "total": stats["total"],
2488
+ "win_rate": stats["win_rate"],
2489
+ })
2490
+
2491
+ # Sort by win rate (descending), then by wins (descending), then by model name
2492
+ available_models.sort(key=lambda x: (-x["win_rate"], -x["wins"], x["model"]))
2493
+
2494
+ return {
2495
+ "subset": subset,
2496
+ "exp_name": exp_name,
2497
+ "sample_index": sample_index,
2498
+ "instruction": sample_meta.get("instruction", ""),
2499
+ "task_type": sample_meta.get("task_type", ""),
2500
+ "input_image_count": sample_meta.get("input_image_count", 1),
2501
+ "prompt_source": sample_meta.get("prompt_source"),
2502
+ "original_metadata": sample_meta.get("original_metadata"),
2503
+ "models": available_models,
2504
+ }
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "genarena"
3
- version = "0.1.0"
3
+ version = "0.1.2"
4
4
  description = "GenArena Arena Evaluation - VLM-based pairwise image generation evaluation"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes