genarena 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {genarena-0.1.0 → genarena-0.1.1}/PKG-INFO +1 -1
  2. {genarena-0.1.0 → genarena-0.1.1}/genarena/deploy/Dockerfile +2 -5
  3. {genarena-0.1.0 → genarena-0.1.1}/genarena/visualize/data_loader.py +95 -0
  4. {genarena-0.1.0 → genarena-0.1.1}/pyproject.toml +1 -1
  5. {genarena-0.1.0 → genarena-0.1.1}/.github/workflows/publish.yml +0 -0
  6. {genarena-0.1.0 → genarena-0.1.1}/.gitignore +0 -0
  7. {genarena-0.1.0 → genarena-0.1.1}/README.md +0 -0
  8. {genarena-0.1.0 → genarena-0.1.1}/docs/README.md +0 -0
  9. {genarena-0.1.0 → genarena-0.1.1}/docs/architecture.md +0 -0
  10. {genarena-0.1.0 → genarena-0.1.1}/docs/cli-reference.md +0 -0
  11. {genarena-0.1.0 → genarena-0.1.1}/docs/experiments.md +0 -0
  12. {genarena-0.1.0 → genarena-0.1.1}/docs/faq.md +0 -0
  13. {genarena-0.1.0 → genarena-0.1.1}/docs/maintainer-guide/README.md +0 -0
  14. {genarena-0.1.0 → genarena-0.1.1}/docs/maintainer-guide/deploy.md +0 -0
  15. {genarena-0.1.0 → genarena-0.1.1}/docs/quickstart.md +0 -0
  16. {genarena-0.1.0 → genarena-0.1.1}/genarena/__init__.py +0 -0
  17. {genarena-0.1.0 → genarena-0.1.1}/genarena/__main__.py +0 -0
  18. {genarena-0.1.0 → genarena-0.1.1}/genarena/arena.py +0 -0
  19. {genarena-0.1.0 → genarena-0.1.1}/genarena/battle.py +0 -0
  20. {genarena-0.1.0 → genarena-0.1.1}/genarena/bt_elo.py +0 -0
  21. {genarena-0.1.0 → genarena-0.1.1}/genarena/cli.py +0 -0
  22. {genarena-0.1.0 → genarena-0.1.1}/genarena/data.py +0 -0
  23. {genarena-0.1.0 → genarena-0.1.1}/genarena/deploy/README.md +0 -0
  24. {genarena-0.1.0 → genarena-0.1.1}/genarena/deploy/__init__.py +0 -0
  25. {genarena-0.1.0 → genarena-0.1.1}/genarena/deploy/app.py +0 -0
  26. {genarena-0.1.0 → genarena-0.1.1}/genarena/experiments.py +0 -0
  27. {genarena-0.1.0 → genarena-0.1.1}/genarena/leaderboard.py +0 -0
  28. {genarena-0.1.0 → genarena-0.1.1}/genarena/logs.py +0 -0
  29. {genarena-0.1.0 → genarena-0.1.1}/genarena/models.py +0 -0
  30. {genarena-0.1.0 → genarena-0.1.1}/genarena/prompts/__init__.py +0 -0
  31. {genarena-0.1.0 → genarena-0.1.1}/genarena/prompts/mmrb2.py +0 -0
  32. {genarena-0.1.0 → genarena-0.1.1}/genarena/sampling.py +0 -0
  33. {genarena-0.1.0 → genarena-0.1.1}/genarena/state.py +0 -0
  34. {genarena-0.1.0 → genarena-0.1.1}/genarena/sync/__init__.py +0 -0
  35. {genarena-0.1.0 → genarena-0.1.1}/genarena/sync/auto_commit.py +0 -0
  36. {genarena-0.1.0 → genarena-0.1.1}/genarena/sync/deploy_ops.py +0 -0
  37. {genarena-0.1.0 → genarena-0.1.1}/genarena/sync/git_ops.py +0 -0
  38. {genarena-0.1.0 → genarena-0.1.1}/genarena/sync/hf_ops.py +0 -0
  39. {genarena-0.1.0 → genarena-0.1.1}/genarena/sync/init_ops.py +0 -0
  40. {genarena-0.1.0 → genarena-0.1.1}/genarena/sync/packer.py +0 -0
  41. {genarena-0.1.0 → genarena-0.1.1}/genarena/sync/submit.py +0 -0
  42. {genarena-0.1.0 → genarena-0.1.1}/genarena/utils.py +0 -0
  43. {genarena-0.1.0 → genarena-0.1.1}/genarena/validation/__init__.py +0 -0
  44. {genarena-0.1.0 → genarena-0.1.1}/genarena/validation/schema.py +0 -0
  45. {genarena-0.1.0 → genarena-0.1.1}/genarena/validation/validator.py +0 -0
  46. {genarena-0.1.0 → genarena-0.1.1}/genarena/visualize/README.md +0 -0
  47. {genarena-0.1.0 → genarena-0.1.1}/genarena/visualize/__init__.py +0 -0
  48. {genarena-0.1.0 → genarena-0.1.1}/genarena/visualize/app.py +0 -0
  49. {genarena-0.1.0 → genarena-0.1.1}/genarena/visualize/static/app.js +0 -0
  50. {genarena-0.1.0 → genarena-0.1.1}/genarena/visualize/static/model_aliases.json +0 -0
  51. {genarena-0.1.0 → genarena-0.1.1}/genarena/visualize/static/style.css +0 -0
  52. {genarena-0.1.0 → genarena-0.1.1}/genarena/visualize/templates/index.html +0 -0
  53. {genarena-0.1.0 → genarena-0.1.1}/genarena/vlm.py +0 -0
  54. {genarena-0.1.0 → genarena-0.1.1}/requirements.txt +0 -0
  55. {genarena-0.1.0 → genarena-0.1.1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: genarena
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: GenArena Arena Evaluation - VLM-based pairwise image generation evaluation
5
5
  Author: GenArena Team
6
6
  License: Apache-2.0
@@ -7,11 +7,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
7
7
  git \
8
8
  && rm -rf /var/lib/apt/lists/*
9
9
 
10
- # Copy project files
11
- COPY . .
12
-
13
10
  # Install Python dependencies
14
- RUN pip install --no-cache-dir -e .[web]
11
+ RUN pip install genarena[web]
15
12
 
16
13
  # Download parquet benchmark data from HuggingFace
17
14
  # This dataset contains the prompt/benchmark data (not arena battle results)
@@ -22,4 +19,4 @@ RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(
22
19
  EXPOSE 7860
23
20
 
24
21
  # Start the application
25
- CMD ["python", "genarena/deploy/app.py"]
22
+ CMD ["python", "-m", "genarena.deploy.app"]
@@ -2333,3 +2333,98 @@ class HFArenaDataLoader(ArenaDataLoader):
2333
2333
  """
2334
2334
  # Return None to indicate image should be fetched via CDN
2335
2335
  return None
2336
+
2337
+ def _get_available_models_for_subset(self, subset: str) -> list[str]:
2338
+ """
2339
+ Get list of models that have images in the HF CDN for this subset.
2340
+
2341
+ Returns:
2342
+ List of model names
2343
+ """
2344
+ models = set()
2345
+ for (s, model, _) in self._image_url_index.keys():
2346
+ if s == subset:
2347
+ models.add(model)
2348
+ return sorted(models)
2349
+
2350
+ def _has_model_image(self, subset: str, model: str, sample_index: int) -> bool:
2351
+ """
2352
+ Check if a model has an image for a specific sample in the HF CDN.
2353
+
2354
+ Args:
2355
+ subset: Subset name
2356
+ model: Model name
2357
+ sample_index: Sample index
2358
+
2359
+ Returns:
2360
+ True if image exists in CDN index
2361
+ """
2362
+ return (subset, model, sample_index) in self._image_url_index
2363
+
2364
+ def get_sample_all_models(
2365
+ self, subset: str, exp_name: str, sample_index: int,
2366
+ filter_models: Optional[list[str]] = None,
2367
+ stats_scope: str = "filtered"
2368
+ ) -> dict[str, Any]:
2369
+ """
2370
+ Get all model outputs for a specific sample, sorted by win rate.
2371
+
2372
+ Override for HF deployment to use CDN image index instead of local files.
2373
+
2374
+ Args:
2375
+ subset: Subset name
2376
+ exp_name: Experiment name
2377
+ sample_index: Sample index
2378
+ filter_models: Optional list of models to filter (show only these models)
2379
+ stats_scope: 'filtered' = only count battles between filtered models,
2380
+ 'all' = count all battles (but show only filtered models)
2381
+
2382
+ Returns:
2383
+ Dict with sample info and all model outputs sorted by win rate
2384
+ """
2385
+ # Get sample metadata
2386
+ sample_meta = self._get_sample_data(subset, sample_index)
2387
+
2388
+ # Determine which models to use for stats calculation
2389
+ stats_filter = filter_models if stats_scope == "filtered" else None
2390
+ model_stats = self.get_model_win_stats(subset, exp_name, sample_index, stats_filter)
2391
+
2392
+ # Get all models that have outputs in CDN
2393
+ available_models_list = self._get_available_models_for_subset(subset)
2394
+
2395
+ # Apply filter if specified
2396
+ if filter_models:
2397
+ filter_set = set(filter_models)
2398
+ available_models_list = [m for m in available_models_list if m in filter_set]
2399
+
2400
+ # Build model info for models that have images for this sample
2401
+ available_models = []
2402
+ for model in available_models_list:
2403
+ # Check if model has image for this sample in CDN index
2404
+ if self._has_model_image(subset, model, sample_index):
2405
+ stats = model_stats.get(model, {
2406
+ "wins": 0, "losses": 0, "ties": 0, "total": 0, "win_rate": 0
2407
+ })
2408
+ available_models.append({
2409
+ "model": model,
2410
+ "wins": stats["wins"],
2411
+ "losses": stats["losses"],
2412
+ "ties": stats["ties"],
2413
+ "total": stats["total"],
2414
+ "win_rate": stats["win_rate"],
2415
+ })
2416
+
2417
+ # Sort by win rate (descending), then by wins (descending), then by model name
2418
+ available_models.sort(key=lambda x: (-x["win_rate"], -x["wins"], x["model"]))
2419
+
2420
+ return {
2421
+ "subset": subset,
2422
+ "exp_name": exp_name,
2423
+ "sample_index": sample_index,
2424
+ "instruction": sample_meta.get("instruction", ""),
2425
+ "task_type": sample_meta.get("task_type", ""),
2426
+ "input_image_count": sample_meta.get("input_image_count", 1),
2427
+ "prompt_source": sample_meta.get("prompt_source"),
2428
+ "original_metadata": sample_meta.get("original_metadata"),
2429
+ "models": available_models,
2430
+ }
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "genarena"
3
- version = "0.1.0"
3
+ version = "0.1.1"
4
4
  description = "GenArena Arena Evaluation - VLM-based pairwise image generation evaluation"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes