genarena 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,11 +7,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
7
7
  git \
8
8
  && rm -rf /var/lib/apt/lists/*
9
9
 
10
- # Copy project files
11
- COPY . .
12
-
13
10
  # Install Python dependencies
14
- RUN pip install --no-cache-dir -e .[web]
11
+ RUN pip install genarena[web]
15
12
 
16
13
  # Download parquet benchmark data from HuggingFace
17
14
  # This dataset contains the prompt/benchmark data (not arena battle results)
@@ -22,4 +19,4 @@ RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(
22
19
  EXPOSE 7860
23
20
 
24
21
  # Start the application
25
- CMD ["python", "genarena/deploy/app.py"]
22
+ CMD ["python", "-m", "genarena.deploy.app"]
@@ -2333,3 +2333,98 @@ class HFArenaDataLoader(ArenaDataLoader):
2333
2333
  """
2334
2334
  # Return None to indicate image should be fetched via CDN
2335
2335
  return None
2336
+
2337
+ def _get_available_models_for_subset(self, subset: str) -> list[str]:
2338
+ """
2339
+ Get list of models that have images in the HF CDN for this subset.
2340
+
2341
+ Returns:
2342
+ List of model names
2343
+ """
2344
+ models = set()
2345
+ for (s, model, _) in self._image_url_index.keys():
2346
+ if s == subset:
2347
+ models.add(model)
2348
+ return sorted(models)
2349
+
2350
+ def _has_model_image(self, subset: str, model: str, sample_index: int) -> bool:
2351
+ """
2352
+ Check if a model has an image for a specific sample in the HF CDN.
2353
+
2354
+ Args:
2355
+ subset: Subset name
2356
+ model: Model name
2357
+ sample_index: Sample index
2358
+
2359
+ Returns:
2360
+ True if image exists in CDN index
2361
+ """
2362
+ return (subset, model, sample_index) in self._image_url_index
2363
+
2364
+ def get_sample_all_models(
2365
+ self, subset: str, exp_name: str, sample_index: int,
2366
+ filter_models: Optional[list[str]] = None,
2367
+ stats_scope: str = "filtered"
2368
+ ) -> dict[str, Any]:
2369
+ """
2370
+ Get all model outputs for a specific sample, sorted by win rate.
2371
+
2372
+ Override for HF deployment to use CDN image index instead of local files.
2373
+
2374
+ Args:
2375
+ subset: Subset name
2376
+ exp_name: Experiment name
2377
+ sample_index: Sample index
2378
+ filter_models: Optional list of models to filter (show only these models)
2379
+ stats_scope: 'filtered' = only count battles between filtered models,
2380
+ 'all' = count all battles (but show only filtered models)
2381
+
2382
+ Returns:
2383
+ Dict with sample info and all model outputs sorted by win rate
2384
+ """
2385
+ # Get sample metadata
2386
+ sample_meta = self._get_sample_data(subset, sample_index)
2387
+
2388
+ # Determine which models to use for stats calculation
2389
+ stats_filter = filter_models if stats_scope == "filtered" else None
2390
+ model_stats = self.get_model_win_stats(subset, exp_name, sample_index, stats_filter)
2391
+
2392
+ # Get all models that have outputs in CDN
2393
+ available_models_list = self._get_available_models_for_subset(subset)
2394
+
2395
+ # Apply filter if specified
2396
+ if filter_models:
2397
+ filter_set = set(filter_models)
2398
+ available_models_list = [m for m in available_models_list if m in filter_set]
2399
+
2400
+ # Build model info for models that have images for this sample
2401
+ available_models = []
2402
+ for model in available_models_list:
2403
+ # Check if model has image for this sample in CDN index
2404
+ if self._has_model_image(subset, model, sample_index):
2405
+ stats = model_stats.get(model, {
2406
+ "wins": 0, "losses": 0, "ties": 0, "total": 0, "win_rate": 0
2407
+ })
2408
+ available_models.append({
2409
+ "model": model,
2410
+ "wins": stats["wins"],
2411
+ "losses": stats["losses"],
2412
+ "ties": stats["ties"],
2413
+ "total": stats["total"],
2414
+ "win_rate": stats["win_rate"],
2415
+ })
2416
+
2417
+ # Sort by win rate (descending), then by wins (descending), then by model name
2418
+ available_models.sort(key=lambda x: (-x["win_rate"], -x["wins"], x["model"]))
2419
+
2420
+ return {
2421
+ "subset": subset,
2422
+ "exp_name": exp_name,
2423
+ "sample_index": sample_index,
2424
+ "instruction": sample_meta.get("instruction", ""),
2425
+ "task_type": sample_meta.get("task_type", ""),
2426
+ "input_image_count": sample_meta.get("input_image_count", 1),
2427
+ "prompt_source": sample_meta.get("prompt_source"),
2428
+ "original_metadata": sample_meta.get("original_metadata"),
2429
+ "models": available_models,
2430
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: genarena
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: GenArena Arena Evaluation - VLM-based pairwise image generation evaluation
5
5
  Author: GenArena Team
6
6
  License: Apache-2.0
@@ -13,7 +13,7 @@ genarena/sampling.py,sha256=v3AeOASfrxBYyPGy3tlAgBetK5F1_AhdJa9HgB9-XQM,11607
13
13
  genarena/state.py,sha256=SK93_ACqHVS3FpHchp5Oj-UvwjDcPZv-ACBX3Cc-P8Q,24095
14
14
  genarena/utils.py,sha256=ppzphYoNryjBMQlgS4GAGC2lw1nmdE_zN4RTcDQk5Y8,2685
15
15
  genarena/vlm.py,sha256=kfgLtSd2wJ077O-VxlNbvRv70Hgg-jWN5ZcICruaZBw,18249
16
- genarena/deploy/Dockerfile,sha256=UN3lm5WgFIxR4plsLKqhFXnvsqZ-ZuDC7KvRQ5E9Qiw,710
16
+ genarena/deploy/Dockerfile,sha256=sbYetDT5ajJHJxwcO5DZzMaqlOZddHXLcUXN8zHaIMY,670
17
17
  genarena/deploy/README.md,sha256=7KcPVY73_5Gotr6a-E24xgeVxe5fokuT4KlupQead8w,1576
18
18
  genarena/deploy/__init__.py,sha256=BpXfurQ84w_Qr_C8Joy0Oh_9HCU--5cMSt4wvxsPV8Y,122
19
19
  genarena/deploy/app.py,sha256=BPifFGz9p0J7-TFw19JPuzmxamp-hdNvVpIxcvHAPsc,2716
@@ -33,12 +33,12 @@ genarena/validation/validator.py,sha256=-yfVMXJBOSMmfajczGjpW3K0Xe1LFHPbdXh5cuMM
33
33
  genarena/visualize/README.md,sha256=8YOEBRicm35G6wEbA-qBbHBkZwozl0Zdl8zNqmb-t_Y,4525
34
34
  genarena/visualize/__init__.py,sha256=Id0QCPo_QuxjZOG7QuqttdzNCwmDFrH26eeYqHLn-JU,283
35
35
  genarena/visualize/app.py,sha256=2TbGuH22zV2U3Fm8LjZLTxsoVeZHRmMqsBlSZ1xhz1A,34903
36
- genarena/visualize/data_loader.py,sha256=C28qx26iJT_cJbAJfRDVKprB9S6nZK63kbfzh87ofpk,86107
36
+ genarena/visualize/data_loader.py,sha256=rTmTMCH7jdKLEJjZyg9bX9DSJPfoxxCRsjuJZJaN8Go,89881
37
37
  genarena/visualize/static/app.js,sha256=g2sdB9zfa_Nee-sQ-JJOWOGKJeihD31LpWyg-vSB6JA,144584
38
38
  genarena/visualize/static/model_aliases.json,sha256=iZQ4IIm-Vv2ly8XSPT2QPmDHM4PlnJS3RTdskbfhQME,1594
39
39
  genarena/visualize/static/style.css,sha256=nIAyGr9PpY9C-wGR5TGPgHB1g9KRCWN-iEEN8F1tbdk,78265
40
40
  genarena/visualize/templates/index.html,sha256=cJoFWkXVXP9MDee16vq-ufMhbs89cVwaTVhS4RKMW1E,21725
41
- genarena-0.1.0.dist-info/METADATA,sha256=IJs5QDs2nGpGSxho3j4-zqfSY3GQu1iycZg6VFKV_Qc,6065
42
- genarena-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
43
- genarena-0.1.0.dist-info/entry_points.txt,sha256=yEZL7896wPLpHS9dWMQ82V5-04PJaYkm48mb7dNdlhM,47
44
- genarena-0.1.0.dist-info/RECORD,,
41
+ genarena-0.1.1.dist-info/METADATA,sha256=XFjDQHeAvZvqnkwrtEfHWFYltK-FH7hC2emNXNVbN-c,6065
42
+ genarena-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
43
+ genarena-0.1.1.dist-info/entry_points.txt,sha256=yEZL7896wPLpHS9dWMQ82V5-04PJaYkm48mb7dNdlhM,47
44
+ genarena-0.1.1.dist-info/RECORD,,