genarena 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genarena/deploy/Dockerfile +2 -5
- genarena/visualize/data_loader.py +95 -0
- {genarena-0.1.0.dist-info → genarena-0.1.1.dist-info}/METADATA +1 -1
- {genarena-0.1.0.dist-info → genarena-0.1.1.dist-info}/RECORD +6 -6
- {genarena-0.1.0.dist-info → genarena-0.1.1.dist-info}/WHEEL +0 -0
- {genarena-0.1.0.dist-info → genarena-0.1.1.dist-info}/entry_points.txt +0 -0
genarena/deploy/Dockerfile
CHANGED
|
@@ -7,11 +7,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
7
7
|
git \
|
|
8
8
|
&& rm -rf /var/lib/apt/lists/*
|
|
9
9
|
|
|
10
|
-
# Copy project files
|
|
11
|
-
COPY . .
|
|
12
|
-
|
|
13
10
|
# Install Python dependencies
|
|
14
|
-
RUN pip install
|
|
11
|
+
RUN pip install genarena[web]
|
|
15
12
|
|
|
16
13
|
# Download parquet benchmark data from HuggingFace
|
|
17
14
|
# This dataset contains the prompt/benchmark data (not arena battle results)
|
|
@@ -22,4 +19,4 @@ RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(
|
|
|
22
19
|
EXPOSE 7860
|
|
23
20
|
|
|
24
21
|
# Start the application
|
|
25
|
-
CMD ["python", "genarena
|
|
22
|
+
CMD ["python", "-m", "genarena.deploy.app"]
|
|
@@ -2333,3 +2333,98 @@ class HFArenaDataLoader(ArenaDataLoader):
|
|
|
2333
2333
|
"""
|
|
2334
2334
|
# Return None to indicate image should be fetched via CDN
|
|
2335
2335
|
return None
|
|
2336
|
+
|
|
2337
|
+
def _get_available_models_for_subset(self, subset: str) -> list[str]:
|
|
2338
|
+
"""
|
|
2339
|
+
Get list of models that have images in the HF CDN for this subset.
|
|
2340
|
+
|
|
2341
|
+
Returns:
|
|
2342
|
+
List of model names
|
|
2343
|
+
"""
|
|
2344
|
+
models = set()
|
|
2345
|
+
for (s, model, _) in self._image_url_index.keys():
|
|
2346
|
+
if s == subset:
|
|
2347
|
+
models.add(model)
|
|
2348
|
+
return sorted(models)
|
|
2349
|
+
|
|
2350
|
+
def _has_model_image(self, subset: str, model: str, sample_index: int) -> bool:
|
|
2351
|
+
"""
|
|
2352
|
+
Check if a model has an image for a specific sample in the HF CDN.
|
|
2353
|
+
|
|
2354
|
+
Args:
|
|
2355
|
+
subset: Subset name
|
|
2356
|
+
model: Model name
|
|
2357
|
+
sample_index: Sample index
|
|
2358
|
+
|
|
2359
|
+
Returns:
|
|
2360
|
+
True if image exists in CDN index
|
|
2361
|
+
"""
|
|
2362
|
+
return (subset, model, sample_index) in self._image_url_index
|
|
2363
|
+
|
|
2364
|
+
def get_sample_all_models(
|
|
2365
|
+
self, subset: str, exp_name: str, sample_index: int,
|
|
2366
|
+
filter_models: Optional[list[str]] = None,
|
|
2367
|
+
stats_scope: str = "filtered"
|
|
2368
|
+
) -> dict[str, Any]:
|
|
2369
|
+
"""
|
|
2370
|
+
Get all model outputs for a specific sample, sorted by win rate.
|
|
2371
|
+
|
|
2372
|
+
Override for HF deployment to use CDN image index instead of local files.
|
|
2373
|
+
|
|
2374
|
+
Args:
|
|
2375
|
+
subset: Subset name
|
|
2376
|
+
exp_name: Experiment name
|
|
2377
|
+
sample_index: Sample index
|
|
2378
|
+
filter_models: Optional list of models to filter (show only these models)
|
|
2379
|
+
stats_scope: 'filtered' = only count battles between filtered models,
|
|
2380
|
+
'all' = count all battles (but show only filtered models)
|
|
2381
|
+
|
|
2382
|
+
Returns:
|
|
2383
|
+
Dict with sample info and all model outputs sorted by win rate
|
|
2384
|
+
"""
|
|
2385
|
+
# Get sample metadata
|
|
2386
|
+
sample_meta = self._get_sample_data(subset, sample_index)
|
|
2387
|
+
|
|
2388
|
+
# Determine which models to use for stats calculation
|
|
2389
|
+
stats_filter = filter_models if stats_scope == "filtered" else None
|
|
2390
|
+
model_stats = self.get_model_win_stats(subset, exp_name, sample_index, stats_filter)
|
|
2391
|
+
|
|
2392
|
+
# Get all models that have outputs in CDN
|
|
2393
|
+
available_models_list = self._get_available_models_for_subset(subset)
|
|
2394
|
+
|
|
2395
|
+
# Apply filter if specified
|
|
2396
|
+
if filter_models:
|
|
2397
|
+
filter_set = set(filter_models)
|
|
2398
|
+
available_models_list = [m for m in available_models_list if m in filter_set]
|
|
2399
|
+
|
|
2400
|
+
# Build model info for models that have images for this sample
|
|
2401
|
+
available_models = []
|
|
2402
|
+
for model in available_models_list:
|
|
2403
|
+
# Check if model has image for this sample in CDN index
|
|
2404
|
+
if self._has_model_image(subset, model, sample_index):
|
|
2405
|
+
stats = model_stats.get(model, {
|
|
2406
|
+
"wins": 0, "losses": 0, "ties": 0, "total": 0, "win_rate": 0
|
|
2407
|
+
})
|
|
2408
|
+
available_models.append({
|
|
2409
|
+
"model": model,
|
|
2410
|
+
"wins": stats["wins"],
|
|
2411
|
+
"losses": stats["losses"],
|
|
2412
|
+
"ties": stats["ties"],
|
|
2413
|
+
"total": stats["total"],
|
|
2414
|
+
"win_rate": stats["win_rate"],
|
|
2415
|
+
})
|
|
2416
|
+
|
|
2417
|
+
# Sort by win rate (descending), then by wins (descending), then by model name
|
|
2418
|
+
available_models.sort(key=lambda x: (-x["win_rate"], -x["wins"], x["model"]))
|
|
2419
|
+
|
|
2420
|
+
return {
|
|
2421
|
+
"subset": subset,
|
|
2422
|
+
"exp_name": exp_name,
|
|
2423
|
+
"sample_index": sample_index,
|
|
2424
|
+
"instruction": sample_meta.get("instruction", ""),
|
|
2425
|
+
"task_type": sample_meta.get("task_type", ""),
|
|
2426
|
+
"input_image_count": sample_meta.get("input_image_count", 1),
|
|
2427
|
+
"prompt_source": sample_meta.get("prompt_source"),
|
|
2428
|
+
"original_metadata": sample_meta.get("original_metadata"),
|
|
2429
|
+
"models": available_models,
|
|
2430
|
+
}
|
|
@@ -13,7 +13,7 @@ genarena/sampling.py,sha256=v3AeOASfrxBYyPGy3tlAgBetK5F1_AhdJa9HgB9-XQM,11607
|
|
|
13
13
|
genarena/state.py,sha256=SK93_ACqHVS3FpHchp5Oj-UvwjDcPZv-ACBX3Cc-P8Q,24095
|
|
14
14
|
genarena/utils.py,sha256=ppzphYoNryjBMQlgS4GAGC2lw1nmdE_zN4RTcDQk5Y8,2685
|
|
15
15
|
genarena/vlm.py,sha256=kfgLtSd2wJ077O-VxlNbvRv70Hgg-jWN5ZcICruaZBw,18249
|
|
16
|
-
genarena/deploy/Dockerfile,sha256=
|
|
16
|
+
genarena/deploy/Dockerfile,sha256=sbYetDT5ajJHJxwcO5DZzMaqlOZddHXLcUXN8zHaIMY,670
|
|
17
17
|
genarena/deploy/README.md,sha256=7KcPVY73_5Gotr6a-E24xgeVxe5fokuT4KlupQead8w,1576
|
|
18
18
|
genarena/deploy/__init__.py,sha256=BpXfurQ84w_Qr_C8Joy0Oh_9HCU--5cMSt4wvxsPV8Y,122
|
|
19
19
|
genarena/deploy/app.py,sha256=BPifFGz9p0J7-TFw19JPuzmxamp-hdNvVpIxcvHAPsc,2716
|
|
@@ -33,12 +33,12 @@ genarena/validation/validator.py,sha256=-yfVMXJBOSMmfajczGjpW3K0Xe1LFHPbdXh5cuMM
|
|
|
33
33
|
genarena/visualize/README.md,sha256=8YOEBRicm35G6wEbA-qBbHBkZwozl0Zdl8zNqmb-t_Y,4525
|
|
34
34
|
genarena/visualize/__init__.py,sha256=Id0QCPo_QuxjZOG7QuqttdzNCwmDFrH26eeYqHLn-JU,283
|
|
35
35
|
genarena/visualize/app.py,sha256=2TbGuH22zV2U3Fm8LjZLTxsoVeZHRmMqsBlSZ1xhz1A,34903
|
|
36
|
-
genarena/visualize/data_loader.py,sha256=
|
|
36
|
+
genarena/visualize/data_loader.py,sha256=rTmTMCH7jdKLEJjZyg9bX9DSJPfoxxCRsjuJZJaN8Go,89881
|
|
37
37
|
genarena/visualize/static/app.js,sha256=g2sdB9zfa_Nee-sQ-JJOWOGKJeihD31LpWyg-vSB6JA,144584
|
|
38
38
|
genarena/visualize/static/model_aliases.json,sha256=iZQ4IIm-Vv2ly8XSPT2QPmDHM4PlnJS3RTdskbfhQME,1594
|
|
39
39
|
genarena/visualize/static/style.css,sha256=nIAyGr9PpY9C-wGR5TGPgHB1g9KRCWN-iEEN8F1tbdk,78265
|
|
40
40
|
genarena/visualize/templates/index.html,sha256=cJoFWkXVXP9MDee16vq-ufMhbs89cVwaTVhS4RKMW1E,21725
|
|
41
|
-
genarena-0.1.
|
|
42
|
-
genarena-0.1.
|
|
43
|
-
genarena-0.1.
|
|
44
|
-
genarena-0.1.
|
|
41
|
+
genarena-0.1.1.dist-info/METADATA,sha256=XFjDQHeAvZvqnkwrtEfHWFYltK-FH7hC2emNXNVbN-c,6065
|
|
42
|
+
genarena-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
43
|
+
genarena-0.1.1.dist-info/entry_points.txt,sha256=yEZL7896wPLpHS9dWMQ82V5-04PJaYkm48mb7dNdlhM,47
|
|
44
|
+
genarena-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|