genarena 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {genarena-0.1.1 → genarena-0.1.2}/PKG-INFO +1 -1
- {genarena-0.1.1 → genarena-0.1.2}/genarena/visualize/data_loader.py +87 -13
- {genarena-0.1.1 → genarena-0.1.2}/pyproject.toml +1 -1
- {genarena-0.1.1 → genarena-0.1.2}/.github/workflows/publish.yml +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/.gitignore +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/README.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/docs/README.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/docs/architecture.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/docs/cli-reference.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/docs/experiments.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/docs/faq.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/docs/maintainer-guide/README.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/docs/maintainer-guide/deploy.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/docs/quickstart.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/__init__.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/__main__.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/arena.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/battle.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/bt_elo.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/cli.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/data.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/deploy/Dockerfile +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/deploy/README.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/deploy/__init__.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/deploy/app.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/experiments.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/leaderboard.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/logs.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/models.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/prompts/__init__.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/prompts/mmrb2.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/sampling.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/state.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/sync/__init__.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/sync/auto_commit.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/sync/deploy_ops.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/sync/git_ops.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/sync/hf_ops.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/sync/init_ops.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/sync/packer.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/sync/submit.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/utils.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/validation/__init__.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/validation/schema.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/validation/validator.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/visualize/README.md +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/visualize/__init__.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/visualize/app.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/visualize/static/app.js +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/visualize/static/model_aliases.json +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/visualize/static/style.css +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/visualize/templates/index.html +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/genarena/vlm.py +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/requirements.txt +0 -0
- {genarena-0.1.1 → genarena-0.1.2}/setup.py +0 -0
|
@@ -2267,14 +2267,15 @@ class HFArenaDataLoader(ArenaDataLoader):
|
|
|
2267
2267
|
preload: If True, preload all data at initialization
|
|
2268
2268
|
"""
|
|
2269
2269
|
self.hf_repo = hf_repo
|
|
2270
|
-
|
|
2270
|
+
# Build both indexes at once
|
|
2271
|
+
self._image_url_index, self._subset_models_index = self._build_image_index(image_files)
|
|
2271
2272
|
super().__init__(arena_dir, data_dir, preload=preload)
|
|
2272
2273
|
|
|
2273
2274
|
def _build_image_index(
|
|
2274
2275
|
self, image_files: list[str]
|
|
2275
|
-
) -> dict[tuple[str, str, int], str]:
|
|
2276
|
+
) -> tuple[dict[tuple[str, str, int], str], dict[str, list[str]]]:
|
|
2276
2277
|
"""
|
|
2277
|
-
Build
|
|
2278
|
+
Build indexes from HF image file list.
|
|
2278
2279
|
|
|
2279
2280
|
Expected path format: {subset}/models/{exp_name}/{model}/{index}.png
|
|
2280
2281
|
|
|
@@ -2282,11 +2283,14 @@ class HFArenaDataLoader(ArenaDataLoader):
|
|
|
2282
2283
|
image_files: List of image file paths from HF repo
|
|
2283
2284
|
|
|
2284
2285
|
Returns:
|
|
2285
|
-
|
|
2286
|
+
Tuple of:
|
|
2287
|
+
- Dict mapping (subset, model, sample_index) to HF file path
|
|
2288
|
+
- Dict mapping subset to sorted list of model names
|
|
2286
2289
|
"""
|
|
2287
2290
|
from genarena.models import parse_image_index
|
|
2288
2291
|
|
|
2289
2292
|
index: dict[tuple[str, str, int], str] = {}
|
|
2293
|
+
subset_models: dict[str, set[str]] = {}
|
|
2290
2294
|
|
|
2291
2295
|
for path in image_files:
|
|
2292
2296
|
parts = path.split("/")
|
|
@@ -2300,9 +2304,18 @@ class HFArenaDataLoader(ArenaDataLoader):
|
|
|
2300
2304
|
if idx is not None:
|
|
2301
2305
|
# If duplicate, later entries overwrite earlier ones
|
|
2302
2306
|
index[(subset, model, idx)] = path
|
|
2307
|
+
# Also track subset -> models mapping
|
|
2308
|
+
if subset not in subset_models:
|
|
2309
|
+
subset_models[subset] = set()
|
|
2310
|
+
subset_models[subset].add(model)
|
|
2311
|
+
|
|
2312
|
+
# Convert sets to sorted lists
|
|
2313
|
+
subset_models_sorted: dict[str, list[str]] = {
|
|
2314
|
+
subset: sorted(models) for subset, models in subset_models.items()
|
|
2315
|
+
}
|
|
2303
2316
|
|
|
2304
|
-
logger.info(f"Built image URL index with {len(index)} entries")
|
|
2305
|
-
return index
|
|
2317
|
+
logger.info(f"Built image URL index with {len(index)} entries across {len(subset_models_sorted)} subsets")
|
|
2318
|
+
return index, subset_models_sorted
|
|
2306
2319
|
|
|
2307
2320
|
def get_model_image_url(
|
|
2308
2321
|
self, subset: str, model: str, sample_index: int
|
|
@@ -2338,14 +2351,12 @@ class HFArenaDataLoader(ArenaDataLoader):
|
|
|
2338
2351
|
"""
|
|
2339
2352
|
Get list of models that have images in the HF CDN for this subset.
|
|
2340
2353
|
|
|
2354
|
+
Uses pre-built index for O(1) lookup.
|
|
2355
|
+
|
|
2341
2356
|
Returns:
|
|
2342
|
-
List of model names
|
|
2357
|
+
List of model names (sorted)
|
|
2343
2358
|
"""
|
|
2344
|
-
|
|
2345
|
-
for (s, model, _) in self._image_url_index.keys():
|
|
2346
|
-
if s == subset:
|
|
2347
|
-
models.add(model)
|
|
2348
|
-
return sorted(models)
|
|
2359
|
+
return self._subset_models_index.get(subset, [])
|
|
2349
2360
|
|
|
2350
2361
|
def _has_model_image(self, subset: str, model: str, sample_index: int) -> bool:
|
|
2351
2362
|
"""
|
|
@@ -2361,6 +2372,69 @@ class HFArenaDataLoader(ArenaDataLoader):
|
|
|
2361
2372
|
"""
|
|
2362
2373
|
return (subset, model, sample_index) in self._image_url_index
|
|
2363
2374
|
|
|
2375
|
+
def get_subset_info(self, subset: str) -> Optional[SubsetInfo]:
|
|
2376
|
+
"""
|
|
2377
|
+
Get information about a subset.
|
|
2378
|
+
|
|
2379
|
+
Override for HF deployment to use CDN image index for models list.
|
|
2380
|
+
|
|
2381
|
+
Args:
|
|
2382
|
+
subset: Subset name
|
|
2383
|
+
|
|
2384
|
+
Returns:
|
|
2385
|
+
SubsetInfo or None if subset doesn't exist
|
|
2386
|
+
"""
|
|
2387
|
+
if subset in self._subset_info_cache:
|
|
2388
|
+
return self._subset_info_cache[subset]
|
|
2389
|
+
|
|
2390
|
+
subset_path = os.path.join(self.arena_dir, subset)
|
|
2391
|
+
if not os.path.isdir(subset_path):
|
|
2392
|
+
return None
|
|
2393
|
+
|
|
2394
|
+
# Get models from CDN index instead of local file system
|
|
2395
|
+
models = self._get_available_models_for_subset(subset)
|
|
2396
|
+
|
|
2397
|
+
# Get experiments
|
|
2398
|
+
pk_logs_dir = os.path.join(subset_path, "pk_logs")
|
|
2399
|
+
experiments = []
|
|
2400
|
+
if os.path.isdir(pk_logs_dir):
|
|
2401
|
+
for name in os.listdir(pk_logs_dir):
|
|
2402
|
+
exp_path = os.path.join(pk_logs_dir, name)
|
|
2403
|
+
if os.path.isdir(exp_path):
|
|
2404
|
+
# Check for battle logs
|
|
2405
|
+
has_logs = any(
|
|
2406
|
+
f.endswith(".jsonl")
|
|
2407
|
+
for f in os.listdir(exp_path)
|
|
2408
|
+
if os.path.isfile(os.path.join(exp_path, f))
|
|
2409
|
+
)
|
|
2410
|
+
if has_logs:
|
|
2411
|
+
experiments.append(name)
|
|
2412
|
+
experiments.sort()
|
|
2413
|
+
|
|
2414
|
+
# Load state
|
|
2415
|
+
state_path = os.path.join(subset_path, "arena", "state.json")
|
|
2416
|
+
state = load_state(state_path)
|
|
2417
|
+
|
|
2418
|
+
# Get image count range
|
|
2419
|
+
img_range = self._image_count_range.get(subset, (1, 1))
|
|
2420
|
+
|
|
2421
|
+
# Get prompt sources
|
|
2422
|
+
prompt_sources = self._prompt_sources.get(subset, [])
|
|
2423
|
+
|
|
2424
|
+
info = SubsetInfo(
|
|
2425
|
+
name=subset,
|
|
2426
|
+
models=models,
|
|
2427
|
+
experiments=experiments,
|
|
2428
|
+
total_battles=state.total_battles,
|
|
2429
|
+
state=state,
|
|
2430
|
+
min_input_images=img_range[0],
|
|
2431
|
+
max_input_images=img_range[1],
|
|
2432
|
+
prompt_sources=prompt_sources,
|
|
2433
|
+
)
|
|
2434
|
+
|
|
2435
|
+
self._subset_info_cache[subset] = info
|
|
2436
|
+
return info
|
|
2437
|
+
|
|
2364
2438
|
def get_sample_all_models(
|
|
2365
2439
|
self, subset: str, exp_name: str, sample_index: int,
|
|
2366
2440
|
filter_models: Optional[list[str]] = None,
|
|
@@ -2389,7 +2463,7 @@ class HFArenaDataLoader(ArenaDataLoader):
|
|
|
2389
2463
|
stats_filter = filter_models if stats_scope == "filtered" else None
|
|
2390
2464
|
model_stats = self.get_model_win_stats(subset, exp_name, sample_index, stats_filter)
|
|
2391
2465
|
|
|
2392
|
-
# Get all models that have outputs in CDN
|
|
2466
|
+
# Get all models that have outputs in CDN (O(1) lookup)
|
|
2393
2467
|
available_models_list = self._get_available_models_for_subset(subset)
|
|
2394
2468
|
|
|
2395
2469
|
# Apply filter if specified
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|