PyPI - alloc - Versions diffs - 0.1.0__tar.gz - Mend

alloc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

alloc-0.1.0/PKG-INFO +127 -0
alloc-0.1.0/README.md +98 -0
alloc-0.1.0/pyproject.toml +43 -0
alloc-0.1.0/setup.cfg +4 -0
alloc-0.1.0/src/alloc/__init__.py +9 -0
alloc-0.1.0/src/alloc/cli.py +393 -0
alloc-0.1.0/src/alloc/config.py +65 -0
alloc-0.1.0/src/alloc/display.py +85 -0
alloc-0.1.0/src/alloc/ghost.py +155 -0
alloc-0.1.0/src/alloc/offline.py +63 -0
alloc-0.1.0/src/alloc/probe.py +176 -0
alloc-0.1.0/src/alloc/upload.py +45 -0
alloc-0.1.0/src/alloc.egg-info/PKG-INFO +127 -0
alloc-0.1.0/src/alloc.egg-info/SOURCES.txt +18 -0
alloc-0.1.0/src/alloc.egg-info/dependency_links.txt +1 -0
alloc-0.1.0/src/alloc.egg-info/entry_points.txt +2 -0
alloc-0.1.0/src/alloc.egg-info/requires.txt +11 -0
alloc-0.1.0/src/alloc.egg-info/top_level.txt +1 -0
alloc-0.1.0/tests/test_cli.py +38 -0
alloc-0.1.0/tests/test_ghost.py +75 -0

alloc-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,127 @@
+Metadata-Version: 2.4
+Name: alloc
+Version: 0.1.0
+Summary: GPU intelligence for ML training — right-size before you launch.
+Author-email: Alloc Labs <hello@alloclabs.com>
+License: Apache-2.0
+Project-URL: Homepage, https://alloclabs.com
+Project-URL: Repository, https://github.com/alloc-labs/alloc
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: typer>=0.9.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: httpx>=0.24.0
+Requires-Dist: pydantic>=2.0.0
+Provides-Extra: gpu
+Requires-Dist: pynvml>=11.5.0; extra == "gpu"
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+# Alloc CLI
+GPU intelligence for ML training. Right-size before you launch.
+## Install
+```bash
+pip install alloc
+# With GPU monitoring support
+pip install alloc[gpu]
+```
+## Commands
+### `alloc scan` — Remote Ghost Scan (no GPU needed)
+```bash
+alloc scan --model llama-3-70b --gpu A100-80GB
+alloc scan --model mistral-7b --gpu A10G --strategy fsdp --num-gpus 4
+alloc scan --param-count-b 13.0 --gpu H100-80GB --dtype bf16
+```
+### `alloc ghost` — Local static analysis
+```bash
+alloc ghost train_7b.py --dtype bf16 --batch-size 32
+```
+Analyzes model parameters from the script filename and computes VRAM breakdown.
+### `alloc run` — Training with GPU monitoring
+```bash
+alloc run python train.py
+alloc run torchrun --nproc_per_node=4 train.py
+alloc run -- python train.py --epochs 10
+```
+Wraps your command, monitors GPU memory/utilization/power via `pynvml`, and outputs a profile report.
+### `alloc version`
+```bash
+alloc version
+```
+## Python API
+```python
+import alloc
+# Static VRAM analysis (never crashes your training)
+report = alloc.ghost(model)
+print(report.total_gb)  # e.g., 115.42
+# Or from param count (no torch needed)
+report = alloc.ghost(param_count_b=7.0, dtype="bf16")
+```
+## Configuration
+All config via environment variables. Zero config files required.
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `ALLOC_API_URL` | `https://alloc-production-ffc2.up.railway.app` | API endpoint for remote scans |
+| `ALLOC_TOKEN` | (empty) | Auth token for API calls |
+| `ALLOC_UPLOAD` | `false` | Upload results to dashboard |
+## Architecture
+| Module | Purpose |
+|--------|---------|
+| `ghost.py` | Static VRAM analysis via parameter walking. With torch: `model.named_parameters()`. Without: pure math from param count. |
+| `probe.py` | External GPU monitoring via `pynvml`. Runs user script unmodified as subprocess. |
+| `offline.py` | Flight Recorder: writes `alloc_profile_report.json.gz`, optional W&B upload. |
+| `cli.py` | Typer CLI with `ghost`, `run`, `scan`, `version` commands. |
+| `display.py` | Rich terminal formatting for reports. |
+| `config.py` | Env-var-only configuration. |
+## Design Principles
+1. **Zero config** — `alloc run python train.py` works out of the box
+2. **No monkey-patching** — External monitoring only, explicit opt-in API
+3. **Never crash user's training** — All Alloc failures are caught and silenced
+4. **Progressive disclosure** — Individual use first, team governance later
+## Deep GPU Metrics (via Probe)
+| Metric | Why It Matters |
+|--------|---------------|
+| Memory bandwidth utilization | Identifies memory-bandwidth-bound workloads |
+| Tensor core vs CUDA core utilization | Reveals if workload uses tensor cores (FP16/BF16) |
+| SM occupancy | Low occupancy = kernel launch overhead or small batches |
+| PCIe/NVLink transfer rates | Communication bottlenecks in multi-GPU setups |
+| Compute throughput (TFLOPS) | Actual vs theoretical — feeds cost-efficiency analysis |
+| Power draw | Thermal throttling detection |

alloc-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,98 @@
+# Alloc CLI
+GPU intelligence for ML training. Right-size before you launch.
+## Install
+```bash
+pip install alloc
+# With GPU monitoring support
+pip install alloc[gpu]
+```
+## Commands
+### `alloc scan` — Remote Ghost Scan (no GPU needed)
+```bash
+alloc scan --model llama-3-70b --gpu A100-80GB
+alloc scan --model mistral-7b --gpu A10G --strategy fsdp --num-gpus 4
+alloc scan --param-count-b 13.0 --gpu H100-80GB --dtype bf16
+```
+### `alloc ghost` — Local static analysis
+```bash
+alloc ghost train_7b.py --dtype bf16 --batch-size 32
+```
+Analyzes model parameters from the script filename and computes VRAM breakdown.
+### `alloc run` — Training with GPU monitoring
+```bash
+alloc run python train.py
+alloc run torchrun --nproc_per_node=4 train.py
+alloc run -- python train.py --epochs 10
+```
+Wraps your command, monitors GPU memory/utilization/power via `pynvml`, and outputs a profile report.
+### `alloc version`
+```bash
+alloc version
+```
+## Python API
+```python
+import alloc
+# Static VRAM analysis (never crashes your training)
+report = alloc.ghost(model)
+print(report.total_gb)  # e.g., 115.42
+# Or from param count (no torch needed)
+report = alloc.ghost(param_count_b=7.0, dtype="bf16")
+```
+## Configuration
+All config via environment variables. Zero config files required.
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `ALLOC_API_URL` | `https://alloc-production-ffc2.up.railway.app` | API endpoint for remote scans |
+| `ALLOC_TOKEN` | (empty) | Auth token for API calls |
+| `ALLOC_UPLOAD` | `false` | Upload results to dashboard |
+## Architecture
+| Module | Purpose |
+|--------|---------|
+| `ghost.py` | Static VRAM analysis via parameter walking. With torch: `model.named_parameters()`. Without: pure math from param count. |
+| `probe.py` | External GPU monitoring via `pynvml`. Runs user script unmodified as subprocess. |
+| `offline.py` | Flight Recorder: writes `alloc_profile_report.json.gz`, optional W&B upload. |
+| `cli.py` | Typer CLI with `ghost`, `run`, `scan`, `version` commands. |
+| `display.py` | Rich terminal formatting for reports. |
+| `config.py` | Env-var-only configuration. |
+## Design Principles
+1. **Zero config** — `alloc run python train.py` works out of the box
+2. **No monkey-patching** — External monitoring only, explicit opt-in API
+3. **Never crash user's training** — All Alloc failures are caught and silenced
+4. **Progressive disclosure** — Individual use first, team governance later
+## Deep GPU Metrics (via Probe)
+| Metric | Why It Matters |
+|--------|---------------|
+| Memory bandwidth utilization | Identifies memory-bandwidth-bound workloads |
+| Tensor core vs CUDA core utilization | Reveals if workload uses tensor cores (FP16/BF16) |
+| SM occupancy | Low occupancy = kernel launch overhead or small batches |
+| PCIe/NVLink transfer rates | Communication bottlenecks in multi-GPU setups |
+| Compute throughput (TFLOPS) | Actual vs theoretical — feeds cost-efficiency analysis |
+| Power draw | Thermal throttling detection |

alloc-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,43 @@
+[build-system]
+requires = ["setuptools>=68.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "alloc"
+version = "0.1.0"
+description = "GPU intelligence for ML training — right-size before you launch."
+readme = "README.md"
+license = {text = "Apache-2.0"}
+requires-python = ">=3.8"
+authors = [{name = "Alloc Labs", email = "hello@alloclabs.com"}]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "typer>=0.9.0",
+    "rich>=13.0.0",
+    "httpx>=0.24.0",
+    "pydantic>=2.0.0",
+]
+[project.optional-dependencies]
+gpu = ["pynvml>=11.5.0"]
+dev = ["pytest>=7.0.0", "pytest-cov>=4.0.0"]
+[project.scripts]
+alloc = "alloc.cli:app"
+[project.urls]
+Homepage = "https://alloclabs.com"
+Repository = "https://github.com/alloc-labs/alloc"
+[tool.setuptools.packages.find]
+where = ["src"]

alloc-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

alloc-0.1.0/src/alloc/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Alloc — GPU intelligence for ML training."""
+from __future__ import annotations
+__version__ = "0.1.0"
+from alloc.ghost import ghost, GhostReport
+__all__ = ["ghost", "GhostReport", "__version__"]

alloc-0.1.0/src/alloc/cli.py ADDED Viewed

@@ -0,0 +1,393 @@
+"""Alloc CLI — GPU intelligence for ML training.
+Commands:
+    alloc ghost <script.py>    Static VRAM scan (no GPU, no execution)
+    alloc run <command...>     Wrap training with probe monitoring
+    alloc scan --model <name>  Remote scan via API
+    alloc login                Authenticate with Alloc dashboard
+    alloc upload <artifact>    Upload a profile report to Alloc
+    alloc version              Show version
+"""
+from __future__ import annotations
+import os
+import sys
+from typing import Optional
+import typer
+from rich.console import Console
+from alloc import __version__
+from alloc.config import get_api_url, get_token, should_upload
+app = typer.Typer(
+    name="alloc",
+    help="GPU intelligence for ML training. Right-size before you launch.",
+    no_args_is_help=True,
+    add_completion=False,
+)
+console = Console()
+@app.command()
+def ghost(
+    script: str = typer.Argument(..., help="Python script to analyze (e.g. train.py)"),
+    dtype: str = typer.Option("fp16", help="Data type: fp16, bf16, fp32"),
+    batch_size: int = typer.Option(32, help="Training batch size"),
+    seq_length: int = typer.Option(2048, help="Sequence length"),
+    hidden_dim: int = typer.Option(4096, help="Hidden dimension"),
+):
+    """Static VRAM scan — analyze a model without executing it."""
+    from alloc.ghost import ghost as ghost_fn
+    from alloc.display import print_ghost_report
+    # Try to extract param count from the script by importing it
+    param_count = _extract_param_count(script)
+    if param_count is None:
+        console.print(f"[yellow]Could not extract model from {script}.[/yellow]")
+        console.print("[dim]Tip: Use 'alloc scan --param-count-b 7.0' for direct param count input.[/dim]")
+        raise typer.Exit(1)
+    report = ghost_fn(
+        param_count=param_count,
+        dtype=dtype,
+        batch_size=batch_size,
+        seq_length=seq_length,
+        hidden_dim=hidden_dim,
+    )
+    print_ghost_report(report)
+@app.command()
+def run(
+    command: list[str] = typer.Argument(..., help="Command to run (e.g. python train.py)"),
+    timeout: int = typer.Option(300, help="Max monitoring time in seconds"),
+    gpu: int = typer.Option(0, help="GPU index to monitor"),
+    save: bool = typer.Option(True, help="Save profile report to disk"),
+    out: Optional[str] = typer.Option(None, "--out", help="Output path for profile report"),
+    upload: bool = typer.Option(False, "--upload", help="Upload report to Alloc dashboard after run"),
+):
+    """Run a training command with GPU monitoring."""
+    from alloc.probe import probe_command
+    from alloc.display import print_probe_result
+    from alloc.offline import write_report
+    if not command:
+        console.print("[red]No command provided.[/red]")
+        console.print("Usage: alloc run python train.py")
+        raise typer.Exit(1)
+    console.print(f"[green]alloc[/green] [dim]v{__version__}[/dim] — Probe monitoring")
+    console.print(f"[dim]Command: {' '.join(command)}[/dim]")
+    console.print()
+    result = probe_command(
+        command,
+        timeout_seconds=timeout,
+        gpu_index=gpu,
+    )
+    if result.error and "pynvml" in result.error:
+        console.print(f"[yellow]{result.error}[/yellow]")
+        console.print("[dim]Process ran without GPU monitoring.[/dim]")
+    elif result.error:
+        console.print(f"[red]Error: {result.error}[/red]")
+    if result.peak_vram_mb > 0:
+        print_probe_result(result)
+    artifact_path = ""
+    if save:
+        probe_dict = {
+            "peak_vram_mb": result.peak_vram_mb,
+            "avg_gpu_util": result.avg_gpu_util,
+            "avg_power_watts": result.avg_power_watts,
+            "duration_seconds": result.duration_seconds,
+            "samples": result.samples,
+            "exit_code": result.exit_code,
+        }
+        artifact_path = write_report(probe_result=probe_dict, output_path=out)
+        if artifact_path:
+            console.print(f"[dim]Report saved: {artifact_path}[/dim]")
+    # Upload if --upload flag or ALLOC_UPLOAD env var
+    if artifact_path and (upload or should_upload()):
+        _try_upload(artifact_path)
+    if result.exit_code and result.exit_code != 0:
+        raise typer.Exit(result.exit_code)
+@app.command()
+def scan(
+    model: str = typer.Option(..., "--model", "-m", help="Model name (e.g. llama-3-70b)"),
+    gpu: str = typer.Option("A100-80GB", "--gpu", "-g", help="Target GPU type"),
+    dtype: str = typer.Option("fp16", help="Data type: fp16, bf16, fp32"),
+    strategy: str = typer.Option("ddp", help="Strategy: ddp, fsdp, deepspeed"),
+    num_gpus: int = typer.Option(4, help="Number of GPUs"),
+    param_count_b: Optional[float] = typer.Option(None, "--param-count-b", "-p", help="Param count in billions (overrides model lookup)"),
+    batch_size: int = typer.Option(32, help="Batch size"),
+    seq_length: int = typer.Option(2048, help="Sequence length"),
+    hidden_dim: int = typer.Option(4096, help="Hidden dimension"),
+):
+    """Remote scan via Alloc API — no GPU needed."""
+    import httpx
+    # Resolve param count from model name or explicit flag
+    resolved_param_count = param_count_b or _model_to_params(model)
+    if resolved_param_count is None:
+        console.print(f"[yellow]Unknown model: {model}[/yellow]")
+        console.print("[dim]Use --param-count-b to specify directly.[/dim]")
+        raise typer.Exit(1)
+    api_url = get_api_url()
+    token = get_token()
+    payload = {
+        "entrypoint": f"{model}.py",
+        "param_count_b": resolved_param_count,
+        "dtype": dtype,
+        "strategy": strategy,
+        "gpu_type": gpu,
+        "num_gpus": num_gpus,
+        "batch_size": batch_size,
+        "seq_length": seq_length,
+        "hidden_dim": hidden_dim,
+    }
+    console.print(f"[green]alloc[/green] [dim]v{__version__}[/dim] — Remote Ghost Scan")
+    console.print(f"[dim]Model: {model} ({resolved_param_count}B) → {gpu} x{num_gpus}[/dim]")
+    console.print()
+    try:
+        headers = {"Content-Type": "application/json"}
+        if token:
+            headers["Authorization"] = f"Bearer {token}"
+        with httpx.Client(timeout=30) as client:
+            resp = client.post(f"{api_url}/scans/cli", json=payload, headers=headers)
+            resp.raise_for_status()
+            result = resp.json()
+        _print_scan_result(result, gpu, strategy)
+    except httpx.HTTPStatusError as e:
+        console.print(f"[red]API error {e.response.status_code}[/red]")
+        console.print(f"[dim]{e.response.text[:200]}[/dim]")
+        raise typer.Exit(1)
+    except httpx.ConnectError:
+        console.print(f"[red]Cannot connect to {api_url}[/red]")
+        console.print("[dim]Check ALLOC_API_URL or try: alloc ghost <script.py> for local scan[/dim]")
+        raise typer.Exit(1)
+@app.command()
+def login():
+    """Authenticate with Alloc dashboard."""
+    import httpx
+    from alloc.config import get_supabase_url, get_supabase_anon_key, load_config, save_config
+    email = typer.prompt("Email")
+    password = typer.prompt("Password", hide_input=True)
+    supabase_url = get_supabase_url()
+    anon_key = get_supabase_anon_key()
+    try:
+        with httpx.Client(timeout=15) as client:
+            resp = client.post(
+                f"{supabase_url}/auth/v1/token?grant_type=password",
+                json={"email": email, "password": password},
+                headers={
+                    "apikey": anon_key,
+                    "Content-Type": "application/json",
+                },
+            )
+            resp.raise_for_status()
+            data = resp.json()
+        token = data.get("access_token", "")
+        refresh = data.get("refresh_token", "")
+        if not token:
+            console.print("[red]Login failed: no access token received.[/red]")
+            raise typer.Exit(1)
+        cfg = load_config()
+        cfg["token"] = token
+        cfg["refresh_token"] = refresh
+        cfg["email"] = email
+        cfg["api_url"] = get_api_url()
+        save_config(cfg)
+        console.print(f"[green]Logged in as {email}[/green]")
+    except httpx.HTTPStatusError as e:
+        detail = ""
+        try:
+            detail = e.response.json().get("error_description", e.response.text[:200])
+        except Exception:
+            detail = e.response.text[:200]
+        console.print(f"[red]Login failed: {detail}[/red]")
+        raise typer.Exit(1)
+    except httpx.ConnectError:
+        console.print(f"[red]Cannot connect to {supabase_url}[/red]")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Login failed: {e}[/red]")
+        raise typer.Exit(1)
+@app.command()
+def upload(
+    artifact: str = typer.Argument(..., help="Path to alloc_profile_report.json.gz"),
+):
+    """Upload a profile report to the Alloc dashboard."""
+    if not os.path.isfile(artifact):
+        console.print(f"[red]File not found: {artifact}[/red]")
+        raise typer.Exit(1)
+    if not artifact.endswith(".json.gz"):
+        console.print("[red]Expected a .json.gz artifact file.[/red]")
+        raise typer.Exit(1)
+    _try_upload(artifact)
+@app.command()
+def version():
+    """Show alloc version."""
+    console.print(f"alloc v{__version__}")
+def _try_upload(artifact_path: str) -> None:
+    """Attempt to upload an artifact. Prints status, never raises."""
+    try:
+        from alloc.upload import upload_artifact
+        token = get_token()
+        if not token:
+            console.print("[yellow]Not logged in. Run `alloc login` first.[/yellow]")
+            return
+        api_url = get_api_url()
+        console.print(f"[dim]Uploading to {api_url}...[/dim]")
+        result = upload_artifact(artifact_path, api_url, token)
+        run_id = result.get("run_id", "unknown")
+        console.print(f"[green]Uploaded.[/green] Run ID: {run_id}")
+    except Exception as e:
+        console.print(f"[yellow]Upload failed: {e}[/yellow]")
+        console.print(f"[dim]You can retry later: alloc upload {artifact_path}[/dim]")
+def _print_scan_result(result: dict, gpu: str, strategy: str) -> None:
+    """Print remote scan result."""
+    from rich.table import Table
+    from rich.panel import Panel
+    vram = result.get("vram_breakdown", {})
+    verdict = result.get("strategy_verdict", {})
+    table = Table(show_header=True, header_style="bold cyan", box=None, padding=(0, 2))
+    table.add_column("Component", style="dim")
+    table.add_column("Size", justify="right", style="bold")
+    table.add_row("Model weights", f"{vram.get('weights_gb', 0):.2f} GB")
+    table.add_row("Optimizer (Adam)", f"{vram.get('optimizer_gb', 0):.2f} GB")
+    table.add_row("Activations (est.)", f"{vram.get('activations_gb', 0):.2f} GB")
+    table.add_row("Buffer (10%)", f"{vram.get('buffer_gb', 0):.2f} GB")
+    table.add_row("", "")
+    table.add_row("[bold]Total VRAM[/bold]", f"[bold]{vram.get('total_gb', 0):.2f} GB[/bold]")
+    console.print(Panel(table, title="VRAM Breakdown", border_style="green", padding=(1, 2)))
+    feasible = verdict.get("feasible", False)
+    status = "[green]FEASIBLE[/green]" if feasible else "[red]INFEASIBLE[/red]"
+    console.print(f"  Strategy: {strategy.upper()} on {gpu} — {status}")
+    if not feasible and verdict.get("recommendation"):
+        rec = verdict["recommendation"]
+        console.print(f"  [yellow]Suggestion: switch to {rec.upper()}[/yellow]")
+    if verdict.get("reason"):
+        console.print(f"  [dim]{verdict['reason']}[/dim]")
+    # Cost estimate if present
+    cost = result.get("est_cost_per_hour")
+    if cost is not None:
+        console.print(f"  [dim]Est. cost: ~${cost:.2f}/hr[/dim]")
+    # Euler analysis if present
+    euler = result.get("euler_analysis")
+    if euler and euler.get("summary"):
+        console.print()
+        console.print(f"  [bold cyan]Euler Analysis[/bold cyan]")
+        console.print(f"  {euler['summary']}")
+        for rec in euler.get("recommendations", []):
+            console.print(f"  [dim]• {rec}[/dim]")
+    console.print()
+def _extract_param_count(script: str) -> Optional[int]:
+    """Try to extract param count from a Python script. Returns None if can't."""
+    # For now, don't execute the script — just check common model names in filename
+    import os
+    basename = os.path.basename(script).lower()
+    # Common model size patterns
+    patterns = {
+        "70b": int(70e9), "65b": int(65e9), "40b": int(40e9),
+        "33b": int(33e9), "30b": int(30e9), "13b": int(13e9),
+        "8b": int(8e9), "7b": int(7e9), "3b": int(3e9),
+        "1.5b": int(1.5e9), "1b": int(1e9),
+        "350m": int(350e6), "125m": int(125e6),
+    }
+    for pattern, count in patterns.items():
+        if pattern in basename:
+            return count
+    return None
+# Well-known model names → param count in billions
+_MODEL_PARAMS = {
+    "llama-3-70b": 70.0,
+    "llama-3-8b": 8.03,
+    "llama-2-70b": 70.0,
+    "llama-2-13b": 13.0,
+    "llama-2-7b": 7.0,
+    "mistral-7b": 7.24,
+    "mixtral-8x7b": 46.7,
+    "gpt2": 0.124,
+    "gpt2-medium": 0.355,
+    "gpt2-large": 0.774,
+    "gpt2-xl": 1.5,
+    "bert-base": 0.110,
+    "bert-large": 0.340,
+    "t5-small": 0.060,
+    "t5-base": 0.220,
+    "t5-large": 0.770,
+    "t5-xl": 3.0,
+    "t5-xxl": 11.0,
+    "falcon-7b": 7.0,
+    "falcon-40b": 40.0,
+    "phi-2": 2.78,
+    "gemma-2b": 2.51,
+    "gemma-7b": 8.54,
+    "qwen-7b": 7.72,
+    "qwen-14b": 14.2,
+    "qwen-72b": 72.7,
+    "deepseek-7b": 6.9,
+    "deepseek-67b": 67.0,
+    "vit-base": 0.086,
+    "vit-large": 0.307,
+    "whisper-small": 0.244,
+    "whisper-medium": 0.769,
+    "whisper-large": 1.55,
+}
+def _model_to_params(model: str) -> Optional[float]:
+    """Look up model param count by name."""
+    normalized = model.lower().strip()
+    return _MODEL_PARAMS.get(normalized)