PyPI - mlx-stack - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mlx-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

mlx_stack/__init__.py +5 -0
mlx_stack/_version.py +24 -0
mlx_stack/cli/__init__.py +5 -0
mlx_stack/cli/bench.py +221 -0
mlx_stack/cli/config.py +166 -0
mlx_stack/cli/down.py +109 -0
mlx_stack/cli/init.py +180 -0
mlx_stack/cli/install.py +165 -0
mlx_stack/cli/logs.py +234 -0
mlx_stack/cli/main.py +187 -0
mlx_stack/cli/models.py +304 -0
mlx_stack/cli/profile.py +65 -0
mlx_stack/cli/pull.py +134 -0
mlx_stack/cli/recommend.py +397 -0
mlx_stack/cli/status.py +111 -0
mlx_stack/cli/up.py +163 -0
mlx_stack/cli/watch.py +252 -0
mlx_stack/core/__init__.py +1 -0
mlx_stack/core/benchmark.py +1182 -0
mlx_stack/core/catalog.py +560 -0
mlx_stack/core/config.py +471 -0
mlx_stack/core/deps.py +323 -0
mlx_stack/core/hardware.py +304 -0
mlx_stack/core/launchd.py +531 -0
mlx_stack/core/litellm_gen.py +188 -0
mlx_stack/core/log_rotation.py +231 -0
mlx_stack/core/log_viewer.py +386 -0
mlx_stack/core/models.py +639 -0
mlx_stack/core/paths.py +79 -0
mlx_stack/core/process.py +887 -0
mlx_stack/core/pull.py +815 -0
mlx_stack/core/scoring.py +611 -0
mlx_stack/core/stack_down.py +317 -0
mlx_stack/core/stack_init.py +524 -0
mlx_stack/core/stack_status.py +229 -0
mlx_stack/core/stack_up.py +856 -0
mlx_stack/core/watchdog.py +744 -0
mlx_stack/data/__init__.py +1 -0
mlx_stack/data/catalog/__init__.py +1 -0
mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
mlx_stack/py.typed +1 -0
mlx_stack/utils/__init__.py +1 -0
mlx_stack-0.1.0.dist-info/METADATA +397 -0
mlx_stack-0.1.0.dist-info/RECORD +61 -0
mlx_stack-0.1.0.dist-info/WHEEL +4 -0
mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0

mlx_stack/cli/main.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""Main CLI entry point for mlx-stack.
+Provides the top-level Click command group with --help, --version,
+Rich-formatted output, and typo suggestions for unknown subcommands.
+"""
+from __future__ import annotations
+import difflib
+import click
+from rich.console import Console
+from rich.table import Table
+from rich.text import Text
+from mlx_stack import __version__
+from mlx_stack.cli.bench import bench as bench_command
+from mlx_stack.cli.config import config as config_group
+from mlx_stack.cli.down import down as down_command
+from mlx_stack.cli.init import init as init_command
+from mlx_stack.cli.install import install as install_command
+from mlx_stack.cli.install import uninstall as uninstall_command
+from mlx_stack.cli.logs import logs as logs_command
+from mlx_stack.cli.models import models as models_command
+from mlx_stack.cli.profile import profile as profile_command
+from mlx_stack.cli.pull import pull as pull_command
+from mlx_stack.cli.recommend import recommend as recommend_command
+from mlx_stack.cli.status import status as status_command
+from mlx_stack.cli.up import up as up_command
+from mlx_stack.cli.watch import watch as watch_command
+console = Console(stderr=True)
+class RichGroup(click.Group):
+    """Custom Click Group with Rich-formatted help and typo suggestions."""
+    def format_help(self, ctx: click.Context, formatter: click.HelpFormatter) -> None:
+        """Format help text using Rich tables."""
+        console_out = Console()
+        # Title
+        console_out.print()
+        title = Text("mlx-stack", style="bold cyan")
+        title.append(" — CLI control plane for local LLM infrastructure on Apple Silicon")
+        console_out.print(title)
+        console_out.print()
+        # Usage
+        usage = Text("Usage: ", style="bold") + Text("mlx-stack [OPTIONS] COMMAND [ARGS]...")
+        console_out.print(usage)
+        console_out.print()
+        # Options
+        console_out.print(Text("Options:", style="bold yellow"))
+        options_table = Table(show_header=False, box=None, padding=(0, 2))
+        options_table.add_column(style="green", min_width=20)
+        options_table.add_column()
+        options_table.add_row("--version", "Show version and exit.")
+        options_table.add_row("--help", "Show this message and exit.")
+        console_out.print(options_table)
+        console_out.print()
+        # Commands grouped by category
+        commands = self.list_commands(ctx)
+        if commands:
+            # Group commands by category
+            categories: dict[str, list[tuple[str, str]]] = {
+                "Setup & Configuration": [],
+                "Model Management": [],
+                "Stack Lifecycle": [],
+                "Diagnostics": [],
+            }
+            command_categories = {
+                "profile": "Setup & Configuration",
+                "config": "Setup & Configuration",
+                "init": "Setup & Configuration",
+                "recommend": "Model Management",
+                "models": "Model Management",
+                "pull": "Model Management",
+                "up": "Stack Lifecycle",
+                "down": "Stack Lifecycle",
+                "status": "Stack Lifecycle",
+                "watch": "Stack Lifecycle",
+                "install": "Stack Lifecycle",
+                "uninstall": "Stack Lifecycle",
+                "bench": "Diagnostics",
+                "logs": "Diagnostics",
+            }
+            for cmd_name in commands:
+                cmd = self.get_command(ctx, cmd_name)
+                if cmd is None:
+                    continue
+                help_text = cmd.get_short_help_str(limit=80)
+                category = command_categories.get(cmd_name, "Other")
+                if category in categories:
+                    categories[category].append((cmd_name, help_text))
+                else:
+                    categories.setdefault("Other", []).append((cmd_name, help_text))
+            for category_name, cmds in categories.items():
+                if not cmds:
+                    continue
+                console_out.print(Text(f"{category_name}:", style="bold yellow"))
+                cmd_table = Table(show_header=False, box=None, padding=(0, 2))
+                cmd_table.add_column(style="green", min_width=20)
+                cmd_table.add_column()
+                for cmd_name, help_text in cmds:
+                    cmd_table.add_row(cmd_name, help_text)
+                console_out.print(cmd_table)
+                console_out.print()
+    def resolve_command(
+        self, ctx: click.Context, args: list[str]
+    ) -> tuple[str | None, click.Command | None, list[str]]:
+        """Override resolve_command to provide typo suggestions."""
+        try:
+            return super().resolve_command(ctx, args)
+        except click.UsageError:
+            # Get the attempted command name
+            if args:
+                cmd_name = args[0]
+                available = self.list_commands(ctx)
+                matches = difflib.get_close_matches(cmd_name, available, n=3, cutoff=0.5)
+                error_msg = f"Error: No such command '{cmd_name}'."
+                if matches:
+                    suggestions = ", ".join(f"'{m}'" for m in matches)
+                    error_msg += f"\n\nDid you mean one of these?\n    {suggestions}"
+                error_msg += "\n\nRun 'mlx-stack --help' for a list of available commands."
+                console.print(f"[red]{error_msg}[/red]")
+                ctx.exit(2)
+                raise SystemExit(2)  # noqa: B904 — we want to exit, not chain
+            raise
+def version_callback(ctx: click.Context, _param: click.Parameter, value: bool) -> None:
+    """Print version and exit."""
+    if not value or ctx.resilient_parsing:
+        return
+    click.echo(f"mlx-stack, version {__version__}")
+    ctx.exit(0)
+@click.group(cls=RichGroup, invoke_without_command=True)
+@click.option(
+    "--version",
+    is_flag=True,
+    callback=version_callback,
+    expose_value=False,
+    is_eager=True,
+    help="Show version and exit.",
+)
+@click.pass_context
+def cli(ctx: click.Context) -> None:
+    """CLI control plane for local LLM infrastructure on Apple Silicon."""
+    if ctx.invoked_subcommand is None:
+        click.echo(ctx.get_help())
+# --- Placeholder commands for planned features ---
+# These will be replaced by real implementations in subsequent features.
+cli.add_command(profile_command, "profile")
+cli.add_command(recommend_command, "recommend")
+cli.add_command(init_command, "init")
+cli.add_command(pull_command, "pull")
+cli.add_command(models_command, "models")
+cli.add_command(up_command, "up")
+cli.add_command(down_command, "down")
+cli.add_command(status_command, "status")
+cli.add_command(watch_command, "watch")
+cli.add_command(install_command, "install")
+cli.add_command(uninstall_command, "uninstall")
+cli.add_command(bench_command, "bench")
+cli.add_command(logs_command, "logs")
+cli.add_command(config_group, "config")

mlx_stack/cli/models.py ADDED Viewed

@@ -0,0 +1,304 @@
+"""CLI command for model listing — `mlx-stack models`.
+Lists locally downloaded models with disk size, quantization, and source type.
+Active stack models are marked with a visual indicator. The --catalog flag
+shows all 15 catalog models with hardware-specific benchmark data.
+Output is formatted as a Rich table with human-readable names.
+"""
+from __future__ import annotations
+import click
+from rich.console import Console
+from rich.table import Table
+from rich.text import Text
+from mlx_stack.core.catalog import (
+    CatalogError,
+    load_catalog,
+    query_by_capability,
+    query_by_family,
+    query_by_tag,
+)
+from mlx_stack.core.hardware import load_profile
+from mlx_stack.core.models import (
+    ModelsError,
+    format_size,
+    get_models_directory,
+    get_remote_stack_models,
+    list_catalog_models,
+    scan_local_models,
+)
+console = Console(stderr=True)
+# --------------------------------------------------------------------------- #
+# Local models display
+# --------------------------------------------------------------------------- #
+def _display_local_models() -> None:
+    """Display locally downloaded models in a Rich table."""
+    out = Console()
+    models_dir = get_models_directory()
+    try:
+        catalog = load_catalog()
+    except CatalogError:
+        catalog = []
+    local_models = scan_local_models(models_dir=models_dir, catalog=catalog)
+    remote_models = get_remote_stack_models(local_models=local_models, catalog=catalog)
+    if not local_models and not remote_models:
+        out.print()
+        out.print(
+            "[yellow]No models found.[/yellow] "
+            "Run [bold]mlx-stack pull[/bold] to download a model, "
+            "or [bold]mlx-stack init[/bold] to set up a stack."
+        )
+        out.print()
+        return
+    out.print()
+    out.print(Text("Local Models", style="bold cyan"))
+    out.print()
+    if local_models:
+        table = Table(show_header=True, header_style="bold cyan")
+        table.add_column("", min_width=2)  # Active indicator
+        table.add_column("Model", min_width=20)
+        table.add_column("Size", justify="right", min_width=8)
+        table.add_column("Quant", min_width=6)
+        table.add_column("Source", min_width=14)
+        for model in local_models:
+            # Active indicator
+            indicator = "✓" if model.is_active else ""
+            indicator_style = "bold green" if model.is_active else ""
+            # Display name: prefer catalog name, fall back to directory name
+            display_name = model.catalog_name if model.catalog_name else model.name
+            # Size
+            size_str = format_size(model.disk_size_bytes)
+            table.add_row(
+                Text(indicator, style=indicator_style),
+                display_name,
+                size_str,
+                model.quant,
+                model.source_type,
+            )
+        out.print(table)
+    else:
+        out.print("[dim]No local models downloaded yet.[/dim]")
+    # Show remote-only stack models
+    if remote_models:
+        out.print()
+        out.print(Text("Stack Models (not downloaded)", style="bold yellow"))
+        out.print()
+        remote_table = Table(show_header=True, header_style="bold yellow")
+        remote_table.add_column("", min_width=2)
+        remote_table.add_column("Model", min_width=20)
+        remote_table.add_column("Tier", min_width=10)
+        remote_table.add_column("Quant", min_width=6)
+        remote_table.add_column("Source", min_width=10)
+        remote_table.add_column("Est. Size", justify="right", min_width=10)
+        for rm in remote_models:
+            est_size = f"{rm['est_size_gb']:.1f} GB" if rm.get("est_size_gb") else "—"
+            remote_table.add_row(
+                Text("✓", style="bold green"),
+                rm["catalog_name"],
+                rm["tier"],
+                rm["quant"],
+                "remote",
+                est_size,
+            )
+        out.print(remote_table)
+    out.print()
+    out.print(f"[dim]Models directory: {models_dir}[/dim]")
+    if any(m.is_active for m in local_models) or remote_models:
+        out.print("[dim]✓ = active in current stack[/dim]")
+    out.print()
+# --------------------------------------------------------------------------- #
+# Catalog display
+# --------------------------------------------------------------------------- #
+def _display_catalog(
+    family: str | None = None,
+    tag: str | None = None,
+    tool_calling: bool = False,
+) -> None:
+    """Display the full model catalog with hardware-specific benchmark data.
+    Args:
+        family: Optional family name filter (case-insensitive).
+        tag: Optional tag filter (case-insensitive).
+        tool_calling: If True, filter to tool-calling-capable models only.
+    """
+    out = Console()
+    try:
+        catalog = load_catalog()
+    except CatalogError as exc:
+        console.print(f"[bold red]Error:[/bold red] Could not load catalog: {exc}")
+        raise SystemExit(1) from None
+    # Apply filters
+    filtered = catalog
+    if family:
+        filtered = query_by_family(filtered, family)
+    if tag:
+        filtered = query_by_tag(filtered, tag)
+    if tool_calling:
+        filtered = query_by_capability(filtered, tool_calling=True)
+    if not filtered:
+        out.print()
+        filter_parts: list[str] = []
+        if family:
+            filter_parts.append(f"family={family}")
+        if tag:
+            filter_parts.append(f"tag={tag}")
+        if tool_calling:
+            filter_parts.append("tool-calling")
+        filter_desc = ", ".join(filter_parts) if filter_parts else "filters"
+        out.print(
+            f"[yellow]No models match the given filters ({filter_desc}).[/yellow] "
+            "Run [bold]mlx-stack models --catalog[/bold] to see all models."
+        )
+        out.print()
+        return
+    profile = load_profile()
+    local_models = scan_local_models(catalog=catalog)
+    catalog_models = list_catalog_models(
+        catalog=filtered, profile=profile, local_models=local_models
+    )
+    out.print()
+    out.print(Text("Model Catalog", style="bold cyan"))
+    if profile:
+        out.print(f"[dim]Hardware: {profile.chip} ({profile.memory_gb} GB)[/dim]")
+    else:
+        out.print(
+            "[dim]No hardware profile — run 'mlx-stack profile' for hardware-specific data[/dim]"
+        )
+    out.print()
+    table = Table(show_header=True, header_style="bold cyan")
+    table.add_column("", width=1)  # Local indicator
+    table.add_column("Name", min_width=14, no_wrap=True)
+    table.add_column("Family", min_width=8)
+    table.add_column("Params", justify="right", min_width=5)
+    table.add_column("Quants", min_width=10)
+    if profile:
+        table.add_column("Gen t/s", justify="right", min_width=7)
+        table.add_column("Mem GB", justify="right", min_width=6)
+    for cm in catalog_models:
+        # Local indicator
+        local_indicator = "●" if cm.is_local else ""
+        local_style = "bold green" if cm.is_local else ""
+        # Parameters
+        params_str = f"{cm.params_b:.1f}B" if cm.params_b >= 1.0 else f"{cm.params_b:.1f}B"
+        # Quantizations
+        quants_str = ", ".join(cm.quants)
+        row: list[str | Text] = [
+            Text(local_indicator, style=local_style),
+            cm.name,
+            cm.family,
+            params_str,
+            quants_str,
+        ]
+        if profile:
+            # Gen t/s
+            if cm.gen_tps is not None:
+                tps_str = f"{cm.gen_tps:.0f}"
+                if cm.is_estimated:
+                    tps_str += "~"
+            else:
+                tps_str = "—"
+            # Memory
+            if cm.memory_gb is not None:
+                mem_str = f"{cm.memory_gb:.1f}"
+                if cm.is_estimated:
+                    mem_str += "~"
+            else:
+                mem_str = "—"
+            row.extend([tps_str, mem_str])
+        table.add_row(*row)
+    out.print(table)
+    out.print()
+    if profile and any(cm.is_estimated for cm in catalog_models):
+        out.print("[dim]~ = estimated values (run 'mlx-stack bench --save' to calibrate)[/dim]")
+    out.print("[dim]● = available locally[/dim]")
+    out.print()
+# --------------------------------------------------------------------------- #
+# Click command
+# --------------------------------------------------------------------------- #
+@click.command()
+@click.option("--catalog", is_flag=True, help="Show full catalog with benchmark data.")
+@click.option("--family", default=None, help="Filter catalog by model family (e.g., 'qwen3.5').")
+@click.option("--tag", default=None, help="Filter catalog by tag (e.g., 'agent-ready').")
+@click.option(
+    "--tool-calling", "tool_calling", is_flag=True,
+    help="Filter catalog to tool-calling-capable models only.",
+)
+def models(
+    catalog: bool,
+    family: str | None,
+    tag: str | None,
+    tool_calling: bool,
+) -> None:
+    """List local models or browse the catalog.
+    Without flags, shows locally downloaded models with disk size,
+    quantization, and source type. Active stack models are marked
+    with a visual indicator.
+    Use --catalog to display all 15 catalog models with hardware-specific
+    benchmark data (gen_tps, memory) for your detected hardware profile.
+    Filter flags (--family, --tag, --tool-calling) require --catalog.
+    """
+    try:
+        # If filter flags are used without --catalog, enable catalog mode
+        if (family or tag or tool_calling) and not catalog:
+            catalog = True
+        if catalog:
+            _display_catalog(family=family, tag=tag, tool_calling=tool_calling)
+        else:
+            _display_local_models()
+    except ModelsError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise SystemExit(1) from None

mlx_stack/cli/profile.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""CLI command for hardware detection — `mlx-stack profile`.
+Detects Apple Silicon hardware, displays results as a Rich table,
+and writes the profile to ~/.mlx-stack/profile.json.
+"""
+from __future__ import annotations
+import click
+from rich.console import Console
+from rich.table import Table
+from mlx_stack.core.hardware import HardwareError, detect_hardware, save_profile
+console = Console(stderr=True)
+@click.command()
+def profile() -> None:
+    """Detect Apple Silicon hardware and write profile."""
+    try:
+        hw = detect_hardware()
+    except HardwareError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+    # Save profile to disk
+    try:
+        save_profile(hw)
+    except OSError as exc:
+        console.print(f"[bold red]Error:[/bold red] Could not write profile: {exc}")
+        raise SystemExit(1) from None
+    # Display results as a Rich table
+    out = Console()
+    table = Table(title="Hardware Profile", show_header=True, header_style="bold cyan")
+    table.add_column("Property", style="bold")
+    table.add_column("Value")
+    table.add_row("Chip", hw.chip)
+    table.add_row("GPU Cores", str(hw.gpu_cores))
+    table.add_row("Unified Memory", f"{hw.memory_gb} GB")
+    bandwidth_str = f"{hw.bandwidth_gbps} GB/s"
+    if hw.is_estimate:
+        bandwidth_str += " (estimate)"
+    table.add_row("Memory Bandwidth", bandwidth_str)
+    table.add_row("Profile ID", hw.profile_id)
+    out.print()
+    out.print(table)
+    if hw.is_estimate:
+        out.print()
+        out.print(
+            "[yellow]⚠ Bandwidth is estimated for unknown chip.[/yellow]"
+        )
+        out.print(
+            "  Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements."
+        )
+    out.print()
+    from mlx_stack.core.paths import get_profile_path
+    out.print(f"[dim]Profile saved to {get_profile_path()}[/dim]")

mlx_stack/cli/pull.py ADDED Viewed

@@ -0,0 +1,134 @@
+"""CLI command for model download — `mlx-stack pull`.
+Downloads models from the catalog with source resolution, disk space
+checking, progress display, duplicate detection, inventory tracking,
+and optional post-download benchmark.
+Supports --quant for quantization override, --bench for post-download
+smoke test, and --force for re-downloading existing models.
+"""
+from __future__ import annotations
+import click
+from rich.console import Console
+from mlx_stack.core.catalog import CatalogError
+from mlx_stack.core.pull import (
+    ConversionError,
+    DiskSpaceError,
+    DownloadError,
+    InvalidModelError,
+    PullError,
+    pull_model,
+)
+console = Console(stderr=True)
+@click.command()
+@click.argument("model", required=True)
+@click.option(
+    "--quant",
+    type=str,
+    default=None,
+    help="Quantization level (int4, int8, bf16). Default from config.",
+)
+@click.option(
+    "--bench",
+    is_flag=True,
+    default=False,
+    help="Run a quick benchmark after download.",
+)
+@click.option(
+    "--force",
+    is_flag=True,
+    default=False,
+    help="Re-download even if model already exists.",
+)
+def pull(model: str, quant: str | None, bench: bool, force: bool) -> None:
+    """Download a model from the catalog.
+    MODEL is the catalog model ID (e.g., qwen3.5-8b). Use 'mlx-stack models --catalog'
+    to see available models.
+    Without --quant, uses the default quantization from config (default: int4).
+    Invalid quantization values are rejected with a clear error.
+    Downloads are checked against available disk space before starting.
+    Already-downloaded models are detected and skipped unless --force is used.
+    With --bench, runs a quick benchmark after download completes. This
+    auto-installs vllm-mlx if needed.
+    """
+    out = Console()
+    try:
+        result = pull_model(
+            model_id=model,
+            quant=quant,
+            force=force,
+            console=out,
+        )
+        if bench:
+            _run_post_download_bench(model, result.quant, out)
+    except InvalidModelError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+    except DiskSpaceError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+    except DownloadError as exc:
+        console.print(f"[bold red]Download error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+    except ConversionError as exc:
+        console.print(f"[bold red]Conversion error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+    except PullError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+    except CatalogError as exc:
+        console.print(f"[bold red]Catalog error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+def _run_post_download_bench(model_id: str, quant: str, out: Console) -> None:
+    """Run a quick benchmark after downloading a model.
+    Auto-installs vllm-mlx if needed.
+    Args:
+        model_id: The model ID that was pulled.
+        quant: The quantization level.
+        out: Rich console for output.
+    """
+    out.print()
+    out.print("[bold cyan]Running post-download benchmark...[/bold cyan]")
+    try:
+        from mlx_stack.core.benchmark import BenchmarkError, run_benchmark
+        result = run_benchmark(target=model_id, save=True)
+        out.print(
+            f"  Prompt TPS: {result.prompt_tps_mean:.1f} ± {result.prompt_tps_std:.1f} tok/s"
+        )
+        out.print(
+            f"  Gen TPS:    {result.gen_tps_mean:.1f} ± {result.gen_tps_std:.1f} tok/s"
+        )
+        out.print()
+        out.print(
+            "[dim]Results saved for use by 'recommend' and 'init' scoring.[/dim]"
+        )
+    except BenchmarkError as exc:
+        out.print(
+            f"[yellow]Benchmark failed: {exc}[/yellow]\n"
+            f"Run 'mlx-stack bench {model_id}' to retry."
+        )
+    except Exception as exc:
+        out.print(
+            f"[yellow]Could not run benchmark: {exc}[/yellow]\n"
+            "Skipping benchmark. Install vllm-mlx manually and run "
+            f"'mlx-stack bench {model_id}'."
+        )