PyPI - mlx-stack - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mlx-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

mlx_stack/__init__.py +5 -0
mlx_stack/_version.py +24 -0
mlx_stack/cli/__init__.py +5 -0
mlx_stack/cli/bench.py +221 -0
mlx_stack/cli/config.py +166 -0
mlx_stack/cli/down.py +109 -0
mlx_stack/cli/init.py +180 -0
mlx_stack/cli/install.py +165 -0
mlx_stack/cli/logs.py +234 -0
mlx_stack/cli/main.py +187 -0
mlx_stack/cli/models.py +304 -0
mlx_stack/cli/profile.py +65 -0
mlx_stack/cli/pull.py +134 -0
mlx_stack/cli/recommend.py +397 -0
mlx_stack/cli/status.py +111 -0
mlx_stack/cli/up.py +163 -0
mlx_stack/cli/watch.py +252 -0
mlx_stack/core/__init__.py +1 -0
mlx_stack/core/benchmark.py +1182 -0
mlx_stack/core/catalog.py +560 -0
mlx_stack/core/config.py +471 -0
mlx_stack/core/deps.py +323 -0
mlx_stack/core/hardware.py +304 -0
mlx_stack/core/launchd.py +531 -0
mlx_stack/core/litellm_gen.py +188 -0
mlx_stack/core/log_rotation.py +231 -0
mlx_stack/core/log_viewer.py +386 -0
mlx_stack/core/models.py +639 -0
mlx_stack/core/paths.py +79 -0
mlx_stack/core/process.py +887 -0
mlx_stack/core/pull.py +815 -0
mlx_stack/core/scoring.py +611 -0
mlx_stack/core/stack_down.py +317 -0
mlx_stack/core/stack_init.py +524 -0
mlx_stack/core/stack_status.py +229 -0
mlx_stack/core/stack_up.py +856 -0
mlx_stack/core/watchdog.py +744 -0
mlx_stack/data/__init__.py +1 -0
mlx_stack/data/catalog/__init__.py +1 -0
mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
mlx_stack/py.typed +1 -0
mlx_stack/utils/__init__.py +1 -0
mlx_stack-0.1.0.dist-info/METADATA +397 -0
mlx_stack-0.1.0.dist-info/RECORD +61 -0
mlx_stack-0.1.0.dist-info/WHEEL +4 -0
mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0

mlx_stack/cli/recommend.py ADDED Viewed

@@ -0,0 +1,397 @@
+"""CLI command for model recommendation — `mlx-stack recommend`.
+Recommends an optimal model stack based on hardware profile and user intent.
+Reads existing profile or auto-detects hardware. Display-only — no files written.
+Supports --budget, --intent (balanced/agent-fleet), and --show-all flags.
+"""
+from __future__ import annotations
+import json
+import re
+from typing import Any
+import click
+from rich.console import Console
+from rich.table import Table
+from rich.text import Text
+from mlx_stack.core.catalog import load_catalog
+from mlx_stack.core.config import ConfigCorruptError, get_value
+from mlx_stack.core.hardware import (
+    HardwareError,
+    HardwareProfile,
+    detect_hardware,
+    load_profile,
+)
+from mlx_stack.core.paths import get_benchmarks_dir
+from mlx_stack.core.scoring import (
+    VALID_INTENTS,
+    RecommendationResult,
+    ScoringError,
+)
+from mlx_stack.core.scoring import (
+    recommend as run_recommend,
+)
+console = Console(stderr=True)
+# --------------------------------------------------------------------------- #
+# Budget parsing
+# --------------------------------------------------------------------------- #
+_BUDGET_PATTERN = re.compile(r"^(\d+(?:\.\d+)?)\s*(gb|GB|Gb|gB)?$")
+def parse_budget(raw: str) -> float:
+    """Parse a budget string like '30gb', '30GB', '30' into GB float.
+    Args:
+        raw: The raw budget string from CLI.
+    Returns:
+        Budget in GB as a float.
+    Raises:
+        click.BadParameter: If the budget format is invalid or value is non-positive.
+    """
+    match = _BUDGET_PATTERN.match(raw.strip())
+    if not match:
+        msg = (
+            f"Invalid budget format '{raw}'. "
+            f"Expected a positive number with optional 'gb' suffix (e.g., '30gb', '16')."
+        )
+        raise click.BadParameter(msg, param_hint="'--budget'")
+    value = float(match.group(1))
+    if value <= 0:
+        msg = (
+            f"Invalid budget '{raw}'. Budget must be a positive value."
+        )
+        raise click.BadParameter(msg, param_hint="'--budget'")
+    return value
+# --------------------------------------------------------------------------- #
+# Hardware profile resolution
+# --------------------------------------------------------------------------- #
+def _resolve_profile() -> HardwareProfile:
+    """Load existing profile or auto-detect hardware.
+    Returns:
+        A HardwareProfile instance.
+    Raises:
+        SystemExit: If hardware detection fails.
+    """
+    profile = load_profile()
+    if profile is not None:
+        return profile
+    # Auto-detect (in-memory only — recommend is display-only, no file writes)
+    console.print("[dim]No saved profile found — detecting hardware...[/dim]")
+    try:
+        profile = detect_hardware()
+        return profile
+    except HardwareError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+# --------------------------------------------------------------------------- #
+# Saved benchmarks loading
+# --------------------------------------------------------------------------- #
+def _load_saved_benchmarks(profile_id: str) -> dict[str, Any] | None:
+    """Load saved benchmark data for the given profile, if available.
+    Reads from ~/.mlx-stack/benchmarks/<profile_id>.json.
+    Args:
+        profile_id: The hardware profile ID.
+    Returns:
+        Dict mapping model_id -> benchmark data, or None if no data.
+    """
+    benchmarks_dir = get_benchmarks_dir()
+    benchmark_file = benchmarks_dir / f"{profile_id}.json"
+    if not benchmark_file.exists():
+        return None
+    try:
+        data = json.loads(benchmark_file.read_text(encoding="utf-8"))
+        if isinstance(data, dict):
+            return data
+    except (json.JSONDecodeError, OSError):
+        console.print(
+            f"[yellow]⚠ Warning:[/yellow] Could not parse saved benchmarks "
+            f"at {benchmark_file}. Falling back to catalog data."
+        )
+    return None
+# --------------------------------------------------------------------------- #
+# Display helpers
+# --------------------------------------------------------------------------- #
+def _format_tps(tps: float, is_estimated: bool) -> str:
+    """Format tokens per second with optional estimated label."""
+    formatted = f"{tps:.1f} tok/s"
+    if is_estimated:
+        formatted += " (est.)"
+    return formatted
+def _format_memory(memory_gb: float) -> str:
+    """Format memory usage in GB."""
+    return f"{memory_gb:.1f} GB"
+def _display_tier_table(result: RecommendationResult) -> None:
+    """Display the recommended tiers as a Rich table."""
+    out = Console()
+    out.print()
+    title = Text("Recommended Stack", style="bold cyan")
+    title.append(f"  ({result.intent})")
+    out.print(title)
+    out.print(
+        f"[dim]Hardware: {result.hardware_profile.chip} "
+        f"({result.hardware_profile.memory_gb} GB) · "
+        f"Budget: {result.memory_budget_gb:.1f} GB[/dim]"
+    )
+    out.print()
+    table = Table(show_header=True, header_style="bold cyan")
+    table.add_column("Tier", style="bold", min_width=10)
+    table.add_column("Model", min_width=20)
+    table.add_column("Quant", min_width=6)
+    table.add_column("Gen TPS", justify="right", min_width=15)
+    table.add_column("Memory", justify="right", min_width=10)
+    for tier_assign in result.tiers:
+        table.add_row(
+            tier_assign.tier,
+            tier_assign.model.entry.name,
+            tier_assign.quant,
+            _format_tps(tier_assign.model.gen_tps, tier_assign.model.is_estimated),
+            _format_memory(tier_assign.model.memory_gb),
+        )
+    out.print(table)
+    # Cloud fallback row if OpenRouter key is configured
+    try:
+        openrouter_key = get_value("openrouter-key")
+    except (ConfigCorruptError, Exception):
+        openrouter_key = ""
+    if openrouter_key:
+        out.print()
+        out.print(
+            "[bold green]☁ Cloud Fallback[/bold green]  "
+            "Premium tier via OpenRouter (GPT-4o / Claude Sonnet)"
+        )
+    # Estimated warning
+    has_estimates = any(t.model.is_estimated for t in result.tiers)
+    if has_estimates:
+        out.print()
+        out.print(
+            "[yellow]⚠ Some performance values are estimated from bandwidth ratio.[/yellow]"
+        )
+        out.print(
+            "  Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements."
+        )
+    out.print()
+    out.print("[dim]This is a recommendation only — no files were written.[/dim]")
+    out.print("[dim]Run [bold]mlx-stack init[/bold] to generate stack configuration.[/dim]")
+def _display_all_models(result: RecommendationResult) -> None:
+    """Display all budget-fitting models sorted by composite score."""
+    out = Console()
+    out.print()
+    title = Text("All Budget-Fitting Models", style="bold cyan")
+    title.append(f"  ({result.intent})")
+    out.print(title)
+    out.print(
+        f"[dim]Hardware: {result.hardware_profile.chip} "
+        f"({result.hardware_profile.memory_gb} GB) · "
+        f"Budget: {result.memory_budget_gb:.1f} GB[/dim]"
+    )
+    out.print()
+    table = Table(show_header=True, header_style="bold cyan")
+    table.add_column("#", justify="right", style="dim", min_width=3)
+    table.add_column("Model", min_width=20)
+    table.add_column("Family", min_width=10)
+    table.add_column("Params", justify="right", min_width=8)
+    table.add_column("Score", justify="right", min_width=8)
+    table.add_column("Gen TPS", justify="right", min_width=15)
+    table.add_column("Memory", justify="right", min_width=10)
+    for idx, scored in enumerate(result.all_scored, 1):
+        table.add_row(
+            str(idx),
+            scored.entry.name,
+            scored.entry.family,
+            f"{scored.entry.params_b:.1f}B",
+            f"{scored.composite_score:.3f}",
+            _format_tps(scored.gen_tps, scored.is_estimated),
+            _format_memory(scored.memory_gb),
+        )
+    out.print(table)
+    out.print()
+    count = len(result.all_scored)
+    budget = f"{result.memory_budget_gb:.1f}"
+    out.print(f"[dim]{count} models fit within the {budget} GB budget.[/dim]")
+    # Cloud fallback note
+    try:
+        openrouter_key = get_value("openrouter-key")
+    except (ConfigCorruptError, Exception):
+        openrouter_key = ""
+    if openrouter_key:
+        out.print()
+        out.print(
+            "[bold green]☁ Cloud Fallback[/bold green]  "
+            "Premium tier via OpenRouter also available."
+        )
+    # Estimated warning
+    has_estimates = any(m.is_estimated for m in result.all_scored)
+    if has_estimates:
+        out.print()
+        out.print(
+            "[yellow]⚠ Some performance values are estimated from bandwidth ratio.[/yellow]"
+        )
+        out.print(
+            "  Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements."
+        )
+    out.print()
+    out.print("[dim]This is a recommendation only — no files were written.[/dim]")
+# --------------------------------------------------------------------------- #
+# Click command
+# --------------------------------------------------------------------------- #
+@click.command()
+@click.option(
+    "--budget",
+    type=str,
+    default=None,
+    help="Memory budget override (e.g., '30gb', '16'). Defaults to 40%% of unified memory.",
+)
+@click.option(
+    "--intent",
+    type=str,
+    default=None,
+    help="Recommendation intent: balanced (default) or agent-fleet.",
+)
+@click.option(
+    "--show-all",
+    is_flag=True,
+    default=False,
+    help="Show all budget-fitting models sorted by score instead of tier assignments.",
+)
+def recommend(budget: str | None, intent: str | None, show_all: bool) -> None:
+    """Recommend an optimal model stack for your hardware.
+    Analyzes your hardware profile and the model catalog to recommend
+    an optimal stack with tier assignments (standard, fast, longctx).
+    Uses 40% of unified memory as the default budget. Override with --budget.
+    Supports --intent to change optimization strategy (balanced or agent-fleet).
+    Use --show-all to see all budget-fitting models ranked by composite score.
+    This command is display-only — no configuration files are written.
+    """
+    # --- Validate intent ---
+    if intent is None:
+        intent = "balanced"
+    elif intent not in VALID_INTENTS:
+        valid = ", ".join(sorted(VALID_INTENTS))
+        console.print(
+            f"[bold red]Error:[/bold red] Invalid intent '{intent}'. "
+            f"Valid intents: {valid}"
+        )
+        raise SystemExit(1)
+    # --- Parse budget ---
+    budget_gb_override: float | None = None
+    if budget is not None:
+        try:
+            budget_gb_override = parse_budget(budget)
+        except click.BadParameter as exc:
+            console.print(f"[bold red]Error:[/bold red] {exc.format_message()}")
+            raise SystemExit(1) from None
+    # --- Resolve hardware profile ---
+    profile = _resolve_profile()
+    # --- Read memory-budget-pct from config (used when no --budget override) ---
+    budget_pct = 40
+    if budget_gb_override is None:
+        try:
+            budget_pct = int(get_value("memory-budget-pct"))
+        except (ConfigCorruptError, ValueError):
+            budget_pct = 40
+    # --- Load catalog ---
+    try:
+        catalog = load_catalog()
+    except Exception as exc:
+        console.print(f"[bold red]Error:[/bold red] Could not load model catalog: {exc}")
+        raise SystemExit(1) from None
+    # --- Load saved benchmarks ---
+    saved_benchmarks = _load_saved_benchmarks(profile.profile_id)
+    # --- Run recommendation ---
+    try:
+        result = run_recommend(
+            catalog=catalog,
+            profile=profile,
+            intent=intent,
+            budget_pct=budget_pct,
+            budget_gb_override=budget_gb_override,
+            saved_benchmarks=saved_benchmarks,
+        )
+    except ScoringError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+    # --- Check for zero results ---
+    if not result.all_scored:
+        console.print(
+            f"[bold red]Error:[/bold red] No models fit within the "
+            f"{result.memory_budget_gb:.1f} GB budget."
+        )
+        console.print(
+            "[dim]Try increasing the budget with --budget or "
+            "adjusting memory-budget-pct in config.[/dim]"
+        )
+        raise SystemExit(1)
+    # --- Display results ---
+    if show_all:
+        _display_all_models(result)
+    else:
+        _display_tier_table(result)

mlx_stack/cli/status.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""CLI command for service status — `mlx-stack status`.
+Displays the health and metrics for all managed services in a
+formatted Rich table or as JSON (with --json). Read-only: does not
+modify any files or acquire the lockfile.
+"""
+from __future__ import annotations
+import json
+import click
+from rich.console import Console
+from rich.table import Table
+from rich.text import Text
+from mlx_stack.core.stack_status import StatusResult, run_status, status_to_dict
+console = Console(stderr=True)
+# Status display styling — maps state to Rich markup
+_STATUS_STYLES: dict[str, str] = {
+    "healthy": "[bold green]healthy[/bold green]",
+    "degraded": "[bold yellow]degraded[/bold yellow]",
+    "down": "[bold red]down[/bold red]",
+    "crashed": "[bold red]crashed[/bold red]",
+    "stopped": "[dim]stopped[/dim]",
+}
+def _display_table(result: StatusResult) -> None:
+    """Display service statuses as a Rich table.
+    Columns: Tier, Model, Port, Status, Uptime.
+    Args:
+        result: The StatusResult to display.
+    """
+    out = Console()
+    out.print()
+    table = Table(
+        title="Service Status",
+        show_header=True,
+        header_style="bold cyan",
+    )
+    table.add_column("Tier", style="bold", min_width=12)
+    table.add_column("Model", min_width=20)
+    table.add_column("Port", justify="right", min_width=6)
+    table.add_column("Status", min_width=10)
+    table.add_column("Uptime", justify="right", min_width=10)
+    for svc in result.services:
+        status_display = _STATUS_STYLES.get(svc.status, svc.status)
+        table.add_row(
+            svc.tier,
+            svc.model,
+            str(svc.port),
+            status_display,
+            svc.uptime_display,
+        )
+    out.print(table)
+    out.print()
+def _display_json(result: StatusResult) -> None:
+    """Display service statuses as JSON to stdout.
+    Args:
+        result: The StatusResult to display.
+    """
+    data = status_to_dict(result)
+    click.echo(json.dumps(data, indent=2))
+@click.command()
+@click.option("--json", "json_output", is_flag=True, help="Output in JSON format.")
+def status(json_output: bool) -> None:
+    """Show health and status of all services.
+    Reports the current state of each managed service: healthy, degraded,
+    down, crashed, or stopped. Displays a formatted table by default, or
+    valid JSON with --json.
+    This command is read-only and safe to run concurrently with other
+    mlx-stack commands.
+    """
+    result = run_status()
+    # Handle no-stack scenario
+    if result.no_stack:
+        if json_output:
+            _display_json(result)
+        else:
+            out = Console()
+            out.print()
+            out.print(
+                Text(
+                    result.message or "No stack configured — run 'mlx-stack init'.",
+                    style="yellow",
+                )
+            )
+            out.print()
+        return
+    # Display results
+    if json_output:
+        _display_json(result)
+    else:
+        _display_table(result)

mlx_stack/cli/up.py ADDED Viewed

@@ -0,0 +1,163 @@
+"""CLI command for starting services — `mlx-stack up`.
+Starts all services defined in the active stack, or a single tier
+with --tier. Supports --dry-run to preview commands without executing.
+"""
+from __future__ import annotations
+import click
+from rich.console import Console
+from rich.table import Table
+from rich.text import Text
+from mlx_stack.core.process import LockError
+from mlx_stack.core.stack_up import UpError, UpResult, run_up
+console = Console(stderr=True)
+def _display_dry_run(result: UpResult) -> None:
+    """Display dry-run commands.
+    Shows the exact shell commands that would be executed for each
+    vllm-mlx instance and LiteLLM without actually running them.
+    Args:
+        result: The UpResult from the dry-run.
+    """
+    out = Console()
+    out.print()
+    out.print(Text("Dry run — commands that would be executed:", style="bold cyan"))
+    out.print()
+    for cmd_info in result.dry_run_commands:
+        service = cmd_info["service"]
+        command = cmd_info["command"]
+        svc_type = cmd_info["type"]
+        label = f"[bold]{service}[/bold]" if svc_type == "vllm-mlx" else "[bold]litellm[/bold]"
+        out.print(f"  {label}:")
+        out.print(f"    [green]{command}[/green]")
+        out.print()
+def _display_summary(result: UpResult) -> None:
+    """Display a summary table of service statuses.
+    Shows tier name, model, port, and status for each service plus
+    LiteLLM.
+    Args:
+        result: The UpResult from startup.
+    """
+    out = Console()
+    out.print()
+    if result.already_running:
+        out.print(
+            Text("All services are already running.", style="bold yellow")
+        )
+        out.print()
+    # Warnings
+    for warning in result.warnings:
+        out.print(f"[yellow]⚠ {warning}[/yellow]")
+    if result.warnings:
+        out.print()
+    # Summary table
+    table = Table(
+        title="Service Summary",
+        show_header=True,
+        header_style="bold cyan",
+    )
+    table.add_column("Service", style="bold", min_width=12)
+    table.add_column("Model", min_width=20)
+    table.add_column("Port", justify="right", min_width=6)
+    table.add_column("Status", min_width=10)
+    # Status styling
+    status_styles = {
+        "healthy": "[bold green]healthy[/bold green]",
+        "already-running": "[bold green]already-running[/bold green]",
+        "failed": "[bold red]failed[/bold red]",
+        "skipped": "[yellow]skipped[/yellow]",
+        "dry-run": "[cyan]dry-run[/cyan]",
+    }
+    for tier in result.tiers:
+        status_display = status_styles.get(tier.status, tier.status)
+        if tier.error:
+            status_display += f"\n[dim]{tier.error}[/dim]"
+        table.add_row(
+            tier.name,
+            tier.model,
+            str(tier.port),
+            status_display,
+        )
+    # LiteLLM row
+    if result.litellm:
+        litellm = result.litellm
+        status_display = status_styles.get(litellm.status, litellm.status)
+        if litellm.error:
+            status_display += f"\n[dim]{litellm.error}[/dim]"
+        table.add_row(
+            litellm.name,
+            litellm.model,
+            str(litellm.port),
+            status_display,
+        )
+    out.print(table)
+    out.print()
+    # Next steps for healthy stacks
+    any_healthy = any(
+        t.status in ("healthy", "already-running") for t in result.tiers
+    )
+    if any_healthy:
+        litellm_port = result.litellm.port if result.litellm else 4000
+        out.print(
+            f"[dim]Endpoint: http://localhost:{litellm_port}/v1[/dim]"
+        )
+        out.print()
+@click.command()
+@click.option("--dry-run", is_flag=True, help="Show commands without executing.")
+@click.option("--tier", "tier_filter", type=str, help="Start only the specified tier.")
+def up(dry_run: bool, tier_filter: str | None) -> None:
+    """Start all services in the active stack.
+    Reads the stack definition from ~/.mlx-stack/stacks/default.yaml
+    and starts one vllm-mlx subprocess per tier plus a LiteLLM proxy.
+    Use --dry-run to preview the exact commands without starting anything.
+    Use --tier to start only a specific tier (plus LiteLLM if needed).
+    """
+    try:
+        result = run_up(
+            dry_run=dry_run,
+            tier_filter=tier_filter,
+        )
+    except UpError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+    except LockError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise SystemExit(1) from None
+    if result.dry_run:
+        _display_dry_run(result)
+    else:
+        _display_summary(result)
+    # Exit with non-zero if all tiers failed
+    any_success = any(
+        t.status in ("healthy", "already-running", "dry-run") for t in result.tiers
+    )
+    if not any_success and not result.dry_run:
+        raise SystemExit(1)