PyPI - lyceum-cli - Versions diffs - 1.0.28__py3-none-any.whl → 1.0.30__py3-none-any.whl - Mend

lyceum-cli 1.0.28py3-none-any.whl → 1.0.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

lyceum/external/compute/execution/gpu_selection.py +1074 -0
lyceum/external/compute/inference/batch.py +94 -304
lyceum/external/compute/inference/chat.py +104 -189
lyceum/external/compute/inference/infer.py +101 -0
lyceum/external/compute/inference/models.py +26 -199
lyceum/main.py +6 -1
lyceum/shared/config.py +5 -9
lyceum/shared/streaming.py +45 -17
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.30.dist-info}/METADATA +1 -1
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.30.dist-info}/RECORD +13 -11
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.30.dist-info}/WHEEL +1 -1
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.30.dist-info}/entry_points.txt +0 -0
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.30.dist-info}/top_level.txt +0 -0

lyceum/external/compute/execution/gpu_selection.py ADDED Viewed

@@ -0,0 +1,1074 @@
+"""GPU selection execution commands"""
+import json
+import time
+import httpx
+import typer
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+from ....shared.config import config
+from ....shared.streaming import StatusLine
+from .python import (
+    inject_script_args,
+    load_workspace_config,
+    read_code_from_source,
+    resolve_import_files,
+    resolve_requirements,
+)
+console = Console()
+gpu_selection_app = typer.Typer(name="gpu-selection", help="GPU selection and profiling commands")
+POLL_INTERVAL = 2.0
+MAX_POLL_TIME = 3600  # 1 hour - A100/H100 initialization can take up to 30 min
+# Cache for GPU pricing
+_pricing_cache: dict[str, float] | None = None
+# Mapping from API profile names to display names
+GPU_DISPLAY_NAMES = {
+    "gpu": "T4",
+    "gpu.t4": "T4",
+    "gpu.t4.64gb": "T4",
+    "gpu.a100": "A100",
+    "gpu.a100.40gb": "A100 (40GB)",
+    "gpu.a100.80gb": "A100 (80GB)",
+    "gpu.h100": "H100",
+    "gpu.h200": "H200",
+    "gpu.l40s": "L40S",
+    "gpu.b200": "B200",
+    "gpu.rtx6000pro": "RTX 6000 Pro",
+}
+# VRAM in GB for each GPU profile (for showing excluded GPUs)
+GPU_VRAM_GB = {
+    "gpu": 16,
+    "gpu.t4": 16,
+    "gpu.t4.64gb": 16,
+    "gpu.a100": 40,
+    "gpu.a100.40gb": 40,
+    "gpu.a100.80gb": 80,
+    "gpu.h100": 80,
+    "gpu.h200": 141,
+    "gpu.l40s": 48,
+    "gpu.b200": 180,
+    "gpu.rtx6000pro": 48,
+}
+def format_gpu_name(profile: str) -> str:
+    """Format GPU profile name for display."""
+    if profile in GPU_DISPLAY_NAMES:
+        return GPU_DISPLAY_NAMES[profile]
+    # Fallback: strip "gpu." prefix and uppercase
+    return profile.replace("gpu.", "").upper()
+def fetch_gpu_pricing() -> dict[str, float]:
+    """Fetch GPU pricing from API. Returns dict of hardware_profile -> price_per_hour."""
+    global _pricing_cache
+    if _pricing_cache is not None:
+        return _pricing_cache
+    try:
+        response = httpx.get(
+            f"{config.base_url}/api/v2/external/compute/machine-types",
+            headers={"Authorization": f"Bearer {config.api_key}"},
+            timeout=10.0,
+        )
+        if response.status_code == 200:
+            data = response.json()
+            _pricing_cache = {
+                m["hardware_profile"]: m.get("price_per_hour", 0) or 0
+                for m in data.get("machine_types", [])
+            }
+            return _pricing_cache
+    except Exception:
+        pass
+    return {}
+def calculate_cost(execution_time_s: float, hardware_profile: str, pricing: dict[str, float]) -> float | None:
+    """Calculate cost based on execution time and GPU pricing."""
+    price_per_hour = pricing.get(hardware_profile)
+    if price_per_hour is None or price_per_hour == 0:
+        return None
+    return execution_time_s * (price_per_hour / 3600)
+def submit_gpu_selection(payload: dict, status: StatusLine = None) -> str:
+    """Submit GPU selection request to API and return the execution_id."""
+    if status:
+        status.update("Submitting GPU selection job...")
+    response = httpx.post(
+        f"{config.base_url}/api/v2/external/execution/gpu_selection/start",
+        headers={"Authorization": f"Bearer {config.api_key}"},
+        json=payload,
+        timeout=30.0,
+    )
+    if response.status_code != 200:
+        if status:
+            status.stop()
+        console.print(f"[red]Error: HTTP {response.status_code}[/red]")
+        if response.status_code == 401:
+            console.print("[red]Authentication failed. Your session may have expired.[/red]")
+            console.print("[yellow]Run 'lyceum auth login' to re-authenticate.[/yellow]")
+        elif response.status_code == 402:
+            console.print("[red]Insufficient credits. Please purchase more credits to continue.[/red]")
+        elif response.status_code == 403:
+            console.print("[red]You do not have access to GPU instances.[/red]")
+        else:
+            console.print(f"[red]{response.content.decode()}[/red]")
+        raise typer.Exit(1)
+    data = response.json()
+    return data["execution_id"]
+def poll_gpu_selection(execution_id: str, status: StatusLine = None) -> dict:
+    """Poll GPU selection status until terminal state."""
+    elapsed = 0.0
+    while elapsed < MAX_POLL_TIME:
+        try:
+            response = httpx.get(
+                f"{config.base_url}/api/v2/external/execution/gpu_selection/{execution_id}/status",
+                headers={"Authorization": f"Bearer {config.api_key}"},
+                timeout=10.0,
+            )
+            if response.status_code != 200:
+                if status:
+                    status.update(f"Waiting for results (status check returned {response.status_code})...")
+                time.sleep(POLL_INTERVAL)
+                elapsed += POLL_INTERVAL
+                continue
+            data = response.json()
+            current_status = data.get("status", "unknown")
+            if current_status in ("completed", "failed", "aborted", "system_failure"):
+                return data
+            if status:
+                status.update(f"Status: {current_status}...")
+        except httpx.RequestError:
+            if status:
+                status.update("Reconnecting...")
+        time.sleep(POLL_INTERVAL)
+        elapsed += POLL_INTERVAL
+    if status:
+        status.stop()
+    console.print("[yellow]Timed out waiting for GPU selection results.[/yellow]")
+    console.print(f"[dim]Check later: lyceum predict status {execution_id}[/dim]")
+    raise typer.Exit(1)
+ERROR_SUGGESTIONS = {
+    "No PyTorch or Hugging Face ecosystem detected": [
+        "Add [cyan]import torch[/cyan] and use PyTorch modules",
+        "Or use HuggingFace [cyan]transformers[/cyan] library",
+    ],
+    "GPU requirement cannot be determined or is CPU-only": [
+        "Move model to GPU: [cyan]model.to('cuda')[/cyan]",
+        "Move tensors to GPU: [cyan]tensor.to('cuda')[/cyan]",
+        "Or use [cyan]device = torch.device('cuda')[/cyan]",
+    ],
+    "No model found": [
+        "Define a class that inherits from [cyan]nn.Module[/cyan]",
+        "Or use a pretrained model from [cyan]transformers[/cyan]",
+    ],
+    "No training loop detected": [
+        "Add a training loop with [cyan]loss.backward()[/cyan]",
+        "And [cyan]optimizer.step()[/cyan]",
+    ],
+}
+def get_suggestions(error: str) -> list[str]:
+    """Get suggestions for a given error message."""
+    for key, suggestions in ERROR_SUGGESTIONS.items():
+        if key.lower() in error.lower():
+            return suggestions
+    return []
+def display_results(data: dict, file_path: str | None = None) -> None:
+    """Display GPU selection results."""
+    if data is None:
+        console.print()
+        console.print(Panel(
+            "[red]✗[/red] No response data received",
+            title="[red]GPU Selection Failed[/red]",
+            border_style="red",
+            padding=(1, 2),
+        ))
+        return
+    status = data.get("status", "unknown")
+    # Parse metadata if it's a string
+    metadata = data.get("metadata")
+    if isinstance(metadata, str):
+        try:
+            metadata = json.loads(metadata)
+        except (json.JSONDecodeError, TypeError):
+            metadata = {}
+    metadata = metadata or {}
+    if status != "completed":
+        errors = data.get("system_errors") or []
+        # Build error content
+        error_lines = []
+        all_suggestions = []
+        for err in errors:
+            error_lines.append(f"[red]✗[/red] {err}")
+            all_suggestions.extend(get_suggestions(err))
+        if not error_lines:
+            error_lines.append(f"[red]✗[/red] Status: {status}")
+        error_content = "\n".join(error_lines)
+        # Add suggestions if available
+        if all_suggestions:
+            error_content += "\n\n[dim]Suggestions:[/dim]"
+            for suggestion in all_suggestions:
+                error_content += f"\n  → {suggestion}"
+        console.print()
+        console.print(Panel(
+            error_content,
+            title="[red]GPU Selection Failed[/red]",
+            border_style="red",
+            padding=(1, 2),
+        ))
+        return
+    profiling = metadata.get("profiling_results", [])
+    extraction = metadata.get("extraction_result", {})
+    # Get memory info for summary
+    mem_config = extraction.get("memory_config", {})
+    minimal_configs = mem_config.get("minimal_configs", [])
+    # Find the smallest/cheapest GPU option (lowest VRAM that works)
+    best_gpu = None
+    if minimal_configs:
+        # Sort by VRAM to find smallest viable option
+        sorted_configs = sorted(minimal_configs, key=lambda x: x.get("per_gpu_vram_gb", 999))
+        best_gpu = sorted_configs[0] if sorted_configs else None
+    # Summary panel
+    console.print()
+    summary_lines = ["[green]✓[/green] Analysis complete"]
+    if best_gpu:
+        gpu_name = format_gpu_name(best_gpu.get("gpu_type", "unknown"))
+        vram = best_gpu.get("per_gpu_vram_gb", "?")
+        count = best_gpu.get("min_gpu_count", 1)
+        gpu_str = f"{count}x " if count > 1 else ""
+        summary_lines.append("")
+        summary_lines.append(f"[bold]Recommended:[/bold] [cyan]{gpu_str}{gpu_name}[/cyan] ({vram} GB VRAM)")
+    # Add runtime info if available
+    if profiling:
+        completed = [p for p in profiling if p.get("status") == "completed"]
+        if completed:
+            fastest = min(completed, key=lambda x: x.get("execution_time", 999))
+            time_s = fastest.get("execution_time")
+            if time_s is not None:
+                summary_lines.append(f"[bold]Est. runtime:[/bold] {time_s:.2f}s")
+            report = fastest.get("runtime_report", {})
+            iters = report.get("train_iteration", {}).get("train_iterations_per_second")
+            if iters:
+                summary_lines.append(f"[bold]Throughput:[/bold] {iters:.0f} iters/sec")
+    console.print(Panel(
+        "\n".join(summary_lines),
+        title="[green]GPU Selection Results[/green]",
+        border_style="green",
+        padding=(1, 2),
+    ))
+    # Profiling results table with cost
+    if profiling:
+        pricing = fetch_gpu_pricing()
+        console.print()
+        prof_table = Table(title="Profiling Results", show_header=True, header_style="bold")
+        prof_table.add_column("GPU", style="cyan")
+        prof_table.add_column("Status")
+        prof_table.add_column("Time", justify="right")
+        prof_table.add_column("Cost", justify="right")
+        prof_table.add_column("Iters/sec", justify="right")
+        prof_table.add_column("Peak VRAM", justify="right")
+        # Sort by execution time
+        sorted_profiling = sorted(profiling, key=lambda x: x.get("execution_time") or 999)
+        for result in sorted_profiling:
+            profile = result.get("profile", "?")
+            rst = result.get("status", "unknown")
+            style = "green" if rst in ("completed", "success") else "red"
+            report = result.get("runtime_report") or {}
+            train_iter = report.get("train_iteration") or {}
+            time_s = result.get("execution_time")
+            # Calculate cost from pricing
+            cost = result.get("cost")
+            if cost is None and time_s is not None:
+                cost = calculate_cost(time_s, profile, pricing)
+            iters = train_iter.get("train_iterations_per_second")
+            vram = report.get("Peak VRAM Allocated (MB)")
+            prof_table.add_row(
+                format_gpu_name(profile),
+                f"[{style}]{rst}[/{style}]",
+                f"{time_s:.2f}s" if time_s is not None else "-",
+                f"${cost:.6f}" if cost is not None else "-",
+                f"{iters:.0f}" if iters else "-",
+                f"{vram:.1f} MB" if vram else "-",
+            )
+        console.print(prof_table)
+    # Compatible GPU configurations table
+    if minimal_configs:
+        console.print()
+        gpu_table = Table(title="Compatible GPUs", show_header=True, header_style="bold")
+        gpu_table.add_column("GPU", style="cyan")
+        gpu_table.add_column("VRAM", justify="right")
+        gpu_table.add_column("GPUs Needed", justify="right")
+        gpu_table.add_column("Utilization", justify="right")
+        # Sort by VRAM size for better display
+        sorted_configs = sorted(minimal_configs, key=lambda x: x.get("per_gpu_vram_gb", 0))
+        compatible_gpu_types = {cfg.get("gpu_type") for cfg in minimal_configs}
+        for i, cfg in enumerate(sorted_configs):
+            gpu_type = format_gpu_name(cfg.get("gpu_type", "?"))
+            vram = cfg.get("per_gpu_vram_gb", 0)
+            count = cfg.get("min_gpu_count", 1)
+            util = cfg.get("vram_utilization_percent", 0)
+            # Highlight the recommended (smallest) option
+            if i == 0:
+                gpu_type = f"[green]{gpu_type}[/green] ✓"
+            gpu_table.add_row(
+                gpu_type,
+                f"{vram} GB",
+                str(count),
+                f"{util}%",
+            )
+        console.print(gpu_table)
+        # Show excluded GPUs (those not in minimal_configs)
+        # Get total memory required from extraction result
+        mem_reqs = mem_config.get("memory_requirements", {})
+        total_mem_gb = 0
+        if mem_reqs:
+            # Sum up memory components
+            total_mem_gb = (
+                mem_reqs.get("model_weights", 0) +
+                mem_reqs.get("gradients", 0) +
+                mem_reqs.get("optimizer_states", 0) +
+                mem_reqs.get("activations", 0)
+            )
+        if not total_mem_gb and sorted_configs:
+            # Estimate from minimal config utilization
+            first_cfg = sorted_configs[0]
+            vram = first_cfg.get("per_gpu_vram_gb", 0)
+            util = first_cfg.get("vram_utilization_percent", 0)
+            if vram and util:
+                total_mem_gb = vram * (util / 100)
+        # Find GPUs that were excluded
+        excluded_gpus = []
+        for profile, vram in GPU_VRAM_GB.items():
+            if profile not in compatible_gpu_types and profile in ("gpu", "gpu.t4"):
+                # Only show common GPUs that users expect to see
+                excluded_gpus.append((profile, vram))
+        if excluded_gpus and total_mem_gb:
+            console.print()
+            console.print("[dim]Excluded GPUs (insufficient VRAM):[/dim]")
+            for profile, vram in sorted(excluded_gpus, key=lambda x: x[1]):
+                gpu_name = format_gpu_name(profile)
+                console.print(f"[dim]  • {gpu_name}: {vram} GB available, ~{total_mem_gb:.1f} GB required[/dim]")
+    # Show run command hint if we have a best GPU and file path
+    if best_gpu and file_path:
+        machine_flag = best_gpu.get("gpu_type", "gpu")
+        console.print()
+        console.print(f"[dim]To run on optimal machine: lyceum python run {file_path} -m {machine_flag}[/dim]")
+@gpu_selection_app.command("run", context_settings={"allow_extra_args": True, "allow_interspersed_args": True})
+def run_gpu_selection(
+    ctx: typer.Context,
+    code_or_file: str = typer.Argument(..., help="Python code to execute or path to Python file"),
+    file_name: str | None = typer.Option(None, "--file-name", "-f", help="Name for the execution"),
+    timeout: int = typer.Option(60, "--timeout", "-t", help="Timeout per sub-job in seconds (1-600)"),
+    requirements: str | None = typer.Option(
+        None, "--requirements", "-r", help="Requirements file path or pip requirements string"
+    ),
+    imports: list[str] | None = typer.Option(
+        None, "--import", help="Pre-import modules (can be used multiple times)"
+    ),
+    use_config: bool = typer.Option(
+        True, "--use-config/--no-config",
+        help="Use workspace config from .lyceum/config.json if available"
+    ),
+    debug: bool = typer.Option(
+        False, "--debug", "-d",
+        help="Show detailed debug information about config, requirements, and payload"
+    ),
+):
+    """Run code on multiple GPUs and select the optimal hardware.
+    Submits the code to run on all GPU profiles available to your account,
+    then returns which GPU performed best.
+    Script arguments can be passed after the file path:
+        lyceum predict run train.py -- --epochs 10 --lr 0.001
+    """
+    status = StatusLine()
+    try:
+        config.get_client()
+        status.start()
+        script_args = [arg for arg in (ctx.args or []) if arg != "--"]
+        code, file_path, detected_file_name = read_code_from_source(code_or_file, status)
+        if not file_name:
+            file_name = detected_file_name
+        code = inject_script_args(code, script_args, file_name)
+        workspace_config = None
+        if use_config:
+            status.update("Loading workspace config...")
+            workspace_config = load_workspace_config(file_path)
+            if workspace_config and debug:
+                status.stop()
+                console.print(f"[cyan]DEBUG: Config keys: {list(workspace_config.keys())}[/cyan]")
+                status.start()
+        requirements_content = resolve_requirements(requirements, workspace_config, debug, status)
+        import_files = resolve_import_files(file_path, workspace_config, debug, status)
+        # Build payload matching GPUSelectionRequest schema
+        payload = {
+            "code": code,
+            "nbcode": 0,
+            "timeout": timeout,
+        }
+        if file_name:
+            payload["file_name"] = file_name
+        if requirements_content:
+            payload["requirements_content"] = requirements_content
+        if imports:
+            payload["prior_imports"] = imports
+        if import_files:
+            payload["import_files"] = import_files
+        if debug:
+            status.stop()
+            console.print("[cyan]DEBUG: Payload summary:[/cyan]")
+            console.print(f"[cyan]  - timeout: {timeout}[/cyan]")
+            console.print(f"[cyan]  - code length: {len(code)} chars[/cyan]")
+            console.print(f"[cyan]  - requirements_content: {len(requirements_content or '')} chars[/cyan]")
+            console.print(f"[cyan]  - import_files: {len(import_files or '')} chars[/cyan]")
+            status.start()
+        execution_id = submit_gpu_selection(payload, status)
+        console.print(f"[dim]Execution ID: {execution_id}[/dim]")
+        status.update("Waiting for GPU selection results...")
+        data = poll_gpu_selection(execution_id, status)
+        status.stop()
+        display_results(data, file_path=code_or_file)
+        if data.get("status") != "completed":
+            raise typer.Exit(1)
+    except typer.Exit:
+        status.stop()
+        raise
+    except Exception as e:
+        status.stop()
+        console.print(f"[red]Error: {e}[/red]")
+        raise typer.Exit(1)
+@gpu_selection_app.command("status")
+def predict_status(
+    execution_id: str = typer.Argument(..., help="Execution ID to check"),
+):
+    """Check the status of a GPU selection execution."""
+    try:
+        config.get_client()
+        response = httpx.get(
+            f"{config.base_url}/api/v2/external/execution/gpu_selection/{execution_id}/status",
+            headers={"Authorization": f"Bearer {config.api_key}"},
+            timeout=10.0,
+        )
+        if response.status_code == 404:
+            console.print("[red]Execution not found.[/red]")
+            raise typer.Exit(1)
+        if response.status_code != 200:
+            console.print(f"[red]Error: HTTP {response.status_code}[/red]")
+            console.print(f"[red]{response.content.decode()}[/red]")
+            raise typer.Exit(1)
+        data = response.json()
+        # Parse metadata if it's a string
+        if isinstance(data.get("metadata"), str):
+            try:
+                data["metadata"] = json.loads(data["metadata"])
+            except (json.JSONDecodeError, TypeError):
+                pass
+        current_status = data.get("status", "unknown")
+        console.print(f"Status: [bold]{current_status}[/bold]")
+        if current_status in ("completed", "failed", "aborted", "system_failure"):
+            display_results(data)
+        else:
+            console.print("[dim]Job is still running. Check again later.[/dim]")
+    except typer.Exit:
+        raise
+    except Exception as e:
+        console.print(f"[red]Error: {e}[/red]")
+        raise typer.Exit(1)
+def display_memory_results(data: dict, file_path: str | None = None) -> None:
+    """Display memory analysis results."""
+    if data is None:
+        console.print("[red]No data received[/red]")
+        return
+    metadata = data.get("metadata")
+    if isinstance(metadata, str):
+        try:
+            metadata = json.loads(metadata)
+        except (json.JSONDecodeError, TypeError):
+            metadata = {}
+    metadata = metadata or {}
+    extraction = metadata.get("extraction_result", {})
+    mem_config = extraction.get("memory_config", {})
+    mem_reqs = mem_config.get("memory_requirements", {})
+    minimal_configs = mem_config.get("minimal_configs", [])
+    if not mem_reqs and not minimal_configs:
+        console.print("[yellow]No memory analysis data available.[/yellow]")
+        return
+    # Memory requirements breakdown
+    if mem_reqs:
+        console.print()
+        mem_table = Table(title="Memory Requirements", show_header=True, header_style="bold")
+        mem_table.add_column("Component", style="cyan")
+        mem_table.add_column("Size", justify="right")
+        def format_gb(val: float) -> str:
+            if val < 0.001:
+                return f"{val * 1024:.2f} MB"
+            return f"{val:.3f} GB"
+        components = [
+            ("Model Weights", mem_reqs.get("model_weights", 0)),
+            ("Gradients", mem_reqs.get("gradients", 0)),
+            ("Optimizer States", mem_reqs.get("optimizer_states", 0)),
+            ("Activations", mem_reqs.get("activations", 0)),
+            ("Largest Layer", mem_reqs.get("largest_layer", 0)),
+        ]
+        total = sum(v for _, v in components if v)
+        for name, val in components:
+            if val:
+                mem_table.add_row(name, format_gb(val))
+        mem_table.add_row("─" * 20, "─" * 10)
+        mem_table.add_row("[bold]Total[/bold]", f"[bold]{format_gb(total)}[/bold]")
+        param_count = mem_reqs.get("parameter_count", 0)
+        if param_count:
+            mem_table.add_row("", "")
+            mem_table.add_row("Parameter Count", f"{param_count:.2e}")
+        console.print(mem_table)
+    # Compatible GPUs
+    if minimal_configs:
+        console.print()
+        gpu_table = Table(title="Compatible GPUs", show_header=True, header_style="bold")
+        gpu_table.add_column("GPU", style="cyan")
+        gpu_table.add_column("VRAM", justify="right")
+        gpu_table.add_column("GPUs Needed", justify="right")
+        gpu_table.add_column("Utilization", justify="right")
+        sorted_configs = sorted(minimal_configs, key=lambda x: x.get("per_gpu_vram_gb", 0))
+        for i, cfg in enumerate(sorted_configs):
+            gpu_type = format_gpu_name(cfg.get("gpu_type", "?"))
+            vram = cfg.get("per_gpu_vram_gb", 0)
+            count = cfg.get("min_gpu_count", 1)
+            util = cfg.get("vram_utilization_percent", 0)
+            if i == 0:
+                gpu_type = f"[green]{gpu_type}[/green] ✓"
+            gpu_table.add_row(gpu_type, f"{vram} GB", str(count), f"{util}%")
+        console.print(gpu_table)
+        # Show run command hint
+        if file_path and sorted_configs:
+            best = sorted_configs[0]
+            machine_flag = best.get("gpu_type", "gpu")
+            console.print()
+            console.print(f"[dim]To run on optimal machine: lyceum python run {file_path} -m {machine_flag}[/dim]")
+@gpu_selection_app.command("memory", context_settings={"allow_extra_args": True, "allow_interspersed_args": True})
+def predict_memory(
+    ctx: typer.Context,
+    code_or_file: str = typer.Argument(..., help="Python code or path to Python file"),
+    file_name: str | None = typer.Option(None, "--file-name", "-f", help="Name for the execution"),
+    requirements: str | None = typer.Option(
+        None, "--requirements", "-r", help="Requirements file path or pip requirements string"
+    ),
+    imports: list[str] | None = typer.Option(
+        None, "--import", help="Pre-import modules (can be used multiple times)"
+    ),
+    mixed_precision: str | None = typer.Option(
+        None, "--mixed-precision", "-mp",
+        help="Mixed precision dtype (fp16, bf16)"
+    ),
+    strategy: str | None = typer.Option(
+        None, "--strategy", "-s",
+        help="Parallelization strategy (ddp, fsdp, zero1, zero2, zero3)"
+    ),
+    use_config: bool = typer.Option(
+        True, "--use-config/--no-config",
+        help="Use workspace config from .lyceum/config.json if available"
+    ),
+):
+    """Estimate memory requirements for your training script.
+    Analyzes model architecture to predict VRAM usage without running
+    full GPU profiling. Faster than 'predict run'.
+    Examples:
+        lyceum predict memory train.py
+        lyceum predict memory train.py --mixed-precision fp16
+        lyceum predict memory train.py --strategy fsdp
+    """
+    status = StatusLine()
+    try:
+        config.get_client()
+        status.start()
+        script_args = [arg for arg in (ctx.args or []) if arg != "--"]
+        code, file_path, detected_file_name = read_code_from_source(code_or_file, status)
+        if not file_name:
+            file_name = detected_file_name
+        code = inject_script_args(code, script_args, file_name)
+        workspace_config = None
+        if use_config:
+            status.update("Loading workspace config...")
+            workspace_config = load_workspace_config(file_path)
+        requirements_content = resolve_requirements(requirements, workspace_config, False, status)
+        import_files = resolve_import_files(file_path, workspace_config, False, status)
+        # Build payload - same as gpu_selection but we'll only show memory results
+        payload = {
+            "code": code,
+            "nbcode": 0,
+            "timeout": 60,  # Memory analysis is quick
+        }
+        if file_name:
+            payload["file_name"] = file_name
+        if requirements_content:
+            payload["requirements_content"] = requirements_content
+        if imports:
+            payload["prior_imports"] = imports
+        if import_files:
+            payload["import_files"] = import_files
+        # TODO: When backend supports it, add mixed_precision and strategy to payload
+        if mixed_precision:
+            console.print(f"[dim]Note: --mixed-precision {mixed_precision} (backend support coming soon)[/dim]")
+        if strategy:
+            console.print(f"[dim]Note: --strategy {strategy} (backend support coming soon)[/dim]")
+        execution_id = submit_gpu_selection(payload, status)
+        console.print(f"[dim]Execution ID: {execution_id}[/dim]")
+        status.update("Analyzing memory requirements...")
+        data = poll_gpu_selection(execution_id, status)
+        status.stop()
+        if data.get("status") != "completed":
+            display_results(data)  # Show error with suggestions
+            raise typer.Exit(1)
+        display_memory_results(data, file_path=code_or_file)
+    except typer.Exit:
+        status.stop()
+        raise
+    except Exception as e:
+        status.stop()
+        console.print(f"[red]Error: {e}[/red]")
+        raise typer.Exit(1)
+@gpu_selection_app.command("recommend-gpus", context_settings={"allow_extra_args": True, "allow_interspersed_args": True})
+def recommend_gpus(
+    ctx: typer.Context,
+    code_or_file: str = typer.Argument(..., help="Python code or path to Python file"),
+    file_name: str | None = typer.Option(None, "--file-name", "-f", help="Name for the execution"),
+    requirements: str | None = typer.Option(
+        None, "--requirements", "-r", help="Requirements file path or pip requirements string"
+    ),
+    use_config: bool = typer.Option(
+        True, "--use-config/--no-config",
+        help="Use workspace config from .lyceum/config.json if available"
+    ),
+    top: int = typer.Option(3, "--top", "-n", help="Number of recommendations to show"),
+):
+    """Quick GPU recommendations based on memory analysis.
+    Analyzes your model and recommends the best GPU configurations
+    sorted by cost-efficiency.
+    Examples:
+        lyceum predict recommend-gpus train.py
+        lyceum predict recommend-gpus train.py --top 5
+    """
+    status = StatusLine()
+    try:
+        config.get_client()
+        status.start()
+        script_args = [arg for arg in (ctx.args or []) if arg != "--"]
+        code, file_path, detected_file_name = read_code_from_source(code_or_file, status)
+        if not file_name:
+            file_name = detected_file_name
+        code = inject_script_args(code, script_args, file_name)
+        workspace_config = None
+        if use_config:
+            status.update("Loading workspace config...")
+            workspace_config = load_workspace_config(file_path)
+        requirements_content = resolve_requirements(requirements, workspace_config, False, status)
+        import_files = resolve_import_files(file_path, workspace_config, False, status)
+        payload = {
+            "code": code,
+            "nbcode": 0,
+            "timeout": 60,
+        }
+        if file_name:
+            payload["file_name"] = file_name
+        if requirements_content:
+            payload["requirements_content"] = requirements_content
+        if import_files:
+            payload["import_files"] = import_files
+        execution_id = submit_gpu_selection(payload, status)
+        console.print(f"[dim]Execution ID: {execution_id}[/dim]")
+        status.update("Analyzing model and generating recommendations...")
+        data = poll_gpu_selection(execution_id, status)
+        status.stop()
+        if data.get("status") != "completed":
+            display_results(data)
+            raise typer.Exit(1)
+        # Get pricing for cost estimates
+        pricing = fetch_gpu_pricing()
+        metadata = data.get("metadata")
+        if isinstance(metadata, str):
+            try:
+                metadata = json.loads(metadata)
+            except (json.JSONDecodeError, TypeError):
+                metadata = {}
+        metadata = metadata or {}
+        extraction = metadata.get("extraction_result", {})
+        mem_config = extraction.get("memory_config", {})
+        minimal_configs = mem_config.get("minimal_configs", [])
+        if not minimal_configs:
+            console.print("[yellow]No GPU recommendations available.[/yellow]")
+            raise typer.Exit(1)
+        # Sort by VRAM (smaller = likely cheaper)
+        sorted_configs = sorted(minimal_configs, key=lambda x: x.get("per_gpu_vram_gb", 0))[:top]
+        console.print()
+        console.print(Panel(
+            f"[green]✓[/green] Found {len(minimal_configs)} compatible GPU configurations",
+            title="[green]GPU Recommendations[/green]",
+            border_style="green",
+        ))
+        console.print()
+        rec_table = Table(title=f"Top {min(top, len(sorted_configs))} Recommendations", show_header=True, header_style="bold")
+        rec_table.add_column("#", style="dim", width=3)
+        rec_table.add_column("GPU", style="cyan")
+        rec_table.add_column("VRAM", justify="right")
+        rec_table.add_column("GPUs", justify="right")
+        rec_table.add_column("$/hour", justify="right")
+        rec_table.add_column("Utilization", justify="right")
+        for i, cfg in enumerate(sorted_configs):
+            gpu_type = cfg.get("gpu_type", "?")
+            gpu_display = format_gpu_name(gpu_type)
+            vram = cfg.get("per_gpu_vram_gb", 0)
+            count = cfg.get("min_gpu_count", 1)
+            util = cfg.get("vram_utilization_percent", 0)
+            price = pricing.get(gpu_type, 0)
+            price_str = f"${price:.2f}" if price else "-"
+            rank = f"[green]{i + 1}[/green]" if i == 0 else str(i + 1)
+            rec_table.add_row(
+                rank,
+                gpu_display,
+                f"{vram} GB",
+                str(count),
+                price_str,
+                f"{util}%",
+            )
+        console.print(rec_table)
+        # Show command hint
+        best = sorted_configs[0]
+        machine_flag = best.get("gpu_type", "gpu")
+        console.print()
+        console.print(f"[dim]Run with: lyceum python run {code_or_file} -m {machine_flag}[/dim]")
+    except typer.Exit:
+        status.stop()
+        raise
+    except Exception as e:
+        status.stop()
+        console.print(f"[red]Error: {e}[/red]")
+        raise typer.Exit(1)
+def display_runtime_results(data: dict, file_path: str | None = None) -> None:
+    """Display runtime profiling results."""
+    if data is None:
+        console.print("[red]No data received[/red]")
+        return
+    metadata = data.get("metadata")
+    if isinstance(metadata, str):
+        try:
+            metadata = json.loads(metadata)
+        except (json.JSONDecodeError, TypeError):
+            metadata = {}
+    metadata = metadata or {}
+    profiling = metadata.get("profiling_results", [])
+    if not profiling:
+        console.print("[yellow]No runtime profiling data available.[/yellow]")
+        return
+    pricing = fetch_gpu_pricing()
+    # Find best performers
+    completed = [p for p in profiling if p.get("status") in ("completed", "success")]
+    if completed:
+        fastest = min(completed, key=lambda x: x.get("execution_time") or 999)
+        fastest_profile = format_gpu_name(fastest.get("profile", "?"))
+        fastest_time = fastest.get("execution_time", 0)
+        console.print()
+        console.print(Panel(
+            f"[green]✓[/green] Profiling complete\n\n"
+            f"[bold]Fastest:[/bold] [cyan]{fastest_profile}[/cyan] ({fastest_time:.2f}s)",
+            title="[green]Runtime Analysis[/green]",
+            border_style="green",
+        ))
+    # Detailed results table
+    console.print()
+    prof_table = Table(title="Runtime Results by GPU", show_header=True, header_style="bold")
+    prof_table.add_column("GPU", style="cyan")
+    prof_table.add_column("Status")
+    prof_table.add_column("Time", justify="right")
+    prof_table.add_column("Cost", justify="right")
+    prof_table.add_column("Iters/sec", justify="right")
+    prof_table.add_column("Avg Batch (ms)", justify="right")
+    prof_table.add_column("Peak VRAM", justify="right")
+    sorted_profiling = sorted(profiling, key=lambda x: x.get("execution_time") or 999)
+    for result in sorted_profiling:
+        profile = result.get("profile", "?")
+        rst = result.get("status", "unknown")
+        style = "green" if rst in ("completed", "success") else "red"
+        report = result.get("runtime_report") or {}
+        train = report.get("training") or {}
+        train_iter = report.get("train_iteration") or {}
+        time_s = result.get("execution_time")
+        cost = result.get("cost")
+        if cost is None and time_s is not None:
+            cost = calculate_cost(time_s, profile, pricing)
+        iters = train_iter.get("train_iterations_per_second")
+        avg_batch = train.get("avg_train_time_ms")
+        vram = report.get("Peak VRAM Allocated (MB)")
+        prof_table.add_row(
+            format_gpu_name(profile),
+            f"[{style}]{rst}[/{style}]",
+            f"{time_s:.2f}s" if time_s is not None else "-",
+            f"${cost:.6f}" if cost is not None else "-",
+            f"{iters:.0f}" if iters else "-",
+            f"{avg_batch:.2f}" if avg_batch else "-",
+            f"{vram:.1f} MB" if vram else "-",
+        )
+    console.print(prof_table)
+    # Show run command hint based on fastest GPU
+    if file_path and completed:
+        fastest = min(completed, key=lambda x: x.get("execution_time") or 999)
+        machine_flag = fastest.get("profile", "gpu")
+        console.print()
+        console.print(f"[dim]To run on fastest machine: lyceum python run {file_path} -m {machine_flag}[/dim]")
+@gpu_selection_app.command("runtime", context_settings={"allow_extra_args": True, "allow_interspersed_args": True})
+def predict_runtime(
+    ctx: typer.Context,
+    code_or_file: str = typer.Argument(..., help="Python code or path to Python file"),
+    file_name: str | None = typer.Option(None, "--file-name", "-f", help="Name for the execution"),
+    timeout: int = typer.Option(120, "--timeout", "-t", help="Timeout per GPU in seconds (1-600)"),
+    requirements: str | None = typer.Option(
+        None, "--requirements", "-r", help="Requirements file path or pip requirements string"
+    ),
+    imports: list[str] | None = typer.Option(
+        None, "--import", help="Pre-import modules (can be used multiple times)"
+    ),
+    use_config: bool = typer.Option(
+        True, "--use-config/--no-config",
+        help="Use workspace config from .lyceum/config.json if available"
+    ),
+):
+    """Profile runtime performance across different GPUs.
+    Runs your training script on available GPUs and measures actual
+    execution time, throughput, and VRAM usage.
+    Examples:
+        lyceum predict runtime train.py
+        lyceum predict runtime train.py --timeout 180
+    """
+    status = StatusLine()
+    try:
+        config.get_client()
+        status.start()
+        script_args = [arg for arg in (ctx.args or []) if arg != "--"]
+        code, file_path, detected_file_name = read_code_from_source(code_or_file, status)
+        if not file_name:
+            file_name = detected_file_name
+        code = inject_script_args(code, script_args, file_name)
+        workspace_config = None
+        if use_config:
+            status.update("Loading workspace config...")
+            workspace_config = load_workspace_config(file_path)
+        requirements_content = resolve_requirements(requirements, workspace_config, False, status)
+        import_files = resolve_import_files(file_path, workspace_config, False, status)
+        payload = {
+            "code": code,
+            "nbcode": 0,
+            "timeout": timeout,
+        }
+        if file_name:
+            payload["file_name"] = file_name
+        if requirements_content:
+            payload["requirements_content"] = requirements_content
+        if imports:
+            payload["prior_imports"] = imports
+        if import_files:
+            payload["import_files"] = import_files
+        execution_id = submit_gpu_selection(payload, status)
+        console.print(f"[dim]Execution ID: {execution_id}[/dim]")
+        status.update("Profiling runtime across GPUs...")
+        data = poll_gpu_selection(execution_id, status)
+        status.stop()
+        if data.get("status") != "completed":
+            display_results(data)
+            raise typer.Exit(1)
+        display_runtime_results(data, file_path=code_or_file)
+    except typer.Exit:
+        status.stop()
+        raise
+    except Exception as e:
+        status.stop()
+        console.print(f"[red]Error: {e}[/red]")
+        raise typer.Exit(1)

lyceum-cli 1.0.28__py3-none-any.whl → 1.0.30__py3-none-any.whl

lyceum-cli 1.0.28py3-none-any.whl → 1.0.30py3-none-any.whl