PyPI - lyceum-cli - Versions diffs - 1.0.28__py3-none-any.whl → 1.0.29__py3-none-any.whl - Mend

lyceum-cli 1.0.28py3-none-any.whl → 1.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

lyceum/external/compute/execution/gpu_selection.py +1023 -0
lyceum/external/compute/inference/batch.py +94 -304
lyceum/external/compute/inference/chat.py +104 -189
lyceum/external/compute/inference/infer.py +101 -0
lyceum/external/compute/inference/models.py +26 -199
lyceum/main.py +6 -1
lyceum/shared/config.py +5 -9
lyceum/shared/streaming.py +45 -17
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.29.dist-info}/METADATA +1 -1
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.29.dist-info}/RECORD +13 -11
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.29.dist-info}/WHEEL +1 -1
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.29.dist-info}/entry_points.txt +0 -0
{lyceum_cli-1.0.28.dist-info → lyceum_cli-1.0.29.dist-info}/top_level.txt +0 -0

lyceum/external/compute/inference/chat.py CHANGED Viewed

@@ -1,221 +1,136 @@
-"""Synchronous inference commands for chat/completion"""
+"""Inference chat command"""
 import json
+import os
+from pathlib import Path
 import typer
 from rich.console import Console
-from rich.table import Table
 from ....shared.config import config
 console = Console()
-# Import generated client modules
-# We'll use direct HTTP calls since the sync inference models aren't generated yet
-chat_app = typer.Typer(name="chat", help="Synchronous inference commands")
-@chat_app.command("send")
-def send_message(
-    message: str = typer.Argument(..., help="Message to send to the model"),
-    model: str = typer.Option("gpt-4", "--model", "-m", help="Model to use for inference"),
-    max_tokens: int = typer.Option(1000, "--max-tokens", help="Maximum tokens in response"),
-    temperature: float = typer.Option(0.7, "--temperature", "-t", help="Temperature (0.0-2.0)"),
-    system_prompt: str | None = typer.Option(None, "--system", "-s", help="System prompt"),
+def chat_cmd(
+    prompt: str = typer.Option(None, "--prompt", "-p", help="The message or path to file (.txt/.yaml/.xml)"),
+    no_stream: bool = typer.Option(False, "--no-stream", "-n", help="Disable streaming response"),
+    image: str = typer.Option(None, "--image", "-i", help="Image path or base64"),
+    image_url: str = typer.Option(None, "--url", help="Image URL"),
+    image_dir: str = typer.Option(None, "--dir", help="Directory of images"),
+    base64: bool = typer.Option(False, "--base64", help="Treat image input as base64"),
+    model: str = typer.Option("gpt-4", "--model", "-m", help="Model to use"),
+    max_tokens: int = typer.Option(1000, "--tokens", "-t", help="Max output tokens"),
+    output_type: str = typer.Option("text", "--type", help="Output type (e.g. json, markdown)"),
+    batch_file: str = typer.Option(None, "--batch", "-b", help="JSONL file for batch processing"),
 ):
-    """Send a message to an AI model and get a response"""
+    """
+    Perform inference (Chat, Image, or Batch).
+    """
     try:
         config.get_client()
-        # Create the sync request payload directly
-        sync_request = {
-            "model_id": model,
-            "input": {
-                "text": message,
-                "parameters": {"system_prompt": system_prompt} if system_prompt else {}
-            },
-            "max_tokens": max_tokens,
-            "temperature": temperature,
-            "top_p": 1.0,
-            "stream": False
-        }
-        console.print(f"[dim]🤖 Sending message to {model}...[/dim]")
-        # Make the API call using httpx directly (since we don't have generated client for sync inference yet)
         import httpx
-        url = f"{config.base_url}/api/v2/external/sync/"
-        headers = {"Authorization": f"Bearer {config.api_key}", "Content-Type": "application/json"}
-        with httpx.Client() as http_client:
-            response = http_client.post(
-                url,
-                json=sync_request,
-                headers=headers,
-                timeout=60.0
-            )
-            if response.status_code == 200:
-                result = response.json()
-                console.print(f"[green]✅ Response from {model}:[/green]")
-                console.print(f"[cyan]{result['output']}[/cyan]")
-                # Show usage stats
-                if 'usage' in result:
-                    usage = result['usage']
-                    console.print(f"[dim]📊 Tokens: {usage.get('total_tokens', 0)} | "
-                                f"Latency: {result.get('latency_ms', 0)}ms | "
-                                f"Cost: ${result.get('cost', 0):.4f}[/dim]")
-            elif response.status_code == 503:
-                console.print(f"[red]❌ Model {model} is not running. Please contact support to start the model.[/red]")
+        # 1. Batch Processing
+        if batch_file:
+            console.print(f"[dim]Initiating batch processing from {batch_file}...[/dim]")
+            if not os.path.exists(batch_file):
+                 console.print(f"[red]File not found: {batch_file}[/red]")
+                 raise typer.Exit(1)
+            # Upload
+            with open(batch_file, 'rb') as f:
+                files = {'file': (os.path.basename(batch_file), f, 'application/jsonl')}
+                response = httpx.post(
+                    f"{config.base_url}/api/v2/external/files",
+                    headers={"Authorization": f"Bearer {config.api_key}"},
+                    files=files,
+                    data={'purpose': 'batch'},
+                    timeout=60.0
+                )
+            if response.status_code != 200:
+                console.print(f"[red]Upload failed: {response.text}[/red]")
                 raise typer.Exit(1)
-            else:
-                console.print(f"[red]❌ Error: HTTP {response.status_code}[/red]")
-                console.print(f"[red]{response.text}[/red]")
-                raise typer.Exit(1)
-    except Exception as e:
-        console.print(f"[red]❌ Error: {e}[/red]")
-        raise typer.Exit(1)
+            file_id = response.json()['id']
-@chat_app.command("models")
-def list_models():
-    """List available models for inference"""
-    try:
-        config.get_client()
-        # Make API call to get available models
-        import httpx
-        url = f"{config.base_url}/api/v2/external/models/"
-        headers = {"Authorization": f"Bearer {config.api_key}"}
-        with httpx.Client() as http_client:
-            response = http_client.get(url, headers=headers, timeout=10.0)
-            if response.status_code == 200:
-                models = response.json()
-                if models:
-                    # Create a table
-                    table = Table(title="Available AI Models")
-                    table.add_column("Model", style="cyan", no_wrap=True)
-                    table.add_column("Type", style="magenta")
-                    table.add_column("Status", justify="center")
-                    table.add_column("Price/1K tokens", justify="right", style="green")
-                    # Sort models: running first, then by type, then by name
-                    sorted_models = sorted(models, key=lambda m: (
-                        not m.get('available', False),  # Running models first
-                        m.get('type', 'text'),          # Then by type
-                        m.get('model_id', '')           # Then by name
-                    ))
-                    for model in sorted_models:
-                        # Status with emoji
-                        status = "🟢 Running" if model.get('available') else "🔴 Stopped"
-                        # Model type with emoji
-                        model_type = model.get('type', 'text')
-                        type_emoji = {
-                            'text': 'Text',
-                            'image': 'Image',
-                            'audio': 'Audio',
-                            'multimodal': 'Multi',
-                            'embedding': 'Embed'
-                        }.get(model_type, f'❓ {model_type.title()}')
-                        # Price
-                        price = model.get('price_per_1k_tokens', 0)
-                        price_str = f"${price:.4f}" if price > 0 else "Free"
-                        table.add_row(
-                            model.get('model_id', 'Unknown'),
-                            type_emoji,
-                            status,
-                            price_str
-                        )
-                    console.print(table)
-                    # Show summary
-                    running_count = sum(1 for m in models if m.get('available'))
-                    total_count = len(models)
-                    console.print(f"\n[dim]📊 {running_count}/{total_count} models running[/dim]")
-                else:
-                    console.print("[yellow]No models are currently available[/yellow]")
-            else:
-                console.print(f"[red]❌ Error: HTTP {response.status_code}[/red]")
-                console.print(f"[red]{response.text}[/red]")
+            # Create Batch
+            response = httpx.post(
+                f"{config.base_url}/api/v2/external/batches",
+                headers={"Authorization": f"Bearer {config.api_key}"},
+                json={"input_file_id": file_id, "model": model},
+                timeout=30.0
+            )
+            if response.status_code != 200:
+                console.print(f"[red]Batch creation failed: {response.text}[/red]")
                 raise typer.Exit(1)
-    except Exception as e:
-        console.print(f"[red]❌ Error: {e}[/red]")
-        raise typer.Exit(1)
+            data = response.json()
+            console.print(f"[green]Batch Job Created: {data['id']}[/green]")
+            return
+        # 2. Image Analysis
+        if image or image_url or image_dir:
+            if image_dir:
+                console.print("[yellow]Directory processing not yet implemented[/yellow]")
+                return
+            console.print(f"[dim]Analyzing image with {model}...[/dim]")
+            img_input = image_url if image_url else image
+            payload = {
+                "model_id": model,
+                "input": {
+                    "text": prompt or "Describe this image",
+                    # Simple heuristic: if it looks like a URL, treat as URL, else file/base64 logic
+                    "image_url": img_input
+                },
+                "max_tokens": max_tokens,
+                "stream": not no_stream
+            }
+            url = f"{config.base_url}/api/v2/external/sync/"
+            headers = {"Authorization": f"Bearer {config.api_key}"}
+            with httpx.Client() as client:
+                response = client.post(url, json=payload, headers=headers, timeout=60.0)
+                if response.status_code != 200:
+                    console.print(f"[red]Error: {response.text}[/red]")
+                    raise typer.Exit(1)
+                result = response.json()
+                console.print(f"[cyan]{result.get('output', '')}[/cyan]")
+            return
-@chat_app.command("image")
-def analyze_image(
-    image_url: str = typer.Argument(..., help="URL of image to analyze"),
-    prompt: str = typer.Option("What do you see in this image?", "--prompt", "-p", help="Question about the image"),
-    model: str = typer.Option("gpt-4-vision", "--model", "-m", help="Vision model to use"),
-    raw_output: bool = typer.Option(False, "--raw", help="Return full model response instead of just content"),
-):
-    """Analyze an image with AI vision models"""
-    try:
-        config.get_client()
-        # Create request payload for image analysis
-        sync_request = {
-            "model_id": model,
-            "input": {
-                "text": prompt,
-                "image_url": image_url
-            },
-            "max_tokens": 1000,
-            "temperature": 0.7,
-            "raw_output": raw_output
-        }
+        # 3. Text Chat (Prompt)
+        if prompt:
+            # Check if prompt is a file
+            if os.path.exists(prompt):
+                 prompt = Path(prompt).read_text()
-        console.print(f"[dim]👁️  Analyzing image with {model}...[/dim]")
+            console.print(f"[dim]Sending message to {model}...[/dim]")
-        import httpx
+            payload = {
+                "model_id": model,
+                "input": {"text": prompt},
+                "max_tokens": max_tokens,
+                "stream": not no_stream
+            }
-        url = f"{config.base_url}/api/v2/external/sync/"
-        headers = {"Authorization": f"Bearer {config.api_key}", "Content-Type": "application/json"}
+            url = f"{config.base_url}/api/v2/external/sync/"
+            headers = {"Authorization": f"Bearer {config.api_key}"}
-        with httpx.Client() as http_client:
-            response = http_client.post(
-                url,
-                json=sync_request,
-                headers=headers,
-                timeout=60.0
-            )
+            with httpx.Client() as client:
+                response = client.post(url, json=payload, headers=headers, timeout=60.0)
+                if response.status_code != 200:
+                    console.print(f"[red]Error: {response.text}[/red]")
+                    raise typer.Exit(1)
-            if response.status_code == 200:
                 result = response.json()
+                console.print(f"[cyan]{result.get('output', '')}[/cyan]")
+            return
-                if raw_output:
-                    console.print("[green]✅ Raw Response:[/green]")
-                    console.print(json.dumps(result.get('raw_response', result['output']), indent=2))
-                else:
-                    console.print("[green]✅ Image Analysis:[/green]")
-                    console.print(f"[cyan]{result['output']}[/cyan]")
-            elif response.status_code == 503:
-                console.print(f"[red]❌ Vision model {model} is not running.[/red]")
-                raise typer.Exit(1)
-            else:
-                console.print(f"[red]❌ Error: HTTP {response.status_code}[/red]")
-                console.print(f"[red]{response.text}[/red]")
-                raise typer.Exit(1)
+        console.print("[yellow]Please provide input via --prompt, --image, or --batch[/yellow]")
     except Exception as e:
-        console.print(f"[red]❌ Error: {e}[/red]")
+        console.print(f"[red]Error: {e}[/red]")
         raise typer.Exit(1)

lyceum/external/compute/inference/infer.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""Inference command group"""
+import typer
+import os
+from rich.console import Console
+import httpx
+from ....shared.config import config
+from .chat import chat_cmd
+from .models import models_cmd
+from .batch import jobs_cmd
+console = Console()
+infer_app = typer.Typer(name="infer", help="Inference commands")
+# Mount sub-apps / commands
+infer_app.command("chat")(chat_cmd)
+infer_app.command("models")(models_cmd)
+infer_app.command("jobs")(jobs_cmd)
+@infer_app.command("deploy")
+def deploy_model(
+    hf_model_id: str = typer.Argument(..., help="HuggingFace model ID to deploy"),
+    vllm_config: str = typer.Option(None, "--config", "-c", help="Name of server-side vLLM config to use"),
+    hf_token: str = typer.Option(None, "--hf-token", "-t", help="HuggingFace token to use for deployment"),
+):
+    """Deploy a model for inference"""
+    # Interactive prompt for token if not provided and not in env
+    if hf_token is None and not (os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")):
+        if typer.confirm("No HF token detected. Do you want to provide one (required for private models)?", default=False):
+            hf_token = typer.prompt("HuggingFace Token", hide_input=True)
+    try:
+        config.get_client()
+        url = f"{config.base_url}/api/v2/external/inference/deploy"
+        headers = {"Authorization": f"Bearer {config.api_key}"}
+        payload = {
+            "hf_model_id": hf_model_id
+        }
+        if vllm_config:
+            payload["vllm_config_name"] = vllm_config
+        if hf_token:
+            payload["hf_token"] = hf_token
+        console.print(f"[dim]Deploying model {hf_model_id}...[/dim]")
+        with httpx.Client() as client:
+            response = client.post(url, json=payload, headers=headers, timeout=30.0)
+            if response.status_code != 200:
+                console.print(f"[red]Error: HTTP {response.status_code}[/red]")
+                console.print(f"[red]{response.text}[/red]")
+                raise typer.Exit(1)
+            data = response.json()
+            console.print(f"[green]Deployment initiated![/green]")
+            console.print(f"Model ID: [cyan]{data.get('model_id')}[/cyan]")
+            console.print(f"Deployment ID: [cyan]{data.get('deployment_id')}[/cyan]")
+            console.print(f"Status: [yellow]{data.get('status')}[/yellow]")
+            console.print(f"Instance URL: [blue]{data.get('instance_url')}[/blue]")
+            console.print("\n[dim]Use 'lyceum infer models' to check status[/dim]")
+    except Exception as e:
+        console.print(f"[red]Error: {e}[/red]")
+        raise typer.Exit(1)
+@infer_app.command("spindown")
+def spindown_model(
+    model_id: str = typer.Argument(..., help="Model ID to spin down"),
+):
+    """Spin down a deployed model"""
+    try:
+        config.get_client()
+        url = f"{config.base_url}/api/v2/external/inference/spindown"
+        headers = {"Authorization": f"Bearer {config.api_key}"}
+        payload = {"model_id": model_id}
+        console.print(f"[dim]Spinning down model {model_id}...[/dim]")
+        with httpx.Client() as client:
+            response = client.post(url, json=payload, headers=headers, timeout=30.0)
+            if response.status_code != 200:
+                console.print(f"[red]Error: HTTP {response.status_code}[/red]")
+                console.print(f"[red]{response.text}[/red]")
+                raise typer.Exit(1)
+            data = response.json()
+            console.print(f"[green]Spindown initiated![/green]")
+            console.print(f"Model ID: [cyan]{data.get('model_id')}[/cyan]")
+            console.print(f"Status: [yellow]{data.get('status')}[/yellow]")
+            console.print(f"Message: {data.get('message')}")
+    except Exception as e:
+        console.print(f"[red]Error: {e}[/red]")
+        raise typer.Exit(1)

lyceum-cli 1.0.28__py3-none-any.whl → 1.0.29__py3-none-any.whl

lyceum-cli 1.0.28py3-none-any.whl → 1.0.29py3-none-any.whl