npm - @draht/pods - Versions diffs - 2026.3.2-2 - Mend

@draht/pods 2026.3.2-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +511 -0
package/dist/cli.d.ts +3 -0
package/dist/cli.d.ts.map +1 -0
package/dist/cli.js +346 -0
package/dist/cli.js.map +1 -0
package/dist/commands/models.d.ts +39 -0
package/dist/commands/models.d.ts.map +1 -0
package/dist/commands/models.js +658 -0
package/dist/commands/models.js.map +1 -0
package/dist/commands/pods.d.ts +21 -0
package/dist/commands/pods.d.ts.map +1 -0
package/dist/commands/pods.js +175 -0
package/dist/commands/pods.js.map +1 -0
package/dist/commands/prompt.d.ts +7 -0
package/dist/commands/prompt.d.ts.map +1 -0
package/dist/commands/prompt.js +54 -0
package/dist/commands/prompt.js.map +1 -0
package/dist/config.d.ts +11 -0
package/dist/config.d.ts.map +1 -0
package/dist/config.js +74 -0
package/dist/config.js.map +1 -0
package/dist/index.d.ts +2 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +3 -0
package/dist/index.js.map +1 -0
package/dist/model-configs.d.ts +22 -0
package/dist/model-configs.d.ts.map +1 -0
package/dist/model-configs.js +75 -0
package/dist/model-configs.js.map +1 -0
package/dist/models.json +295 -0
package/dist/scripts/model_run.sh +83 -0
package/dist/scripts/pod_setup.sh +336 -0
package/dist/ssh.d.ts +24 -0
package/dist/ssh.d.ts.map +1 -0
package/dist/ssh.js +115 -0
package/dist/ssh.js.map +1 -0
package/dist/types.d.ts +23 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +3 -0
package/dist/types.js.map +1 -0
package/package.json +40 -0
package/scripts/model_run.sh +83 -0
package/scripts/pod_setup.sh +336 -0

package/dist/scripts/pod_setup.sh ADDED Viewed

@@ -0,0 +1,336 @@
+#!/usr/bin/env bash
+# GPU pod bootstrap for vLLM deployment
+set -euo pipefail
+# Parse arguments passed from pi CLI
+MOUNT_COMMAND=""
+MODELS_PATH=""
+HF_TOKEN=""
+PI_API_KEY=""
+VLLM_VERSION="release"  # Default to release
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --mount)
+            MOUNT_COMMAND="$2"
+            shift 2
+            ;;
+        --models-path)
+            MODELS_PATH="$2"
+            shift 2
+            ;;
+        --hf-token)
+            HF_TOKEN="$2"
+            shift 2
+            ;;
+        --vllm-api-key)
+            PI_API_KEY="$2"
+            shift 2
+            ;;
+        --vllm)
+            VLLM_VERSION="$2"
+            shift 2
+            ;;
+        *)
+            echo "ERROR: Unknown option: $1" >&2
+            exit 1
+            ;;
+    esac
+done
+# Validate required parameters
+if [ -z "$HF_TOKEN" ]; then
+    echo "ERROR: HF_TOKEN is required" >&2
+    exit 1
+fi
+if [ -z "$PI_API_KEY" ]; then
+    echo "ERROR: PI_API_KEY is required" >&2
+    exit 1
+fi
+if [ -z "$MODELS_PATH" ]; then
+    echo "ERROR: MODELS_PATH is required" >&2
+    exit 1
+fi
+echo "=== Starting pod setup ==="
+# Install system dependencies
+apt update -y
+apt install -y python3-pip python3-venv git build-essential cmake ninja-build curl wget lsb-release htop pkg-config
+# --- Install matching CUDA toolkit -------------------------------------------
+echo "Checking CUDA driver version..."
+DRIVER_CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}')
+echo "Driver supports CUDA: $DRIVER_CUDA_VERSION"
+# Check if nvcc exists and its version
+if command -v nvcc &> /dev/null; then
+    NVCC_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d, -f1)
+    echo "Current nvcc version: $NVCC_VERSION"
+else
+    NVCC_VERSION="none"
+    echo "nvcc not found"
+fi
+# Install CUDA toolkit matching driver version if needed
+if [[ "$NVCC_VERSION" != "$DRIVER_CUDA_VERSION" ]]; then
+    echo "Installing CUDA Toolkit $DRIVER_CUDA_VERSION to match driver..."
+    # Detect Ubuntu version
+    UBUNTU_VERSION=$(lsb_release -rs)
+    UBUNTU_CODENAME=$(lsb_release -cs)
+    echo "Detected Ubuntu $UBUNTU_VERSION ($UBUNTU_CODENAME)"
+    # Map Ubuntu version to NVIDIA repo path
+    if [[ "$UBUNTU_VERSION" == "24.04" ]]; then
+        REPO_PATH="ubuntu2404"
+    elif [[ "$UBUNTU_VERSION" == "22.04" ]]; then
+        REPO_PATH="ubuntu2204"
+    elif [[ "$UBUNTU_VERSION" == "20.04" ]]; then
+        REPO_PATH="ubuntu2004"
+    else
+        echo "Warning: Unsupported Ubuntu version $UBUNTU_VERSION, trying ubuntu2204"
+        REPO_PATH="ubuntu2204"
+    fi
+    # Add NVIDIA package repositories
+    wget https://developer.download.nvidia.com/compute/cuda/repos/${REPO_PATH}/x86_64/cuda-keyring_1.1-1_all.deb
+    dpkg -i cuda-keyring_1.1-1_all.deb
+    rm cuda-keyring_1.1-1_all.deb
+    apt-get update
+    # Install specific CUDA toolkit version
+    # Convert version format (12.9 -> 12-9)
+    CUDA_VERSION_APT=$(echo $DRIVER_CUDA_VERSION | sed 's/\./-/')
+    echo "Installing cuda-toolkit-${CUDA_VERSION_APT}..."
+    apt-get install -y cuda-toolkit-${CUDA_VERSION_APT}
+    # Add CUDA to PATH
+    export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
+    export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
+    # Verify installation
+    nvcc --version
+else
+    echo "CUDA toolkit $NVCC_VERSION matches driver version"
+    export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
+    export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
+fi
+# --- Install uv (fast Python package manager) --------------------------------
+curl -LsSf https://astral.sh/uv/install.sh | sh
+export PATH="$HOME/.local/bin:$PATH"
+# --- Install Python 3.12 if not available ------------------------------------
+if ! command -v python3.12 &> /dev/null; then
+    echo "Python 3.12 not found. Installing via uv..."
+    uv python install 3.12
+fi
+# --- Clean up existing environments and caches -------------------------------
+echo "Cleaning up existing environments and caches..."
+# Remove existing venv for a clean installation
+VENV="$HOME/venv"
+if [ -d "$VENV" ]; then
+    echo "Removing existing virtual environment..."
+    rm -rf "$VENV"
+fi
+# Remove uv cache to ensure fresh installs
+if [ -d "$HOME/.cache/uv" ]; then
+    echo "Clearing uv cache..."
+    rm -rf "$HOME/.cache/uv"
+fi
+# Remove vLLM cache to avoid conflicts
+if [ -d "$HOME/.cache/vllm" ]; then
+    echo "Clearing vLLM cache..."
+    rm -rf "$HOME/.cache/vllm"
+fi
+# --- Create and activate venv ------------------------------------------------
+echo "Creating fresh virtual environment..."
+uv venv --python 3.12 --seed "$VENV"
+source "$VENV/bin/activate"
+# --- Install PyTorch and vLLM ------------------------------------------------
+echo "Installing vLLM and dependencies (version: $VLLM_VERSION)..."
+case "$VLLM_VERSION" in
+    release)
+        echo "Installing vLLM release with PyTorch..."
+        # Install vLLM with automatic PyTorch backend selection
+        # vLLM will automatically install the correct PyTorch version
+        uv pip install vllm>=0.10.0 --torch-backend=auto || {
+            echo "ERROR: Failed to install vLLM"
+            exit 1
+        }
+        ;;
+    nightly)
+        echo "Installing vLLM nightly with PyTorch..."
+        echo "This will install the latest nightly build of vLLM..."
+        # Install vLLM nightly with PyTorch
+        uv pip install -U vllm \
+            --torch-backend=auto \
+            --extra-index-url https://wheels.vllm.ai/nightly || {
+            echo "ERROR: Failed to install vLLM nightly"
+            exit 1
+        }
+        echo "vLLM nightly successfully installed!"
+        ;;
+    gpt-oss)
+        echo "Installing GPT-OSS special build with PyTorch nightly..."
+        echo "WARNING: This build is ONLY for GPT-OSS models!"
+        echo "Installing PyTorch nightly and cutting-edge dependencies..."
+        # Convert CUDA version format for PyTorch (12.4 -> cu124)
+        PYTORCH_CUDA="cu$(echo $DRIVER_CUDA_VERSION | sed 's/\.//')"
+        echo "Using PyTorch nightly with ${PYTORCH_CUDA} (driver supports ${DRIVER_CUDA_VERSION})"
+        # The GPT-OSS build will pull PyTorch nightly and other dependencies
+        # via the extra index URLs. We don't pre-install torch here to avoid conflicts.
+        uv pip install --pre vllm==0.10.1+gptoss \
+            --extra-index-url https://wheels.vllm.ai/gpt-oss/ \
+            --extra-index-url https://download.pytorch.org/whl/nightly/${PYTORCH_CUDA} \
+            --index-strategy unsafe-best-match || {
+            echo "ERROR: Failed to install GPT-OSS vLLM build"
+            echo "This automatically installs PyTorch nightly with ${PYTORCH_CUDA}, Triton nightly, and other dependencies"
+            exit 1
+        }
+        # Install gpt-oss library for tool support
+        uv pip install gpt-oss || {
+            echo "WARNING: Failed to install gpt-oss library (needed for tool use)"
+        }
+        ;;
+    *)
+        echo "ERROR: Unknown vLLM version: $VLLM_VERSION"
+        exit 1
+        ;;
+esac
+# --- Install additional packages ---------------------------------------------
+echo "Installing additional packages..."
+# Note: tensorrt removed temporarily due to CUDA 13.0 compatibility issues
+# TensorRT still depends on deprecated nvidia-cuda-runtime-cu13 package
+uv pip install huggingface-hub psutil hf_transfer
+# --- FlashInfer installation (optional, improves performance) ----------------
+echo "Attempting FlashInfer installation (optional)..."
+if uv pip install flashinfer-python; then
+    echo "FlashInfer installed successfully"
+else
+    echo "FlashInfer not available, using Flash Attention instead"
+fi
+# --- Mount storage if provided -----------------------------------------------
+if [ -n "$MOUNT_COMMAND" ]; then
+    echo "Setting up mount..."
+    # Create mount point directory if it doesn't exist
+    mkdir -p "$MODELS_PATH"
+    # Execute the mount command
+    eval "$MOUNT_COMMAND" || {
+        echo "WARNING: Mount command failed, continuing without mount"
+    }
+    # Verify mount succeeded (optional, may not always be a mount point)
+    if mountpoint -q "$MODELS_PATH" 2>/dev/null; then
+        echo "Storage successfully mounted at $MODELS_PATH"
+    else
+        echo "Note: $MODELS_PATH is not a mount point (might be local storage)"
+    fi
+fi
+# --- Model storage setup ------------------------------------------------------
+echo ""
+echo "=== Setting up model storage ==="
+echo "Storage path: $MODELS_PATH"
+# Check if the path exists and is writable
+if [ ! -d "$MODELS_PATH" ]; then
+    echo "Creating model storage directory: $MODELS_PATH"
+    mkdir -p "$MODELS_PATH"
+fi
+if [ ! -w "$MODELS_PATH" ]; then
+    echo "ERROR: Model storage path is not writable: $MODELS_PATH"
+    echo "Please check permissions"
+    exit 1
+fi
+# Create the huggingface cache directory structure in the models path
+mkdir -p "${MODELS_PATH}/huggingface/hub"
+# Remove any existing cache directory or symlink
+if [ -e ~/.cache/huggingface ] || [ -L ~/.cache/huggingface ]; then
+    echo "Removing existing ~/.cache/huggingface..."
+    rm -rf ~/.cache/huggingface 2>/dev/null || true
+fi
+# Create parent directory if needed
+mkdir -p ~/.cache
+# Create symlink from ~/.cache/huggingface to the models path
+ln -s "${MODELS_PATH}/huggingface" ~/.cache/huggingface
+echo "Created symlink: ~/.cache/huggingface -> ${MODELS_PATH}/huggingface"
+# Verify the symlink works
+if [ -d ~/.cache/huggingface/hub ]; then
+    echo "✓ Model storage configured successfully"
+    # Check available space
+    AVAILABLE_SPACE=$(df -h "$MODELS_PATH" | awk 'NR==2 {print $4}')
+    echo "Available space: $AVAILABLE_SPACE"
+else
+    echo "ERROR: Could not verify model storage setup"
+    echo "The symlink was created but the target directory is not accessible"
+    exit 1
+fi
+# --- Configure environment ----------------------------------------------------
+mkdir -p ~/.config/vllm
+touch ~/.config/vllm/do_not_track
+# Write environment to .bashrc for persistence
+cat >> ~/.bashrc << EOF
+# Pi vLLM environment
+[ -d "\$HOME/venv" ] && source "\$HOME/venv/bin/activate"
+export PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:\$HOME/.local/bin:\$PATH"
+export LD_LIBRARY_PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:\${LD_LIBRARY_PATH:-}"
+export HF_TOKEN="${HF_TOKEN}"
+export PI_API_KEY="${PI_API_KEY}"
+export HUGGING_FACE_HUB_TOKEN="${HF_TOKEN}"
+export HF_HUB_ENABLE_HF_TRANSFER=1
+export VLLM_NO_USAGE_STATS=1
+export VLLM_DO_NOT_TRACK=1
+export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+EOF
+# Create log directory for vLLM
+mkdir -p ~/.vllm_logs
+# --- Output GPU info for pi CLI to parse -------------------------------------
+echo ""
+echo "===GPU_INFO_START==="
+nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader | while IFS=, read -r id name memory; do
+    # Trim whitespace
+    id=$(echo "$id" | xargs)
+    name=$(echo "$name" | xargs)
+    memory=$(echo "$memory" | xargs)
+    echo "{\"id\": $id, \"name\": \"$name\", \"memory\": \"$memory\"}"
+done
+echo "===GPU_INFO_END==="
+echo ""
+echo "=== Setup complete ==="
+echo "Pod is ready for vLLM deployments"
+echo "Models will be cached at: $MODELS_PATH"

package/dist/ssh.d.ts ADDED Viewed

@@ -0,0 +1,24 @@
+export interface SSHResult {
+    stdout: string;
+    stderr: string;
+    exitCode: number;
+}
+/**
+ * Execute an SSH command and return the result
+ */
+export declare const sshExec: (sshCmd: string, command: string, options?: {
+    keepAlive?: boolean | undefined;
+} | undefined) => Promise<SSHResult>;
+/**
+ * Execute an SSH command with streaming output to console
+ */
+export declare const sshExecStream: (sshCmd: string, command: string, options?: {
+    silent?: boolean | undefined;
+    forceTTY?: boolean | undefined;
+    keepAlive?: boolean | undefined;
+} | undefined) => Promise<number>;
+/**
+ * Copy a file to remote via SCP
+ */
+export declare const scpFile: (sshCmd: string, localPath: string, remotePath: string) => Promise<boolean>;
+//# sourceMappingURL=ssh.d.ts.map

package/dist/ssh.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"ssh.d.ts","sourceRoot":"","sources":["../src/ssh.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,eAAO,MAAM,OAAO;;oCAmDnB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,aAAa;;;;iCAwCzB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,OAAO,6EAsCnB,CAAC","sourcesContent":["import { type SpawnOptions, spawn } from \"child_process\";\n\nexport interface SSHResult {\n\tstdout: string;\n\tstderr: string;\n\texitCode: number;\n}\n\n/**\n * Execute an SSH command and return the result\n */\nexport const sshExec = async (\n\tsshCmd: string,\n\tcommand: string,\n\toptions?: { keepAlive?: boolean },\n): Promise<SSHResult> => {\n\treturn new Promise((resolve) => {\n\t\t// Parse SSH command (e.g., \"ssh root@1.2.3.4\" or \"ssh -p 22 root@1.2.3.4\")\n\t\tconst sshParts = sshCmd.split(\" \").filter((p) => p);\n\t\tconst sshBinary = sshParts[0];\n\t\tlet sshArgs = [...sshParts.slice(1)];\n\n\t\t// Add SSH keepalive options for long-running commands\n\t\tif (options?.keepAlive) {\n\t\t\t// ServerAliveInterval=30 sends keepalive every 30 seconds\n\t\t\t// ServerAliveCountMax=120 allows up to 120 failures (60 minutes total)\n\t\t\tsshArgs = [\"-o\", \"ServerAliveInterval=30\", \"-o\", \"ServerAliveCountMax=120\", ...sshArgs];\n\t\t}\n\n\t\tsshArgs.push(command);\n\n\t\tconst proc = spawn(sshBinary, sshArgs, {\n\t\t\tstdio: [\"ignore\", \"pipe\", \"pipe\"],\n\t\t});\n\n\t\tlet stdout = \"\";\n\t\tlet stderr = \"\";\n\n\t\tproc.stdout.on(\"data\", (data) => {\n\t\t\tstdout += data.toString();\n\t\t});\n\n\t\tproc.stderr.on(\"data\", (data) => {\n\t\t\tstderr += data.toString();\n\t\t});\n\n\t\tproc.on(\"close\", (code) => {\n\t\t\tresolve({\n\t\t\t\tstdout,\n\t\t\t\tstderr,\n\t\t\t\texitCode: code || 0,\n\t\t\t});\n\t\t});\n\n\t\tproc.on(\"error\", (err) => {\n\t\t\tresolve({\n\t\t\t\tstdout,\n\t\t\t\tstderr: err.message,\n\t\t\t\texitCode: 1,\n\t\t\t});\n\t\t});\n\t});\n};\n\n/**\n * Execute an SSH command with streaming output to console\n */\nexport const sshExecStream = async (\n\tsshCmd: string,\n\tcommand: string,\n\toptions?: { silent?: boolean; forceTTY?: boolean; keepAlive?: boolean },\n): Promise<number> => {\n\treturn new Promise((resolve) => {\n\t\tconst sshParts = sshCmd.split(\" \").filter((p) => p);\n\t\tconst sshBinary = sshParts[0];\n\n\t\t// Build SSH args\n\t\tlet sshArgs = [...sshParts.slice(1)];\n\n\t\t// Add -t flag if requested and not already present\n\t\tif (options?.forceTTY && !sshParts.includes(\"-t\")) {\n\t\t\tsshArgs = [\"-t\", ...sshArgs];\n\t\t}\n\n\t\t// Add SSH keepalive options for long-running commands\n\t\tif (options?.keepAlive) {\n\t\t\t// ServerAliveInterval=30 sends keepalive every 30 seconds\n\t\t\t// ServerAliveCountMax=120 allows up to 120 failures (60 minutes total)\n\t\t\tsshArgs = [\"-o\", \"ServerAliveInterval=30\", \"-o\", \"ServerAliveCountMax=120\", ...sshArgs];\n\t\t}\n\n\t\tsshArgs.push(command);\n\n\t\tconst spawnOptions: SpawnOptions = options?.silent\n\t\t\t? { stdio: [\"ignore\", \"ignore\", \"ignore\"] }\n\t\t\t: { stdio: \"inherit\" };\n\n\t\tconst proc = spawn(sshBinary, sshArgs, spawnOptions);\n\n\t\tproc.on(\"close\", (code) => {\n\t\t\tresolve(code || 0);\n\t\t});\n\n\t\tproc.on(\"error\", () => {\n\t\t\tresolve(1);\n\t\t});\n\t});\n};\n\n/**\n * Copy a file to remote via SCP\n */\nexport const scpFile = async (sshCmd: string, localPath: string, remotePath: string): Promise<boolean> => {\n\t// Extract host from SSH command\n\tconst sshParts = sshCmd.split(\" \").filter((p) => p);\n\tlet host = \"\";\n\tlet port = \"22\";\n\tlet i = 1; // Skip 'ssh'\n\n\twhile (i < sshParts.length) {\n\t\tif (sshParts[i] === \"-p\" && i + 1 < sshParts.length) {\n\t\t\tport = sshParts[i + 1];\n\t\t\ti += 2;\n\t\t} else if (!sshParts[i].startsWith(\"-\")) {\n\t\t\thost = sshParts[i];\n\t\t\tbreak;\n\t\t} else {\n\t\t\ti++;\n\t\t}\n\t}\n\n\tif (!host) {\n\t\tconsole.error(\"Could not parse host from SSH command\");\n\t\treturn false;\n\t}\n\n\t// Build SCP command\n\tconst scpArgs = [\"-P\", port, localPath, `${host}:${remotePath}`];\n\n\treturn new Promise((resolve) => {\n\t\tconst proc = spawn(\"scp\", scpArgs, { stdio: \"inherit\" });\n\n\t\tproc.on(\"close\", (code) => {\n\t\t\tresolve(code === 0);\n\t\t});\n\n\t\tproc.on(\"error\", () => {\n\t\t\tresolve(false);\n\t\t});\n\t});\n};\n"]}

package/dist/ssh.js ADDED Viewed

@@ -0,0 +1,115 @@
+import { spawn } from "child_process";
+/**
+ * Execute an SSH command and return the result
+ */
+export const sshExec = async (sshCmd, command, options) => {
+    return new Promise((resolve) => {
+        // Parse SSH command (e.g., "ssh root@1.2.3.4" or "ssh -p 22 root@1.2.3.4")
+        const sshParts = sshCmd.split(" ").filter((p) => p);
+        const sshBinary = sshParts[0];
+        let sshArgs = [...sshParts.slice(1)];
+        // Add SSH keepalive options for long-running commands
+        if (options?.keepAlive) {
+            // ServerAliveInterval=30 sends keepalive every 30 seconds
+            // ServerAliveCountMax=120 allows up to 120 failures (60 minutes total)
+            sshArgs = ["-o", "ServerAliveInterval=30", "-o", "ServerAliveCountMax=120", ...sshArgs];
+        }
+        sshArgs.push(command);
+        const proc = spawn(sshBinary, sshArgs, {
+            stdio: ["ignore", "pipe", "pipe"],
+        });
+        let stdout = "";
+        let stderr = "";
+        proc.stdout.on("data", (data) => {
+            stdout += data.toString();
+        });
+        proc.stderr.on("data", (data) => {
+            stderr += data.toString();
+        });
+        proc.on("close", (code) => {
+            resolve({
+                stdout,
+                stderr,
+                exitCode: code || 0,
+            });
+        });
+        proc.on("error", (err) => {
+            resolve({
+                stdout,
+                stderr: err.message,
+                exitCode: 1,
+            });
+        });
+    });
+};
+/**
+ * Execute an SSH command with streaming output to console
+ */
+export const sshExecStream = async (sshCmd, command, options) => {
+    return new Promise((resolve) => {
+        const sshParts = sshCmd.split(" ").filter((p) => p);
+        const sshBinary = sshParts[0];
+        // Build SSH args
+        let sshArgs = [...sshParts.slice(1)];
+        // Add -t flag if requested and not already present
+        if (options?.forceTTY && !sshParts.includes("-t")) {
+            sshArgs = ["-t", ...sshArgs];
+        }
+        // Add SSH keepalive options for long-running commands
+        if (options?.keepAlive) {
+            // ServerAliveInterval=30 sends keepalive every 30 seconds
+            // ServerAliveCountMax=120 allows up to 120 failures (60 minutes total)
+            sshArgs = ["-o", "ServerAliveInterval=30", "-o", "ServerAliveCountMax=120", ...sshArgs];
+        }
+        sshArgs.push(command);
+        const spawnOptions = options?.silent
+            ? { stdio: ["ignore", "ignore", "ignore"] }
+            : { stdio: "inherit" };
+        const proc = spawn(sshBinary, sshArgs, spawnOptions);
+        proc.on("close", (code) => {
+            resolve(code || 0);
+        });
+        proc.on("error", () => {
+            resolve(1);
+        });
+    });
+};
+/**
+ * Copy a file to remote via SCP
+ */
+export const scpFile = async (sshCmd, localPath, remotePath) => {
+    // Extract host from SSH command
+    const sshParts = sshCmd.split(" ").filter((p) => p);
+    let host = "";
+    let port = "22";
+    let i = 1; // Skip 'ssh'
+    while (i < sshParts.length) {
+        if (sshParts[i] === "-p" && i + 1 < sshParts.length) {
+            port = sshParts[i + 1];
+            i += 2;
+        }
+        else if (!sshParts[i].startsWith("-")) {
+            host = sshParts[i];
+            break;
+        }
+        else {
+            i++;
+        }
+    }
+    if (!host) {
+        console.error("Could not parse host from SSH command");
+        return false;
+    }
+    // Build SCP command
+    const scpArgs = ["-P", port, localPath, `${host}:${remotePath}`];
+    return new Promise((resolve) => {
+        const proc = spawn("scp", scpArgs, { stdio: "inherit" });
+        proc.on("close", (code) => {
+            resolve(code === 0);
+        });
+        proc.on("error", () => {
+            resolve(false);
+        });
+    });
+};
+//# sourceMappingURL=ssh.js.map

package/dist/ssh.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"ssh.js","sourceRoot":"","sources":["../src/ssh.ts"],"names":[],"mappings":"AAAA,OAAO,EAAqB,KAAK,EAAE,MAAM,eAAe,CAAC;AAQzD;;GAEG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,KAAK,EAC3B,MAAc,EACd,OAAe,EACf,OAAiC,EACZ,EAAE,CAAC;IACxB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC;QAC/B,2EAA2E;QAC3E,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC9B,IAAI,OAAO,GAAG,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAErC,sDAAsD;QACtD,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;YACxB,0DAA0D;YAC1D,uEAAuE;YACvE,OAAO,GAAG,CAAC,IAAI,EAAE,wBAAwB,EAAE,IAAI,EAAE,yBAAyB,EAAE,GAAG,OAAO,CAAC,CAAC;QACzF,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEtB,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,EAAE,OAAO,EAAE;YACtC,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC;SACjC,CAAC,CAAC;QAEH,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;YAChC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAAA,CAC1B,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;YAChC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAAA,CAC1B,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;YAC1B,OAAO,CAAC;gBACP,MAAM;gBACN,MAAM;gBACN,QAAQ,EAAE,IAAI,IAAI,CAAC;aACnB,CAAC,CAAC;QAAA,CACH,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC;YACzB,OAAO,CAAC;gBACP,MAAM;gBACN,MAAM,EAAE,GAAG,CAAC,OAAO;gBACnB,QAAQ,EAAE,CAAC;aACX,CAAC,CAAC;QAAA,CACH,CAAC,CAAC;IAAA,CACH,CAAC,CAAC;AAAA,CACH,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,KAAK,EACjC,MAAc,EACd,OAAe,EACf,OAAuE,EACrD,EAAE,CAAC;IACrB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAE9B,iBAAiB;QACjB,IAAI,OAAO,GAAG,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAErC,mDAAmD;QACnD,IAAI,OAAO,EAAE,QAAQ,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACnD,OAAO,GAAG,CAAC,IAAI,EAAE,GAAG,OAAO,CAAC,CAAC;QAC9B,CAAC;QAED,sDAAsD;QACtD,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;YACxB,0DAA0D;YAC1D,uEAAuE;YACvE,OAAO,GAAG,CAAC,IAAI,EAAE,wBAAwB,EAAE,IAAI,EAAE,yBAAyB,EAAE,GAAG,OAAO,CAAC,CAAC;QACzF,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEtB,MAAM,YAAY,GAAiB,OAAO,EAAE,MAAM;YACjD,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE;YAC3C,CAAC,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QAExB,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;QAErD,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;YAC1B,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;QAAA,CACnB,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC;YACtB,OAAO,CAAC,CAAC,CAAC,CAAC;QAAA,CACX,CAAC,CAAC;IAAA,CACH,CAAC,CAAC;AAAA,CACH,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,KAAK,EAAE,MAAc,EAAE,SAAiB,EAAE,UAAkB,EAAoB,EAAE,CAAC;IACzG,gCAAgC;IAChC,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IACpD,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,IAAI,IAAI,GAAG,IAAI,CAAC;IAChB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa;IAExB,OAAO,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QAC5B,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;YACrD,IAAI,GAAG,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACvB,CAAC,IAAI,CAAC,CAAC;QACR,CAAC;aAAM,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACzC,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YACnB,MAAM;QACP,CAAC;aAAM,CAAC;YACP,CAAC,EAAE,CAAC;QACL,CAAC;IACF,CAAC;IAED,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;QACvD,OAAO,KAAK,CAAC;IACd,CAAC;IAED,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,IAAI,IAAI,UAAU,EAAE,CAAC,CAAC;IAEjE,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC;QAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;QAEzD,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;YAC1B,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;QAAA,CACpB,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CAAC,CAAC;QAAA,CACf,CAAC,CAAC;IAAA,CACH,CAAC,CAAC;AAAA,CACH,CAAC","sourcesContent":["import { type SpawnOptions, spawn } from \"child_process\";\n\nexport interface SSHResult {\n\tstdout: string;\n\tstderr: string;\n\texitCode: number;\n}\n\n/**\n * Execute an SSH command and return the result\n */\nexport const sshExec = async (\n\tsshCmd: string,\n\tcommand: string,\n\toptions?: { keepAlive?: boolean },\n): Promise<SSHResult> => {\n\treturn new Promise((resolve) => {\n\t\t// Parse SSH command (e.g., \"ssh root@1.2.3.4\" or \"ssh -p 22 root@1.2.3.4\")\n\t\tconst sshParts = sshCmd.split(\" \").filter((p) => p);\n\t\tconst sshBinary = sshParts[0];\n\t\tlet sshArgs = [...sshParts.slice(1)];\n\n\t\t// Add SSH keepalive options for long-running commands\n\t\tif (options?.keepAlive) {\n\t\t\t// ServerAliveInterval=30 sends keepalive every 30 seconds\n\t\t\t// ServerAliveCountMax=120 allows up to 120 failures (60 minutes total)\n\t\t\tsshArgs = [\"-o\", \"ServerAliveInterval=30\", \"-o\", \"ServerAliveCountMax=120\", ...sshArgs];\n\t\t}\n\n\t\tsshArgs.push(command);\n\n\t\tconst proc = spawn(sshBinary, sshArgs, {\n\t\t\tstdio: [\"ignore\", \"pipe\", \"pipe\"],\n\t\t});\n\n\t\tlet stdout = \"\";\n\t\tlet stderr = \"\";\n\n\t\tproc.stdout.on(\"data\", (data) => {\n\t\t\tstdout += data.toString();\n\t\t});\n\n\t\tproc.stderr.on(\"data\", (data) => {\n\t\t\tstderr += data.toString();\n\t\t});\n\n\t\tproc.on(\"close\", (code) => {\n\t\t\tresolve({\n\t\t\t\tstdout,\n\t\t\t\tstderr,\n\t\t\t\texitCode: code || 0,\n\t\t\t});\n\t\t});\n\n\t\tproc.on(\"error\", (err) => {\n\t\t\tresolve({\n\t\t\t\tstdout,\n\t\t\t\tstderr: err.message,\n\t\t\t\texitCode: 1,\n\t\t\t});\n\t\t});\n\t});\n};\n\n/**\n * Execute an SSH command with streaming output to console\n */\nexport const sshExecStream = async (\n\tsshCmd: string,\n\tcommand: string,\n\toptions?: { silent?: boolean; forceTTY?: boolean; keepAlive?: boolean },\n): Promise<number> => {\n\treturn new Promise((resolve) => {\n\t\tconst sshParts = sshCmd.split(\" \").filter((p) => p);\n\t\tconst sshBinary = sshParts[0];\n\n\t\t// Build SSH args\n\t\tlet sshArgs = [...sshParts.slice(1)];\n\n\t\t// Add -t flag if requested and not already present\n\t\tif (options?.forceTTY && !sshParts.includes(\"-t\")) {\n\t\t\tsshArgs = [\"-t\", ...sshArgs];\n\t\t}\n\n\t\t// Add SSH keepalive options for long-running commands\n\t\tif (options?.keepAlive) {\n\t\t\t// ServerAliveInterval=30 sends keepalive every 30 seconds\n\t\t\t// ServerAliveCountMax=120 allows up to 120 failures (60 minutes total)\n\t\t\tsshArgs = [\"-o\", \"ServerAliveInterval=30\", \"-o\", \"ServerAliveCountMax=120\", ...sshArgs];\n\t\t}\n\n\t\tsshArgs.push(command);\n\n\t\tconst spawnOptions: SpawnOptions = options?.silent\n\t\t\t? { stdio: [\"ignore\", \"ignore\", \"ignore\"] }\n\t\t\t: { stdio: \"inherit\" };\n\n\t\tconst proc = spawn(sshBinary, sshArgs, spawnOptions);\n\n\t\tproc.on(\"close\", (code) => {\n\t\t\tresolve(code || 0);\n\t\t});\n\n\t\tproc.on(\"error\", () => {\n\t\t\tresolve(1);\n\t\t});\n\t});\n};\n\n/**\n * Copy a file to remote via SCP\n */\nexport const scpFile = async (sshCmd: string, localPath: string, remotePath: string): Promise<boolean> => {\n\t// Extract host from SSH command\n\tconst sshParts = sshCmd.split(\" \").filter((p) => p);\n\tlet host = \"\";\n\tlet port = \"22\";\n\tlet i = 1; // Skip 'ssh'\n\n\twhile (i < sshParts.length) {\n\t\tif (sshParts[i] === \"-p\" && i + 1 < sshParts.length) {\n\t\t\tport = sshParts[i + 1];\n\t\t\ti += 2;\n\t\t} else if (!sshParts[i].startsWith(\"-\")) {\n\t\t\thost = sshParts[i];\n\t\t\tbreak;\n\t\t} else {\n\t\t\ti++;\n\t\t}\n\t}\n\n\tif (!host) {\n\t\tconsole.error(\"Could not parse host from SSH command\");\n\t\treturn false;\n\t}\n\n\t// Build SCP command\n\tconst scpArgs = [\"-P\", port, localPath, `${host}:${remotePath}`];\n\n\treturn new Promise((resolve) => {\n\t\tconst proc = spawn(\"scp\", scpArgs, { stdio: \"inherit\" });\n\n\t\tproc.on(\"close\", (code) => {\n\t\t\tresolve(code === 0);\n\t\t});\n\n\t\tproc.on(\"error\", () => {\n\t\t\tresolve(false);\n\t\t});\n\t});\n};\n"]}

package/dist/types.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+export interface GPU {
+    id: number;
+    name: string;
+    memory: string;
+}
+export interface Model {
+    model: string;
+    port: number;
+    gpu: number[];
+    pid: number;
+}
+export interface Pod {
+    ssh: string;
+    gpus: GPU[];
+    models: Record<string, Model>;
+    modelsPath?: string;
+    vllmVersion?: "release" | "nightly" | "gpt-oss";
+}
+export interface Config {
+    pods: Record<string, Pod>;
+    active?: string;
+}
+//# sourceMappingURL=types.d.ts.map

package/dist/types.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,GAAG;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,KAAK;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,EAAE,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,GAAG;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,GAAG,EAAE,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,SAAS,GAAG,SAAS,GAAG,SAAS,CAAC;CAChD;AAED,MAAM,WAAW,MAAM;IACtB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB","sourcesContent":["// Core type definitions for pi\n\nexport interface GPU {\n\tid: number;\n\tname: string;\n\tmemory: string;\n}\n\nexport interface Model {\n\tmodel: string;\n\tport: number;\n\tgpu: number[]; // Array of GPU IDs for multi-GPU deployment\n\tpid: number;\n}\n\nexport interface Pod {\n\tssh: string;\n\tgpus: GPU[];\n\tmodels: Record<string, Model>;\n\tmodelsPath?: string;\n\tvllmVersion?: \"release\" | \"nightly\" | \"gpt-oss\"; // Track which vLLM version is installed\n}\n\nexport interface Config {\n\tpods: Record<string, Pod>;\n\tactive?: string;\n}\n"]}

package/dist/types.js ADDED Viewed

@@ -0,0 +1,3 @@
+// Core type definitions for pi
+export {};
+//# sourceMappingURL=types.js.map

package/dist/types.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,+BAA+B","sourcesContent":["// Core type definitions for pi\n\nexport interface GPU {\n\tid: number;\n\tname: string;\n\tmemory: string;\n}\n\nexport interface Model {\n\tmodel: string;\n\tport: number;\n\tgpu: number[]; // Array of GPU IDs for multi-GPU deployment\n\tpid: number;\n}\n\nexport interface Pod {\n\tssh: string;\n\tgpus: GPU[];\n\tmodels: Record<string, Model>;\n\tmodelsPath?: string;\n\tvllmVersion?: \"release\" | \"nightly\" | \"gpt-oss\"; // Track which vLLM version is installed\n}\n\nexport interface Config {\n\tpods: Record<string, Pod>;\n\tactive?: string;\n}\n"]}

package/package.json ADDED Viewed

@@ -0,0 +1,40 @@
+{
+	"name": "@draht/pods",
+	"version": "2026.3.2-2",
+	"description": "CLI tool for managing vLLM deployments on GPU pods",
+	"type": "module",
+	"bin": {
+		"draht-pods": "dist/cli.js"
+	},
+	"scripts": {
+		"clean": "rm -rf dist",
+		"build": "tsgo -p tsconfig.build.json && cp src/models.json dist/ && cp -r scripts dist/",
+		"prepublishOnly": "bun run clean && bun run build"
+	},
+	"files": [
+		"dist",
+		"scripts"
+	],
+	"keywords": [
+		"llm",
+		"vllm",
+		"gpu",
+		"ai",
+		"cli"
+	],
+	"author": "Mario Zechner",
+	"license": "MIT",
+	"repository": {
+		"type": "git",
+		"url": "git+https://github.com/badlogic/pi-mono.git",
+		"directory": "packages/pods"
+	},
+	"engines": {
+		"node": ">=20.0.0"
+	},
+	"dependencies": {
+		"@draht/agent-core": "2026.3.1-7",
+		"chalk": "^5.5.0"
+	},
+	"devDependencies": {}
+}

package/scripts/model_run.sh ADDED Viewed

@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# Model runner script - runs sequentially, killed by pi stop
+set -euo pipefail
+# These values are replaced before upload by pi CLI
+MODEL_ID="{{MODEL_ID}}"
+NAME="{{NAME}}"
+PORT="{{PORT}}"
+VLLM_ARGS="{{VLLM_ARGS}}"
+# Trap to ensure cleanup on exit and kill any child processes
+cleanup() {
+    local exit_code=$?
+    echo "Model runner exiting with code $exit_code"
+    # Kill any child processes
+    pkill -P $$ 2>/dev/null || true
+    exit $exit_code
+}
+trap cleanup EXIT TERM INT
+# Force colored output even when not a TTY
+export FORCE_COLOR=1
+export PYTHONUNBUFFERED=1
+export TERM=xterm-256color
+export RICH_FORCE_TERMINAL=1
+export CLICOLOR_FORCE=1
+# Source virtual environment
+source /root/venv/bin/activate
+echo "========================================="
+echo "Model Run: $NAME"
+echo "Model ID: $MODEL_ID"
+echo "Port: $PORT"
+if [ -n "$VLLM_ARGS" ]; then
+    echo "vLLM Args: $VLLM_ARGS"
+fi
+echo "========================================="
+echo ""
+# Download model (with color progress bars)
+echo "Downloading model (will skip if cached)..."
+HF_HUB_ENABLE_HF_TRANSFER=1 hf download "$MODEL_ID"
+if [ $? -ne 0 ]; then
+    echo "❌ ERROR: Failed to download model" >&2
+    exit 1
+fi
+echo ""
+echo "✅ Model download complete"
+echo ""
+# Build vLLM command
+VLLM_CMD="vllm serve '$MODEL_ID' --port $PORT --api-key '$PI_API_KEY'"
+if [ -n "$VLLM_ARGS" ]; then
+    VLLM_CMD="$VLLM_CMD $VLLM_ARGS"
+fi
+echo "Starting vLLM server..."
+echo "Command: $VLLM_CMD"
+echo "========================================="
+echo ""
+# Run vLLM in background so we can monitor it
+echo "Starting vLLM process..."
+bash -c "$VLLM_CMD" &
+VLLM_PID=$!
+# Monitor the vLLM process
+echo "Monitoring vLLM process (PID: $VLLM_PID)..."
+wait $VLLM_PID
+VLLM_EXIT_CODE=$?
+if [ $VLLM_EXIT_CODE -ne 0 ]; then
+    echo "❌ ERROR: vLLM exited with code $VLLM_EXIT_CODE" >&2
+    # Make sure to exit the script command too
+    kill -TERM $$ 2>/dev/null || true
+    exit $VLLM_EXIT_CODE
+fi
+echo "✅ vLLM exited normally"
+exit 0