PyPI - llmboost-hub - Versions diffs - 0.1.1__py3-none-any.whl - Mend

llmboost-hub 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

llmboost_hub/cli.py +47 -0
llmboost_hub/commands/attach.py +74 -0
llmboost_hub/commands/chat.py +62 -0
llmboost_hub/commands/completions.py +238 -0
llmboost_hub/commands/list.py +283 -0
llmboost_hub/commands/login.py +72 -0
llmboost_hub/commands/prep.py +559 -0
llmboost_hub/commands/run.py +486 -0
llmboost_hub/commands/search.py +182 -0
llmboost_hub/commands/serve.py +303 -0
llmboost_hub/commands/status.py +34 -0
llmboost_hub/commands/stop.py +59 -0
llmboost_hub/commands/test_cmd.py +45 -0
llmboost_hub/commands/tune.py +372 -0
llmboost_hub/utils/config.py +220 -0
llmboost_hub/utils/container_utils.py +126 -0
llmboost_hub/utils/fs_utils.py +42 -0
llmboost_hub/utils/generate_sample_lookup.py +132 -0
llmboost_hub/utils/gpu_info.py +244 -0
llmboost_hub/utils/license_checker.py +3 -0
llmboost_hub/utils/license_wrapper.py +91 -0
llmboost_hub/utils/llmboost_version.py +1 -0
llmboost_hub/utils/lookup_cache.py +123 -0
llmboost_hub/utils/model_utils.py +76 -0
llmboost_hub/utils/signature.py +3 -0
llmboost_hub-0.1.1.dist-info/METADATA +203 -0
llmboost_hub-0.1.1.dist-info/RECORD +31 -0
llmboost_hub-0.1.1.dist-info/WHEEL +5 -0
llmboost_hub-0.1.1.dist-info/entry_points.txt +3 -0
llmboost_hub-0.1.1.dist-info/licenses/LICENSE +16 -0
llmboost_hub-0.1.1.dist-info/top_level.txt +1 -0

llmboost_hub/commands/serve.py ADDED Viewed

@@ -0,0 +1,303 @@
+import click
+import subprocess
+import time
+from typing import Optional
+import socket
+from llmboost_hub.commands.run import do_run
+from llmboost_hub.utils.container_utils import (
+    container_name_for_model,
+    is_container_running,
+    is_model_initializing,
+    is_model_ready2serve,
+)
+from llmboost_hub.utils import gpu_info
+from llmboost_hub.commands.stop import do_stop
+from llmboost_hub.commands.completions import complete_model_names
+from llmboost_hub.utils.config import config
+def _collect_error_logs(cname: str, max_lines: int = 200) -> str:
+    """
+    Return recent error lines from inside the container for diagnosis.
+    Strategy:
+        - First: grep -i 'error' across `worker*.log` in `config.LLMBOOST_LOGS_DIR` and tail the last `max_lines` lines.
+        - Fallback: tail any `*.log` in `config.LLMBOOST_LOGS_DIR`.
+    Args:
+        cname: Container name.
+        max_lines: Maximum lines to return.
+    Returns:
+        Joined log lines, or empty string when unavailable.
+    """
+    try:
+        logs_dir = config.LLMBOOST_LOGS_DIR
+        grep_cmd = [
+            "docker",
+            "exec",
+            cname,
+            "sh",
+            "-lc",
+            f"grep -i 'error' -r {logs_dir}/worker*.log 2>/dev/null | tail -n {max_lines}",
+        ]
+        out = subprocess.check_output(grep_cmd, text=True, stderr=subprocess.DEVNULL).strip()
+        if out:
+            return out
+    except subprocess.CalledProcessError:
+        pass
+    except Exception:
+        pass
+    try:
+        logs_dir = config.LLMBOOST_LOGS_DIR
+        tail_cmd = [
+            "docker",
+            "exec",
+            cname,
+            "sh",
+            "-lc",
+            f"tail -n {max_lines} {logs_dir}/*.log 2>/dev/null",
+        ]
+        out = subprocess.check_output(tail_cmd, text=True, stderr=subprocess.DEVNULL).strip()
+        return out
+    except Exception:
+        return ""
+def do_serve(
+    model: str,
+    lbh_workspace: str | None,
+    verbose: bool = False,
+    host: str = "0.0.0.0",
+    port: int = 8080,
+    wait_timeout: float = 600.0,
+    poll_interval: float = 1.0,
+    detached: bool = False,
+    force: bool = False,
+    image: Optional[str] = None,
+    model_path: Optional[str] = None,
+    restart: bool = False,
+) -> dict:
+    """
+    Start llmboost serve in the container and optionally wait for readiness.
+    Args:
+        model: Target model identifier.
+        lbh_workspace: Optional host path to mount as /user_workspace.
+        verbose: If True, echo docker exec command and progress.
+        host: Bind address passed to the service.
+        port: Bind port passed to the service.
+        wait_timeout: Max seconds to wait for readiness (ignored if detached).
+        poll_interval: Seconds between readiness checks.
+        detached: If True, start and return immediately without polling.
+        force: If True, bypass pre-flight GPU utilization guard.
+        image: If set, force a specific docker image for the model.
+        model_path: If set, local HF model directory to mount inside the container.
+        restart: If True, restart the container if it is already running.
+    Returns:
+        Dict: {returncode: int, container_name: str, error: str|None}
+    """
+    # Guard: prevent accidental start if GPUs are already in use (unless forced)
+    if not force and gpu_info.any_gpu_in_use():
+        return {
+            "returncode": 1,
+            "container_name": "",
+            "error": "Detected non-zero GPU utilization (compute or VRAM). Decrease GPU memory utilization or reduce GPU memory used by other processes. Use -f/--force to bypass.",
+        }
+    cname = container_name_for_model(model)
+    if not is_container_running(cname):
+        # Start container if missing
+        if verbose:
+            click.echo(f"[serve] No running container for {model}; starting via lbh run...")
+        res = do_run(
+            model,
+            lbh_workspace,
+            verbose=verbose,
+            image=image,
+            model_path=model_path,
+            restart=restart,
+            docker_args=(),
+        )  # use empty docker_args
+        if res["returncode"] != 0:
+            return {"returncode": res["returncode"], "container_name": "", "error": res["error"]}
+        time.sleep(1)
+        if not is_container_running(cname):
+            return {
+                "returncode": 1,
+                "container_name": "",
+                "error": "Failed to start container for model.",
+            }
+    # Launch llmboost serve detached by default; switch to interactive on verbose+attached
+    exec_cmd = [
+        "docker",
+        "exec",
+        "-d",
+        cname,
+        "llmboost",
+        "serve",
+        "--host",
+        host,
+        "--port",
+        str(port),
+        "--model_name",
+        model,
+    ]
+    if verbose and not detached:
+        # Replace -d with -i to surface logs interactively during startup
+        exec_cmd = [part if part != "-d" else "-i" for part in exec_cmd]
+    if verbose:
+        click.echo("[tune] Executing inside container:")
+        click.echo(" ".join(exec_cmd))
+    try:
+        subprocess.run(exec_cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        return {
+            "returncode": e.returncode,
+            "container_name": cname,
+            "error": f"Failed to start service inside container (exit {e.returncode})\n{_collect_error_logs(cname, max_lines=200)}",
+        }
+    # If detached, return immediately (no readiness polling)
+    start = time.time()
+    time.sleep(3.0)  # brief pause to let process start
+    if detached:
+        click.echo(
+            f"[serve] Started llmboost serve in background (detached). Not waiting for readiness."
+        )
+        return {"returncode": 0, "container_name": cname, "error": None}
+    click.echo(
+        f"[serve] Waiting for server to become ready on {host}:{port} (timeout {wait_timeout:.1f}s)..."
+    )
+    # Poll until it is no longer initializing or timeout is reached
+    while is_model_initializing(cname) and (time.time() - start < wait_timeout):
+        elapsed = int(time.time() - start)
+        if elapsed % 60 == 0:  # every minute
+            click.echo(f"{int(elapsed)}s.", nl=False)
+        elif elapsed % 5 == 0:  # every 5 seconds
+            click.echo(".", nl=False)
+        time.sleep(max(0.1, float(poll_interval)))
+    elapsed = time.time() - start
+    if is_model_ready2serve(cname, host=host, port=port):
+        click.echo(f"[serve] Server is ready after {elapsed:.1f} seconds.")
+        return {"returncode": 0, "container_name": cname, "error": None}
+    else:
+        error_logs = _collect_error_logs(cname, max_lines=200)
+        error_msg = f"Server failed to become ready within {wait_timeout:.1f} seconds."
+        if error_logs:
+            error_msg += f"\nRecent error logs:\n{error_logs}"
+        return {"returncode": 1, "container_name": cname, "error": error_msg}
+@click.command(context_settings={"help_option_names": ["-h", "--help"]})
+@click.argument("model", required=True, shell_complete=complete_model_names)
+@click.option(
+    "--lbh-workspace", type=click.Path(), help="Override workspace path mounted inside container."
+)
+@click.option("--host", default="0.0.0.0", show_default=True, help="Host address to bind to.")
+@click.option("--port", default=8080, show_default=True, help="Port to bind to.", type=int)
+@click.option(
+    "--wait-timeout",
+    default=600.0,
+    show_default=True,
+    type=float,
+    help="Maximum seconds to wait for the server to become ready.",
+)
+@click.option(
+    "--poll-interval",
+    default=1.0,
+    show_default=True,
+    type=float,
+    help="Seconds between readiness checks.",
+)
+@click.option(
+    "-d",
+    "--detached",
+    is_flag=True,
+    help="Do not wait for server readiness; return immediately after starting serve.",
+)
+@click.option(
+    "-f",
+    "--force",
+    is_flag=True,
+    help="Ignore GPU utilization checks before starting serve.",
+)
+@click.option(
+    "-i",
+    "--image",
+    "forced_image",
+    type=str,
+    default=None,
+    help="Force a specific docker image (required when multiple images match the model).",
+)
+@click.option(
+    "-m",
+    "--model_path",
+    "model_path",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True),
+    default=None,
+    help=f"Local HF model directory to mount inside the container.",
+)
+@click.option(
+    "-r",
+    "--restart",
+    is_flag=True,
+    help="Restart the container if it is running before starting.",
+)
+@click.pass_context
+def serve(
+    ctx,
+    model,
+    lbh_workspace,
+    host,
+    port,
+    wait_timeout,
+    poll_interval,
+    detached,
+    force,
+    forced_image,
+    model_path,
+    restart,
+):
+    """
+    Start llmboost server inside the model container.
+    """
+    verbose = ctx.obj.get("VERBOSE", False)
+    # Restart if requested
+    if restart:
+        stop_res = do_stop(model, None, verbose=verbose)
+        if stop_res["returncode"] != 0:
+            if is_container_running(container_name_for_model(model)):
+                raise click.ClickException(
+                    stop_res.get("error") or "Failed to stop existing container"
+                )
+    res = do_serve(
+        model,
+        lbh_workspace,
+        verbose=verbose,
+        host=host,
+        port=port,
+        wait_timeout=wait_timeout,
+        poll_interval=poll_interval,
+        detached=detached,
+        force=force,
+        image=forced_image,
+        model_path=model_path,
+        restart=restart,
+    )
+    if res["returncode"] != 0:
+        raise click.ClickException(res["error"] or "Serve failed")

llmboost_hub/commands/status.py ADDED Viewed

@@ -0,0 +1,34 @@
+import click
+import pandas as pd
+import tabulate
+from llmboost_hub.commands.list import do_list
+from llmboost_hub.commands.completions import complete_model_names
+@click.command(name="status", context_settings={"help_option_names": ["-h", "--help"]})
+@click.argument("model", required=False, default="", shell_complete=complete_model_names)
+@click.pass_context
+def status_cmd(ctx: click.Context, model: str | None):
+    """
+    Show a compact status table for models.
+    """
+    verbose = ctx.obj.get("VERBOSE", False)
+    data = do_list(query=model or "", verbose=verbose)
+    df = data.get("images_df")
+    if not isinstance(df, pd.DataFrame) or df.empty:
+        click.echo("Found 0 models")
+        return
+    # Keep only desired columns if available; bail out if none found
+    cols = [c for c in ["status", "model"] if c in df.columns]
+    if not cols:
+        click.echo("No status information available.")
+        return
+    df = df[cols].reset_index(drop=True)
+    df.index += 1
+    click.echo(f"Found {len(df)} models")
+    click.echo(
+        tabulate.tabulate(
+            df.values.tolist(), headers=list(df.columns), showindex=list(df.index), tablefmt="psql"
+        )
+    )

llmboost_hub/commands/stop.py ADDED Viewed

@@ -0,0 +1,59 @@
+import click
+import subprocess
+from llmboost_hub.utils.container_utils import (
+    container_name_for_model,
+    is_container_running,
+)
+from llmboost_hub.commands.completions import complete_model_names
+def do_stop(model: str, container: str | None, verbose: bool = False) -> dict:
+    """
+    Stop the model's container.
+    Args:
+        model: Model identifier (used when container is not directly provided).
+        container: Optional explicit container name to stop.
+        verbose: If True, echo the docker command.
+    Returns:
+        Dict: {returncode: int, container_name: str, error: str|None}
+    """
+    cname = container or container_name_for_model(model)
+    # Fast-fail if target container is not running
+    if not is_container_running(cname):
+        return {
+            "returncode": 1,
+            "container_name": cname,
+            "error": f"Container '{cname}' is not running.",
+        }
+    cmd = ["docker", "stop", cname]
+    if verbose:
+        click.echo("[run] " + " ".join(cmd))
+    try:
+        subprocess.run(cmd, check=True)
+        return {"returncode": 0, "container_name": cname, "error": None}
+    except subprocess.CalledProcessError as e:
+        return {
+            "returncode": e.returncode,
+            "container_name": cname,
+            "error": f"Docker stop failed (exit {e.returncode})",
+        }
+@click.command(context_settings={"help_option_names": ["-h", "--help"]})
+@click.argument("model", required=True, shell_complete=complete_model_names)
+@click.option(
+    "-c", "--container", "container", type=str, help="Container name to stop (overrides model)."
+)
+@click.pass_context
+def stop(ctx: click.Context, model, container):
+    """
+    Stops a running container for a given model (or explicit name).
+    """
+    verbose = ctx.obj.get("VERBOSE", False)
+    res = do_stop(model, container, verbose=verbose)
+    if res["returncode"] != 0:
+        raise click.ClickException(res["error"] or "Stop failed")

llmboost_hub/commands/test_cmd.py ADDED Viewed

@@ -0,0 +1,45 @@
+import click
+import subprocess
+import json
+from llmboost_hub.commands.completions import complete_model_names
+@click.command(name="test", context_settings={"help_option_names": ["-h", "--help"]})
+@click.argument("model", required=True, shell_complete=complete_model_names)
+@click.option(
+    "-q",
+    "--query",
+    "query_str",
+    default="What are you?",
+    show_default=True,
+    help="User query prompt.",
+)
+@click.option(
+    "-t", "--max_tokens", default=300, show_default=True, type=int, help="Max tokens in completion."
+)
+@click.option("--host", default="127.0.0.1", show_default=True, help="Host to call.")
+@click.option("--port", default=8080, show_default=True, type=int, help="Port to call.")
+@click.pass_context
+def test_cmd(ctx: click.Context, model: str, query_str: str, max_tokens: int, host: str, port: int):
+    """
+    Call the running llmboost serve endpoint and print the raw JSON response.
+    """
+    # Build endpoint URL and JSON payload for OpenAI-compatible chat API
+    url = f"http://{host}:{port}/v1/chat/completions"
+    payload = {
+        "model": model,
+        "messages": [{"role": "user", "content": query_str}],
+        "max_tokens": max_tokens,
+    }
+    data_str = json.dumps(payload)
+    # Use curl for simplicity; surface the full command in verbose mode
+    cmd = ["curl", "-sS", url, "-H", "Content-Type: application/json", "-d", data_str]
+    if ctx.obj.get("VERBOSE", False):
+        click.echo("[test] " + " ".join(cmd))
+    # Execute request and propagate errors with a clear message
+    try:
+        subprocess.run(cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        raise click.ClickException(f"curl failed (exit {e.returncode})")