PyPI - llmboost-hub - Versions diffs - 0.1.1__py3-none-any.whl - Mend

llmboost-hub 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

llmboost_hub/cli.py +47 -0
llmboost_hub/commands/attach.py +74 -0
llmboost_hub/commands/chat.py +62 -0
llmboost_hub/commands/completions.py +238 -0
llmboost_hub/commands/list.py +283 -0
llmboost_hub/commands/login.py +72 -0
llmboost_hub/commands/prep.py +559 -0
llmboost_hub/commands/run.py +486 -0
llmboost_hub/commands/search.py +182 -0
llmboost_hub/commands/serve.py +303 -0
llmboost_hub/commands/status.py +34 -0
llmboost_hub/commands/stop.py +59 -0
llmboost_hub/commands/test_cmd.py +45 -0
llmboost_hub/commands/tune.py +372 -0
llmboost_hub/utils/config.py +220 -0
llmboost_hub/utils/container_utils.py +126 -0
llmboost_hub/utils/fs_utils.py +42 -0
llmboost_hub/utils/generate_sample_lookup.py +132 -0
llmboost_hub/utils/gpu_info.py +244 -0
llmboost_hub/utils/license_checker.py +3 -0
llmboost_hub/utils/license_wrapper.py +91 -0
llmboost_hub/utils/llmboost_version.py +1 -0
llmboost_hub/utils/lookup_cache.py +123 -0
llmboost_hub/utils/model_utils.py +76 -0
llmboost_hub/utils/signature.py +3 -0
llmboost_hub-0.1.1.dist-info/METADATA +203 -0
llmboost_hub-0.1.1.dist-info/RECORD +31 -0
llmboost_hub-0.1.1.dist-info/WHEEL +5 -0
llmboost_hub-0.1.1.dist-info/entry_points.txt +3 -0
llmboost_hub-0.1.1.dist-info/licenses/LICENSE +16 -0
llmboost_hub-0.1.1.dist-info/top_level.txt +1 -0

llmboost_hub/commands/tune.py ADDED Viewed

@@ -0,0 +1,372 @@
+import click
+import subprocess
+import time
+from typing import Optional
+import os
+import shutil
+from datetime import datetime
+from llmboost_hub.commands.run import do_run
+from llmboost_hub.utils.container_utils import (
+    container_name_for_model,
+    is_container_running,
+    is_model_tuning,
+)
+from llmboost_hub.commands.completions import complete_model_names
+from llmboost_hub.utils.config import config
+from llmboost_hub.utils.gpu_info import get_gpu_count
+from llmboost_hub.commands.stop import do_stop
+def _collect_error_logs(cname: str, max_lines: int = 200) -> str:
+    """
+    Collect recent error log lines from the container.
+    Strategy:
+    - Grep case-insensitive 'error' across `worker*.log` under `config.LLMBOOST_LOGS_DIR` and tail `max_lines`.
+    - Fallback to tailing any `*.log` if grep returns nothing.
+    Args:
+        cname: Target container name.
+        max_lines: Maximum number of lines to include.
+    Returns:
+        Concatenated recent log lines, or empty string on failure or no logs.
+    """
+    try:
+        grep_cmd = [
+            "docker",
+            "exec",
+            cname,
+            "sh",
+            "-lc",
+            f"grep -i 'error' -r {config.LLMBOOST_LOGS_DIR}/worker*.log 2>/dev/null | tail -n {max_lines}",
+        ]
+        out = subprocess.check_output(grep_cmd, text=True, stderr=subprocess.DEVNULL).strip()
+        if out:
+            return out
+    except subprocess.CalledProcessError:
+        pass
+    except Exception:
+        pass
+    try:
+        tail_cmd = [
+            "docker",
+            "exec",
+            cname,
+            "sh",
+            "-lc",
+            f"tail -n {max_lines} {config.LLMBOOST_LOGS_DIR}/*.log 2>/dev/null",
+        ]
+        out = subprocess.check_output(tail_cmd, text=True, stderr=subprocess.DEVNULL).strip()
+        return out
+    except Exception:
+        return ""
+def do_tune(
+    model: str,
+    lbh_workspace: Optional[str],
+    verbose: bool = False,
+    metrics: str = "latency",
+    algorithm: str = "mb_algorithm",
+    wait_timeout: float = 600.0,
+    poll_interval: float = 1.0,
+    detached: bool = False,
+    gui: bool = False,
+    image: Optional[str] = None,
+    model_path: Optional[str] = None,
+    restart: bool = False,
+    n_tuners: Optional[int] = None,
+    merge_db: bool = False,
+) -> dict:
+    """
+    Start the autotuner inside the model container and optionally wait.
+    Args:
+        model: Model identifier.
+        lbh_workspace: Optional override for the workspace mount path.
+        verbose: If True, echo detailed logs and commands.
+        metrics: Primary optimization metric (`latency` or `throughput`).
+        algorithm: Autotuning algorithm identifier.
+        wait_timeout: Max seconds to wait for completion (ignored when detached).
+        poll_interval: Seconds between tuning status checks.
+        detached: If True, return right after starting the tuner.
+        gui: If True, print diagnostics GUI URL using `config.LBH_GUI_PORT`.
+        image: If set, force a specific docker image for the model.
+        model_path: If set, local HF model directory to mount inside the container.
+        restart: If True, restart the container if it is already running.
+        n_tuners: Number of parallel tuners; defaults to GPU count when None.
+        merge_db: If True, merge container DB into host and exit (no tuning occurs).
+    Returns:
+        Dict: {returncode: int, container_name: str, error: str|None}
+    """
+    cname = container_name_for_model(model)
+    # Ensure container is running; otherwise start it via lbh run
+    if not is_container_running(cname):
+        if verbose:
+            click.echo(f"[tune] No running container for {model}; starting via lbh run...")
+        # Pass through forced image when provided
+        res = do_run(
+            model,
+            lbh_workspace,
+            verbose=verbose,
+            image=image,
+            model_path=model_path,
+            restart=restart,
+            docker_args=(),
+        )  # use empty docker_args
+        if res["returncode"] != 0:
+            return {"returncode": res["returncode"], "container_name": "", "error": res["error"]}
+        time.sleep(1)
+        if not is_container_running(cname):
+            return {"returncode": 1, "container_name": "", "error": "Failed to start container."}
+    # Optional: print GUI URL hint
+    if gui:
+        try:
+            gui_port = config.LBH_GUI_PORT
+            click.echo(f"[tune] Diagnostics GUI: http://localhost:{gui_port}")
+        except Exception:
+            # Ignore failures to read/format port
+            pass
+    # Default n_tuners to GPU count if not provided
+    if not n_tuners:
+        n_tuners = get_gpu_count()
+    exec_cmd = [
+        "docker",
+        "exec",
+        "-i",
+        cname,
+        "llmboost",
+        "tuner",
+        "--model",
+        model,
+        "--metrics",
+        metrics,
+        "--algorithm",
+        algorithm,
+        "--n-tuners",
+        f"{n_tuners}",
+    ]
+    # In verbose+attached mode, prefer interactive exec to surface logs
+    if verbose and not detached:
+        # replace `-d` for `-i` in exec_cmd
+        exec_cmd = [part if part != "-d" else "-i" for part in exec_cmd]
+    if merge_db:
+        # Ensure backup directory exists on host
+        os.makedirs(os.path.join(config.LBH_HOME, config.TUNER_DB_BACKUPS_DIRNAME), exist_ok=True)
+        # Backup host DB before merging
+        backup_file = os.path.join(
+            config.LBH_HOME,
+            config.TUNER_DB_BACKUPS_DIRNAME,
+            f"inference.db.{datetime.now().strftime('%Y%m%d_%H%M%S')}.bak",
+        )  # eg: inference.db.20231123_153045.bak
+        shutil.copy2(config.LBH_TUNER_DB_PATH, backup_file)
+        click.echo(f"[tune] Backed up host tuner database to {backup_file} before merging.")
+        # Replace all args from --model with --merge-db, merging container DB into host DB
+        exec_cmd = exec_cmd[: exec_cmd.index("--model")] + [
+            "--merge-db",
+            f"{config.LLMBOOST_TUNER_DB_BACKUP_PATH}",
+        ]
+        if verbose:
+            click.echo(f"[tune] Merging tuner database: {' '.join(exec_cmd)}")
+        subprocess.run(exec_cmd, check=True)
+        click.echo("[tune] Merged tuner database from container into host database.")
+        # No tuning is performed in merge-db mode
+        return {"returncode": 0, "container_name": cname, "error": None}
+    if verbose:
+        click.echo(f"[tune] Tuning model: {' '.join(exec_cmd)}")
+    # Start tuner and handle failures
+    start = time.time()
+    try:
+        # Start the tuner process inside the container (foreground)
+        subprocess.run(exec_cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        return {
+            "returncode": e.returncode,
+            "container_name": cname,
+            "error": f"Failed to start tuner inside container (exit {e.returncode})",
+        }
+    if detached:
+        # Return early when running in background
+        click.echo("[tune] Tuner started in background (detached).")
+        return {"returncode": 0, "container_name": cname, "error": None}
+    if not verbose:
+        # Poll for completion with minimal feedback
+        click.echo(f"[tune] Waiting for tuning to complete (timeout {wait_timeout:.1f}s)...")
+        time.sleep(3.0)  # brief pause to let process start
+        while (
+            is_container_running(cname)
+            and is_model_tuning(cname)
+            and (time.time() - start < wait_timeout)
+        ):
+            # Minimal progress feedback
+            elapsed = int(time.time() - start)
+            if elapsed % 60 == 0:
+                click.echo(f"{int(elapsed)}s.", nl=False)
+            elif elapsed % 5 == 0:
+                click.echo(".", nl=False)
+            time.sleep(max(0.1, float(poll_interval)))
+        # Handle container unexpectedly stopping
+        if not is_container_running(cname):
+            return {
+                "returncode": 1,
+                "container_name": cname,
+                "error": "Container stopped during tuning.",
+            }
+        # Branch: timeout while still tuning, show recent logs
+        if is_model_tuning(cname):
+            logs = _collect_error_logs(cname, max_lines=200)
+            msg = f"Tuning did not complete within {wait_timeout:.1f} seconds.\nNOTE: The tuning process may still be running in the background inside the container. Increase --wait-timeout to wait longer."
+            if logs:
+                msg += f"\nRecent logs:\n{logs}"
+            return {"returncode": 0, "container_name": cname, "error": msg}
+    # Completed within timeout
+    elapsed = time.time() - start
+    click.echo(f"[tune] Tuning finished after {elapsed:.1f} seconds.")
+    return {"returncode": 0, "container_name": cname, "error": None}
+@click.command(name="tune", context_settings={"help_option_names": ["-h", "--help"]})
+@click.argument("model", required=True, shell_complete=complete_model_names)
+@click.option(
+    "--lbh-workspace", type=click.Path(), help="Override workspace path mounted inside container."
+)
+@click.option(
+    "--metrics",
+    type=click.Choice(["throughput", "latency"]),
+    default="throughput",
+    show_default=True,
+    help="Primary optimization metric.",
+)
+@click.option(
+    "-a",
+    "--algorithm",
+    type=str,
+    default="mb_algorithm",
+    show_default=True,
+    help="Autotuning algorithm identifier.",
+)
+@click.option(
+    "--wait-timeout",
+    default=600.0,
+    show_default=True,
+    type=float,
+    help="Maximum seconds to wait for tuning to complete (ignored in detached mode).",
+)
+@click.option(
+    "--poll-interval",
+    default=1.0,
+    show_default=True,
+    type=float,
+    help="Seconds between tuning status checks.",
+)
+@click.option(
+    "-d",
+    "--detached",
+    is_flag=True,
+    help="Do not wait for tuning to complete; return immediately after starting tuner.",
+)
+@click.option(
+    "-i",
+    "--image",
+    "forced_image",
+    type=str,
+    default=None,
+    help="Force a specific docker image (required when multiple images match the model).",
+)
+@click.option(
+    "-m",
+    "--model_path",
+    "model_path",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True),
+    default=None,
+    help=f"Local HF model directory to mount inside the container.",
+)
+@click.option(
+    "-r",
+    "--restart",
+    is_flag=True,
+    help="Restart the container if it is running before starting.",
+)
+@click.option(
+    "--gui",
+    is_flag=True,
+    help="Print localhost URL for diagnostics GUI.",
+)
+@click.option(
+    "-n",
+    "--n-tuners",
+    type=str,
+    default=None,
+    help="Number of parallel tuners to run (defaults to number of GPUs detected).",
+)
+@click.option(
+    "--merge-db",
+    is_flag=True,
+    type=bool,
+    help="Merge container DB into existing DB on host. (no tuning is performed when this flag is set)",
+)
+@click.pass_context
+def tune(
+    ctx,
+    model,
+    lbh_workspace,
+    metrics,
+    algorithm,
+    wait_timeout,
+    poll_interval,
+    detached,
+    forced_image,
+    model_path,
+    restart,
+    gui,
+    n_tuners,
+    merge_db,
+):
+    """
+    Start autotuning for a given model inside its container.
+    """
+    verbose = ctx.obj.get("VERBOSE", False)
+    # Restart if requested
+    if restart:
+        stop_res = do_stop(model, None, verbose=verbose)
+        if stop_res["returncode"] != 0:
+            if is_container_running(container_name_for_model(model)):
+                raise click.ClickException(
+                    stop_res.get("error") or "Failed to stop existing container"
+                )
+    res = do_tune(
+        model=model,
+        lbh_workspace=lbh_workspace,
+        verbose=verbose,
+        metrics=metrics,
+        algorithm=algorithm,
+        wait_timeout=wait_timeout,
+        poll_interval=poll_interval,
+        detached=detached,
+        gui=gui,
+        image=forced_image,
+        model_path=model_path,
+        restart=restart,
+        n_tuners=n_tuners,
+        merge_db=merge_db,
+    )
+    if res["returncode"] != 0:
+        raise click.ClickException(res["error"] or "Tune failed")

llmboost_hub/utils/config.py ADDED Viewed

@@ -0,0 +1,220 @@
+import os
+from pathlib import Path
+from typing import Optional, Dict, Any
+import logging
+import yaml
+_DEFAULT_HOME = "~/.llmboost_hub"
+_CONFIG_FILENAME = "config.yaml"
+log = logging.getLogger("CONFIG")
+def expand_path(p: str) -> str:
+    """
+    Expand a path containing `~` to the user home directory.
+    Args:
+        p: Path string that may contain a leading `~`.
+    Returns:
+        The expanded absolute or relative path string.
+    """
+    return os.path.expanduser(p)
+def ensure_home() -> str:
+    """
+    Ensure `LBH_HOME` exists and return its absolute path.
+    Resolution order:
+    - ENV `LBH_HOME` (if set)
+    - built-in default `~/.llmboost_hub`
+    Returns:
+        Absolute path to `LBH_HOME` (created if missing).
+    """
+    home_env = os.getenv("LBH_HOME", _DEFAULT_HOME)
+    home = expand_path(home_env)
+    os.makedirs(home, exist_ok=True)
+    return os.path.abspath(home)
+def _get_home() -> str:
+    """Internal helper to return `LBH_HOME` (ensures existence)."""
+    return ensure_home()
+def _config_path() -> str:
+    """Absolute path to `config.LBH_HOME`/`config.yaml`."""
+    return os.path.join(_get_home(), _CONFIG_FILENAME)
+class _Constants:
+    # LLMBOOST paths are always inside container
+    CONTAINER_LBH_HOME = "/llmboost_hub"  # container lbh home
+    CONTAINER_USER_WORKSPACE = "/user_workspace"  # container user workspace mount point
+    LLMBOOST_WORKSPACE = "/workspace"  # container workspace dir
+    LLMBOOST_MODELS_DIR = f"{LLMBOOST_WORKSPACE}/models"  # container models dir
+    LLMBOOST_LOGS_DIR = f"{LLMBOOST_WORKSPACE}/logs"  # container logs dir
+    LLMBOOST_LICENSE_PATH = f"{LLMBOOST_WORKSPACE}/license.skm"  # container license path
+    LLMBOOST_TUNER_DB_PATH = f"{LLMBOOST_WORKSPACE}/data/inference.db"  # container tuner DB path
+    LLMBOOST_TUNER_DB_BACKUP_PATH = (
+        f"{LLMBOOST_WORKSPACE}/data/inference.db.bak"  # container tuner DB backup path
+    )
+    TUNER_DB_BACKUPS_DIRNAME = "tuner_db_backups"  # tuner DB backup dir name (host and container)
+class _Defaults:
+    # LBH is always on host
+    LBH_HOME = _get_home()  # host lbh home
+    LBH_MODELS = os.path.join(_get_home(), "models")  # host models dir
+    LBH_MODELS_STAGING = os.path.join(_get_home(), "models", ".tmp")  # host staging dir
+    LBH_LICENSE_PATH = os.path.join(_get_home(), "license.skm")  # host license path
+    LBH_WORKSPACE = os.path.join(_get_home(), "workspace")  # host workspace dir
+    LBH_LOOKUP_URL = "https://docs.google.com/spreadsheets/d/1f8FTgGDJkI6hnJQsd-RhHtlGhYTx_p8AAvDLNbRRTV8/export?format=csv"  # lookup URL
+    LBH_LOOKUP_CACHE = os.path.join(_get_home(), "lookup_cache.csv")  # host lookup cache
+    LBH_LOOKUP_CACHE_TTL = 60  # seconds between cache refreshes
+    LBH_GUI_PORT = 8080  # GUI port
+    LBH_TUNER_DB_PATH = os.path.join(
+        _get_home(), f"{os.path.basename(_Constants.LLMBOOST_TUNER_DB_PATH)}"
+    )  # host tuner DB path
+    LBH_AUTO_PREP = True  # whether to auto-prepare missing models on run
+# Coerce env/config values to the expected type (handles bools, ints, floats)
+def _to_bool(v: Any) -> bool:
+    if isinstance(v, bool):
+        return v
+    if isinstance(v, (int, float)):
+        return bool(v)
+    if isinstance(v, str):
+        s = v.strip().lower()
+        if s in {"1", "true", "t", "yes", "y", "on"}:
+            return True
+        if s in {"0", "false", "f", "no", "n", "off", ""}:
+            return False
+    raise ValueError(f"Cannot parse boolean from: {v!r}")
+def _coerce_to_type(value: Any, default: Any) -> Any:
+    # Note: bool is a subclass of int, so handle bool before int.
+    if isinstance(default, bool):
+        try:
+            return _to_bool(value)
+        except Exception:
+            return default
+    if isinstance(default, int) and not isinstance(default, bool):
+        try:
+            return int(value)
+        except Exception:
+            return default
+    if isinstance(default, float):
+        try:
+            return float(value)
+        except Exception:
+            return default
+    return value
+class _Config(_Defaults, _Constants):
+    _loaded_cfg = None
+    @staticmethod
+    def _resolve(cfg: Dict, key):
+        """
+        Resolve a config value for 'key'.
+        Resolution order:
+            1) Environment variable (if set, even if "0"/"false")
+            2) config.yaml (if present, even if False/0)
+            3) Defaults (_Defaults)
+        """
+        # 1) ENV (use presence, not truthiness; coerce type)
+        v = os.getenv(key)
+        if v is not None:
+            return _coerce_to_type(v, getattr(_Defaults, key))
+        # 2) config.yaml (do not use truthiness; coerce type)
+        if key in cfg:
+            val = cfg.get(key)
+            if val is not None and val != "":
+                return _coerce_to_type(val, getattr(_Defaults, key))
+        # 3) update config.yaml with default if missing
+        if key not in cfg:
+            _write_config({**cfg, key: getattr(_Defaults, key)})
+        return getattr(_Defaults, key)
+    def __init__(self):
+        """Load config on instantiation and populate attributes from resolved values."""
+        loaded_cfg = _load_config(create_if_missing=True)
+        for key in dir(_Defaults):
+            if not key.startswith("_"):
+                setattr(self, key, self._resolve(loaded_cfg, key))
+def _write_config(cfg: Dict[str, Any]) -> None:
+    """
+    Write the given config mapping to `config.LBH_HOME`/`config.yaml`.
+    Args:
+        cfg: Mapping of key -> value to persist.
+    """
+    path = _config_path()
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w", encoding="utf-8") as fh:
+        yaml.safe_dump(cfg, fh, sort_keys=True)
+def _load_config(create_if_missing: bool = True) -> Dict[str, Any]:
+    """
+    Load `config.LBH_HOME`/`config.yaml` with safe defaults.
+    Behavior:
+    - If file is missing and `create_if_missing=True`: create with defaults and return those defaults.
+    - If file exists but is not a mapping: warn and rewrite defaults.
+    - On read error: warn and rewrite defaults.
+    Args:
+        create_if_missing: Whether to create a default config file if missing.
+    Returns:
+        The loaded config mapping (possibly defaults).
+    """
+    path = _config_path()
+    if not os.path.exists(path):
+        if create_if_missing:
+            # Bootstrap with defaults
+            cfg = {}
+            for key in dir(_Defaults):
+                if not key.startswith("_"):
+                    cfg[key] = getattr(_Defaults, key)
+            _write_config(cfg)
+            log.info(f"Created default config at {path}")
+            return cfg
+        return {}
+    try:
+        with open(path, "r", encoding="utf-8") as fh:
+            data = yaml.safe_load(fh) or {}
+            if not isinstance(data, dict):
+                log.warning(f"Config at {path} is not a mapping. Rewriting defaults.")
+                cfg = {}
+                for key in dir(_Defaults):
+                    if not key.startswith("_"):
+                        cfg[key] = getattr(_Defaults, key)
+                # Note: rewrite defaults to recover from invalid content
+                _write_config(data)
+            return data
+    except Exception as e:
+        # Any error reading or parsing: recover by writing defaults
+        log.warning(f"Failed to read config at {path}: {e}. Rewriting defaults.")
+        cfg = {}
+        for key in dir(_Defaults):
+            if not key.startswith("_"):
+                cfg[key] = getattr(_Defaults, key)
+        _write_config(cfg)
+        return cfg
+config = _Config()