PyPI - opencode-llmstack - Versions diffs - 0.6.0__py3-none-any.whl - Mend

opencode-llmstack 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

llmstack/AGENTS.md +13 -0
llmstack/__init__.py +20 -0
llmstack/__main__.py +10 -0
llmstack/_platform.py +420 -0
llmstack/app.py +644 -0
llmstack/backends/__init__.py +19 -0
llmstack/backends/bedrock.py +790 -0
llmstack/check_models.py +119 -0
llmstack/cli.py +264 -0
llmstack/commands/__init__.py +10 -0
llmstack/commands/_helpers.py +91 -0
llmstack/commands/activate.py +71 -0
llmstack/commands/check.py +13 -0
llmstack/commands/download.py +27 -0
llmstack/commands/install.py +365 -0
llmstack/commands/install_llama_swap.py +36 -0
llmstack/commands/reload.py +59 -0
llmstack/commands/restart.py +12 -0
llmstack/commands/setup.py +146 -0
llmstack/commands/start.py +360 -0
llmstack/commands/status.py +260 -0
llmstack/commands/stop.py +73 -0
llmstack/download/__init__.py +21 -0
llmstack/download/binary.py +234 -0
llmstack/download/ggufs.py +164 -0
llmstack/generators/__init__.py +37 -0
llmstack/generators/llama_swap.py +421 -0
llmstack/generators/opencode.py +291 -0
llmstack/models.ini +304 -0
llmstack/paths.py +318 -0
llmstack/shell_env.py +927 -0
llmstack/tiers.py +394 -0
opencode_llmstack-0.6.0.dist-info/METADATA +693 -0
opencode_llmstack-0.6.0.dist-info/RECORD +37 -0
opencode_llmstack-0.6.0.dist-info/WHEEL +5 -0
opencode_llmstack-0.6.0.dist-info/entry_points.txt +2 -0
opencode_llmstack-0.6.0.dist-info/top_level.txt +1 -0

llmstack/download/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Asset downloaders.
+Two distinct concerns live here:
+  :mod:`llmstack.download.ggufs`
+    Background download of every GGUF named in ``models.ini`` using
+    ``llama-completion`` (or legacy ``llama-cli``) so the standard
+    llama.cpp HF cache stays the single canonical store.
+  :mod:`llmstack.download.binary`
+    One-shot installer for the ``llama-swap`` Go binary, fetched from its
+    GitHub release tag. Detects host OS/arch, optionally pinned via the
+    ``LLAMA_SWAP_VERSION`` env var.
+"""
+from __future__ import annotations
+from llmstack.download.binary import install_llama_swap
+from llmstack.download.ggufs import download_all
+__all__ = ["install_llama_swap", "download_all"]

llmstack/download/binary.py ADDED Viewed

@@ -0,0 +1,234 @@
+"""Install (or update) the ``llama-swap`` binary.
+Replaces the shell ``_install_llama_swap`` helper. Resolves the latest
+GitHub release tag (or honours ``$LLAMA_SWAP_VERSION``), downloads the
+asset for the current OS+arch, extracts the single ``llama-swap``
+executable (``llama-swap.exe`` on Windows), and atomically renames it
+into place under :func:`llmstack.paths.bin_dir`.
+A second call short-circuits when the installed version already matches
+the resolved tag, unless ``force=True`` is passed.
+Asset naming on the upstream release matches goreleaser's convention:
+  * POSIX:  ``llama-swap_<num>_<os>_<arch>.tar.gz``
+  * Windows: ``llama-swap_<num>_windows_amd64.zip`` (only amd64 is published)
+"""
+from __future__ import annotations
+import os
+import platform
+import re
+import shutil
+import subprocess
+import tarfile
+import tempfile
+import urllib.request
+import zipfile
+from pathlib import Path
+from llmstack._platform import EXE_SUFFIX, IS_WINDOWS, make_executable
+from llmstack.paths import REPO_LLAMA_SWAP, ensure_data_dirs
+GH_API = "https://api.github.com"
+GH_DL = "https://github.com"
+VERSION_RE = re.compile(r"version:\s*v?([0-9][\w.-]*)", re.IGNORECASE)
+BINARY_NAME = f"llama-swap{EXE_SUFFIX}"
+def _detect_os_arch() -> tuple[str, str, str]:
+    """Return ``(os_label, arch_label, archive_ext)`` for the current host.
+    The third element drives the asset name suffix: ``"tar.gz"`` for the
+    POSIX builds, ``"zip"`` for the Windows build. Goreleaser's defaults.
+    """
+    sysname = platform.system()
+    os_map = {"Darwin": "darwin", "Linux": "linux", "FreeBSD": "freebsd", "Windows": "windows"}
+    if sysname not in os_map:
+        raise SystemExit(f"unsupported OS: {sysname} (need Darwin/Linux/FreeBSD/Windows)")
+    machine = platform.machine().lower()
+    if machine in ("arm64", "aarch64"):
+        arch = "arm64"
+    elif machine in ("x86_64", "amd64"):
+        arch = "amd64"
+    else:
+        raise SystemExit(f"unsupported arch: {machine} (need arm64 or x86_64)")
+    os_label = os_map[sysname]
+    if os_label == "freebsd" and arch != "amd64":
+        raise SystemExit(f"no llama-swap release for {os_label}/{arch}")
+    if os_label == "windows":
+        if arch != "amd64":
+            raise SystemExit(
+                f"no llama-swap windows release for {arch} -- "
+                "only windows_amd64 is published upstream."
+            )
+        return os_label, arch, "zip"
+    return os_label, arch, "tar.gz"
+def _resolve_latest_tag() -> str:
+    print(f"[*] resolving latest release tag from github.com/{REPO_LLAMA_SWAP}...")
+    url = f"{GH_API}/repos/{REPO_LLAMA_SWAP}/releases/latest"
+    try:
+        with urllib.request.urlopen(url, timeout=10) as resp:
+            import json as _json
+            tag = _json.load(resp).get("tag_name") or ""
+    except Exception as e:
+        raise SystemExit(f"could not resolve latest release tag: {e}") from None
+    if not tag:
+        raise SystemExit("could not resolve latest release tag (empty response)")
+    print(f"[*] latest release: {tag}")
+    return tag
+def _installed_version_line(target: Path) -> str:
+    """Return the first line of ``llama-swap --version`` (or empty on error)."""
+    if not target.exists():
+        return ""
+    try:
+        proc = subprocess.run(
+            [str(target), "--version"],
+            check=False,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            timeout=10,
+        )
+    except (OSError, subprocess.SubprocessError):
+        return ""
+    return (proc.stdout or "").splitlines()[0] if proc.stdout else ""
+def latest_release_tag() -> str | None:
+    """Best-effort lookup; returns ``None`` instead of raising."""
+    try:
+        url = f"{GH_API}/repos/{REPO_LLAMA_SWAP}/releases/latest"
+        with urllib.request.urlopen(url, timeout=5) as resp:
+            import json as _json
+            tag = _json.load(resp).get("tag_name") or ""
+        return tag or None
+    except Exception:
+        return None
+def installed_version(target: Path) -> str | None:
+    """Parse the version number out of ``--version`` output, e.g. ``"211"``."""
+    line = _installed_version_line(target)
+    m = VERSION_RE.search(line)
+    return m.group(1) if m else None
+def _extract_binary(archive: Path, dest_dir: Path, *, archive_ext: str) -> Path:
+    """Pull the ``llama-swap[.exe]`` file out of ``archive`` into ``dest_dir``.
+    Returns the path to the extracted executable. We deliberately ignore
+    the rest of the archive contents (READMEs, sample configs) -- the
+    package only consumes the binary itself.
+    """
+    if archive_ext == "zip":
+        try:
+            with zipfile.ZipFile(archive) as zf:
+                member = next((m for m in zf.namelist() if Path(m).name == BINARY_NAME), None)
+                if member is None:
+                    raise SystemExit(
+                        f"[!] zip did not contain a top-level '{BINARY_NAME}' file"
+                    )
+                zf.extract(member, dest_dir)
+                extracted = dest_dir / member
+        except zipfile.BadZipFile as e:
+            raise SystemExit(f"extract failed: {e}") from None
+    else:
+        try:
+            with tarfile.open(archive, "r:gz") as tf:
+                member = next((m for m in tf.getmembers() if Path(m.name).name == BINARY_NAME), None)
+                if member is None:
+                    raise SystemExit(
+                        f"[!] tarball did not contain a top-level '{BINARY_NAME}' file"
+                    )
+                tf.extract(member, dest_dir)
+                extracted = dest_dir / member.name
+        except tarfile.TarError as e:
+            raise SystemExit(f"extract failed: {e}") from None
+    if not extracted.is_file():
+        raise SystemExit(f"[!] archive did not yield a '{BINARY_NAME}' file")
+    return extracted
+def install_llama_swap(*, force: bool = False) -> Path:
+    """Download/refresh the ``llama-swap`` binary.
+    Returns the absolute path to the installed binary. ``force=True``
+    re-downloads even when the version matches.
+    """
+    paths = ensure_data_dirs()
+    target = paths.llama_swap_bin
+    os_name, arch, archive_ext = _detect_os_arch()
+    tag = os.environ.get("LLAMA_SWAP_VERSION", "").strip()
+    if tag:
+        print(f"[*] version: {tag} (from $LLAMA_SWAP_VERSION)")
+    else:
+        tag = _resolve_latest_tag()
+    num = tag.lstrip("v")
+    asset = f"llama-swap_{num}_{os_name}_{arch}.{archive_ext}"
+    url = f"{GH_DL}/{REPO_LLAMA_SWAP}/releases/download/{tag}/{asset}"
+    if target.exists() and not force:
+        line = _installed_version_line(target)
+        if line and re.search(rf"version:\s*v?{re.escape(num)}\b", line, re.IGNORECASE):
+            print(f"[=] already installed: {target}")
+            print(f"         {line}")
+            print("    (re-run with --force to redownload)")
+            return target
+        if line:
+            print(f"[*] currently installed: {line}")
+            print(f"    upgrading to {tag}")
+    paths.bin_dir.mkdir(parents=True, exist_ok=True)
+    with tempfile.TemporaryDirectory(prefix="llmstack-llama-swap-") as tmp_dir:
+        tmp = Path(tmp_dir)
+        archive = tmp / asset
+        print(f"[*] downloading {asset}")
+        print(f"    from {url}")
+        try:
+            urllib.request.urlretrieve(url, archive)
+        except Exception as e:
+            raise SystemExit(f"download failed: {e}") from None
+        print("[*] extracting")
+        extracted = _extract_binary(archive, tmp, archive_ext=archive_ext)
+        # Stage with a sibling name (NOT ``with_suffix(".new")`` -- on
+        # Windows that would replace ".exe" with ".new" and lose the
+        # executable extension).
+        staged = target.with_name(target.name + ".new")
+        if staged.exists():
+            staged.unlink()
+        shutil.move(str(extracted), staged)
+        make_executable(staged)
+        # Windows ``os.replace`` on an open / running binary fails with
+        # ERROR_ACCESS_DENIED; the daemon must be stopped before
+        # upgrading. We don't try to be clever about it.
+        if IS_WINDOWS and target.exists():
+            try:
+                target.unlink()
+            except OSError as e:
+                staged.unlink(missing_ok=True)
+                raise SystemExit(
+                    f"[!] could not replace {target}: {e}\n"
+                    "    is llama-swap still running? stop the stack first: "
+                    "llmstack stop"
+                ) from None
+        os.replace(staged, target)
+    print(f"[OK] installed {target} ({os_name}/{arch})")
+    line = _installed_version_line(target)
+    if line:
+        print(f"     {line}")
+    return target

llmstack/download/ggufs.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""Background GGUF downloader.
+Replaces the shell ``cmd_download`` action. We shell out to
+``llama-completion`` (preferred; modern llama.cpp split: chat=llama-cli,
+one-shot=llama-completion) or legacy ``llama-cli`` because the standard
+llama.cpp HF cache uses a resumable partial-file convention
+(``.downloadInProgress``) that ``huggingface_hub.hf_hub_download`` does
+not understand. Co-mixing the two would leave un-resumable partial blobs
+on disk -- see ``UPGRADING.md`` "Cache management".
+Every download is launched as a backgrounded subprocess with its own
+log file at ``<state>/logs/dl-<tier>-<label>.log``. We do **not** wait
+for them to finish; the caller decides whether to poll
+:func:`running_downloads`.
+"""
+from __future__ import annotations
+import os
+import shutil
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from llmstack._platform import detached_popen, find_pids
+from llmstack.paths import ensure_state_dirs, require_models_ini, resolve
+from llmstack.tiers import iter_download_targets, load_tiers
+LLAMA_BINS = ("llama-completion", "llama-cli")
+@dataclass(frozen=True)
+class DownloadJob:
+    """A single backgrounded ``llama-*`` invocation."""
+    tag: str
+    repo: str
+    file: str
+    label: str
+    log: Path
+    pid: int
+def _find_llama_bin() -> str:
+    for candidate in LLAMA_BINS:
+        path = shutil.which(candidate)
+        if path:
+            return path
+    raise SystemExit(
+        "[!] neither llama-completion nor llama-cli found in PATH "
+        "(brew install llama.cpp)"
+    )
+def _spawn(llama_bin: str, repo: str, file: str, log: Path, hf_token: str | None) -> int:
+    """Launch a backgrounded one-shot completion that downloads ``repo/file``."""
+    argv: list[str] = [
+        llama_bin,
+        "-hf", repo,
+        "-hff", file,
+        "--no-warmup",
+        "-ngl", "0",
+        "-c", "256",
+        "-p", "ok",
+        "-n", "1",
+    ]
+    if hf_token:
+        argv += ["--hf-token", hf_token]
+    log.parent.mkdir(parents=True, exist_ok=True)
+    fp = log.open("wb")
+    proc = detached_popen(argv, stdout=fp, stderr=fp)
+    fp.close()
+    return proc.pid
+def download_all() -> list[DownloadJob]:
+    """Kick off downloads for every tier file declared in models.ini.
+    Returns the list of launched jobs. Always non-empty: raises
+    :exc:`SystemExit` if the ini has no download targets.
+    """
+    require_models_ini()
+    paths = ensure_state_dirs()
+    hf_token = os.environ.get("HF_TOKEN") or None
+    targets = list(iter_download_targets())
+    hosted_tiers = sorted(t.name for t in load_tiers().values() if not t.is_gguf)
+    print(f"[*] inventory:  {paths.models_ini}")
+    if hosted_tiers:
+        print(f"[*] hosted (no download): {', '.join(hosted_tiers)}")
+    if not targets:
+        # All tiers in the ini are hosted (e.g. bedrock-only) -- nothing
+        # to fetch. Don't fail; downloads are an optional step in a
+        # cloud-only deployment.
+        print("[*] no GGUF tiers configured -- nothing to download.")
+        return []
+    llama_bin = _find_llama_bin()
+    print(f"[*] downloader: {llama_bin}")
+    print("[*] cache:      ~/.cache/huggingface/hub  (default for llama.cpp)")
+    if hf_token:
+        print("[*] HF_TOKEN set (faster rate limits)")
+    else:
+        print("[*] no HF_TOKEN (rate-limited unauthenticated downloads)")
+    print()
+    jobs: list[DownloadJob] = []
+    for tf in targets:
+        log = paths.log_dir / f"dl-{tf.tag}.log"
+        print(f"[*] {tf.tag:<32} ({tf.label:<7}) {tf.repo} / {tf.file}")
+        print(f"    log -> {log}")
+        pid = _spawn(llama_bin, tf.repo, tf.file, log, hf_token)
+        print(f"    pid -> {pid}")
+        jobs.append(DownloadJob(
+            tag=tf.tag, repo=tf.repo, file=tf.file, label=tf.label,
+            log=log, pid=pid,
+        ))
+    print()
+    print(f"{len(jobs)} download(s) queued in the background.")
+    print()
+    print("Watch progress:")
+    print(f"    tail -f {paths.log_dir}/dl-*.log")
+    print("    llama-cli -cl                        # lists completed cache entries")
+    print()
+    print("When you want to try queued upgrade targets without committing:")
+    print("    llmstack stop && llmstack start --next")
+    return jobs
+def running_downloads() -> int:
+    """Return the count of in-flight ``llama-{completion,cli}`` HF downloads.
+    Cross-platform via :func:`llmstack._platform.find_pids`: POSIX uses
+    ``pgrep -f`` under the hood, Windows uses PowerShell's
+    ``Get-CimInstance``. Returns 0 when neither lookup tool is available.
+    """
+    return len(find_pids(r"llama-(completion|cli).*-hf "))
+def wait_for_downloads(poll_seconds: float = 10.0, *, log_dir: Path | None = None) -> None:
+    """Block until no ``llama-*`` HF download subprocesses remain.
+    Prints a one-liner every ``poll_seconds`` so the user can see we're
+    not hung. Honours Ctrl-C politely.
+    """
+    log = log_dir or resolve().log_dir
+    print(f"      (logs: {log}/dl-*.log)")
+    time.sleep(2)
+    try:
+        while True:
+            n = running_downloads()
+            if n == 0:
+                break
+            print(f"      {n} download(s) still running...")
+            time.sleep(poll_seconds)
+    except KeyboardInterrupt:
+        print("\n[!] interrupted -- downloads continue in the background.", file=sys.stderr)
+        raise SystemExit(130) from None
+    print("[OK] all downloads complete.")

llmstack/generators/__init__.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Config generators that render the runtime configs from ``models.ini``.
+Every command that mutates state runs through ``render_to`` so the file is
+written atomically (tmp file in the same directory, validated, then
+renamed) -- mirrors the old shell ``_render_install`` helper.
+"""
+from __future__ import annotations
+import os
+import tempfile
+from collections.abc import Callable
+from pathlib import Path
+def render_to(target: Path, render: Callable[[Path], None], validate: Callable[[Path], None]) -> None:
+    """Render -> validate -> atomic ``mv`` into ``target``.
+    ``render`` writes the candidate file, ``validate`` raises on a bad
+    payload (e.g. by trying to ``yaml.safe_load`` it).  We unlink the
+    tempfile if anything fails so we never leave a half-written config.
+    """
+    target.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp_str = tempfile.mkstemp(prefix=f".{target.name}.", dir=str(target.parent))
+    os.close(fd)
+    tmp = Path(tmp_str)
+    try:
+        render(tmp)
+        validate(tmp)
+        os.replace(tmp, target)
+        target.chmod(0o644)
+    except BaseException:
+        tmp.unlink(missing_ok=True)
+        raise
+__all__ = ["render_to"]