PyPI - opencode-llmstack - Versions diffs - 0.6.0__py3-none-any.whl - Mend

opencode-llmstack 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

llmstack/AGENTS.md +13 -0
llmstack/__init__.py +20 -0
llmstack/__main__.py +10 -0
llmstack/_platform.py +420 -0
llmstack/app.py +644 -0
llmstack/backends/__init__.py +19 -0
llmstack/backends/bedrock.py +790 -0
llmstack/check_models.py +119 -0
llmstack/cli.py +264 -0
llmstack/commands/__init__.py +10 -0
llmstack/commands/_helpers.py +91 -0
llmstack/commands/activate.py +71 -0
llmstack/commands/check.py +13 -0
llmstack/commands/download.py +27 -0
llmstack/commands/install.py +365 -0
llmstack/commands/install_llama_swap.py +36 -0
llmstack/commands/reload.py +59 -0
llmstack/commands/restart.py +12 -0
llmstack/commands/setup.py +146 -0
llmstack/commands/start.py +360 -0
llmstack/commands/status.py +260 -0
llmstack/commands/stop.py +73 -0
llmstack/download/__init__.py +21 -0
llmstack/download/binary.py +234 -0
llmstack/download/ggufs.py +164 -0
llmstack/generators/__init__.py +37 -0
llmstack/generators/llama_swap.py +421 -0
llmstack/generators/opencode.py +291 -0
llmstack/models.ini +304 -0
llmstack/paths.py +318 -0
llmstack/shell_env.py +927 -0
llmstack/tiers.py +394 -0
opencode_llmstack-0.6.0.dist-info/METADATA +693 -0
opencode_llmstack-0.6.0.dist-info/RECORD +37 -0
opencode_llmstack-0.6.0.dist-info/WHEEL +5 -0
opencode_llmstack-0.6.0.dist-info/entry_points.txt +2 -0
opencode_llmstack-0.6.0.dist-info/top_level.txt +1 -0

llmstack/check_models.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Snapshot the currently-configured GGUFs and what's recommended.
+For every tier in ``models.ini``, prints a row per file (current + upgrade
+target if defined) with:
+  - filename
+  - HuggingFace size + last-modified
+  - direct URL to the GGUF on HF
+  - DRIFT marker when ``models.ini`` and ``llama-swap.yaml`` disagree about
+    the currently-configured file
+Read-only -- no side effects. Invoked by ``llmstack check``.
+"""
+from __future__ import annotations
+import re
+import sys
+from pathlib import Path
+import yaml
+from huggingface_hub import HfApi
+from llmstack.paths import resolve
+from llmstack.tiers import load_tiers
+HF_RE = re.compile(r"-hf\s+(\S+/\S+)")
+HFF_RE = re.compile(r"-hff\s+(\S+\.gguf)")
+def parse_yaml(yaml_path: Path) -> dict[str, tuple[str, str]]:
+    """Map tier-name -> (repo, file) by reading ``llama-swap.yaml``.
+    Strips comment lines from each ``cmd:`` block before regex-matching so
+    commented-out ``-hf`` examples don't pollute the result.
+    """
+    if not yaml_path.exists():
+        return {}
+    cfg = yaml.safe_load(yaml_path.read_text())
+    out: dict[str, tuple[str, str]] = {}
+    for tier, m in (cfg.get("models") or {}).items():
+        cmd_lines = [
+            line for line in (m.get("cmd") or "").splitlines()
+            if not line.strip().startswith("#")
+        ]
+        cmd = "\n".join(cmd_lines)
+        repo_m = HF_RE.search(cmd)
+        file_m = HFF_RE.search(cmd)
+        if repo_m and file_m:
+            out[tier] = (repo_m.group(1), file_m.group(1))
+    return out
+def hf_meta(api: HfApi, repo: str, fname: str) -> tuple[str, str]:
+    """Fetch (size_human, last_modified_iso) from HF for a single file."""
+    try:
+        info = api.model_info(repo, files_metadata=True)
+        size = next((s.size for s in info.siblings if s.rfilename == fname), None)
+        size_s = f"{size / 1024 / 1024 / 1024:.1f} GB" if size else "?"
+        mod = info.last_modified.strftime("%Y-%m-%d") if info.last_modified else "?"
+        return size_s, mod
+    except Exception as e:  # network / 404 / auth - keep going for the next row
+        return "ERR", str(e)[:24]
+def main(argv: list[str] | None = None) -> int:
+    api = HfApi()
+    tiers = load_tiers()
+    yaml_cfg = parse_yaml(resolve().llama_swap_yaml)
+    fmt = "{:<18} {:<8} {:<70} {:>10} {:>12}  {}"
+    print(fmt.format("tier", "label", "file / model-id", "size", "updated", "url / region"))
+    print("-" * 165)
+    drift = []
+    for tier in tiers.values():
+        if tier.is_bedrock and tier.bedrock is not None:
+            b = tier.bedrock
+            scope_parts = [p for p in (b.region, b.profile) if p]
+            scope = " / ".join(scope_parts) if scope_parts else "(default chain)"
+            print(fmt.format(tier.name, "bedrock", b.model_id, "-", "-", scope))
+            if b.has_next:
+                next_scope_parts = [p for p in (b.region_next or b.region, b.profile) if p]
+                next_scope = " / ".join(next_scope_parts) if next_scope_parts else "(default chain)"
+                print(fmt.format(tier.name, "next", b.model_id_next or "", "-", "-", next_scope))
+            continue
+        for tf in tier.files():
+            size_s, mod = hf_meta(api, tf.repo, tf.file)
+            url = f"https://huggingface.co/{tf.repo}/blob/main/{tf.file}"
+            label = tf.label
+            if tf.label == "current":
+                actual = yaml_cfg.get(tier.name)
+                if actual and actual != (tf.repo, tf.file):
+                    label = "DRIFT!"
+                    drift.append((tier.name, (tf.repo, tf.file), actual))
+            print(fmt.format(tier.name, label, tf.file, size_s, mod, url))
+    if drift:
+        print()
+        print("[!] DRIFT detected between models.ini (recommended) and llama-swap.yaml (active):")
+        for tier, want, got in drift:
+            print(f"    [{tier}]  ini wants  {want[0]} / {want[1]}")
+            print(f"             yaml has   {got[0]} / {got[1]}")
+        print("    Reconcile by editing one of the two so they match.")
+    print()
+    print("To look for upgrades, browse:")
+    print("  https://huggingface.co/models?library=gguf&sort=trending")
+    print("  https://huggingface.co/bartowski        (general GGUF maintainer)")
+    print("  https://huggingface.co/unsloth          (Qwen + UD dynamic quants)")
+    print("  https://huggingface.co/mradermacher     (i1 + abliterated/heretic)")
+    print()
+    print("Then: see UPGRADING.md for the workflow.")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))

llmstack/cli.py ADDED Viewed

@@ -0,0 +1,264 @@
+"""``llmstack`` console-script entry point.
+This is the Python replacement for ``llmstack.sh``. It does only one
+thing: parse the action word, look up the matching ``commands.<action>``
+module, and call its ``run(args)`` function. Every action implements its
+own flag parsing so help text and error messages stay close to the
+behaviour they describe.
+  llmstack <action> [args...]
+For machine readers / shell completions, ``llmstack help`` prints the
+full action table; ``llmstack <action> -h`` prints per-action help.
+"""
+from __future__ import annotations
+import sys
+from collections.abc import Callable
+from llmstack import __version__
+USAGE = """\
+llmstack - multi-tier local LLM stack (llama-swap + auto-router + opencode wiring)
+Does NOT touch ~/.config/opencode/opencode.json. Instead, the generated
+opencode config lives at <work-dir>/.llmstack/opencode.json, and the
+activate hook (`llmstack activate <shell>`) auto-exports OPENCODE_CONFIG
+whenever you cd into a project that has a `.llmstack/`. Inside that
+hooked shell, `opencode` picks up our config; in any other terminal,
+opencode keeps using your global setup unchanged.
+Usage:
+  llmstack <action> [options]
+Actions:
+  setup [--skip-download] [--skip-wait]
+      First-time walkthrough: kick off GGUF downloads, wait for them, install
+      the llama-swap binary, print the shell activation hook, check opencode.
+  install [--print] [--current | --next | --external [URL]]
+      Regenerate .llmstack/opencode.json (+ AGENTS.md copy) and pin
+      the default channel for the next `start`. The source of tier
+      config depends on channel:
+        --current / --next (local)
+          Read <work-dir>/.llmstack/models.ini, seeding it from the
+          bundled template on first run. llama-swap.yaml is NOT
+          touched -- `start` owns that and regenerates it for the
+          chosen channel on each launch.
+        --external [URL] (thin client)
+          Fetch models.ini live from the router (`GET URL/models.ini`)
+          and render opencode.json against that -- no local
+          models.ini is created or kept. Re-run `install` any time to
+          pick up router-side edits. URL precedence:
+            flag arg > $LLMSTACK_REMOTE_URL > the local router
+            (http://127.0.0.1:10101).
+          The localhost default is what makes "two projects, one
+          host" work zero-config: install one project local and the
+          others --external.
+      $LLMSTACK_REMOTE_URL set without --external still implies
+      --external (back-compat). The activate hook re-exports this
+      var when you `cd` into an external project, so re-running
+      `install` from inside an active shell never needs the URL or
+      the flag again.
+      `--print` writes the rendered opencode.json to stdout instead
+      of files (still fetches the remote in external mode).
+  install-llama-swap [--force]
+      (Re-)download the llama-swap Go binary into $LLMSTACK_BIN_DIR (default
+      $XDG_DATA_HOME/llmstack/bin/). Setup runs this for you.
+  download
+      Download every GGUF named in models.ini (current + queued next) to
+      the standard llama.cpp cache, in parallel, in the background.
+  start [--current | --next] [--detach]
+      Generate .llmstack/llama-swap.yaml for the chosen channel, bring up
+      llama-swap (:10102) + auto-router (:10101). Default channel =
+      whatever `install` pinned, else `current`. `--next` swaps any tier
+      with hf_file_next. The yaml is regenerated on each fresh launch
+      so it always matches the live models.ini; if the daemons are
+      already up the running yaml is left alone.
+      Subshell behaviour: if LLMSTACK_ACTIVE is already set (i.e. the
+      activate hook has wired this shell up) `start` just brings up
+      daemons and returns. Only when the env is not set does `start`
+      drop you into a subshell with OPENCODE_CONFIG exported -- as a
+      fallback for users who haven't run the activate hook yet.
+      `--detach` skips the subshell unconditionally.
+      When the project is installed with channel=external (see
+      `install --external`), no daemons are launched: this just
+      verifies the pinned remote `GET /models.ini` (which doubles as
+      the router's health check -- there's no separate /health route).
+  activate <zsh|bash|powershell>
+      Write the auto-activation hook to ~/.<shell>_llmstack_hook and
+      print a `source` line to stdout, so
+          eval "$(llmstack activate zsh)"
+      both regenerates the file and turns the hook on in the current
+      shell. Paste the same line into your shell rc to make it stick:
+          # ~/.zshrc
+          eval "$(llmstack activate zsh)"
+      The hook walks up from $PWD on every prompt, finds the nearest
+      .llmstack/opencode.json, and exports OPENCODE_CONFIG +
+      LLMSTACK_WORK_DIR + LLMSTACK_CHANNEL accordingly. Walks back out
+      when you cd away. There is no separate `shell` action -- this is
+      the shell action.
+  stop
+      Stop the router + llama-swap (and any orphaned llama-server children).
+  restart [--current | --next] [--detach]
+      stop + start. Convenient for cycling channels.
+  reload
+      Emit shell commands that re-export LLMSTACK_CHANNEL +
+      OPENCODE_CONFIG and re-render the [llmstack:<project>] prompt
+      prefix for the current channel marker. Pipe through eval to
+      apply in-place (no nested subshell):
+          eval "$(llmstack reload)"
+      Useful after `start --next` switches channels in an
+      already-active shell -- the activate hook only refreshes on
+      chpwd, so without this the prompt would lag until your next cd.
+  status
+      Show channel, pids, /v1/models, loaded llama-server processes.
+  check [args]
+      Snapshot configured GGUFs + flag drift between models.ini and
+      llama-swap.yaml.
+  help | -h | --help
+      This message.
+  version | --version
+      Print the package version and exit.
+Environment overrides:
+  LLMSTACK_REMOTE_URL     base URL of a *remote* llmstack router (e.g.
+                          `http://10.0.0.5:10101`). Picked up by
+                          `install` as an alternative to passing
+                          `--external <url>`; once `install` runs, the
+                          channel + URL are persisted in
+                          .llmstack/default-channel and that file is
+                          the source of truth (the env var is only
+                          re-exported by the activate hook for
+                          downstream callers).
+  LLMSTACK_MODELS_INI     path to models.ini (default:
+                          <work-dir>/.llmstack/models.ini).
+  LLMSTACK_WORK_DIR       where .llmstack/ + logs/ live (default: $PWD
+                          when invoked). Auto-exported by the activate
+                          hook (`llmstack activate <shell>`) and by the
+                          subshell `start` spawns, set to the project
+                          root -- so commands work from any subdirectory
+                          of an installed project. Without the hook,
+                          run from the project root (or set this var).
+                          Local daemons are singleton (ports 10101/10102);
+                          to consume them from a second project on the
+                          same host, install that project --external.
+  LLMSTACK_DATA_DIR       persistent user-data root (default:
+                          $XDG_DATA_HOME/llmstack). Where the binary lives.
+  LLMSTACK_BIN_DIR        override just the binary location.
+  OPENCODE_CONFIG_DIR     where to write opencode.json (default: .llmstack/).
+  LLAMA_SWAP_VERSION      pin a specific llama-swap release (e.g. v211).
+  HF_TOKEN                authenticate model downloads (faster rate limits).
+  LLMSTACK_SHELL          shell to spawn from `start` when no active env
+                          is detected (default: $SHELL).
+Channel labels (LLMSTACK_CHANNEL):
+  current    local stack, canonical channel (steel-blue prompt prefix)
+  next       local stack, queued-upgrade channel (orange prompt prefix)
+  external   thin client of an llmstack router (medium-purple prompt
+             prefix; the URL is shown alongside the project name in the
+             prompt: `[llmstack:<project> <url>]`). The URL is pinned at
+             install time -- typically a remote host, but defaults to
+             the local router so two projects on one host can share a
+             single set of daemons cleanly.
+Channel markers on disk (.llmstack/active-channel, .llmstack/default-channel):
+  one line, format `<channel>[ <url>]`. The URL is only present for
+  channel=external; the activate hook re-exports it as
+  LLMSTACK_REMOTE_URL when you cd into the project, so you don't have to
+  put the URL in your shell rc.
+Variables exported by the activate hook (and the start fallback subshell):
+  OPENCODE_CONFIG         path to the generated .llmstack/opencode.json
+  LLMSTACK_WORK_DIR       absolute path to the project root (auto-detected
+                          by walking up from $PWD looking for .llmstack/)
+  LLMSTACK_CHANNEL        current | next | external
+  LLMSTACK_ACTIVE         "1" while the env is wired up
+  LLMSTACK_REMOTE_URL     set when channel == external, from the marker file
+  LLMSTACK_ROOT           absolute path to the llmstack package (start only)
+"""
+def _print_help() -> None:
+    sys.stdout.write(USAGE)
+def _print_version() -> None:
+    print(f"llmstack {__version__}")
+def _load_action(action: str) -> Callable[[list[str]], int]:
+    """Resolve ``action`` to a ``run(args)`` callable, lazy-importing the module."""
+    aliases = {
+        "download-models":     "download",
+        "check-models":        "check",
+    }
+    name = aliases.get(action, action)
+    name = name.replace("-", "_")
+    target = f"llmstack.commands.{name}"
+    from importlib import import_module
+    try:
+        module = import_module(target)
+    except ModuleNotFoundError as e:
+        # Only swallow the error when the *action module itself* is missing.
+        # Transitive ImportErrors (e.g. an uninstalled third-party dep) must
+        # surface, otherwise we mislead the user with "unknown action".
+        if e.name == target:
+            raise SystemExit(f"[!] unknown action: {action}\n\nrun: llmstack help") from None
+        raise SystemExit(
+            f"[!] action '{action}' failed to load: missing dependency '{e.name}'\n"
+            f"    hint: pip install -e . (or pipx install .) to install llmstack's deps"
+        ) from e
+    run = getattr(module, "run", None)
+    if not callable(run):
+        raise SystemExit(f"[!] action '{action}' is missing run() -- bug in llmstack")
+    return run
+def main(argv: list[str] | None = None) -> int:
+    args = sys.argv[1:] if argv is None else list(argv)
+    if not args or args[0] in ("help", "-h", "--help"):
+        _print_help()
+        return 0
+    if args[0] in ("version", "-V", "--version"):
+        _print_version()
+        return 0
+    action, rest = args[0], args[1:]
+    run = _load_action(action)
+    try:
+        rc = run(rest)
+    except SystemExit:
+        raise
+    except KeyboardInterrupt:
+        print("\n[!] interrupted", file=sys.stderr)
+        return 130
+    return rc if isinstance(rc, int) else 0
+if __name__ == "__main__":
+    sys.exit(main())

llmstack/commands/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""One module per CLI action.
+Each module exports a single ``run(args: list[str]) -> int`` callable
+that the dispatcher in :mod:`llmstack.cli` invokes after stripping the
+action name. ``args`` is the rest of ``sys.argv``; modules do their own
+argparse / manual parsing -- the commands are small enough that one
+shared parser would be more friction than it's worth.
+"""
+from __future__ import annotations

llmstack/commands/_helpers.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Shared helpers for the start/stop/status commands.
+Just process lifecycle plumbing -- pid files, port probes, daemon
+spawning, kill-by-pattern. Kept separate from the command modules so the
+control-flow per command stays readable. The actual platform-specific
+process bits (POSIX signals vs Windows ``taskkill`` etc.) live in
+:mod:`llmstack._platform` so this module stays portable.
+"""
+from __future__ import annotations
+import urllib.error
+import urllib.request
+from pathlib import Path
+from llmstack._platform import (
+    describe_matching,
+    detached_popen,
+    find_pids,
+    kill_matching,
+    pid_alive,
+    terminate_pid,
+)
+def is_running(pid_file: Path) -> bool:
+    """``True`` iff ``pid_file`` exists and points at a live process."""
+    if not pid_file.is_file():
+        return False
+    try:
+        pid = int(pid_file.read_text().strip())
+    except (ValueError, OSError):
+        return False
+    if pid <= 0:
+        return False
+    return pid_alive(pid)
+def read_pid(pid_file: Path) -> int | None:
+    if not pid_file.is_file():
+        return None
+    try:
+        return int(pid_file.read_text().strip())
+    except (ValueError, OSError):
+        return None
+def port_responds(url: str, *, timeout: float = 2.0) -> bool:
+    """Probe ``url`` for any 2xx response. Used to detect external daemons."""
+    try:
+        with urllib.request.urlopen(url, timeout=timeout) as resp:
+            return 200 <= resp.status < 300
+    except (urllib.error.URLError, ConnectionError, TimeoutError, OSError):
+        return False
+def spawn_daemon(
+    argv: list[str],
+    *,
+    log: Path,
+    pid_file: Path,
+    env: dict[str, str] | None = None,
+) -> int:
+    """Spawn ``argv`` detached, redirect stdio to ``log``, write the pid."""
+    log.parent.mkdir(parents=True, exist_ok=True)
+    pid_file.parent.mkdir(parents=True, exist_ok=True)
+    fp = log.open("ab")
+    proc = detached_popen(argv, stdout=fp, stderr=fp, env=env)
+    fp.close()
+    pid_file.write_text(f"{proc.pid}\n")
+    return proc.pid
+def kill_pid(pid: int, *, grace: float = 5.0) -> None:
+    """SIGTERM (or taskkill), wait up to ``grace`` seconds, then hard-kill."""
+    terminate_pid(pid, grace=grace)
+def pgrep(pattern: str) -> list[int]:
+    """Return PIDs whose full command-line matches ``pattern``."""
+    return find_pids(pattern)
+def pkill(pattern: str, *, grace: float = 5.0) -> int:
+    """Terminate every process matching ``pattern``."""
+    return kill_matching(pattern, grace=grace)
+def pgrep_describe(pattern: str) -> str:
+    """``pgrep -af``-style multi-line summary (empty when nothing matches)."""
+    return describe_matching(pattern)

llmstack/commands/activate.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""``llmstack activate <shell>`` -- install + source the auto-activation hook.
+Writes the hook to ``~/.<shell>_llmstack_hook`` and prints the matching
+``source`` line to **stdout** so a one-shot
+    eval "$(llmstack activate zsh)"
+both regenerates the file and turns on the hook in the current shell.
+Pasting the same line into your shell rc keeps it on for every new
+shell. All informational output goes to stderr so it doesn't get
+captured by ``eval``.
+Once the hook is installed, ``cd`` into any project with ``.llmstack/``
+and the env (``OPENCODE_CONFIG``, ``LLMSTACK_WORK_DIR``,
+``LLMSTACK_CHANNEL``) is set up automatically -- there is no separate
+``llmstack shell`` action.
+"""
+from __future__ import annotations
+import sys
+from pathlib import Path
+from llmstack.shell_env import activate_hook
+def _print_help() -> None:
+    print("usage: llmstack activate <zsh|bash|powershell>", file=sys.stderr)
+def _hook_path(shell: str) -> Path:
+    """``~/.<shell>_llmstack_hook`` -- ``pwsh`` is normalised to ``powershell``
+    so the user doesn't end up with two redundant files."""
+    name = "powershell" if shell in ("powershell", "pwsh") else shell
+    return Path.home() / f".{name}_llmstack_hook"
+def _source_line(shell: str, path: Path) -> str:
+    """Shell-specific incantation to load the hook file."""
+    if shell in ("powershell", "pwsh"):
+        return f". '{path}'"
+    return f'source "{path}"'
+def write_hook(shell: str) -> tuple[Path, str]:
+    """Render the hook for ``shell``, write it to disk, return ``(path, source_line)``.
+    Shared by ``llmstack activate`` (CLI surface) and ``llmstack setup``
+    (first-run walkthrough) so they install the hook the same way.
+    """
+    body = activate_hook(shell)  # raises SystemExit on unknown shell
+    path = _hook_path(shell)
+    path.write_text(body)
+    return path, _source_line(shell, path)
+def run(args: list[str]) -> int:
+    if not args or args[0] in ("-h", "--help"):
+        _print_help()
+        return 0
+    shell = args[0]
+    path, src = write_hook(shell)
+    eval_line = f'eval "$(llmstack activate {shell})"'
+    print(f"[OK] hook written: {path}", file=sys.stderr)
+    print( "     activate in this shell now (and for every new shell:", file=sys.stderr)
+    print(f"     paste into your rc):  {eval_line}", file=sys.stderr)
+    print(src)
+    return 0

llmstack/commands/check.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""``llmstack check`` -- snapshot configured GGUFs + flag drift.
+Thin wrapper around :mod:`llmstack.check_models` so the action stays in
+the standard commands/ tree rather than special-cased in the dispatcher.
+"""
+from __future__ import annotations
+from llmstack import check_models
+def run(args: list[str]) -> int:
+    return check_models.main(args)

llmstack/commands/download.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""``llmstack download`` -- queue every GGUF in models.ini in the background."""
+from __future__ import annotations
+from llmstack.download.ggufs import download_all
+from llmstack.paths import is_remote, remote_url
+def _print_help() -> None:
+    print("usage: llmstack download")
+def run(args: list[str]) -> int:
+    for arg in args:
+        if arg in ("-h", "--help"):
+            _print_help()
+            return 0
+        print(f"[!] unknown arg to download: {arg}")
+        return 2
+    if is_remote():
+        print(f"[!] this project is wired as a thin client of {remote_url()} (channel: external);")
+        print("    GGUFs live on the remote. `llmstack download` is a local-only command.")
+        return 1
+    download_all()
+    return 0