opencode-llmstack 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ """Asset downloaders.
2
+
3
+ Two distinct concerns live here:
4
+
5
+ :mod:`llmstack.download.ggufs`
6
+ Background download of every GGUF named in ``models.ini`` using
7
+ ``llama-completion`` (or legacy ``llama-cli``) so the standard
8
+ llama.cpp HF cache stays the single canonical store.
9
+
10
+ :mod:`llmstack.download.binary`
11
+ One-shot installer for the ``llama-swap`` Go binary, fetched from its
12
+ GitHub release tag. Detects host OS/arch, optionally pinned via the
13
+ ``LLAMA_SWAP_VERSION`` env var.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from llmstack.download.binary import install_llama_swap
19
+ from llmstack.download.ggufs import download_all
20
+
21
+ __all__ = ["install_llama_swap", "download_all"]
@@ -0,0 +1,234 @@
1
+ """Install (or update) the ``llama-swap`` binary.
2
+
3
+ Replaces the shell ``_install_llama_swap`` helper. Resolves the latest
4
+ GitHub release tag (or honours ``$LLAMA_SWAP_VERSION``), downloads the
5
+ asset for the current OS+arch, extracts the single ``llama-swap``
6
+ executable (``llama-swap.exe`` on Windows), and atomically renames it
7
+ into place under :func:`llmstack.paths.bin_dir`.
8
+
9
+ A second call short-circuits when the installed version already matches
10
+ the resolved tag, unless ``force=True`` is passed.
11
+
12
+ Asset naming on the upstream release matches goreleaser's convention:
13
+
14
+ * POSIX: ``llama-swap_<num>_<os>_<arch>.tar.gz``
15
+ * Windows: ``llama-swap_<num>_windows_amd64.zip`` (only amd64 is published)
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ import platform
22
+ import re
23
+ import shutil
24
+ import subprocess
25
+ import tarfile
26
+ import tempfile
27
+ import urllib.request
28
+ import zipfile
29
+ from pathlib import Path
30
+
31
+ from llmstack._platform import EXE_SUFFIX, IS_WINDOWS, make_executable
32
+ from llmstack.paths import REPO_LLAMA_SWAP, ensure_data_dirs
33
+
34
+ GH_API = "https://api.github.com"
35
+ GH_DL = "https://github.com"
36
+ VERSION_RE = re.compile(r"version:\s*v?([0-9][\w.-]*)", re.IGNORECASE)
37
+
38
+ BINARY_NAME = f"llama-swap{EXE_SUFFIX}"
39
+
40
+
41
+ def _detect_os_arch() -> tuple[str, str, str]:
42
+ """Return ``(os_label, arch_label, archive_ext)`` for the current host.
43
+
44
+ The third element drives the asset name suffix: ``"tar.gz"`` for the
45
+ POSIX builds, ``"zip"`` for the Windows build. Goreleaser's defaults.
46
+ """
47
+ sysname = platform.system()
48
+ os_map = {"Darwin": "darwin", "Linux": "linux", "FreeBSD": "freebsd", "Windows": "windows"}
49
+ if sysname not in os_map:
50
+ raise SystemExit(f"unsupported OS: {sysname} (need Darwin/Linux/FreeBSD/Windows)")
51
+ machine = platform.machine().lower()
52
+ if machine in ("arm64", "aarch64"):
53
+ arch = "arm64"
54
+ elif machine in ("x86_64", "amd64"):
55
+ arch = "amd64"
56
+ else:
57
+ raise SystemExit(f"unsupported arch: {machine} (need arm64 or x86_64)")
58
+
59
+ os_label = os_map[sysname]
60
+ if os_label == "freebsd" and arch != "amd64":
61
+ raise SystemExit(f"no llama-swap release for {os_label}/{arch}")
62
+ if os_label == "windows":
63
+ if arch != "amd64":
64
+ raise SystemExit(
65
+ f"no llama-swap windows release for {arch} -- "
66
+ "only windows_amd64 is published upstream."
67
+ )
68
+ return os_label, arch, "zip"
69
+ return os_label, arch, "tar.gz"
70
+
71
+
72
+ def _resolve_latest_tag() -> str:
73
+ print(f"[*] resolving latest release tag from github.com/{REPO_LLAMA_SWAP}...")
74
+ url = f"{GH_API}/repos/{REPO_LLAMA_SWAP}/releases/latest"
75
+ try:
76
+ with urllib.request.urlopen(url, timeout=10) as resp:
77
+ import json as _json
78
+ tag = _json.load(resp).get("tag_name") or ""
79
+ except Exception as e:
80
+ raise SystemExit(f"could not resolve latest release tag: {e}") from None
81
+ if not tag:
82
+ raise SystemExit("could not resolve latest release tag (empty response)")
83
+ print(f"[*] latest release: {tag}")
84
+ return tag
85
+
86
+
87
+ def _installed_version_line(target: Path) -> str:
88
+ """Return the first line of ``llama-swap --version`` (or empty on error)."""
89
+ if not target.exists():
90
+ return ""
91
+ try:
92
+ proc = subprocess.run(
93
+ [str(target), "--version"],
94
+ check=False,
95
+ stdout=subprocess.PIPE,
96
+ stderr=subprocess.STDOUT,
97
+ text=True,
98
+ timeout=10,
99
+ )
100
+ except (OSError, subprocess.SubprocessError):
101
+ return ""
102
+ return (proc.stdout or "").splitlines()[0] if proc.stdout else ""
103
+
104
+
105
+ def latest_release_tag() -> str | None:
106
+ """Best-effort lookup; returns ``None`` instead of raising."""
107
+ try:
108
+ url = f"{GH_API}/repos/{REPO_LLAMA_SWAP}/releases/latest"
109
+ with urllib.request.urlopen(url, timeout=5) as resp:
110
+ import json as _json
111
+ tag = _json.load(resp).get("tag_name") or ""
112
+ return tag or None
113
+ except Exception:
114
+ return None
115
+
116
+
117
+ def installed_version(target: Path) -> str | None:
118
+ """Parse the version number out of ``--version`` output, e.g. ``"211"``."""
119
+ line = _installed_version_line(target)
120
+ m = VERSION_RE.search(line)
121
+ return m.group(1) if m else None
122
+
123
+
124
+ def _extract_binary(archive: Path, dest_dir: Path, *, archive_ext: str) -> Path:
125
+ """Pull the ``llama-swap[.exe]`` file out of ``archive`` into ``dest_dir``.
126
+
127
+ Returns the path to the extracted executable. We deliberately ignore
128
+ the rest of the archive contents (READMEs, sample configs) -- the
129
+ package only consumes the binary itself.
130
+ """
131
+ if archive_ext == "zip":
132
+ try:
133
+ with zipfile.ZipFile(archive) as zf:
134
+ member = next((m for m in zf.namelist() if Path(m).name == BINARY_NAME), None)
135
+ if member is None:
136
+ raise SystemExit(
137
+ f"[!] zip did not contain a top-level '{BINARY_NAME}' file"
138
+ )
139
+ zf.extract(member, dest_dir)
140
+ extracted = dest_dir / member
141
+ except zipfile.BadZipFile as e:
142
+ raise SystemExit(f"extract failed: {e}") from None
143
+ else:
144
+ try:
145
+ with tarfile.open(archive, "r:gz") as tf:
146
+ member = next((m for m in tf.getmembers() if Path(m.name).name == BINARY_NAME), None)
147
+ if member is None:
148
+ raise SystemExit(
149
+ f"[!] tarball did not contain a top-level '{BINARY_NAME}' file"
150
+ )
151
+ tf.extract(member, dest_dir)
152
+ extracted = dest_dir / member.name
153
+ except tarfile.TarError as e:
154
+ raise SystemExit(f"extract failed: {e}") from None
155
+
156
+ if not extracted.is_file():
157
+ raise SystemExit(f"[!] archive did not yield a '{BINARY_NAME}' file")
158
+ return extracted
159
+
160
+
161
+ def install_llama_swap(*, force: bool = False) -> Path:
162
+ """Download/refresh the ``llama-swap`` binary.
163
+
164
+ Returns the absolute path to the installed binary. ``force=True``
165
+ re-downloads even when the version matches.
166
+ """
167
+ paths = ensure_data_dirs()
168
+ target = paths.llama_swap_bin
169
+
170
+ os_name, arch, archive_ext = _detect_os_arch()
171
+ tag = os.environ.get("LLAMA_SWAP_VERSION", "").strip()
172
+ if tag:
173
+ print(f"[*] version: {tag} (from $LLAMA_SWAP_VERSION)")
174
+ else:
175
+ tag = _resolve_latest_tag()
176
+
177
+ num = tag.lstrip("v")
178
+ asset = f"llama-swap_{num}_{os_name}_{arch}.{archive_ext}"
179
+ url = f"{GH_DL}/{REPO_LLAMA_SWAP}/releases/download/{tag}/{asset}"
180
+
181
+ if target.exists() and not force:
182
+ line = _installed_version_line(target)
183
+ if line and re.search(rf"version:\s*v?{re.escape(num)}\b", line, re.IGNORECASE):
184
+ print(f"[=] already installed: {target}")
185
+ print(f" {line}")
186
+ print(" (re-run with --force to redownload)")
187
+ return target
188
+ if line:
189
+ print(f"[*] currently installed: {line}")
190
+ print(f" upgrading to {tag}")
191
+
192
+ paths.bin_dir.mkdir(parents=True, exist_ok=True)
193
+ with tempfile.TemporaryDirectory(prefix="llmstack-llama-swap-") as tmp_dir:
194
+ tmp = Path(tmp_dir)
195
+ archive = tmp / asset
196
+
197
+ print(f"[*] downloading {asset}")
198
+ print(f" from {url}")
199
+ try:
200
+ urllib.request.urlretrieve(url, archive)
201
+ except Exception as e:
202
+ raise SystemExit(f"download failed: {e}") from None
203
+
204
+ print("[*] extracting")
205
+ extracted = _extract_binary(archive, tmp, archive_ext=archive_ext)
206
+
207
+ # Stage with a sibling name (NOT ``with_suffix(".new")`` -- on
208
+ # Windows that would replace ".exe" with ".new" and lose the
209
+ # executable extension).
210
+ staged = target.with_name(target.name + ".new")
211
+ if staged.exists():
212
+ staged.unlink()
213
+ shutil.move(str(extracted), staged)
214
+ make_executable(staged)
215
+ # Windows ``os.replace`` on an open / running binary fails with
216
+ # ERROR_ACCESS_DENIED; the daemon must be stopped before
217
+ # upgrading. We don't try to be clever about it.
218
+ if IS_WINDOWS and target.exists():
219
+ try:
220
+ target.unlink()
221
+ except OSError as e:
222
+ staged.unlink(missing_ok=True)
223
+ raise SystemExit(
224
+ f"[!] could not replace {target}: {e}\n"
225
+ " is llama-swap still running? stop the stack first: "
226
+ "llmstack stop"
227
+ ) from None
228
+ os.replace(staged, target)
229
+
230
+ print(f"[OK] installed {target} ({os_name}/{arch})")
231
+ line = _installed_version_line(target)
232
+ if line:
233
+ print(f" {line}")
234
+ return target
@@ -0,0 +1,164 @@
1
+ """Background GGUF downloader.
2
+
3
+ Replaces the shell ``cmd_download`` action. We shell out to
4
+ ``llama-completion`` (preferred; modern llama.cpp split: chat=llama-cli,
5
+ one-shot=llama-completion) or legacy ``llama-cli`` because the standard
6
+ llama.cpp HF cache uses a resumable partial-file convention
7
+ (``.downloadInProgress``) that ``huggingface_hub.hf_hub_download`` does
8
+ not understand. Co-mixing the two would leave un-resumable partial blobs
9
+ on disk -- see ``UPGRADING.md`` "Cache management".
10
+
11
+ Every download is launched as a backgrounded subprocess with its own
12
+ log file at ``<state>/logs/dl-<tier>-<label>.log``. We do **not** wait
13
+ for them to finish; the caller decides whether to poll
14
+ :func:`running_downloads`.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import os
20
+ import shutil
21
+ import sys
22
+ import time
23
+ from dataclasses import dataclass
24
+ from pathlib import Path
25
+
26
+ from llmstack._platform import detached_popen, find_pids
27
+ from llmstack.paths import ensure_state_dirs, require_models_ini, resolve
28
+ from llmstack.tiers import iter_download_targets, load_tiers
29
+
30
+ LLAMA_BINS = ("llama-completion", "llama-cli")
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class DownloadJob:
35
+ """A single backgrounded ``llama-*`` invocation."""
36
+
37
+ tag: str
38
+ repo: str
39
+ file: str
40
+ label: str
41
+ log: Path
42
+ pid: int
43
+
44
+
45
+ def _find_llama_bin() -> str:
46
+ for candidate in LLAMA_BINS:
47
+ path = shutil.which(candidate)
48
+ if path:
49
+ return path
50
+ raise SystemExit(
51
+ "[!] neither llama-completion nor llama-cli found in PATH "
52
+ "(brew install llama.cpp)"
53
+ )
54
+
55
+
56
+ def _spawn(llama_bin: str, repo: str, file: str, log: Path, hf_token: str | None) -> int:
57
+ """Launch a backgrounded one-shot completion that downloads ``repo/file``."""
58
+ argv: list[str] = [
59
+ llama_bin,
60
+ "-hf", repo,
61
+ "-hff", file,
62
+ "--no-warmup",
63
+ "-ngl", "0",
64
+ "-c", "256",
65
+ "-p", "ok",
66
+ "-n", "1",
67
+ ]
68
+ if hf_token:
69
+ argv += ["--hf-token", hf_token]
70
+
71
+ log.parent.mkdir(parents=True, exist_ok=True)
72
+ fp = log.open("wb")
73
+ proc = detached_popen(argv, stdout=fp, stderr=fp)
74
+ fp.close()
75
+ return proc.pid
76
+
77
+
78
+ def download_all() -> list[DownloadJob]:
79
+ """Kick off downloads for every tier file declared in models.ini.
80
+
81
+ Returns the list of launched jobs. Always non-empty: raises
82
+ :exc:`SystemExit` if the ini has no download targets.
83
+ """
84
+ require_models_ini()
85
+ paths = ensure_state_dirs()
86
+ hf_token = os.environ.get("HF_TOKEN") or None
87
+
88
+ targets = list(iter_download_targets())
89
+ hosted_tiers = sorted(t.name for t in load_tiers().values() if not t.is_gguf)
90
+
91
+ print(f"[*] inventory: {paths.models_ini}")
92
+ if hosted_tiers:
93
+ print(f"[*] hosted (no download): {', '.join(hosted_tiers)}")
94
+
95
+ if not targets:
96
+ # All tiers in the ini are hosted (e.g. bedrock-only) -- nothing
97
+ # to fetch. Don't fail; downloads are an optional step in a
98
+ # cloud-only deployment.
99
+ print("[*] no GGUF tiers configured -- nothing to download.")
100
+ return []
101
+
102
+ llama_bin = _find_llama_bin()
103
+ print(f"[*] downloader: {llama_bin}")
104
+ print("[*] cache: ~/.cache/huggingface/hub (default for llama.cpp)")
105
+ if hf_token:
106
+ print("[*] HF_TOKEN set (faster rate limits)")
107
+ else:
108
+ print("[*] no HF_TOKEN (rate-limited unauthenticated downloads)")
109
+ print()
110
+
111
+ jobs: list[DownloadJob] = []
112
+ for tf in targets:
113
+ log = paths.log_dir / f"dl-{tf.tag}.log"
114
+ print(f"[*] {tf.tag:<32} ({tf.label:<7}) {tf.repo} / {tf.file}")
115
+ print(f" log -> {log}")
116
+ pid = _spawn(llama_bin, tf.repo, tf.file, log, hf_token)
117
+ print(f" pid -> {pid}")
118
+ jobs.append(DownloadJob(
119
+ tag=tf.tag, repo=tf.repo, file=tf.file, label=tf.label,
120
+ log=log, pid=pid,
121
+ ))
122
+
123
+ print()
124
+ print(f"{len(jobs)} download(s) queued in the background.")
125
+ print()
126
+ print("Watch progress:")
127
+ print(f" tail -f {paths.log_dir}/dl-*.log")
128
+ print(" llama-cli -cl # lists completed cache entries")
129
+ print()
130
+ print("When you want to try queued upgrade targets without committing:")
131
+ print(" llmstack stop && llmstack start --next")
132
+ return jobs
133
+
134
+
135
+ def running_downloads() -> int:
136
+ """Return the count of in-flight ``llama-{completion,cli}`` HF downloads.
137
+
138
+ Cross-platform via :func:`llmstack._platform.find_pids`: POSIX uses
139
+ ``pgrep -f`` under the hood, Windows uses PowerShell's
140
+ ``Get-CimInstance``. Returns 0 when neither lookup tool is available.
141
+ """
142
+ return len(find_pids(r"llama-(completion|cli).*-hf "))
143
+
144
+
145
+ def wait_for_downloads(poll_seconds: float = 10.0, *, log_dir: Path | None = None) -> None:
146
+ """Block until no ``llama-*`` HF download subprocesses remain.
147
+
148
+ Prints a one-liner every ``poll_seconds`` so the user can see we're
149
+ not hung. Honours Ctrl-C politely.
150
+ """
151
+ log = log_dir or resolve().log_dir
152
+ print(f" (logs: {log}/dl-*.log)")
153
+ time.sleep(2)
154
+ try:
155
+ while True:
156
+ n = running_downloads()
157
+ if n == 0:
158
+ break
159
+ print(f" {n} download(s) still running...")
160
+ time.sleep(poll_seconds)
161
+ except KeyboardInterrupt:
162
+ print("\n[!] interrupted -- downloads continue in the background.", file=sys.stderr)
163
+ raise SystemExit(130) from None
164
+ print("[OK] all downloads complete.")
@@ -0,0 +1,37 @@
1
+ """Config generators that render the runtime configs from ``models.ini``.
2
+
3
+ Every command that mutates state runs through ``render_to`` so the file is
4
+ written atomically (tmp file in the same directory, validated, then
5
+ renamed) -- mirrors the old shell ``_render_install`` helper.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import tempfile
12
+ from collections.abc import Callable
13
+ from pathlib import Path
14
+
15
+
16
+ def render_to(target: Path, render: Callable[[Path], None], validate: Callable[[Path], None]) -> None:
17
+ """Render -> validate -> atomic ``mv`` into ``target``.
18
+
19
+ ``render`` writes the candidate file, ``validate`` raises on a bad
20
+ payload (e.g. by trying to ``yaml.safe_load`` it). We unlink the
21
+ tempfile if anything fails so we never leave a half-written config.
22
+ """
23
+ target.parent.mkdir(parents=True, exist_ok=True)
24
+ fd, tmp_str = tempfile.mkstemp(prefix=f".{target.name}.", dir=str(target.parent))
25
+ os.close(fd)
26
+ tmp = Path(tmp_str)
27
+ try:
28
+ render(tmp)
29
+ validate(tmp)
30
+ os.replace(tmp, target)
31
+ target.chmod(0o644)
32
+ except BaseException:
33
+ tmp.unlink(missing_ok=True)
34
+ raise
35
+
36
+
37
+ __all__ = ["render_to"]