opencode-llmstack 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmstack/AGENTS.md +13 -0
- llmstack/__init__.py +20 -0
- llmstack/__main__.py +10 -0
- llmstack/_platform.py +420 -0
- llmstack/app.py +644 -0
- llmstack/backends/__init__.py +19 -0
- llmstack/backends/bedrock.py +790 -0
- llmstack/check_models.py +119 -0
- llmstack/cli.py +264 -0
- llmstack/commands/__init__.py +10 -0
- llmstack/commands/_helpers.py +91 -0
- llmstack/commands/activate.py +71 -0
- llmstack/commands/check.py +13 -0
- llmstack/commands/download.py +27 -0
- llmstack/commands/install.py +365 -0
- llmstack/commands/install_llama_swap.py +36 -0
- llmstack/commands/reload.py +59 -0
- llmstack/commands/restart.py +12 -0
- llmstack/commands/setup.py +146 -0
- llmstack/commands/start.py +360 -0
- llmstack/commands/status.py +260 -0
- llmstack/commands/stop.py +73 -0
- llmstack/download/__init__.py +21 -0
- llmstack/download/binary.py +234 -0
- llmstack/download/ggufs.py +164 -0
- llmstack/generators/__init__.py +37 -0
- llmstack/generators/llama_swap.py +421 -0
- llmstack/generators/opencode.py +291 -0
- llmstack/models.ini +304 -0
- llmstack/paths.py +318 -0
- llmstack/shell_env.py +927 -0
- llmstack/tiers.py +394 -0
- opencode_llmstack-0.6.0.dist-info/METADATA +693 -0
- opencode_llmstack-0.6.0.dist-info/RECORD +37 -0
- opencode_llmstack-0.6.0.dist-info/WHEEL +5 -0
- opencode_llmstack-0.6.0.dist-info/entry_points.txt +2 -0
- opencode_llmstack-0.6.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""``llmstack start`` -- bring up the stack and enter the env-prepared subshell.
|
|
2
|
+
|
|
3
|
+
The channel is **decided at install time** and persisted to
|
|
4
|
+
``.llmstack/default-channel`` -- ``start`` reads that marker and never
|
|
5
|
+
re-derives the channel from the environment. Three channels exist:
|
|
6
|
+
|
|
7
|
+
*Local* (``current`` / ``next``)
|
|
8
|
+
Generate ``llama-swap.yaml`` for the chosen channel, launch
|
|
9
|
+
llama-swap + the FastAPI router locally, and drop into a subshell
|
|
10
|
+
with ``OPENCODE_CONFIG`` exported. The yaml is regenerated on every
|
|
11
|
+
fresh launch so it always reflects the live ``models.ini``; if the
|
|
12
|
+
daemons are already up under our pid files we leave the loaded yaml
|
|
13
|
+
alone. The ``--current`` / ``--next`` flags pick which of the two
|
|
14
|
+
local channels to launch *for this run* (without rewriting the
|
|
15
|
+
marker -- only ``install`` does that).
|
|
16
|
+
|
|
17
|
+
Daemon state has two branches:
|
|
18
|
+
(a) local pid file says daemons are up -> idempotent, channel-checked,
|
|
19
|
+
no yaml regeneration
|
|
20
|
+
(b) nothing in the pid file -> regenerate yaml, launch
|
|
21
|
+
fresh. If port :10102 is
|
|
22
|
+
already in use by *another*
|
|
23
|
+
process (typically another
|
|
24
|
+
project on this host) we
|
|
25
|
+
refuse: the user should
|
|
26
|
+
``llmstack install --external``
|
|
27
|
+
to wire this project as a
|
|
28
|
+
thin client of those
|
|
29
|
+
daemons, or stop them
|
|
30
|
+
first.
|
|
31
|
+
|
|
32
|
+
*External* (``external``)
|
|
33
|
+
Don't launch anything; verify the marker's ``/health`` endpoint is
|
|
34
|
+
reachable and drop into the subshell with ``LLMSTACK_CHANNEL=external``.
|
|
35
|
+
The URL was pinned by ``llmstack install --external [URL]``; an
|
|
36
|
+
external install with no URL defaults to the local router
|
|
37
|
+
(``http://127.0.0.1:10101``), which is the laptop-with-N-projects
|
|
38
|
+
case where one project owns the daemons and the others are clients.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from __future__ import annotations
|
|
42
|
+
|
|
43
|
+
import os
|
|
44
|
+
import sys
|
|
45
|
+
import time
|
|
46
|
+
from pathlib import Path
|
|
47
|
+
|
|
48
|
+
from llmstack.commands._helpers import (
|
|
49
|
+
is_running,
|
|
50
|
+
port_responds,
|
|
51
|
+
read_pid,
|
|
52
|
+
spawn_daemon,
|
|
53
|
+
)
|
|
54
|
+
from llmstack.generators import render_to
|
|
55
|
+
from llmstack.generators.llama_swap import render as render_yaml
|
|
56
|
+
from llmstack.generators.llama_swap import validate as validate_yaml
|
|
57
|
+
from llmstack.paths import (
|
|
58
|
+
DEFAULT_REMOTE_URL,
|
|
59
|
+
ROUTER_PORT,
|
|
60
|
+
SWAP_PORT,
|
|
61
|
+
ChannelMark,
|
|
62
|
+
ensure_state_dirs,
|
|
63
|
+
read_marker,
|
|
64
|
+
write_marker,
|
|
65
|
+
)
|
|
66
|
+
from llmstack.shell_env import spawn_subshell
|
|
67
|
+
from llmstack.tiers import load_tiers
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _print_help() -> None:
|
|
71
|
+
print("usage: llmstack start [--current | --next] [--detach]")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _queued_next_tiers() -> list[str]:
|
|
75
|
+
"""Names of every tier that has *some* queued upgrade target.
|
|
76
|
+
|
|
77
|
+
Backend-aware: gguf tiers with ``hf_file_next`` qualify, and so do
|
|
78
|
+
bedrock tiers with ``aws_model_id_next``. Used to short-circuit
|
|
79
|
+
``--next`` when nothing's queued.
|
|
80
|
+
"""
|
|
81
|
+
return sorted(t.name for t in load_tiers().values() if t.has_next)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _start_remote(detach: bool, url: str) -> int:
|
|
85
|
+
"""Client-mode start: just verify the remote and drop into the shell.
|
|
86
|
+
|
|
87
|
+
``url`` is the remote-router base URL pinned by ``install --external``
|
|
88
|
+
into ``default-channel``. It is *not* re-derived from the environment
|
|
89
|
+
here -- the marker is canonical post-install, and silently following
|
|
90
|
+
a stale env var would lie to the user about which remote opencode is
|
|
91
|
+
actually wired to (the URL is baked into ``opencode.json`` at install
|
|
92
|
+
time).
|
|
93
|
+
|
|
94
|
+
The reachability probe hits ``GET /models.ini`` rather than a
|
|
95
|
+
dedicated ``/health`` endpoint -- a 200 there proves both that the
|
|
96
|
+
router is up and that it actually has a config worth talking to,
|
|
97
|
+
which is what the thin client needs. The router has no separate
|
|
98
|
+
``/health`` route.
|
|
99
|
+
"""
|
|
100
|
+
paths = ensure_state_dirs()
|
|
101
|
+
|
|
102
|
+
if not paths.opencode_json.is_file():
|
|
103
|
+
raise SystemExit(
|
|
104
|
+
f"no .llmstack/opencode.json in {paths.work_dir} -- run: llmstack install --external\n"
|
|
105
|
+
f" (or `llmstack install --external {url}` to keep this remote URL)"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
print(f"[*] external llmstack: {url}")
|
|
109
|
+
probe_url = f"{url}/models.ini"
|
|
110
|
+
if port_responds(probe_url, timeout=5.0):
|
|
111
|
+
print(f"[OK] {probe_url} responds.")
|
|
112
|
+
else:
|
|
113
|
+
print(f"[!] {probe_url} did not respond -- is the remote stack up?", file=sys.stderr)
|
|
114
|
+
print(" proceeding anyway; opencode will surface the error on first request.", file=sys.stderr)
|
|
115
|
+
|
|
116
|
+
write_marker(paths.active_marker, ChannelMark("external", url))
|
|
117
|
+
|
|
118
|
+
print()
|
|
119
|
+
print("[OK] client mode (channel: external).")
|
|
120
|
+
print()
|
|
121
|
+
print(f" router {url} (external)")
|
|
122
|
+
print()
|
|
123
|
+
print("Try:")
|
|
124
|
+
print(f" curl -s {url}/v1/models | jq '.data[].id'")
|
|
125
|
+
print(f" curl -s {url}/models.ini | head")
|
|
126
|
+
print()
|
|
127
|
+
print("Disconnect:")
|
|
128
|
+
print(" exit # leave the subshell (daemons are external, nothing to stop)")
|
|
129
|
+
|
|
130
|
+
if detach:
|
|
131
|
+
return 0
|
|
132
|
+
|
|
133
|
+
# Same "spawn only when no active env" rule as the local-mode path.
|
|
134
|
+
if os.environ.get("LLMSTACK_ACTIVE") == "1":
|
|
135
|
+
cur_chan = os.environ.get("LLMSTACK_CHANNEL", "?")
|
|
136
|
+
if cur_chan == "external":
|
|
137
|
+
print("[=] already active as external client -- env is up to date.")
|
|
138
|
+
else:
|
|
139
|
+
print(
|
|
140
|
+
f"[*] switching to external client ({cur_chan} -> external); env in "
|
|
141
|
+
"this shell is now stale."
|
|
142
|
+
)
|
|
143
|
+
print(" refresh prompt + env in this shell:")
|
|
144
|
+
print(' eval "$(llmstack reload)"')
|
|
145
|
+
return 0
|
|
146
|
+
|
|
147
|
+
spawn_subshell("external")
|
|
148
|
+
return 0 # unreachable
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def run(args: list[str]) -> int:
|
|
152
|
+
requested: str | None = None
|
|
153
|
+
detach = False
|
|
154
|
+
for arg in args:
|
|
155
|
+
if arg == "--next":
|
|
156
|
+
requested = "next"
|
|
157
|
+
elif arg == "--current":
|
|
158
|
+
requested = "current"
|
|
159
|
+
elif arg in ("--detach", "--no-shell"):
|
|
160
|
+
detach = True
|
|
161
|
+
elif arg in ("-h", "--help"):
|
|
162
|
+
_print_help()
|
|
163
|
+
return 0
|
|
164
|
+
else:
|
|
165
|
+
print(f"[!] unknown arg to start: {arg} (try --next, --current, --detach, -h)")
|
|
166
|
+
return 2
|
|
167
|
+
|
|
168
|
+
paths = ensure_state_dirs()
|
|
169
|
+
default = read_marker(paths.default_marker)
|
|
170
|
+
|
|
171
|
+
# External installs short-circuit to the thin-client path. The URL
|
|
172
|
+
# is taken from the marker (set by ``install --external``); we
|
|
173
|
+
# never re-derive it from the env.
|
|
174
|
+
if default and default.channel == "external":
|
|
175
|
+
if requested is not None:
|
|
176
|
+
print(
|
|
177
|
+
"[!] --current / --next have no effect for external installs "
|
|
178
|
+
"(no daemons to launch).",
|
|
179
|
+
file=sys.stderr,
|
|
180
|
+
)
|
|
181
|
+
url = (default.url or "").rstrip("/") or DEFAULT_REMOTE_URL
|
|
182
|
+
return _start_remote(detach, url)
|
|
183
|
+
|
|
184
|
+
# Local mode -- decide which of current/next to launch.
|
|
185
|
+
if requested is not None:
|
|
186
|
+
channel = requested
|
|
187
|
+
elif default and default.channel in ("current", "next"):
|
|
188
|
+
channel = default.channel
|
|
189
|
+
else:
|
|
190
|
+
channel = "current"
|
|
191
|
+
|
|
192
|
+
if not paths.llama_swap_bin.exists() or not os.access(paths.llama_swap_bin, os.X_OK):
|
|
193
|
+
raise SystemExit(f"missing {paths.llama_swap_bin} (run: llmstack setup)")
|
|
194
|
+
if not paths.opencode_json.is_file():
|
|
195
|
+
raise SystemExit(f"no .llmstack/opencode.json in {paths.work_dir} -- run: llmstack install")
|
|
196
|
+
|
|
197
|
+
if is_running(paths.swap_pid):
|
|
198
|
+
launch_daemons = False
|
|
199
|
+
live_mark = read_marker(paths.active_marker)
|
|
200
|
+
live = live_mark.channel if live_mark else channel
|
|
201
|
+
if live != channel:
|
|
202
|
+
print(
|
|
203
|
+
f"[!] llama-swap is already running in '{live}' channel; "
|
|
204
|
+
f"refusing to also start '{channel}'. Stop the stack first:",
|
|
205
|
+
file=sys.stderr,
|
|
206
|
+
)
|
|
207
|
+
print("\n llmstack stop", file=sys.stderr)
|
|
208
|
+
print(f" llmstack start --{channel}\n", file=sys.stderr)
|
|
209
|
+
return 1
|
|
210
|
+
elif port_responds(f"http://127.0.0.1:{SWAP_PORT}/health"):
|
|
211
|
+
# Something is already listening on :10102, but it isn't ours
|
|
212
|
+
# (no pid file in this project's state dir). The pre-flag flow
|
|
213
|
+
# silently joined as "shared", which was a footgun: a `stop`
|
|
214
|
+
# from this project would tear down the other project's
|
|
215
|
+
# daemons and we couldn't bring them back without local
|
|
216
|
+
# tooling. Instead, refuse and tell the user how to wire this
|
|
217
|
+
# project as a proper thin client.
|
|
218
|
+
print(
|
|
219
|
+
f"[!] port :{SWAP_PORT} is already in use (daemons started by "
|
|
220
|
+
"another project on this host).",
|
|
221
|
+
file=sys.stderr,
|
|
222
|
+
)
|
|
223
|
+
print(" This project is installed for local mode -- it expects to own", file=sys.stderr)
|
|
224
|
+
print(" those daemons. To run as a thin client of the running stack:", file=sys.stderr)
|
|
225
|
+
print("", file=sys.stderr)
|
|
226
|
+
print(" llmstack install --external", file=sys.stderr)
|
|
227
|
+
print("", file=sys.stderr)
|
|
228
|
+
print(" (--external defaults to http://127.0.0.1:10101, the local router.)", file=sys.stderr)
|
|
229
|
+
print(" To take over instead, stop the running daemons first:", file=sys.stderr)
|
|
230
|
+
print("", file=sys.stderr)
|
|
231
|
+
print(" llmstack stop && llmstack start", file=sys.stderr)
|
|
232
|
+
return 1
|
|
233
|
+
else:
|
|
234
|
+
launch_daemons = True
|
|
235
|
+
|
|
236
|
+
if launch_daemons:
|
|
237
|
+
if channel == "next":
|
|
238
|
+
queued = _queued_next_tiers()
|
|
239
|
+
if not queued:
|
|
240
|
+
print(
|
|
241
|
+
"[!] no tiers have hf_file_next or aws_model_id_next set in models.ini -- "
|
|
242
|
+
"nothing to do.",
|
|
243
|
+
file=sys.stderr,
|
|
244
|
+
)
|
|
245
|
+
print(
|
|
246
|
+
" add a *_next line to a tier and re-run, or use --current.",
|
|
247
|
+
file=sys.stderr,
|
|
248
|
+
)
|
|
249
|
+
return 1
|
|
250
|
+
print(f"[*] generating next-channel yaml -> {paths.llama_swap_yaml}")
|
|
251
|
+
print(f" queued upgrade tiers: {' '.join(queued)}")
|
|
252
|
+
else:
|
|
253
|
+
print(f"[*] generating yaml -> {paths.llama_swap_yaml}")
|
|
254
|
+
render_to(
|
|
255
|
+
paths.llama_swap_yaml,
|
|
256
|
+
render=lambda p: Path(p).write_text(render_yaml(use_next=(channel == "next"))),
|
|
257
|
+
validate=validate_yaml,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
print(f"[*] channel: {channel} ({paths.llama_swap_yaml.name})")
|
|
261
|
+
|
|
262
|
+
if launch_daemons:
|
|
263
|
+
print(f"[*] starting llama-swap on :{SWAP_PORT}")
|
|
264
|
+
spawn_daemon(
|
|
265
|
+
[
|
|
266
|
+
str(paths.llama_swap_bin),
|
|
267
|
+
"--config", str(paths.llama_swap_yaml),
|
|
268
|
+
"--listen", f"127.0.0.1:{SWAP_PORT}",
|
|
269
|
+
],
|
|
270
|
+
log=paths.log_dir / "llama-swap.log",
|
|
271
|
+
pid_file=paths.swap_pid,
|
|
272
|
+
)
|
|
273
|
+
write_marker(paths.active_marker, ChannelMark(channel))
|
|
274
|
+
time.sleep(1)
|
|
275
|
+
if not is_running(paths.swap_pid):
|
|
276
|
+
print(f"[!] llama-swap failed to start. Check {paths.log_dir}/llama-swap.log")
|
|
277
|
+
paths.swap_pid.unlink(missing_ok=True)
|
|
278
|
+
paths.active_marker.unlink(missing_ok=True)
|
|
279
|
+
return 1
|
|
280
|
+
print(f" pid {read_pid(paths.swap_pid)}")
|
|
281
|
+
|
|
282
|
+
print(f"[*] starting router on :{ROUTER_PORT}")
|
|
283
|
+
env = os.environ.copy()
|
|
284
|
+
env.setdefault("LLAMA_SWAP_URL", f"http://127.0.0.1:{SWAP_PORT}")
|
|
285
|
+
env.setdefault("ROUTER_HOST", "127.0.0.1")
|
|
286
|
+
env.setdefault("ROUTER_PORT", str(ROUTER_PORT))
|
|
287
|
+
# Lock-step with the gguf --use-next swap: bedrock tiers in the
|
|
288
|
+
# router pick aws_model_id_next when this flag is set.
|
|
289
|
+
if channel == "next":
|
|
290
|
+
env["LLMSTACK_USE_NEXT"] = "1"
|
|
291
|
+
else:
|
|
292
|
+
env.pop("LLMSTACK_USE_NEXT", None)
|
|
293
|
+
spawn_daemon(
|
|
294
|
+
[sys.executable, "-m", "llmstack.app"],
|
|
295
|
+
log=paths.log_dir / "router.log",
|
|
296
|
+
pid_file=paths.router_pid,
|
|
297
|
+
env=env,
|
|
298
|
+
)
|
|
299
|
+
time.sleep(1)
|
|
300
|
+
if not is_running(paths.router_pid):
|
|
301
|
+
print(f"[!] router failed to start. Check {paths.log_dir}/router.log")
|
|
302
|
+
paths.router_pid.unlink(missing_ok=True)
|
|
303
|
+
return 1
|
|
304
|
+
print(f" pid {read_pid(paths.router_pid)}")
|
|
305
|
+
else:
|
|
306
|
+
print(f"[=] llama-swap already running (pid {read_pid(paths.swap_pid)}, channel {channel})")
|
|
307
|
+
if is_running(paths.router_pid):
|
|
308
|
+
print(f"[=] router already running (pid {read_pid(paths.router_pid)})")
|
|
309
|
+
|
|
310
|
+
other = "next" if channel == "current" else "current"
|
|
311
|
+
print()
|
|
312
|
+
print(f"[OK] stack is up (channel: {channel}).")
|
|
313
|
+
print()
|
|
314
|
+
print(f' router http://127.0.0.1:{ROUTER_PORT} (OpenAI-compatible, "auto" routing)')
|
|
315
|
+
print(f" llama-swap http://127.0.0.1:{SWAP_PORT} (raw model endpoints + UI)")
|
|
316
|
+
print()
|
|
317
|
+
print("Try:")
|
|
318
|
+
print(f" curl -s http://127.0.0.1:{ROUTER_PORT}/v1/models | jq '.data[].id'")
|
|
319
|
+
print(f" curl -s http://127.0.0.1:{ROUTER_PORT}/models.ini | head")
|
|
320
|
+
print()
|
|
321
|
+
print("Logs:")
|
|
322
|
+
print(f" tail -f {paths.log_dir}/llama-swap.log")
|
|
323
|
+
print(f" tail -f {paths.log_dir}/router.log")
|
|
324
|
+
print()
|
|
325
|
+
print("Switch channel (requires stop first):")
|
|
326
|
+
print(f" llmstack restart --{other}")
|
|
327
|
+
print()
|
|
328
|
+
print("Stop:")
|
|
329
|
+
print(" llmstack stop")
|
|
330
|
+
|
|
331
|
+
if detach:
|
|
332
|
+
return 0
|
|
333
|
+
|
|
334
|
+
# Only spawn a subshell when the env isn't already wired up. Two cases:
|
|
335
|
+
# - Hook installed + sourced: cd-ing into a project sets
|
|
336
|
+
# LLMSTACK_ACTIVE=1 and friends. start just brings up daemons --
|
|
337
|
+
# no need to nest another shell.
|
|
338
|
+
# - Inside a previously-spawned llmstack shell: same deal.
|
|
339
|
+
# For users who haven't run `eval "$(llmstack activate <shell>)"`,
|
|
340
|
+
# spawn so they at least get OPENCODE_CONFIG / channel exports for
|
|
341
|
+
# this terminal.
|
|
342
|
+
if os.environ.get("LLMSTACK_ACTIVE") == "1":
|
|
343
|
+
cur_chan = os.environ.get("LLMSTACK_CHANNEL", "?")
|
|
344
|
+
if cur_chan == channel:
|
|
345
|
+
print(f"[=] already active in '{channel}' -- env is up to date.")
|
|
346
|
+
else:
|
|
347
|
+
# Daemons + active-channel marker are already on the new
|
|
348
|
+
# channel. The current shell's env + PROMPT lag behind --
|
|
349
|
+
# `llmstack reload` emits the eval-able snippet to fix that
|
|
350
|
+
# without nesting a subshell.
|
|
351
|
+
print(
|
|
352
|
+
f"[*] channel switched ({cur_chan} -> {channel}); env in this shell "
|
|
353
|
+
"is now stale."
|
|
354
|
+
)
|
|
355
|
+
print(" refresh prompt + env in this shell:")
|
|
356
|
+
print(' eval "$(llmstack reload)"')
|
|
357
|
+
return 0
|
|
358
|
+
|
|
359
|
+
spawn_subshell(channel)
|
|
360
|
+
return 0 # unreachable: spawn_subshell execvps
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""``llmstack status`` -- show channel, pids, ``/v1/models``, llama-server load.
|
|
2
|
+
|
|
3
|
+
The channel comes from ``.llmstack/default-channel`` (pinned by
|
|
4
|
+
``install``). Two top-level reporting paths:
|
|
5
|
+
|
|
6
|
+
* ``current`` / ``next`` -- local install. Check pid files + port
|
|
7
|
+
probes for our daemons. If port :10102
|
|
8
|
+
responds without a pid file in *this*
|
|
9
|
+
project's ``.llmstack/``, the daemons
|
|
10
|
+
belong to another project on this host;
|
|
11
|
+
we report that as "(other)" so the user
|
|
12
|
+
knows the local daemons aren't ours --
|
|
13
|
+
it's not an error, but also not
|
|
14
|
+
something this project can ``stop``
|
|
15
|
+
cleanly.
|
|
16
|
+
* ``external`` -- thin-client install. Skip all local
|
|
17
|
+
checks; probe the remote-router URL
|
|
18
|
+
from the marker.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
import os
|
|
25
|
+
import subprocess
|
|
26
|
+
import urllib.error
|
|
27
|
+
import urllib.request
|
|
28
|
+
|
|
29
|
+
import yaml
|
|
30
|
+
|
|
31
|
+
from llmstack._platform import IS_WINDOWS
|
|
32
|
+
from llmstack.commands._helpers import is_running, pgrep, port_responds, read_pid
|
|
33
|
+
from llmstack.paths import (
|
|
34
|
+
DEFAULT_REMOTE_URL,
|
|
35
|
+
ROUTER_PORT,
|
|
36
|
+
SWAP_PORT,
|
|
37
|
+
read_marker,
|
|
38
|
+
resolve,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _print_help() -> None:
|
|
43
|
+
print("usage: llmstack status")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _check_local(name: str, url: str) -> None:
|
|
47
|
+
"""Report on a local daemon (router/llama-swap).
|
|
48
|
+
|
|
49
|
+
``alive`` (we own the process via pid file) is the happy path.
|
|
50
|
+
``responds`` without ``alive`` means the port is in use but the
|
|
51
|
+
process isn't ours -- another project on this host owns it. We
|
|
52
|
+
surface that as ``(other)`` rather than ``shared`` because there's
|
|
53
|
+
no special "shared" mode anymore: a local install can't manage
|
|
54
|
+
daemons it didn't spawn. ``llmstack install --external`` is the
|
|
55
|
+
documented way to consume those daemons cleanly.
|
|
56
|
+
"""
|
|
57
|
+
paths = resolve()
|
|
58
|
+
pid_file = paths.state_dir / f"{name}.pid"
|
|
59
|
+
pid = read_pid(pid_file) if pid_file.is_file() else None
|
|
60
|
+
alive = pid is not None and is_running(pid_file)
|
|
61
|
+
responds = port_responds(url, timeout=3.0)
|
|
62
|
+
|
|
63
|
+
if alive:
|
|
64
|
+
status = f"pid {pid:<7}"
|
|
65
|
+
elif responds:
|
|
66
|
+
status = "(other)"
|
|
67
|
+
else:
|
|
68
|
+
status = "DOWN"
|
|
69
|
+
suffix = f"OK {url}" if responds else f"no response @ {url}"
|
|
70
|
+
print(f" {name:<12} {status:<11} {suffix}")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _print_process_table(pids: list[int]) -> None:
|
|
74
|
+
"""Render ``pid / rss_mb / command`` for each pid (cross-OS).
|
|
75
|
+
|
|
76
|
+
POSIX: ``ps -o pid,rss,command`` (rss is in KB, we humanise to MB).
|
|
77
|
+
Windows: ``tasklist /FI "PID eq ..." /FO CSV`` (image name + memory
|
|
78
|
+
usage). Both branches print a header row.
|
|
79
|
+
"""
|
|
80
|
+
if IS_WINDOWS:
|
|
81
|
+
rows: list[tuple[str, str, str]] = []
|
|
82
|
+
for pid in pids:
|
|
83
|
+
try:
|
|
84
|
+
proc = subprocess.run(
|
|
85
|
+
["tasklist", "/FI", f"PID eq {pid}", "/FO", "CSV", "/NH"],
|
|
86
|
+
check=False,
|
|
87
|
+
stdout=subprocess.PIPE,
|
|
88
|
+
stderr=subprocess.DEVNULL,
|
|
89
|
+
text=True,
|
|
90
|
+
timeout=10,
|
|
91
|
+
)
|
|
92
|
+
except (OSError, subprocess.SubprocessError):
|
|
93
|
+
continue
|
|
94
|
+
if proc.returncode != 0 or not proc.stdout.strip():
|
|
95
|
+
continue
|
|
96
|
+
import csv
|
|
97
|
+
for fields in csv.reader(proc.stdout.splitlines()):
|
|
98
|
+
if len(fields) < 5:
|
|
99
|
+
continue
|
|
100
|
+
image, pid_str, _session, _sid, mem = fields[0], fields[1], fields[2], fields[3], fields[4]
|
|
101
|
+
if not pid_str.isdigit():
|
|
102
|
+
continue
|
|
103
|
+
rss_mb = mem.replace(",", "").replace(" K", "").strip()
|
|
104
|
+
try:
|
|
105
|
+
rss_mb = f"{int(rss_mb) // 1024} MB"
|
|
106
|
+
except ValueError:
|
|
107
|
+
pass
|
|
108
|
+
rows.append((pid_str, rss_mb, image))
|
|
109
|
+
if not rows:
|
|
110
|
+
print(" (tasklist returned nothing)")
|
|
111
|
+
return
|
|
112
|
+
print(f" {'PID':<8} {'RSS':<10} COMMAND")
|
|
113
|
+
for pid_str, rss, cmd in rows:
|
|
114
|
+
print(f" {pid_str:<8} {rss:<10} {cmd}")
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
ps = subprocess.run(
|
|
119
|
+
["ps", "-o", "pid,rss,command", "-p", ",".join(str(p) for p in pids)],
|
|
120
|
+
check=False,
|
|
121
|
+
stdout=subprocess.PIPE,
|
|
122
|
+
stderr=subprocess.DEVNULL,
|
|
123
|
+
text=True,
|
|
124
|
+
)
|
|
125
|
+
except (OSError, subprocess.SubprocessError):
|
|
126
|
+
print(" (ps failed)")
|
|
127
|
+
return
|
|
128
|
+
for i, line in enumerate(ps.stdout.splitlines()):
|
|
129
|
+
if i == 0:
|
|
130
|
+
print(line)
|
|
131
|
+
continue
|
|
132
|
+
cols = line.split()
|
|
133
|
+
if len(cols) >= 3:
|
|
134
|
+
try:
|
|
135
|
+
rss_mb = int(cols[1]) // 1024
|
|
136
|
+
cols[1] = f"{rss_mb} MB"
|
|
137
|
+
except ValueError:
|
|
138
|
+
pass
|
|
139
|
+
print(" ".join(cols))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _list_models(base: str) -> None:
|
|
143
|
+
print()
|
|
144
|
+
print("current models in /v1/models:")
|
|
145
|
+
try:
|
|
146
|
+
with urllib.request.urlopen(f"{base}/v1/models", timeout=5) as resp:
|
|
147
|
+
data = json.load(resp)
|
|
148
|
+
for m in data.get("data", []):
|
|
149
|
+
print(f" - {m.get('id')}")
|
|
150
|
+
except (urllib.error.URLError, ConnectionError, TimeoutError, OSError, json.JSONDecodeError):
|
|
151
|
+
print(f" (no response @ {base}/v1/models)")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _print_remote_status(paths, url: str) -> int:
|
|
155
|
+
print(f"stack status (channel: external -- remote {url}):")
|
|
156
|
+
print(f" work dir {paths.work_dir}")
|
|
157
|
+
probe = f"{url}/models.ini"
|
|
158
|
+
responds = port_responds(probe, timeout=3.0)
|
|
159
|
+
suffix = f"OK {probe}" if responds else f"no response @ {probe}"
|
|
160
|
+
status = "external" if responds else "DOWN"
|
|
161
|
+
print(f" {'router':<12} {status:<11} {suffix}")
|
|
162
|
+
|
|
163
|
+
print()
|
|
164
|
+
if paths.opencode_json.is_file():
|
|
165
|
+
print(f" opencode {paths.opencode_json}")
|
|
166
|
+
if paths.agents_local.is_file():
|
|
167
|
+
print(f" instructions {paths.agents_local}")
|
|
168
|
+
else:
|
|
169
|
+
print(" opencode (not generated for this work dir; run: llmstack install)")
|
|
170
|
+
|
|
171
|
+
if os.environ.get("LLMSTACK_ACTIVE") == "1":
|
|
172
|
+
cfg = os.environ.get("OPENCODE_CONFIG", "?")
|
|
173
|
+
chan = os.environ.get("LLMSTACK_CHANNEL", "?")
|
|
174
|
+
print(f" in-shell OPENCODE_CONFIG={cfg}, LLMSTACK_CHANNEL={chan}")
|
|
175
|
+
|
|
176
|
+
if responds:
|
|
177
|
+
_list_models(url)
|
|
178
|
+
return 0
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def run(args: list[str]) -> int:
|
|
182
|
+
for arg in args:
|
|
183
|
+
if arg in ("-h", "--help"):
|
|
184
|
+
_print_help()
|
|
185
|
+
return 0
|
|
186
|
+
print(f"[!] unknown arg to status: {arg}")
|
|
187
|
+
return 2
|
|
188
|
+
|
|
189
|
+
paths = resolve()
|
|
190
|
+
|
|
191
|
+
# Channel decision is pinned at install time; status just reads it.
|
|
192
|
+
# active-channel (set by `start`) takes precedence over default-channel
|
|
193
|
+
# (set by `install`) so a `start --next` run is reflected immediately.
|
|
194
|
+
default = read_marker(paths.default_marker)
|
|
195
|
+
active = read_marker(paths.active_marker)
|
|
196
|
+
persisted = active or default
|
|
197
|
+
|
|
198
|
+
if persisted and persisted.channel == "external":
|
|
199
|
+
url = (persisted.url or "").rstrip("/") or DEFAULT_REMOTE_URL
|
|
200
|
+
return _print_remote_status(paths, url)
|
|
201
|
+
|
|
202
|
+
if active:
|
|
203
|
+
channel = active.channel
|
|
204
|
+
elif default and default.channel in ("current", "next"):
|
|
205
|
+
channel = f"{default.channel} (or stopped)"
|
|
206
|
+
elif port_responds(f"http://127.0.0.1:{SWAP_PORT}/health"):
|
|
207
|
+
channel = "(other) -- daemons running on :10102 are not ours"
|
|
208
|
+
else:
|
|
209
|
+
channel = "current (or stopped)"
|
|
210
|
+
|
|
211
|
+
print(f"stack status (channel: {channel}):")
|
|
212
|
+
print(f" work dir {paths.work_dir}")
|
|
213
|
+
# Router has no /health route (dropped in v3.x); /v1/models always
|
|
214
|
+
# 200s on a live router. llama-swap is a separate binary with its
|
|
215
|
+
# own /health endpoint -- leave that one alone.
|
|
216
|
+
_check_local("router", f"http://127.0.0.1:{ROUTER_PORT}/v1/models")
|
|
217
|
+
_check_local("llama-swap", f"http://127.0.0.1:{SWAP_PORT}/health")
|
|
218
|
+
|
|
219
|
+
print()
|
|
220
|
+
if paths.opencode_json.is_file():
|
|
221
|
+
print(f" opencode {paths.opencode_json}")
|
|
222
|
+
if paths.agents_local.is_file():
|
|
223
|
+
print(f" instructions {paths.agents_local}")
|
|
224
|
+
else:
|
|
225
|
+
print(" opencode (not generated for this work dir; run: llmstack install)")
|
|
226
|
+
|
|
227
|
+
if os.environ.get("LLMSTACK_ACTIVE") == "1":
|
|
228
|
+
cfg = os.environ.get("OPENCODE_CONFIG", "?")
|
|
229
|
+
chan = os.environ.get("LLMSTACK_CHANNEL", "?")
|
|
230
|
+
print(f" in-shell OPENCODE_CONFIG={cfg}, LLMSTACK_CHANNEL={chan}")
|
|
231
|
+
|
|
232
|
+
_list_models(f"http://127.0.0.1:{ROUTER_PORT}")
|
|
233
|
+
|
|
234
|
+
print()
|
|
235
|
+
print("loaded llama-server processes:")
|
|
236
|
+
pids = pgrep(r"llama-server.*--alias")
|
|
237
|
+
if pids:
|
|
238
|
+
_print_process_table(pids)
|
|
239
|
+
else:
|
|
240
|
+
print(" (none loaded)")
|
|
241
|
+
|
|
242
|
+
if channel.split()[0] == "next" and paths.llama_swap_yaml.is_file():
|
|
243
|
+
print()
|
|
244
|
+
print(f"next-channel swaps (from {paths.llama_swap_yaml.name}):")
|
|
245
|
+
try:
|
|
246
|
+
cfg = yaml.safe_load(paths.llama_swap_yaml.read_text())
|
|
247
|
+
for name, m in (cfg.get("models") or {}).items():
|
|
248
|
+
md = m.get("metadata") or {}
|
|
249
|
+
if md.get("channel") != "next":
|
|
250
|
+
continue
|
|
251
|
+
hff = "?"
|
|
252
|
+
for line in (m.get("cmd") or "").splitlines():
|
|
253
|
+
s = line.strip()
|
|
254
|
+
if s.startswith("-hff ") and not s.lstrip().startswith("#"):
|
|
255
|
+
hff = s[len("-hff "):].strip()
|
|
256
|
+
break
|
|
257
|
+
print(f" {name:<18} -> {hff} ({md.get('quant', '?')}, {md.get('size_gb', '?')} GB)")
|
|
258
|
+
except (OSError, yaml.YAMLError):
|
|
259
|
+
pass
|
|
260
|
+
return 0
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""``llmstack stop`` -- tear down the singleton router + llama-swap daemons.
|
|
2
|
+
|
|
3
|
+
Three layers, in order:
|
|
4
|
+
|
|
5
|
+
1. SIGTERM/SIGKILL the pids in ``<state>/router.pid`` and
|
|
6
|
+
``<state>/llama-swap.pid`` (if any).
|
|
7
|
+
2. ``pkill`` by pattern as a cross-project safety net for daemons that
|
|
8
|
+
were started from another project's ``.llmstack/``.
|
|
9
|
+
3. ``pkill`` any orphaned ``llama-server`` children spawned by
|
|
10
|
+
llama-swap.
|
|
11
|
+
|
|
12
|
+
In **external mode** (channel pinned to ``external`` by ``install``)
|
|
13
|
+
there are no local daemons to tear down -- we just clear the
|
|
14
|
+
active-channel marker so ``status`` no longer reports the connection.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from llmstack.commands._helpers import (
|
|
20
|
+
kill_pid,
|
|
21
|
+
pgrep_describe,
|
|
22
|
+
pkill,
|
|
23
|
+
read_pid,
|
|
24
|
+
)
|
|
25
|
+
from llmstack.paths import is_remote, remote_url, resolve
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _print_help() -> None:
|
|
29
|
+
print("usage: llmstack stop")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def run(args: list[str]) -> int:
|
|
33
|
+
for arg in args:
|
|
34
|
+
if arg in ("-h", "--help"):
|
|
35
|
+
_print_help()
|
|
36
|
+
return 0
|
|
37
|
+
print(f"[!] unknown arg to stop: {arg}")
|
|
38
|
+
return 2
|
|
39
|
+
|
|
40
|
+
paths = resolve()
|
|
41
|
+
|
|
42
|
+
if is_remote():
|
|
43
|
+
url = remote_url()
|
|
44
|
+
if paths.active_marker.is_file():
|
|
45
|
+
paths.active_marker.unlink(missing_ok=True)
|
|
46
|
+
print(f"[OK] disconnected from {url} (active-channel cleared).")
|
|
47
|
+
else:
|
|
48
|
+
print(f"[=] not connected to any remote llmstack. (external URL: {url})")
|
|
49
|
+
print(" note: nothing local was running. To stop the *remote* daemons, run")
|
|
50
|
+
print(" 'llmstack stop' on the host that started them.")
|
|
51
|
+
return 0
|
|
52
|
+
|
|
53
|
+
for name, pid_file in (("router", paths.router_pid), ("llama-swap", paths.swap_pid)):
|
|
54
|
+
pid = read_pid(pid_file)
|
|
55
|
+
if pid is not None:
|
|
56
|
+
print(f"[*] stopping {name} (pid {pid})")
|
|
57
|
+
kill_pid(pid)
|
|
58
|
+
pid_file.unlink(missing_ok=True)
|
|
59
|
+
|
|
60
|
+
cross_project = pgrep_describe(r"llama-swap --config|llmstack\.app")
|
|
61
|
+
if cross_project.strip():
|
|
62
|
+
print("[*] stopping daemons by name (no local pid files, started elsewhere):")
|
|
63
|
+
for line in cross_project.splitlines():
|
|
64
|
+
print(f" {line}")
|
|
65
|
+
pkill(r"llama-swap --config")
|
|
66
|
+
pkill(r"llmstack\.app")
|
|
67
|
+
|
|
68
|
+
# Orphaned llama-server children (shouldn't happen, but cheap insurance)
|
|
69
|
+
pkill(r"llama-server.*--alias (code-fast|code-smart|plan|plan-uncensored)")
|
|
70
|
+
|
|
71
|
+
paths.active_marker.unlink(missing_ok=True)
|
|
72
|
+
print("[OK] stopped.")
|
|
73
|
+
return 0
|