opencode-llmstack 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,360 @@
1
+ """``llmstack start`` -- bring up the stack and enter the env-prepared subshell.
2
+
3
+ The channel is **decided at install time** and persisted to
4
+ ``.llmstack/default-channel`` -- ``start`` reads that marker and never
5
+ re-derives the channel from the environment. Three channels exist:
6
+
7
+ *Local* (``current`` / ``next``)
8
+ Generate ``llama-swap.yaml`` for the chosen channel, launch
9
+ llama-swap + the FastAPI router locally, and drop into a subshell
10
+ with ``OPENCODE_CONFIG`` exported. The yaml is regenerated on every
11
+ fresh launch so it always reflects the live ``models.ini``; if the
12
+ daemons are already up under our pid files we leave the loaded yaml
13
+ alone. The ``--current`` / ``--next`` flags pick which of the two
14
+ local channels to launch *for this run* (without rewriting the
15
+ marker -- only ``install`` does that).
16
+
17
+ Daemon state has two branches:
18
+ (a) local pid file says daemons are up -> idempotent, channel-checked,
19
+ no yaml regeneration
20
+ (b) nothing in the pid file -> regenerate yaml, launch
21
+ fresh. If port :10102 is
22
+ already in use by *another*
23
+ process (typically another
24
+ project on this host) we
25
+ refuse: the user should
26
+ ``llmstack install --external``
27
+ to wire this project as a
28
+ thin client of those
29
+ daemons, or stop them
30
+ first.
31
+
32
+ *External* (``external``)
33
+ Don't launch anything; verify the marker's ``/health`` endpoint is
34
+ reachable and drop into the subshell with ``LLMSTACK_CHANNEL=external``.
35
+ The URL was pinned by ``llmstack install --external [URL]``; an
36
+ external install with no URL defaults to the local router
37
+ (``http://127.0.0.1:10101``), which is the laptop-with-N-projects
38
+ case where one project owns the daemons and the others are clients.
39
+ """
40
+
41
+ from __future__ import annotations
42
+
43
+ import os
44
+ import sys
45
+ import time
46
+ from pathlib import Path
47
+
48
+ from llmstack.commands._helpers import (
49
+ is_running,
50
+ port_responds,
51
+ read_pid,
52
+ spawn_daemon,
53
+ )
54
+ from llmstack.generators import render_to
55
+ from llmstack.generators.llama_swap import render as render_yaml
56
+ from llmstack.generators.llama_swap import validate as validate_yaml
57
+ from llmstack.paths import (
58
+ DEFAULT_REMOTE_URL,
59
+ ROUTER_PORT,
60
+ SWAP_PORT,
61
+ ChannelMark,
62
+ ensure_state_dirs,
63
+ read_marker,
64
+ write_marker,
65
+ )
66
+ from llmstack.shell_env import spawn_subshell
67
+ from llmstack.tiers import load_tiers
68
+
69
+
70
+ def _print_help() -> None:
71
+ print("usage: llmstack start [--current | --next] [--detach]")
72
+
73
+
74
+ def _queued_next_tiers() -> list[str]:
75
+ """Names of every tier that has *some* queued upgrade target.
76
+
77
+ Backend-aware: gguf tiers with ``hf_file_next`` qualify, and so do
78
+ bedrock tiers with ``aws_model_id_next``. Used to short-circuit
79
+ ``--next`` when nothing's queued.
80
+ """
81
+ return sorted(t.name for t in load_tiers().values() if t.has_next)
82
+
83
+
84
+ def _start_remote(detach: bool, url: str) -> int:
85
+ """Client-mode start: just verify the remote and drop into the shell.
86
+
87
+ ``url`` is the remote-router base URL pinned by ``install --external``
88
+ into ``default-channel``. It is *not* re-derived from the environment
89
+ here -- the marker is canonical post-install, and silently following
90
+ a stale env var would lie to the user about which remote opencode is
91
+ actually wired to (the URL is baked into ``opencode.json`` at install
92
+ time).
93
+
94
+ The reachability probe hits ``GET /models.ini`` rather than a
95
+ dedicated ``/health`` endpoint -- a 200 there proves both that the
96
+ router is up and that it actually has a config worth talking to,
97
+ which is what the thin client needs. The router has no separate
98
+ ``/health`` route.
99
+ """
100
+ paths = ensure_state_dirs()
101
+
102
+ if not paths.opencode_json.is_file():
103
+ raise SystemExit(
104
+ f"no .llmstack/opencode.json in {paths.work_dir} -- run: llmstack install --external\n"
105
+ f" (or `llmstack install --external {url}` to keep this remote URL)"
106
+ )
107
+
108
+ print(f"[*] external llmstack: {url}")
109
+ probe_url = f"{url}/models.ini"
110
+ if port_responds(probe_url, timeout=5.0):
111
+ print(f"[OK] {probe_url} responds.")
112
+ else:
113
+ print(f"[!] {probe_url} did not respond -- is the remote stack up?", file=sys.stderr)
114
+ print(" proceeding anyway; opencode will surface the error on first request.", file=sys.stderr)
115
+
116
+ write_marker(paths.active_marker, ChannelMark("external", url))
117
+
118
+ print()
119
+ print("[OK] client mode (channel: external).")
120
+ print()
121
+ print(f" router {url} (external)")
122
+ print()
123
+ print("Try:")
124
+ print(f" curl -s {url}/v1/models | jq '.data[].id'")
125
+ print(f" curl -s {url}/models.ini | head")
126
+ print()
127
+ print("Disconnect:")
128
+ print(" exit # leave the subshell (daemons are external, nothing to stop)")
129
+
130
+ if detach:
131
+ return 0
132
+
133
+ # Same "spawn only when no active env" rule as the local-mode path.
134
+ if os.environ.get("LLMSTACK_ACTIVE") == "1":
135
+ cur_chan = os.environ.get("LLMSTACK_CHANNEL", "?")
136
+ if cur_chan == "external":
137
+ print("[=] already active as external client -- env is up to date.")
138
+ else:
139
+ print(
140
+ f"[*] switching to external client ({cur_chan} -> external); env in "
141
+ "this shell is now stale."
142
+ )
143
+ print(" refresh prompt + env in this shell:")
144
+ print(' eval "$(llmstack reload)"')
145
+ return 0
146
+
147
+ spawn_subshell("external")
148
+ return 0 # unreachable
149
+
150
+
151
+ def run(args: list[str]) -> int:
152
+ requested: str | None = None
153
+ detach = False
154
+ for arg in args:
155
+ if arg == "--next":
156
+ requested = "next"
157
+ elif arg == "--current":
158
+ requested = "current"
159
+ elif arg in ("--detach", "--no-shell"):
160
+ detach = True
161
+ elif arg in ("-h", "--help"):
162
+ _print_help()
163
+ return 0
164
+ else:
165
+ print(f"[!] unknown arg to start: {arg} (try --next, --current, --detach, -h)")
166
+ return 2
167
+
168
+ paths = ensure_state_dirs()
169
+ default = read_marker(paths.default_marker)
170
+
171
+ # External installs short-circuit to the thin-client path. The URL
172
+ # is taken from the marker (set by ``install --external``); we
173
+ # never re-derive it from the env.
174
+ if default and default.channel == "external":
175
+ if requested is not None:
176
+ print(
177
+ "[!] --current / --next have no effect for external installs "
178
+ "(no daemons to launch).",
179
+ file=sys.stderr,
180
+ )
181
+ url = (default.url or "").rstrip("/") or DEFAULT_REMOTE_URL
182
+ return _start_remote(detach, url)
183
+
184
+ # Local mode -- decide which of current/next to launch.
185
+ if requested is not None:
186
+ channel = requested
187
+ elif default and default.channel in ("current", "next"):
188
+ channel = default.channel
189
+ else:
190
+ channel = "current"
191
+
192
+ if not paths.llama_swap_bin.exists() or not os.access(paths.llama_swap_bin, os.X_OK):
193
+ raise SystemExit(f"missing {paths.llama_swap_bin} (run: llmstack setup)")
194
+ if not paths.opencode_json.is_file():
195
+ raise SystemExit(f"no .llmstack/opencode.json in {paths.work_dir} -- run: llmstack install")
196
+
197
+ if is_running(paths.swap_pid):
198
+ launch_daemons = False
199
+ live_mark = read_marker(paths.active_marker)
200
+ live = live_mark.channel if live_mark else channel
201
+ if live != channel:
202
+ print(
203
+ f"[!] llama-swap is already running in '{live}' channel; "
204
+ f"refusing to also start '{channel}'. Stop the stack first:",
205
+ file=sys.stderr,
206
+ )
207
+ print("\n llmstack stop", file=sys.stderr)
208
+ print(f" llmstack start --{channel}\n", file=sys.stderr)
209
+ return 1
210
+ elif port_responds(f"http://127.0.0.1:{SWAP_PORT}/health"):
211
+ # Something is already listening on :10102, but it isn't ours
212
+ # (no pid file in this project's state dir). The pre-flag flow
213
+ # silently joined as "shared", which was a footgun: a `stop`
214
+ # from this project would tear down the other project's
215
+ # daemons and we couldn't bring them back without local
216
+ # tooling. Instead, refuse and tell the user how to wire this
217
+ # project as a proper thin client.
218
+ print(
219
+ f"[!] port :{SWAP_PORT} is already in use (daemons started by "
220
+ "another project on this host).",
221
+ file=sys.stderr,
222
+ )
223
+ print(" This project is installed for local mode -- it expects to own", file=sys.stderr)
224
+ print(" those daemons. To run as a thin client of the running stack:", file=sys.stderr)
225
+ print("", file=sys.stderr)
226
+ print(" llmstack install --external", file=sys.stderr)
227
+ print("", file=sys.stderr)
228
+ print(" (--external defaults to http://127.0.0.1:10101, the local router.)", file=sys.stderr)
229
+ print(" To take over instead, stop the running daemons first:", file=sys.stderr)
230
+ print("", file=sys.stderr)
231
+ print(" llmstack stop && llmstack start", file=sys.stderr)
232
+ return 1
233
+ else:
234
+ launch_daemons = True
235
+
236
+ if launch_daemons:
237
+ if channel == "next":
238
+ queued = _queued_next_tiers()
239
+ if not queued:
240
+ print(
241
+ "[!] no tiers have hf_file_next or aws_model_id_next set in models.ini -- "
242
+ "nothing to do.",
243
+ file=sys.stderr,
244
+ )
245
+ print(
246
+ " add a *_next line to a tier and re-run, or use --current.",
247
+ file=sys.stderr,
248
+ )
249
+ return 1
250
+ print(f"[*] generating next-channel yaml -> {paths.llama_swap_yaml}")
251
+ print(f" queued upgrade tiers: {' '.join(queued)}")
252
+ else:
253
+ print(f"[*] generating yaml -> {paths.llama_swap_yaml}")
254
+ render_to(
255
+ paths.llama_swap_yaml,
256
+ render=lambda p: Path(p).write_text(render_yaml(use_next=(channel == "next"))),
257
+ validate=validate_yaml,
258
+ )
259
+
260
+ print(f"[*] channel: {channel} ({paths.llama_swap_yaml.name})")
261
+
262
+ if launch_daemons:
263
+ print(f"[*] starting llama-swap on :{SWAP_PORT}")
264
+ spawn_daemon(
265
+ [
266
+ str(paths.llama_swap_bin),
267
+ "--config", str(paths.llama_swap_yaml),
268
+ "--listen", f"127.0.0.1:{SWAP_PORT}",
269
+ ],
270
+ log=paths.log_dir / "llama-swap.log",
271
+ pid_file=paths.swap_pid,
272
+ )
273
+ write_marker(paths.active_marker, ChannelMark(channel))
274
+ time.sleep(1)
275
+ if not is_running(paths.swap_pid):
276
+ print(f"[!] llama-swap failed to start. Check {paths.log_dir}/llama-swap.log")
277
+ paths.swap_pid.unlink(missing_ok=True)
278
+ paths.active_marker.unlink(missing_ok=True)
279
+ return 1
280
+ print(f" pid {read_pid(paths.swap_pid)}")
281
+
282
+ print(f"[*] starting router on :{ROUTER_PORT}")
283
+ env = os.environ.copy()
284
+ env.setdefault("LLAMA_SWAP_URL", f"http://127.0.0.1:{SWAP_PORT}")
285
+ env.setdefault("ROUTER_HOST", "127.0.0.1")
286
+ env.setdefault("ROUTER_PORT", str(ROUTER_PORT))
287
+ # Lock-step with the gguf --use-next swap: bedrock tiers in the
288
+ # router pick aws_model_id_next when this flag is set.
289
+ if channel == "next":
290
+ env["LLMSTACK_USE_NEXT"] = "1"
291
+ else:
292
+ env.pop("LLMSTACK_USE_NEXT", None)
293
+ spawn_daemon(
294
+ [sys.executable, "-m", "llmstack.app"],
295
+ log=paths.log_dir / "router.log",
296
+ pid_file=paths.router_pid,
297
+ env=env,
298
+ )
299
+ time.sleep(1)
300
+ if not is_running(paths.router_pid):
301
+ print(f"[!] router failed to start. Check {paths.log_dir}/router.log")
302
+ paths.router_pid.unlink(missing_ok=True)
303
+ return 1
304
+ print(f" pid {read_pid(paths.router_pid)}")
305
+ else:
306
+ print(f"[=] llama-swap already running (pid {read_pid(paths.swap_pid)}, channel {channel})")
307
+ if is_running(paths.router_pid):
308
+ print(f"[=] router already running (pid {read_pid(paths.router_pid)})")
309
+
310
+ other = "next" if channel == "current" else "current"
311
+ print()
312
+ print(f"[OK] stack is up (channel: {channel}).")
313
+ print()
314
+ print(f' router http://127.0.0.1:{ROUTER_PORT} (OpenAI-compatible, "auto" routing)')
315
+ print(f" llama-swap http://127.0.0.1:{SWAP_PORT} (raw model endpoints + UI)")
316
+ print()
317
+ print("Try:")
318
+ print(f" curl -s http://127.0.0.1:{ROUTER_PORT}/v1/models | jq '.data[].id'")
319
+ print(f" curl -s http://127.0.0.1:{ROUTER_PORT}/models.ini | head")
320
+ print()
321
+ print("Logs:")
322
+ print(f" tail -f {paths.log_dir}/llama-swap.log")
323
+ print(f" tail -f {paths.log_dir}/router.log")
324
+ print()
325
+ print("Switch channel (requires stop first):")
326
+ print(f" llmstack restart --{other}")
327
+ print()
328
+ print("Stop:")
329
+ print(" llmstack stop")
330
+
331
+ if detach:
332
+ return 0
333
+
334
+ # Only spawn a subshell when the env isn't already wired up. Two cases:
335
+ # - Hook installed + sourced: cd-ing into a project sets
336
+ # LLMSTACK_ACTIVE=1 and friends. start just brings up daemons --
337
+ # no need to nest another shell.
338
+ # - Inside a previously-spawned llmstack shell: same deal.
339
+ # For users who haven't run `eval "$(llmstack activate <shell>)"`,
340
+ # spawn so they at least get OPENCODE_CONFIG / channel exports for
341
+ # this terminal.
342
+ if os.environ.get("LLMSTACK_ACTIVE") == "1":
343
+ cur_chan = os.environ.get("LLMSTACK_CHANNEL", "?")
344
+ if cur_chan == channel:
345
+ print(f"[=] already active in '{channel}' -- env is up to date.")
346
+ else:
347
+ # Daemons + active-channel marker are already on the new
348
+ # channel. The current shell's env + PROMPT lag behind --
349
+ # `llmstack reload` emits the eval-able snippet to fix that
350
+ # without nesting a subshell.
351
+ print(
352
+ f"[*] channel switched ({cur_chan} -> {channel}); env in this shell "
353
+ "is now stale."
354
+ )
355
+ print(" refresh prompt + env in this shell:")
356
+ print(' eval "$(llmstack reload)"')
357
+ return 0
358
+
359
+ spawn_subshell(channel)
360
+ return 0 # unreachable: spawn_subshell execvps
@@ -0,0 +1,260 @@
1
+ """``llmstack status`` -- show channel, pids, ``/v1/models``, llama-server load.
2
+
3
+ The channel comes from ``.llmstack/default-channel`` (pinned by
4
+ ``install``). Two top-level reporting paths:
5
+
6
+ * ``current`` / ``next`` -- local install. Check pid files + port
7
+ probes for our daemons. If port :10102
8
+ responds without a pid file in *this*
9
+ project's ``.llmstack/``, the daemons
10
+ belong to another project on this host;
11
+ we report that as "(other)" so the user
12
+ knows the local daemons aren't ours --
13
+ it's not an error, but also not
14
+ something this project can ``stop``
15
+ cleanly.
16
+ * ``external`` -- thin-client install. Skip all local
17
+ checks; probe the remote-router URL
18
+ from the marker.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ import os
25
+ import subprocess
26
+ import urllib.error
27
+ import urllib.request
28
+
29
+ import yaml
30
+
31
+ from llmstack._platform import IS_WINDOWS
32
+ from llmstack.commands._helpers import is_running, pgrep, port_responds, read_pid
33
+ from llmstack.paths import (
34
+ DEFAULT_REMOTE_URL,
35
+ ROUTER_PORT,
36
+ SWAP_PORT,
37
+ read_marker,
38
+ resolve,
39
+ )
40
+
41
+
42
+ def _print_help() -> None:
43
+ print("usage: llmstack status")
44
+
45
+
46
+ def _check_local(name: str, url: str) -> None:
47
+ """Report on a local daemon (router/llama-swap).
48
+
49
+ ``alive`` (we own the process via pid file) is the happy path.
50
+ ``responds`` without ``alive`` means the port is in use but the
51
+ process isn't ours -- another project on this host owns it. We
52
+ surface that as ``(other)`` rather than ``shared`` because there's
53
+ no special "shared" mode anymore: a local install can't manage
54
+ daemons it didn't spawn. ``llmstack install --external`` is the
55
+ documented way to consume those daemons cleanly.
56
+ """
57
+ paths = resolve()
58
+ pid_file = paths.state_dir / f"{name}.pid"
59
+ pid = read_pid(pid_file) if pid_file.is_file() else None
60
+ alive = pid is not None and is_running(pid_file)
61
+ responds = port_responds(url, timeout=3.0)
62
+
63
+ if alive:
64
+ status = f"pid {pid:<7}"
65
+ elif responds:
66
+ status = "(other)"
67
+ else:
68
+ status = "DOWN"
69
+ suffix = f"OK {url}" if responds else f"no response @ {url}"
70
+ print(f" {name:<12} {status:<11} {suffix}")
71
+
72
+
73
+ def _print_process_table(pids: list[int]) -> None:
74
+ """Render ``pid / rss_mb / command`` for each pid (cross-OS).
75
+
76
+ POSIX: ``ps -o pid,rss,command`` (rss is in KB, we humanise to MB).
77
+ Windows: ``tasklist /FI "PID eq ..." /FO CSV`` (image name + memory
78
+ usage). Both branches print a header row.
79
+ """
80
+ if IS_WINDOWS:
81
+ rows: list[tuple[str, str, str]] = []
82
+ for pid in pids:
83
+ try:
84
+ proc = subprocess.run(
85
+ ["tasklist", "/FI", f"PID eq {pid}", "/FO", "CSV", "/NH"],
86
+ check=False,
87
+ stdout=subprocess.PIPE,
88
+ stderr=subprocess.DEVNULL,
89
+ text=True,
90
+ timeout=10,
91
+ )
92
+ except (OSError, subprocess.SubprocessError):
93
+ continue
94
+ if proc.returncode != 0 or not proc.stdout.strip():
95
+ continue
96
+ import csv
97
+ for fields in csv.reader(proc.stdout.splitlines()):
98
+ if len(fields) < 5:
99
+ continue
100
+ image, pid_str, _session, _sid, mem = fields[0], fields[1], fields[2], fields[3], fields[4]
101
+ if not pid_str.isdigit():
102
+ continue
103
+ rss_mb = mem.replace(",", "").replace(" K", "").strip()
104
+ try:
105
+ rss_mb = f"{int(rss_mb) // 1024} MB"
106
+ except ValueError:
107
+ pass
108
+ rows.append((pid_str, rss_mb, image))
109
+ if not rows:
110
+ print(" (tasklist returned nothing)")
111
+ return
112
+ print(f" {'PID':<8} {'RSS':<10} COMMAND")
113
+ for pid_str, rss, cmd in rows:
114
+ print(f" {pid_str:<8} {rss:<10} {cmd}")
115
+ return
116
+
117
+ try:
118
+ ps = subprocess.run(
119
+ ["ps", "-o", "pid,rss,command", "-p", ",".join(str(p) for p in pids)],
120
+ check=False,
121
+ stdout=subprocess.PIPE,
122
+ stderr=subprocess.DEVNULL,
123
+ text=True,
124
+ )
125
+ except (OSError, subprocess.SubprocessError):
126
+ print(" (ps failed)")
127
+ return
128
+ for i, line in enumerate(ps.stdout.splitlines()):
129
+ if i == 0:
130
+ print(line)
131
+ continue
132
+ cols = line.split()
133
+ if len(cols) >= 3:
134
+ try:
135
+ rss_mb = int(cols[1]) // 1024
136
+ cols[1] = f"{rss_mb} MB"
137
+ except ValueError:
138
+ pass
139
+ print(" ".join(cols))
140
+
141
+
142
+ def _list_models(base: str) -> None:
143
+ print()
144
+ print("current models in /v1/models:")
145
+ try:
146
+ with urllib.request.urlopen(f"{base}/v1/models", timeout=5) as resp:
147
+ data = json.load(resp)
148
+ for m in data.get("data", []):
149
+ print(f" - {m.get('id')}")
150
+ except (urllib.error.URLError, ConnectionError, TimeoutError, OSError, json.JSONDecodeError):
151
+ print(f" (no response @ {base}/v1/models)")
152
+
153
+
154
+ def _print_remote_status(paths, url: str) -> int:
155
+ print(f"stack status (channel: external -- remote {url}):")
156
+ print(f" work dir {paths.work_dir}")
157
+ probe = f"{url}/models.ini"
158
+ responds = port_responds(probe, timeout=3.0)
159
+ suffix = f"OK {probe}" if responds else f"no response @ {probe}"
160
+ status = "external" if responds else "DOWN"
161
+ print(f" {'router':<12} {status:<11} {suffix}")
162
+
163
+ print()
164
+ if paths.opencode_json.is_file():
165
+ print(f" opencode {paths.opencode_json}")
166
+ if paths.agents_local.is_file():
167
+ print(f" instructions {paths.agents_local}")
168
+ else:
169
+ print(" opencode (not generated for this work dir; run: llmstack install)")
170
+
171
+ if os.environ.get("LLMSTACK_ACTIVE") == "1":
172
+ cfg = os.environ.get("OPENCODE_CONFIG", "?")
173
+ chan = os.environ.get("LLMSTACK_CHANNEL", "?")
174
+ print(f" in-shell OPENCODE_CONFIG={cfg}, LLMSTACK_CHANNEL={chan}")
175
+
176
+ if responds:
177
+ _list_models(url)
178
+ return 0
179
+
180
+
181
+ def run(args: list[str]) -> int:
182
+ for arg in args:
183
+ if arg in ("-h", "--help"):
184
+ _print_help()
185
+ return 0
186
+ print(f"[!] unknown arg to status: {arg}")
187
+ return 2
188
+
189
+ paths = resolve()
190
+
191
+ # Channel decision is pinned at install time; status just reads it.
192
+ # active-channel (set by `start`) takes precedence over default-channel
193
+ # (set by `install`) so a `start --next` run is reflected immediately.
194
+ default = read_marker(paths.default_marker)
195
+ active = read_marker(paths.active_marker)
196
+ persisted = active or default
197
+
198
+ if persisted and persisted.channel == "external":
199
+ url = (persisted.url or "").rstrip("/") or DEFAULT_REMOTE_URL
200
+ return _print_remote_status(paths, url)
201
+
202
+ if active:
203
+ channel = active.channel
204
+ elif default and default.channel in ("current", "next"):
205
+ channel = f"{default.channel} (or stopped)"
206
+ elif port_responds(f"http://127.0.0.1:{SWAP_PORT}/health"):
207
+ channel = "(other) -- daemons running on :10102 are not ours"
208
+ else:
209
+ channel = "current (or stopped)"
210
+
211
+ print(f"stack status (channel: {channel}):")
212
+ print(f" work dir {paths.work_dir}")
213
+ # Router has no /health route (dropped in v3.x); /v1/models always
214
+ # 200s on a live router. llama-swap is a separate binary with its
215
+ # own /health endpoint -- leave that one alone.
216
+ _check_local("router", f"http://127.0.0.1:{ROUTER_PORT}/v1/models")
217
+ _check_local("llama-swap", f"http://127.0.0.1:{SWAP_PORT}/health")
218
+
219
+ print()
220
+ if paths.opencode_json.is_file():
221
+ print(f" opencode {paths.opencode_json}")
222
+ if paths.agents_local.is_file():
223
+ print(f" instructions {paths.agents_local}")
224
+ else:
225
+ print(" opencode (not generated for this work dir; run: llmstack install)")
226
+
227
+ if os.environ.get("LLMSTACK_ACTIVE") == "1":
228
+ cfg = os.environ.get("OPENCODE_CONFIG", "?")
229
+ chan = os.environ.get("LLMSTACK_CHANNEL", "?")
230
+ print(f" in-shell OPENCODE_CONFIG={cfg}, LLMSTACK_CHANNEL={chan}")
231
+
232
+ _list_models(f"http://127.0.0.1:{ROUTER_PORT}")
233
+
234
+ print()
235
+ print("loaded llama-server processes:")
236
+ pids = pgrep(r"llama-server.*--alias")
237
+ if pids:
238
+ _print_process_table(pids)
239
+ else:
240
+ print(" (none loaded)")
241
+
242
+ if channel.split()[0] == "next" and paths.llama_swap_yaml.is_file():
243
+ print()
244
+ print(f"next-channel swaps (from {paths.llama_swap_yaml.name}):")
245
+ try:
246
+ cfg = yaml.safe_load(paths.llama_swap_yaml.read_text())
247
+ for name, m in (cfg.get("models") or {}).items():
248
+ md = m.get("metadata") or {}
249
+ if md.get("channel") != "next":
250
+ continue
251
+ hff = "?"
252
+ for line in (m.get("cmd") or "").splitlines():
253
+ s = line.strip()
254
+ if s.startswith("-hff ") and not s.lstrip().startswith("#"):
255
+ hff = s[len("-hff "):].strip()
256
+ break
257
+ print(f" {name:<18} -> {hff} ({md.get('quant', '?')}, {md.get('size_gb', '?')} GB)")
258
+ except (OSError, yaml.YAMLError):
259
+ pass
260
+ return 0
@@ -0,0 +1,73 @@
1
+ """``llmstack stop`` -- tear down the singleton router + llama-swap daemons.
2
+
3
+ Three layers, in order:
4
+
5
+ 1. SIGTERM/SIGKILL the pids in ``<state>/router.pid`` and
6
+ ``<state>/llama-swap.pid`` (if any).
7
+ 2. ``pkill`` by pattern as a cross-project safety net for daemons that
8
+ were started from another project's ``.llmstack/``.
9
+ 3. ``pkill`` any orphaned ``llama-server`` children spawned by
10
+ llama-swap.
11
+
12
+ In **external mode** (channel pinned to ``external`` by ``install``)
13
+ there are no local daemons to tear down -- we just clear the
14
+ active-channel marker so ``status`` no longer reports the connection.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from llmstack.commands._helpers import (
20
+ kill_pid,
21
+ pgrep_describe,
22
+ pkill,
23
+ read_pid,
24
+ )
25
+ from llmstack.paths import is_remote, remote_url, resolve
26
+
27
+
28
+ def _print_help() -> None:
29
+ print("usage: llmstack stop")
30
+
31
+
32
+ def run(args: list[str]) -> int:
33
+ for arg in args:
34
+ if arg in ("-h", "--help"):
35
+ _print_help()
36
+ return 0
37
+ print(f"[!] unknown arg to stop: {arg}")
38
+ return 2
39
+
40
+ paths = resolve()
41
+
42
+ if is_remote():
43
+ url = remote_url()
44
+ if paths.active_marker.is_file():
45
+ paths.active_marker.unlink(missing_ok=True)
46
+ print(f"[OK] disconnected from {url} (active-channel cleared).")
47
+ else:
48
+ print(f"[=] not connected to any remote llmstack. (external URL: {url})")
49
+ print(" note: nothing local was running. To stop the *remote* daemons, run")
50
+ print(" 'llmstack stop' on the host that started them.")
51
+ return 0
52
+
53
+ for name, pid_file in (("router", paths.router_pid), ("llama-swap", paths.swap_pid)):
54
+ pid = read_pid(pid_file)
55
+ if pid is not None:
56
+ print(f"[*] stopping {name} (pid {pid})")
57
+ kill_pid(pid)
58
+ pid_file.unlink(missing_ok=True)
59
+
60
+ cross_project = pgrep_describe(r"llama-swap --config|llmstack\.app")
61
+ if cross_project.strip():
62
+ print("[*] stopping daemons by name (no local pid files, started elsewhere):")
63
+ for line in cross_project.splitlines():
64
+ print(f" {line}")
65
+ pkill(r"llama-swap --config")
66
+ pkill(r"llmstack\.app")
67
+
68
+ # Orphaned llama-server children (shouldn't happen, but cheap insurance)
69
+ pkill(r"llama-server.*--alias (code-fast|code-smart|plan|plan-uncensored)")
70
+
71
+ paths.active_marker.unlink(missing_ok=True)
72
+ print("[OK] stopped.")
73
+ return 0