opencode-llmstack 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,6 +54,7 @@ from llmstack.commands._helpers import (
54
54
  from llmstack.generators import render_to
55
55
  from llmstack.generators.llama_swap import render as render_yaml
56
56
  from llmstack.generators.llama_swap import validate as validate_yaml
57
+ from llmstack.tiers import load_tiers
57
58
  from llmstack.paths import (
58
59
  DEFAULT_REMOTE_URL,
59
60
  ROUTER_PORT,
@@ -194,47 +195,55 @@ def run(args: list[str]) -> int:
194
195
  if not paths.opencode_json.is_file():
195
196
  raise SystemExit(f"no .llmstack/opencode.json in {paths.work_dir} -- run: llmstack install")
196
197
 
197
- if is_running(paths.swap_pid):
198
- launch_daemons = False
199
- live_mark = read_marker(paths.active_marker)
200
- live = live_mark.channel if live_mark else channel
201
- if live != channel:
198
+ tiers = load_tiers()
199
+ has_gguf = any(t.is_gguf for t in tiers.values())
200
+
201
+ if has_gguf:
202
+ if is_running(paths.swap_pid):
203
+ launch_daemons = False
204
+ live_mark = read_marker(paths.active_marker)
205
+ live = live_mark.channel if live_mark else channel
206
+ if live != channel:
207
+ print(
208
+ f"[!] llama-swap is already running in '{live}' channel; "
209
+ f"refusing to also start '{channel}'. Stop the stack first:",
210
+ file=sys.stderr,
211
+ )
212
+ print("\n llmstack stop", file=sys.stderr)
213
+ print(f" llmstack start --{channel}\n", file=sys.stderr)
214
+ return 1
215
+ elif port_responds(f"http://127.0.0.1:{SWAP_PORT}/health"):
216
+ # Something is already listening on :10102, but it isn't ours
217
+ # (no pid file in this project's state dir). The pre-flag flow
218
+ # silently joined as "shared", which was a footgun: a `stop`
219
+ # from this project would tear down the other project's
220
+ # daemons and we couldn't bring them back without local
221
+ # tooling. Instead, refuse and tell the user how to wire this
222
+ # project as a proper thin client.
202
223
  print(
203
- f"[!] llama-swap is already running in '{live}' channel; "
204
- f"refusing to also start '{channel}'. Stop the stack first:",
224
+ f"[!] port :{SWAP_PORT} is already in use (daemons started by "
225
+ "another project on this host).",
205
226
  file=sys.stderr,
206
227
  )
207
- print("\n llmstack stop", file=sys.stderr)
208
- print(f" llmstack start --{channel}\n", file=sys.stderr)
228
+ print(" This project is installed for local mode -- it expects to own", file=sys.stderr)
229
+ print(" those daemons. To run as a thin client of the running stack:", file=sys.stderr)
230
+ print("", file=sys.stderr)
231
+ print(" llmstack install --external", file=sys.stderr)
232
+ print("", file=sys.stderr)
233
+ print(" (--external defaults to http://127.0.0.1:10101, the local router.)", file=sys.stderr)
234
+ print(" To take over instead, stop the running daemons first:", file=sys.stderr)
235
+ print("", file=sys.stderr)
236
+ print(" llmstack stop && llmstack start", file=sys.stderr)
209
237
  return 1
210
- elif port_responds(f"http://127.0.0.1:{SWAP_PORT}/health"):
211
- # Something is already listening on :10102, but it isn't ours
212
- # (no pid file in this project's state dir). The pre-flag flow
213
- # silently joined as "shared", which was a footgun: a `stop`
214
- # from this project would tear down the other project's
215
- # daemons and we couldn't bring them back without local
216
- # tooling. Instead, refuse and tell the user how to wire this
217
- # project as a proper thin client.
218
- print(
219
- f"[!] port :{SWAP_PORT} is already in use (daemons started by "
220
- "another project on this host).",
221
- file=sys.stderr,
222
- )
223
- print(" This project is installed for local mode -- it expects to own", file=sys.stderr)
224
- print(" those daemons. To run as a thin client of the running stack:", file=sys.stderr)
225
- print("", file=sys.stderr)
226
- print(" llmstack install --external", file=sys.stderr)
227
- print("", file=sys.stderr)
228
- print(" (--external defaults to http://127.0.0.1:10101, the local router.)", file=sys.stderr)
229
- print(" To take over instead, stop the running daemons first:", file=sys.stderr)
230
- print("", file=sys.stderr)
231
- print(" llmstack stop && llmstack start", file=sys.stderr)
232
- return 1
238
+ else:
239
+ launch_daemons = True
233
240
  else:
234
241
  launch_daemons = True
235
242
 
236
243
  if launch_daemons:
237
- if channel == "next":
244
+ if not has_gguf:
245
+ print("[*] bedrock-only config -- skipping llama-swap")
246
+ elif channel == "next":
238
247
  queued = _queued_next_tiers()
239
248
  if not queued:
240
249
  print(
@@ -251,37 +260,40 @@ def run(args: list[str]) -> int:
251
260
  print(f" queued upgrade tiers: {' '.join(queued)}")
252
261
  else:
253
262
  print(f"[*] generating yaml -> {paths.llama_swap_yaml}")
254
- render_to(
255
- paths.llama_swap_yaml,
256
- render=lambda p: Path(p).write_text(render_yaml(use_next=(channel == "next"))),
257
- validate=validate_yaml,
258
- )
263
+ if has_gguf:
264
+ render_to(
265
+ paths.llama_swap_yaml,
266
+ render=lambda p: Path(p).write_text(render_yaml(use_next=(channel == "next"))),
267
+ validate=validate_yaml,
268
+ )
259
269
 
260
270
  print(f"[*] channel: {channel} ({paths.llama_swap_yaml.name})")
261
271
 
262
272
  if launch_daemons:
263
- print(f"[*] starting llama-swap on :{SWAP_PORT}")
264
- spawn_daemon(
265
- [
266
- str(paths.llama_swap_bin),
267
- "--config", str(paths.llama_swap_yaml),
268
- "--listen", f"127.0.0.1:{SWAP_PORT}",
269
- ],
270
- log=paths.log_dir / "llama-swap.log",
271
- pid_file=paths.swap_pid,
272
- )
273
- write_marker(paths.active_marker, ChannelMark(channel))
274
- time.sleep(1)
275
- if not is_running(paths.swap_pid):
276
- print(f"[!] llama-swap failed to start. Check {paths.log_dir}/llama-swap.log")
277
- paths.swap_pid.unlink(missing_ok=True)
278
- paths.active_marker.unlink(missing_ok=True)
279
- return 1
280
- print(f" pid {read_pid(paths.swap_pid)}")
273
+ if has_gguf:
274
+ print(f"[*] starting llama-swap on :{SWAP_PORT}")
275
+ spawn_daemon(
276
+ [
277
+ str(paths.llama_swap_bin),
278
+ "--config", str(paths.llama_swap_yaml),
279
+ "--listen", f"127.0.0.1:{SWAP_PORT}",
280
+ ],
281
+ log=paths.log_dir / "llama-swap.log",
282
+ pid_file=paths.swap_pid,
283
+ )
284
+ write_marker(paths.active_marker, ChannelMark(channel))
285
+ time.sleep(1)
286
+ if not is_running(paths.swap_pid):
287
+ print(f"[!] llama-swap failed to start. Check {paths.log_dir}/llama-swap.log")
288
+ paths.swap_pid.unlink(missing_ok=True)
289
+ paths.active_marker.unlink(missing_ok=True)
290
+ return 1
291
+ print(f" pid {read_pid(paths.swap_pid)}")
281
292
 
282
293
  print(f"[*] starting router on :{ROUTER_PORT}")
283
294
  env = os.environ.copy()
284
- env.setdefault("LLAMA_SWAP_URL", f"http://127.0.0.1:{SWAP_PORT}")
295
+ if has_gguf:
296
+ env.setdefault("LLAMA_SWAP_URL", f"http://127.0.0.1:{SWAP_PORT}")
285
297
  env.setdefault("ROUTER_HOST", "127.0.0.1")
286
298
  env.setdefault("ROUTER_PORT", str(ROUTER_PORT))
287
299
  # Lock-step with the gguf --use-next swap: bedrock tiers in the
@@ -303,7 +315,10 @@ def run(args: list[str]) -> int:
303
315
  return 1
304
316
  print(f" pid {read_pid(paths.router_pid)}")
305
317
  else:
306
- print(f"[=] llama-swap already running (pid {read_pid(paths.swap_pid)}, channel {channel})")
318
+ if has_gguf:
319
+ print(f"[=] llama-swap already running (pid {read_pid(paths.swap_pid)}, channel {channel})")
320
+ else:
321
+ print("[=] bedrock-only config -- llama-swap not used")
307
322
  if is_running(paths.router_pid):
308
323
  print(f"[=] router already running (pid {read_pid(paths.router_pid)})")
309
324
 
@@ -312,14 +327,16 @@ def run(args: list[str]) -> int:
312
327
  print(f"[OK] stack is up (channel: {channel}).")
313
328
  print()
314
329
  print(f' router http://127.0.0.1:{ROUTER_PORT} (OpenAI-compatible, "auto" routing)')
315
- print(f" llama-swap http://127.0.0.1:{SWAP_PORT} (raw model endpoints + UI)")
330
+ if has_gguf:
331
+ print(f" llama-swap http://127.0.0.1:{SWAP_PORT} (raw model endpoints + UI)")
316
332
  print()
317
333
  print("Try:")
318
334
  print(f" curl -s http://127.0.0.1:{ROUTER_PORT}/v1/models | jq '.data[].id'")
319
335
  print(f" curl -s http://127.0.0.1:{ROUTER_PORT}/models.ini | head")
320
336
  print()
321
337
  print("Logs:")
322
- print(f" tail -f {paths.log_dir}/llama-swap.log")
338
+ if has_gguf:
339
+ print(f" tail -f {paths.log_dir}/llama-swap.log")
323
340
  print(f" tail -f {paths.log_dir}/router.log")
324
341
  print()
325
342
  print("Switch channel (requires stop first):")
@@ -37,6 +37,7 @@ from llmstack.paths import (
37
37
  read_marker,
38
38
  resolve,
39
39
  )
40
+ from llmstack.tiers import load_tiers
40
41
 
41
42
 
42
43
  def _print_help() -> None:
@@ -208,13 +209,17 @@ def run(args: list[str]) -> int:
208
209
  else:
209
210
  channel = "current (or stopped)"
210
211
 
212
+ tiers = load_tiers()
213
+ has_gguf = any(t.is_gguf for t in tiers.values())
214
+
211
215
  print(f"stack status (channel: {channel}):")
212
216
  print(f" work dir {paths.work_dir}")
213
217
  # Router has no /health route (dropped in v3.x); /v1/models always
214
218
  # 200s on a live router. llama-swap is a separate binary with its
215
219
  # own /health endpoint -- leave that one alone.
216
220
  _check_local("router", f"http://127.0.0.1:{ROUTER_PORT}/v1/models")
217
- _check_local("llama-swap", f"http://127.0.0.1:{SWAP_PORT}/health")
221
+ if has_gguf:
222
+ _check_local("llama-swap", f"http://127.0.0.1:{SWAP_PORT}/health")
218
223
 
219
224
  print()
220
225
  if paths.opencode_json.is_file():
@@ -229,17 +234,29 @@ def run(args: list[str]) -> int:
229
234
  chan = os.environ.get("LLMSTACK_CHANNEL", "?")
230
235
  print(f" in-shell OPENCODE_CONFIG={cfg}, LLMSTACK_CHANNEL={chan}")
231
236
 
232
- _list_models(f"http://127.0.0.1:{ROUTER_PORT}")
233
-
234
- print()
235
- print("loaded llama-server processes:")
236
- pids = pgrep(r"llama-server.*--alias")
237
- if pids:
238
- _print_process_table(pids)
237
+ if has_gguf:
238
+ _list_models(f"http://127.0.0.1:{ROUTER_PORT}")
239
239
  else:
240
- print(" (none loaded)")
241
-
242
- if channel.split()[0] == "next" and paths.llama_swap_yaml.is_file():
240
+ print()
241
+ print("current models in /v1/models:")
242
+ try:
243
+ with urllib.request.urlopen(f"{f'http://127.0.0.1:{ROUTER_PORT}'}/v1/models", timeout=5) as resp:
244
+ data = json.load(resp)
245
+ for m in data.get("data", []):
246
+ print(f" - {m.get('id')}")
247
+ except (urllib.error.URLError, ConnectionError, TimeoutError, OSError, json.JSONDecodeError):
248
+ print(f" (no response @ http://127.0.0.1:{ROUTER_PORT}/v1/models)")
249
+
250
+ if has_gguf:
251
+ print()
252
+ print("loaded llama-server processes:")
253
+ pids = pgrep(r"llama-server.*--alias")
254
+ if pids:
255
+ _print_process_table(pids)
256
+ else:
257
+ print(" (none loaded)")
258
+
259
+ if channel.split()[0] == "next" and has_gguf and paths.llama_swap_yaml.is_file():
243
260
  print()
244
261
  print(f"next-channel swaps (from {paths.llama_swap_yaml.name}):")
245
262
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opencode-llmstack
3
- Version: 0.6.0
3
+ Version: 0.7.1
4
4
  Summary: Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring.
5
5
  Author: llmstack
6
6
  License: MIT
@@ -21,8 +21,8 @@ llmstack/commands/install_llama_swap.py,sha256=c6iedl-DjnOc7jMVzy_M0aIWSgygzAgYU
21
21
  llmstack/commands/reload.py,sha256=Z7ceZQX2fkHpZiWxov8YwidR72Xw0-qMFFV_RRXpkwI,2016
22
22
  llmstack/commands/restart.py,sha256=Bp6lSAnLhR2Nd7eA5BlD9J_TeGlzRfWS_Z3DdxP-eq4,294
23
23
  llmstack/commands/setup.py,sha256=o8In30TMflxDn4ahbodLrHeaIBwWScWuXhi6_78DQrQ,4825
24
- llmstack/commands/start.py,sha256=5X50TAQbIp72qThvc_TOGDLt0CtAm4yMUYfqpaYK0pg,14963
25
- llmstack/commands/status.py,sha256=9SKn7u7nUNYS53Bb76RXxsFO3SVBktqbAXsWQ3nOmVM,9680
24
+ llmstack/commands/start.py,sha256=V9BDZeCQS_NL2bJmJANHVE2J1rqoYBUDYcjK9O_PNYM,15693
25
+ llmstack/commands/status.py,sha256=TOHoDSyu04lZtepJH4bFmIk694RyaUYeFMpUejyUPe0,10403
26
26
  llmstack/commands/stop.py,sha256=vntZ1n8wpY9zgix1xGHDNJqEacaUpw9haSKgOnMg73k,2474
27
27
  llmstack/download/__init__.py,sha256=lpGmxsE4zxSp0fQViNJZHzbCL_V8zy6IHn71MP31538,695
28
28
  llmstack/download/binary.py,sha256=xpv15wF4viv8uFC5UqfSIf36CIoPpmaNUaVtjF-vTWA,8737
@@ -30,8 +30,8 @@ llmstack/download/ggufs.py,sha256=2hCr-svUiPIV2I3ruwTbXo6lPn9m-VBOqa3DFbvdIcA,54
30
30
  llmstack/generators/__init__.py,sha256=LfbcReuyYBCdVuT9J5RKo7-f8n585YBU3Hus6DsxqTs,1189
31
31
  llmstack/generators/llama_swap.py,sha256=KdYH9N6TJECotZvyxvAjaa3kRyzn4YOi2T6D2UdyVKw,14785
32
32
  llmstack/generators/opencode.py,sha256=If7opOQyMWSSbHTj7M9dndsA3BmskSTUsTggMKV0VWM,10669
33
- opencode_llmstack-0.6.0.dist-info/METADATA,sha256=VaudMhiKVFtKk_-JhBOPbqjhkQJlr5Ljh5taK_Rsis0,34508
34
- opencode_llmstack-0.6.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
35
- opencode_llmstack-0.6.0.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
36
- opencode_llmstack-0.6.0.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
37
- opencode_llmstack-0.6.0.dist-info/RECORD,,
33
+ opencode_llmstack-0.7.1.dist-info/METADATA,sha256=5ZLU7BozHlxueU01Jykw3BLZLp-Ww2jP8vjQ24kItrQ,34508
34
+ opencode_llmstack-0.7.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
35
+ opencode_llmstack-0.7.1.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
36
+ opencode_llmstack-0.7.1.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
37
+ opencode_llmstack-0.7.1.dist-info/RECORD,,