opencode-llmstack 0.6.0__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/PKG-INFO +1 -1
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/start.py +77 -60
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/status.py +28 -11
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/PKG-INFO +1 -1
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/pyproject.toml +1 -1
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/README.md +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/AGENTS.md +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/__init__.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/__main__.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/_platform.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/app.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/backends/__init__.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/backends/bedrock.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/check_models.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/cli.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/__init__.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/_helpers.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/activate.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/check.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/download.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/install.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/install_llama_swap.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/reload.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/restart.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/setup.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/stop.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/download/__init__.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/download/binary.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/download/ggufs.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/generators/__init__.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/generators/llama_swap.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/generators/opencode.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/models.ini +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/paths.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/shell_env.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/tiers.py +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/SOURCES.txt +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/dependency_links.txt +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/entry_points.txt +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/requires.txt +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/top_level.txt +0 -0
- {opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/setup.cfg +0 -0
|
@@ -54,6 +54,7 @@ from llmstack.commands._helpers import (
|
|
|
54
54
|
from llmstack.generators import render_to
|
|
55
55
|
from llmstack.generators.llama_swap import render as render_yaml
|
|
56
56
|
from llmstack.generators.llama_swap import validate as validate_yaml
|
|
57
|
+
from llmstack.tiers import load_tiers
|
|
57
58
|
from llmstack.paths import (
|
|
58
59
|
DEFAULT_REMOTE_URL,
|
|
59
60
|
ROUTER_PORT,
|
|
@@ -194,47 +195,55 @@ def run(args: list[str]) -> int:
|
|
|
194
195
|
if not paths.opencode_json.is_file():
|
|
195
196
|
raise SystemExit(f"no .llmstack/opencode.json in {paths.work_dir} -- run: llmstack install")
|
|
196
197
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
if
|
|
198
|
+
tiers = load_tiers()
|
|
199
|
+
has_gguf = any(t.is_gguf for t in tiers.values())
|
|
200
|
+
|
|
201
|
+
if has_gguf:
|
|
202
|
+
if is_running(paths.swap_pid):
|
|
203
|
+
launch_daemons = False
|
|
204
|
+
live_mark = read_marker(paths.active_marker)
|
|
205
|
+
live = live_mark.channel if live_mark else channel
|
|
206
|
+
if live != channel:
|
|
207
|
+
print(
|
|
208
|
+
f"[!] llama-swap is already running in '{live}' channel; "
|
|
209
|
+
f"refusing to also start '{channel}'. Stop the stack first:",
|
|
210
|
+
file=sys.stderr,
|
|
211
|
+
)
|
|
212
|
+
print("\n llmstack stop", file=sys.stderr)
|
|
213
|
+
print(f" llmstack start --{channel}\n", file=sys.stderr)
|
|
214
|
+
return 1
|
|
215
|
+
elif port_responds(f"http://127.0.0.1:{SWAP_PORT}/health"):
|
|
216
|
+
# Something is already listening on :10102, but it isn't ours
|
|
217
|
+
# (no pid file in this project's state dir). The pre-flag flow
|
|
218
|
+
# silently joined as "shared", which was a footgun: a `stop`
|
|
219
|
+
# from this project would tear down the other project's
|
|
220
|
+
# daemons and we couldn't bring them back without local
|
|
221
|
+
# tooling. Instead, refuse and tell the user how to wire this
|
|
222
|
+
# project as a proper thin client.
|
|
202
223
|
print(
|
|
203
|
-
f"[!]
|
|
204
|
-
|
|
224
|
+
f"[!] port :{SWAP_PORT} is already in use (daemons started by "
|
|
225
|
+
"another project on this host).",
|
|
205
226
|
file=sys.stderr,
|
|
206
227
|
)
|
|
207
|
-
print("
|
|
208
|
-
print(
|
|
228
|
+
print(" This project is installed for local mode -- it expects to own", file=sys.stderr)
|
|
229
|
+
print(" those daemons. To run as a thin client of the running stack:", file=sys.stderr)
|
|
230
|
+
print("", file=sys.stderr)
|
|
231
|
+
print(" llmstack install --external", file=sys.stderr)
|
|
232
|
+
print("", file=sys.stderr)
|
|
233
|
+
print(" (--external defaults to http://127.0.0.1:10101, the local router.)", file=sys.stderr)
|
|
234
|
+
print(" To take over instead, stop the running daemons first:", file=sys.stderr)
|
|
235
|
+
print("", file=sys.stderr)
|
|
236
|
+
print(" llmstack stop && llmstack start", file=sys.stderr)
|
|
209
237
|
return 1
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
# (no pid file in this project's state dir). The pre-flag flow
|
|
213
|
-
# silently joined as "shared", which was a footgun: a `stop`
|
|
214
|
-
# from this project would tear down the other project's
|
|
215
|
-
# daemons and we couldn't bring them back without local
|
|
216
|
-
# tooling. Instead, refuse and tell the user how to wire this
|
|
217
|
-
# project as a proper thin client.
|
|
218
|
-
print(
|
|
219
|
-
f"[!] port :{SWAP_PORT} is already in use (daemons started by "
|
|
220
|
-
"another project on this host).",
|
|
221
|
-
file=sys.stderr,
|
|
222
|
-
)
|
|
223
|
-
print(" This project is installed for local mode -- it expects to own", file=sys.stderr)
|
|
224
|
-
print(" those daemons. To run as a thin client of the running stack:", file=sys.stderr)
|
|
225
|
-
print("", file=sys.stderr)
|
|
226
|
-
print(" llmstack install --external", file=sys.stderr)
|
|
227
|
-
print("", file=sys.stderr)
|
|
228
|
-
print(" (--external defaults to http://127.0.0.1:10101, the local router.)", file=sys.stderr)
|
|
229
|
-
print(" To take over instead, stop the running daemons first:", file=sys.stderr)
|
|
230
|
-
print("", file=sys.stderr)
|
|
231
|
-
print(" llmstack stop && llmstack start", file=sys.stderr)
|
|
232
|
-
return 1
|
|
238
|
+
else:
|
|
239
|
+
launch_daemons = True
|
|
233
240
|
else:
|
|
234
241
|
launch_daemons = True
|
|
235
242
|
|
|
236
243
|
if launch_daemons:
|
|
237
|
-
if
|
|
244
|
+
if not has_gguf:
|
|
245
|
+
print("[*] bedrock-only config -- skipping llama-swap")
|
|
246
|
+
elif channel == "next":
|
|
238
247
|
queued = _queued_next_tiers()
|
|
239
248
|
if not queued:
|
|
240
249
|
print(
|
|
@@ -251,37 +260,40 @@ def run(args: list[str]) -> int:
|
|
|
251
260
|
print(f" queued upgrade tiers: {' '.join(queued)}")
|
|
252
261
|
else:
|
|
253
262
|
print(f"[*] generating yaml -> {paths.llama_swap_yaml}")
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
263
|
+
if has_gguf:
|
|
264
|
+
render_to(
|
|
265
|
+
paths.llama_swap_yaml,
|
|
266
|
+
render=lambda p: Path(p).write_text(render_yaml(use_next=(channel == "next"))),
|
|
267
|
+
validate=validate_yaml,
|
|
268
|
+
)
|
|
259
269
|
|
|
260
270
|
print(f"[*] channel: {channel} ({paths.llama_swap_yaml.name})")
|
|
261
271
|
|
|
262
272
|
if launch_daemons:
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
273
|
+
if has_gguf:
|
|
274
|
+
print(f"[*] starting llama-swap on :{SWAP_PORT}")
|
|
275
|
+
spawn_daemon(
|
|
276
|
+
[
|
|
277
|
+
str(paths.llama_swap_bin),
|
|
278
|
+
"--config", str(paths.llama_swap_yaml),
|
|
279
|
+
"--listen", f"127.0.0.1:{SWAP_PORT}",
|
|
280
|
+
],
|
|
281
|
+
log=paths.log_dir / "llama-swap.log",
|
|
282
|
+
pid_file=paths.swap_pid,
|
|
283
|
+
)
|
|
284
|
+
write_marker(paths.active_marker, ChannelMark(channel))
|
|
285
|
+
time.sleep(1)
|
|
286
|
+
if not is_running(paths.swap_pid):
|
|
287
|
+
print(f"[!] llama-swap failed to start. Check {paths.log_dir}/llama-swap.log")
|
|
288
|
+
paths.swap_pid.unlink(missing_ok=True)
|
|
289
|
+
paths.active_marker.unlink(missing_ok=True)
|
|
290
|
+
return 1
|
|
291
|
+
print(f" pid {read_pid(paths.swap_pid)}")
|
|
281
292
|
|
|
282
293
|
print(f"[*] starting router on :{ROUTER_PORT}")
|
|
283
294
|
env = os.environ.copy()
|
|
284
|
-
|
|
295
|
+
if has_gguf:
|
|
296
|
+
env.setdefault("LLAMA_SWAP_URL", f"http://127.0.0.1:{SWAP_PORT}")
|
|
285
297
|
env.setdefault("ROUTER_HOST", "127.0.0.1")
|
|
286
298
|
env.setdefault("ROUTER_PORT", str(ROUTER_PORT))
|
|
287
299
|
# Lock-step with the gguf --use-next swap: bedrock tiers in the
|
|
@@ -303,7 +315,10 @@ def run(args: list[str]) -> int:
|
|
|
303
315
|
return 1
|
|
304
316
|
print(f" pid {read_pid(paths.router_pid)}")
|
|
305
317
|
else:
|
|
306
|
-
|
|
318
|
+
if has_gguf:
|
|
319
|
+
print(f"[=] llama-swap already running (pid {read_pid(paths.swap_pid)}, channel {channel})")
|
|
320
|
+
else:
|
|
321
|
+
print("[=] bedrock-only config -- llama-swap not used")
|
|
307
322
|
if is_running(paths.router_pid):
|
|
308
323
|
print(f"[=] router already running (pid {read_pid(paths.router_pid)})")
|
|
309
324
|
|
|
@@ -312,14 +327,16 @@ def run(args: list[str]) -> int:
|
|
|
312
327
|
print(f"[OK] stack is up (channel: {channel}).")
|
|
313
328
|
print()
|
|
314
329
|
print(f' router http://127.0.0.1:{ROUTER_PORT} (OpenAI-compatible, "auto" routing)')
|
|
315
|
-
|
|
330
|
+
if has_gguf:
|
|
331
|
+
print(f" llama-swap http://127.0.0.1:{SWAP_PORT} (raw model endpoints + UI)")
|
|
316
332
|
print()
|
|
317
333
|
print("Try:")
|
|
318
334
|
print(f" curl -s http://127.0.0.1:{ROUTER_PORT}/v1/models | jq '.data[].id'")
|
|
319
335
|
print(f" curl -s http://127.0.0.1:{ROUTER_PORT}/models.ini | head")
|
|
320
336
|
print()
|
|
321
337
|
print("Logs:")
|
|
322
|
-
|
|
338
|
+
if has_gguf:
|
|
339
|
+
print(f" tail -f {paths.log_dir}/llama-swap.log")
|
|
323
340
|
print(f" tail -f {paths.log_dir}/router.log")
|
|
324
341
|
print()
|
|
325
342
|
print("Switch channel (requires stop first):")
|
|
@@ -37,6 +37,7 @@ from llmstack.paths import (
|
|
|
37
37
|
read_marker,
|
|
38
38
|
resolve,
|
|
39
39
|
)
|
|
40
|
+
from llmstack.tiers import load_tiers
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
def _print_help() -> None:
|
|
@@ -208,13 +209,17 @@ def run(args: list[str]) -> int:
|
|
|
208
209
|
else:
|
|
209
210
|
channel = "current (or stopped)"
|
|
210
211
|
|
|
212
|
+
tiers = load_tiers()
|
|
213
|
+
has_gguf = any(t.is_gguf for t in tiers.values())
|
|
214
|
+
|
|
211
215
|
print(f"stack status (channel: {channel}):")
|
|
212
216
|
print(f" work dir {paths.work_dir}")
|
|
213
217
|
# Router has no /health route (dropped in v3.x); /v1/models always
|
|
214
218
|
# 200s on a live router. llama-swap is a separate binary with its
|
|
215
219
|
# own /health endpoint -- leave that one alone.
|
|
216
220
|
_check_local("router", f"http://127.0.0.1:{ROUTER_PORT}/v1/models")
|
|
217
|
-
|
|
221
|
+
if has_gguf:
|
|
222
|
+
_check_local("llama-swap", f"http://127.0.0.1:{SWAP_PORT}/health")
|
|
218
223
|
|
|
219
224
|
print()
|
|
220
225
|
if paths.opencode_json.is_file():
|
|
@@ -229,17 +234,29 @@ def run(args: list[str]) -> int:
|
|
|
229
234
|
chan = os.environ.get("LLMSTACK_CHANNEL", "?")
|
|
230
235
|
print(f" in-shell OPENCODE_CONFIG={cfg}, LLMSTACK_CHANNEL={chan}")
|
|
231
236
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
print()
|
|
235
|
-
print("loaded llama-server processes:")
|
|
236
|
-
pids = pgrep(r"llama-server.*--alias")
|
|
237
|
-
if pids:
|
|
238
|
-
_print_process_table(pids)
|
|
237
|
+
if has_gguf:
|
|
238
|
+
_list_models(f"http://127.0.0.1:{ROUTER_PORT}")
|
|
239
239
|
else:
|
|
240
|
-
print(
|
|
241
|
-
|
|
242
|
-
|
|
240
|
+
print()
|
|
241
|
+
print("current models in /v1/models:")
|
|
242
|
+
try:
|
|
243
|
+
with urllib.request.urlopen(f"{f'http://127.0.0.1:{ROUTER_PORT}'}/v1/models", timeout=5) as resp:
|
|
244
|
+
data = json.load(resp)
|
|
245
|
+
for m in data.get("data", []):
|
|
246
|
+
print(f" - {m.get('id')}")
|
|
247
|
+
except (urllib.error.URLError, ConnectionError, TimeoutError, OSError, json.JSONDecodeError):
|
|
248
|
+
print(f" (no response @ http://127.0.0.1:{ROUTER_PORT}/v1/models)")
|
|
249
|
+
|
|
250
|
+
if has_gguf:
|
|
251
|
+
print()
|
|
252
|
+
print("loaded llama-server processes:")
|
|
253
|
+
pids = pgrep(r"llama-server.*--alias")
|
|
254
|
+
if pids:
|
|
255
|
+
_print_process_table(pids)
|
|
256
|
+
else:
|
|
257
|
+
print(" (none loaded)")
|
|
258
|
+
|
|
259
|
+
if channel.split()[0] == "next" and has_gguf and paths.llama_swap_yaml.is_file():
|
|
243
260
|
print()
|
|
244
261
|
print(f"next-channel swaps (from {paths.llama_swap_yaml.name}):")
|
|
245
262
|
try:
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "opencode-llmstack"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.7.1"
|
|
8
8
|
description = "Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|