PyPI - opencode-llmstack - Versions diffs - 0.6.0__tar.gz → 0.7.1__tar.gz - Mend

opencode-llmstack 0.6.0tar.gz → 0.7.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: opencode-llmstack
-Version: 0.6.0
+Version: 0.7.1
 Summary: Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring.
 Author: llmstack
 License: MIT

{opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/start.py RENAMED Viewed

@@ -54,6 +54,7 @@ from llmstack.commands._helpers import (
 from llmstack.generators import render_to
 from llmstack.generators.llama_swap import render as render_yaml
 from llmstack.generators.llama_swap import validate as validate_yaml
+from llmstack.tiers import load_tiers
 from llmstack.paths import (
     DEFAULT_REMOTE_URL,
     ROUTER_PORT,
@@ -194,47 +195,55 @@ def run(args: list[str]) -> int:
     if not paths.opencode_json.is_file():
         raise SystemExit(f"no .llmstack/opencode.json in {paths.work_dir} -- run: llmstack install")
-    if is_running(paths.swap_pid):
-        launch_daemons = False
-        live_mark = read_marker(paths.active_marker)
-        live = live_mark.channel if live_mark else channel
-        if live != channel:
+    tiers = load_tiers()
+    has_gguf = any(t.is_gguf for t in tiers.values())
+    if has_gguf:
+        if is_running(paths.swap_pid):
+            launch_daemons = False
+            live_mark = read_marker(paths.active_marker)
+            live = live_mark.channel if live_mark else channel
+            if live != channel:
+                print(
+                    f"[!] llama-swap is already running in '{live}' channel; "
+                    f"refusing to also start '{channel}'. Stop the stack first:",
+                    file=sys.stderr,
+                )
+                print("\n      llmstack stop", file=sys.stderr)
+                print(f"      llmstack start --{channel}\n", file=sys.stderr)
+                return 1
+        elif port_responds(f"http://127.0.0.1:{SWAP_PORT}/health"):
+            # Something is already listening on :10102, but it isn't ours
+            # (no pid file in this project's state dir). The pre-flag flow
+            # silently joined as "shared", which was a footgun: a `stop`
+            # from this project would tear down the other project's
+            # daemons and we couldn't bring them back without local
+            # tooling. Instead, refuse and tell the user how to wire this
+            # project as a proper thin client.
             print(
-                f"[!] llama-swap is already running in '{live}' channel; "
-                f"refusing to also start '{channel}'. Stop the stack first:",
+                f"[!] port :{SWAP_PORT} is already in use (daemons started by "
+                "another project on this host).",
                 file=sys.stderr,
             )
-            print("\n      llmstack stop", file=sys.stderr)
-            print(f"      llmstack start --{channel}\n", file=sys.stderr)
+            print("    This project is installed for local mode -- it expects to own", file=sys.stderr)
+            print("    those daemons. To run as a thin client of the running stack:", file=sys.stderr)
+            print("", file=sys.stderr)
+            print("        llmstack install --external", file=sys.stderr)
+            print("", file=sys.stderr)
+            print("    (--external defaults to http://127.0.0.1:10101, the local router.)", file=sys.stderr)
+            print("    To take over instead, stop the running daemons first:", file=sys.stderr)
+            print("", file=sys.stderr)
+            print("        llmstack stop && llmstack start", file=sys.stderr)
             return 1
-    elif port_responds(f"http://127.0.0.1:{SWAP_PORT}/health"):
-        # Something is already listening on :10102, but it isn't ours
-        # (no pid file in this project's state dir). The pre-flag flow
-        # silently joined as "shared", which was a footgun: a `stop`
-        # from this project would tear down the other project's
-        # daemons and we couldn't bring them back without local
-        # tooling. Instead, refuse and tell the user how to wire this
-        # project as a proper thin client.
-        print(
-            f"[!] port :{SWAP_PORT} is already in use (daemons started by "
-            "another project on this host).",
-            file=sys.stderr,
-        )
-        print("    This project is installed for local mode -- it expects to own", file=sys.stderr)
-        print("    those daemons. To run as a thin client of the running stack:", file=sys.stderr)
-        print("", file=sys.stderr)
-        print("        llmstack install --external", file=sys.stderr)
-        print("", file=sys.stderr)
-        print("    (--external defaults to http://127.0.0.1:10101, the local router.)", file=sys.stderr)
-        print("    To take over instead, stop the running daemons first:", file=sys.stderr)
-        print("", file=sys.stderr)
-        print("        llmstack stop && llmstack start", file=sys.stderr)
-        return 1
+        else:
+            launch_daemons = True
     else:
         launch_daemons = True
     if launch_daemons:
-        if channel == "next":
+        if not has_gguf:
+            print("[*] bedrock-only config -- skipping llama-swap")
+        elif channel == "next":
             queued = _queued_next_tiers()
             if not queued:
                 print(
@@ -251,37 +260,40 @@ def run(args: list[str]) -> int:
             print(f"    queued upgrade tiers: {' '.join(queued)}")
         else:
             print(f"[*] generating yaml -> {paths.llama_swap_yaml}")
-        render_to(
-            paths.llama_swap_yaml,
-            render=lambda p: Path(p).write_text(render_yaml(use_next=(channel == "next"))),
-            validate=validate_yaml,
-        )
+        if has_gguf:
+            render_to(
+                paths.llama_swap_yaml,
+                render=lambda p: Path(p).write_text(render_yaml(use_next=(channel == "next"))),
+                validate=validate_yaml,
+            )
     print(f"[*] channel: {channel}  ({paths.llama_swap_yaml.name})")
     if launch_daemons:
-        print(f"[*] starting llama-swap on :{SWAP_PORT}")
-        spawn_daemon(
-            [
-                str(paths.llama_swap_bin),
-                "--config", str(paths.llama_swap_yaml),
-                "--listen", f"127.0.0.1:{SWAP_PORT}",
-            ],
-            log=paths.log_dir / "llama-swap.log",
-            pid_file=paths.swap_pid,
-        )
-        write_marker(paths.active_marker, ChannelMark(channel))
-        time.sleep(1)
-        if not is_running(paths.swap_pid):
-            print(f"[!] llama-swap failed to start. Check {paths.log_dir}/llama-swap.log")
-            paths.swap_pid.unlink(missing_ok=True)
-            paths.active_marker.unlink(missing_ok=True)
-            return 1
-        print(f"    pid {read_pid(paths.swap_pid)}")
+        if has_gguf:
+            print(f"[*] starting llama-swap on :{SWAP_PORT}")
+            spawn_daemon(
+                [
+                    str(paths.llama_swap_bin),
+                    "--config", str(paths.llama_swap_yaml),
+                    "--listen", f"127.0.0.1:{SWAP_PORT}",
+                ],
+                log=paths.log_dir / "llama-swap.log",
+                pid_file=paths.swap_pid,
+            )
+            write_marker(paths.active_marker, ChannelMark(channel))
+            time.sleep(1)
+            if not is_running(paths.swap_pid):
+                print(f"[!] llama-swap failed to start. Check {paths.log_dir}/llama-swap.log")
+                paths.swap_pid.unlink(missing_ok=True)
+                paths.active_marker.unlink(missing_ok=True)
+                return 1
+            print(f"    pid {read_pid(paths.swap_pid)}")
         print(f"[*] starting router on :{ROUTER_PORT}")
         env = os.environ.copy()
-        env.setdefault("LLAMA_SWAP_URL", f"http://127.0.0.1:{SWAP_PORT}")
+        if has_gguf:
+            env.setdefault("LLAMA_SWAP_URL", f"http://127.0.0.1:{SWAP_PORT}")
         env.setdefault("ROUTER_HOST", "127.0.0.1")
         env.setdefault("ROUTER_PORT", str(ROUTER_PORT))
         # Lock-step with the gguf --use-next swap: bedrock tiers in the
@@ -303,7 +315,10 @@ def run(args: list[str]) -> int:
             return 1
         print(f"    pid {read_pid(paths.router_pid)}")
     else:
-        print(f"[=] llama-swap already running (pid {read_pid(paths.swap_pid)}, channel {channel})")
+        if has_gguf:
+            print(f"[=] llama-swap already running (pid {read_pid(paths.swap_pid)}, channel {channel})")
+        else:
+            print("[=] bedrock-only config -- llama-swap not used")
         if is_running(paths.router_pid):
             print(f"[=] router already running (pid {read_pid(paths.router_pid)})")
@@ -312,14 +327,16 @@ def run(args: list[str]) -> int:
     print(f"[OK] stack is up (channel: {channel}).")
     print()
     print(f'  router       http://127.0.0.1:{ROUTER_PORT}     (OpenAI-compatible, "auto" routing)')
-    print(f"  llama-swap   http://127.0.0.1:{SWAP_PORT}     (raw model endpoints + UI)")
+    if has_gguf:
+        print(f"  llama-swap   http://127.0.0.1:{SWAP_PORT}     (raw model endpoints + UI)")
     print()
     print("Try:")
     print(f"  curl -s http://127.0.0.1:{ROUTER_PORT}/v1/models | jq '.data[].id'")
     print(f"  curl -s http://127.0.0.1:{ROUTER_PORT}/models.ini | head")
     print()
     print("Logs:")
-    print(f"  tail -f {paths.log_dir}/llama-swap.log")
+    if has_gguf:
+        print(f"  tail -f {paths.log_dir}/llama-swap.log")
     print(f"  tail -f {paths.log_dir}/router.log")
     print()
     print("Switch channel (requires stop first):")

{opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/llmstack/commands/status.py RENAMED Viewed

@@ -37,6 +37,7 @@ from llmstack.paths import (
     read_marker,
     resolve,
 )
+from llmstack.tiers import load_tiers
 def _print_help() -> None:
@@ -208,13 +209,17 @@ def run(args: list[str]) -> int:
     else:
         channel = "current (or stopped)"
+    tiers = load_tiers()
+    has_gguf = any(t.is_gguf for t in tiers.values())
     print(f"stack status (channel: {channel}):")
     print(f"  work dir      {paths.work_dir}")
     # Router has no /health route (dropped in v3.x); /v1/models always
     # 200s on a live router. llama-swap is a separate binary with its
     # own /health endpoint -- leave that one alone.
     _check_local("router", f"http://127.0.0.1:{ROUTER_PORT}/v1/models")
-    _check_local("llama-swap", f"http://127.0.0.1:{SWAP_PORT}/health")
+    if has_gguf:
+        _check_local("llama-swap", f"http://127.0.0.1:{SWAP_PORT}/health")
     print()
     if paths.opencode_json.is_file():
@@ -229,17 +234,29 @@ def run(args: list[str]) -> int:
         chan = os.environ.get("LLMSTACK_CHANNEL", "?")
         print(f"  in-shell      OPENCODE_CONFIG={cfg}, LLMSTACK_CHANNEL={chan}")
-    _list_models(f"http://127.0.0.1:{ROUTER_PORT}")
-    print()
-    print("loaded llama-server processes:")
-    pids = pgrep(r"llama-server.*--alias")
-    if pids:
-        _print_process_table(pids)
+    if has_gguf:
+        _list_models(f"http://127.0.0.1:{ROUTER_PORT}")
     else:
-        print("  (none loaded)")
-    if channel.split()[0] == "next" and paths.llama_swap_yaml.is_file():
+        print()
+        print("current models in /v1/models:")
+        try:
+            with urllib.request.urlopen(f"{f'http://127.0.0.1:{ROUTER_PORT}'}/v1/models", timeout=5) as resp:
+                data = json.load(resp)
+            for m in data.get("data", []):
+                print(f"  - {m.get('id')}")
+        except (urllib.error.URLError, ConnectionError, TimeoutError, OSError, json.JSONDecodeError):
+            print(f"  (no response @ http://127.0.0.1:{ROUTER_PORT}/v1/models)")
+    if has_gguf:
+        print()
+        print("loaded llama-server processes:")
+        pids = pgrep(r"llama-server.*--alias")
+        if pids:
+            _print_process_table(pids)
+        else:
+            print("  (none loaded)")
+    if channel.split()[0] == "next" and has_gguf and paths.llama_swap_yaml.is_file():
         print()
         print(f"next-channel swaps (from {paths.llama_swap_yaml.name}):")
         try:

{opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/opencode_llmstack.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: opencode-llmstack
-Version: 0.6.0
+Version: 0.7.1
 Summary: Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring.
 Author: llmstack
 License: MIT

{opencode_llmstack-0.6.0 → opencode_llmstack-0.7.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "opencode-llmstack"
-version = "0.6.0"
+version = "0.7.1"
 description = "Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring."
 readme = "README.md"
 requires-python = ">=3.11"