PyPI - opencode-llmstack - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

opencode-llmstack 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

llmstack/app.py CHANGED Viewed

@@ -90,16 +90,13 @@ Routing decision tree (first match wins):
      ("reasonable context still being built")         -> code-ultra
                                                          (else code-smart)
   5. Estimated input tokens <= MID_FIDELITY_CEILING   -> code-smart
-  6. Otherwise (long context, top-tier becomes
-     expensive/slow, fast tier's 128k window is the
-     best fit and it's free)                          -> code-fast
+   6. Otherwise (long context, top-tier becomes
+      expensive/slow, fast tier's 128k window is the
+      best fit and it's free)                          -> code-fast
                                                          (floored at
                                                           code-smart when
-                                                          ``tools[]`` is set
-                                                          or n_turns >=
-                                                          MULTI_TURN_THRESHOLD,
-                                                          since 3B models
-                                                          tool-call unreliably)
+                                                          n_turns >=
+                                                          MULTI_TURN_THRESHOLD)
 The auto router's effective max context window is
 ``[code-fast].ctx_size`` -- fast is the bottom of the step-down
@@ -356,7 +353,6 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
                     ULTRA_MODEL, AGENT_MODEL)
         return AGENT_MODEL, f"ultra-trigger->agent ({ULTRA_MODEL} unavailable)"
-    has_tools = bool(body.get("tools"))
     n_turns = len(messages) if messages else 0
     has_code_signal = (
         _matches(CODE_BLOCK, messages, prompt)
@@ -368,14 +364,14 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
     # Plan track is orthogonal to the code fidelity ladder: ``plan`` is a
     # chat-tuned model meant for design / "should we" discussions. Only
     # take it when nothing about the request says "I'm about to write
-    # code" (no triple-backticks, no agent verbs, no tool calls). And
-    # only if the input fits in the planner's ctx_size -- past that we'd
-    # be sending a request the planner can't hold, so we fall through
-    # to the coding ladder, which has tiers (smart, fast) explicitly
-    # sized for larger contexts.
+    # code" (no triple-backticks, no agent verbs). Tools are stripped
+    # from the request body before dispatch (see ``_handle_completion``),
+    # so their presence here does not block plan routing.
+    # Only route to plan if the input fits in the planner's ctx_size --
+    # past that we fall through to the coding ladder which has tiers
+    # (smart, fast) explicitly sized for larger contexts.
     if (
-        not has_tools
-        and not has_code_signal
+        not has_code_signal
         and _matches(PLAN_SIGNALS, messages, prompt)
     ):
         plan_tier = TIER_BY_ALIAS.get(PLAN_MODEL)
@@ -401,12 +397,12 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
     if est <= MID_FIDELITY_CEILING:
         return AGENT_MODEL, f"mid-fidelity tokens~{est}<={MID_FIDELITY_CEILING}"
-    # Rung 3: long context -- step down to fast (128k YaRN, free,
-    # always-resident). Floor at smart when tools/agent loop is in
-    # play; the 3B coder doesn't tool-call reliably.
-    if has_tools or n_turns >= MULTI_TURN_THRESHOLD:
-        why = "tools" if has_tools else f"turns={n_turns}"
-        return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} ({why} floor)"
+    # Rung 3: long context -- step down to fast. Floor at smart only
+    # when the multi-turn threshold is hit; tools alone no longer
+    # prevent the step-down (plan tiers strip tools before dispatch,
+    # and code-fast is a hosted model that tool-calls reliably).
+    if n_turns >= MULTI_TURN_THRESHOLD:
+        return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} (turns={n_turns} floor)"
     return FAST_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING}"
@@ -626,6 +622,11 @@ async def _handle_completion(req: Request, path: str) -> Response:
         mutated = True
     chosen_name = body.get("model")
+    if chosen_name in {PLAN_MODEL, UNCENSORED_MODEL} and body.get("tools"):
+        log.info("plan tier %s: stripping tools from request", chosen_name)
+        body.pop("tools")
+        body.pop("tool_choice", None)
+        mutated = True
     tier = _resolve_tier(chosen_name)
     if tier is not None and _inject_sampler(body, tier):
         mutated = True

{opencode_llmstack-0.8.0.dist-info → opencode_llmstack-0.9.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: opencode-llmstack
-Version: 0.8.0
+Version: 0.9.0
 Summary: Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring.
 Author: llmstack
 License: MIT

{opencode_llmstack-0.8.0.dist-info → opencode_llmstack-0.9.0.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ llmstack/AGENTS.md,sha256=4DVUkqJ1-EP-cDNRCpznzghOOX6dAMbVWdcwyfFCALw,528
 llmstack/__init__.py,sha256=EKHybZtPxLqFWkgkIoYBameu5_Tf9j4UewpANKm0fMU,855
 llmstack/__main__.py,sha256=wXHd5-BmCCHUfNEmy2rbilBSyVhi4KD1dSIO_4NlxuE,199
 llmstack/_platform.py,sha256=eDY3T9krkaBigG5xXxqzIbH3MhdZqX3BWe7bozOsAso,13099
-llmstack/app.py,sha256=YfglFlzrp58mh8K1srQA6KNqc9cF41w1xnWnUrLW0IE,27839
+llmstack/app.py,sha256=Fha6Ivb-lsnoWVAK3ekzRlaLqQ1bIEavipgPP9W_TuQ,27888
 llmstack/check_models.py,sha256=WvTS2Td4acp-Q0-yWXUgXAgAgFOmpxiaeSDuAoivirw,4559
 llmstack/cli.py,sha256=Om70PzHrmU81y2Mw1sB6eeUs1fRHP0PnsCEVNC0UNvI,11341
 llmstack/models.ini,sha256=wWAmbfKUCacjLXpBpH7tcgasHgMyOrhF_AmDLsmzptI,20339
@@ -30,8 +30,8 @@ llmstack/download/ggufs.py,sha256=2hCr-svUiPIV2I3ruwTbXo6lPn9m-VBOqa3DFbvdIcA,54
 llmstack/generators/__init__.py,sha256=LfbcReuyYBCdVuT9J5RKo7-f8n585YBU3Hus6DsxqTs,1189
 llmstack/generators/llama_swap.py,sha256=KdYH9N6TJECotZvyxvAjaa3kRyzn4YOi2T6D2UdyVKw,14785
 llmstack/generators/opencode.py,sha256=s_FrLXUBnLzRGQovl1PcAEs7V_P52wT1vnvvxMcKfs4,11203
-opencode_llmstack-0.8.0.dist-info/METADATA,sha256=kskFW_TAESnhrsu3ims1bMeLgANnnfWK8YDaaSlbnGQ,34914
-opencode_llmstack-0.8.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
-opencode_llmstack-0.8.0.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
-opencode_llmstack-0.8.0.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
-opencode_llmstack-0.8.0.dist-info/RECORD,,
+opencode_llmstack-0.9.0.dist-info/METADATA,sha256=WSRM1_jNIIwH9zBhb41tvEiHDPSbdara_FoHqFLgWj4,34914
+opencode_llmstack-0.9.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+opencode_llmstack-0.9.0.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
+opencode_llmstack-0.9.0.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
+opencode_llmstack-0.9.0.dist-info/RECORD,,

{opencode_llmstack-0.8.0.dist-info → opencode_llmstack-0.9.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{opencode_llmstack-0.8.0.dist-info → opencode_llmstack-0.9.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{opencode_llmstack-0.8.0.dist-info → opencode_llmstack-0.9.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

opencode-llmstack 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

opencode-llmstack 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl