PyPI - opencode-llmstack - Versions diffs - 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl - Mend

opencode-llmstack 0.9.2py3-none-any.whl → 0.9.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

llmstack/__init__.py CHANGED Viewed

@@ -16,5 +16,5 @@ organised by concern:
 from __future__ import annotations
-__version__ = "0.9.2"
+__version__ = "0.9.4"
 __all__ = ["__version__"]

llmstack/app.py CHANGED Viewed

@@ -355,9 +355,10 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
         return AGENT_MODEL, f"ultra-trigger->agent ({ULTRA_MODEL} unavailable)"
     n_turns = sum(1 for m in (messages or []) if m.get("role") == "user")
+    _last_msgs = [{"role": "user", "content": last_user}] if last_user else None
     has_code_signal = (
-        _matches(CODE_BLOCK, messages, prompt)
-        or _matches(AGENT_SIGNALS, messages, prompt)
+        _matches(CODE_BLOCK, _last_msgs, prompt)
+        or _matches(AGENT_SIGNALS, _last_msgs, prompt)
     )
     est = _estimate_tokens(messages, prompt)

llmstack/backends/bedrock.py CHANGED Viewed

@@ -400,7 +400,7 @@ def _tool_config(tools: list[dict[str, Any]] | None) -> dict[str, Any] | None:
     return {"tools": specs}
-def _inference_config(body: dict[str, Any]) -> dict[str, Any]:
+def _inference_config(body: dict[str, Any], max_output_tokens: int | None = None) -> dict[str, Any]:
     # We forward only what the Converse `inferenceConfig` schema accepts:
     # `temperature`, `topP`, `maxTokens`, `stopSequences`. Other sampler
     # knobs (`top_k`, `min_p`, `repetition_penalty`) have no Converse-
@@ -415,28 +415,34 @@ def _inference_config(body: dict[str, Any]) -> dict[str, Any]:
     # forward. Configure Bedrock tiers in models.ini accordingly: omit
     # the `sampler =` line for Opus 4.7+, and pick the one allowed knob
     # for Sonnet 4.5 / Haiku 4.5.
-    cfg: dict[str, Any] = {}
+    #
+    # `max_output_tokens` is the per-tier cap from models.ini
+    # (`aws_max_output_tokens`). When set, the client-requested value is
+    # silently clamped to this ceiling -- useful for models like
+    # Llama 3.1 405B whose Bedrock deployment rejects values above 4096.
+    icfg: dict[str, Any] = {}
     if "temperature" in body:
         try:
-            cfg["temperature"] = float(body["temperature"])
+            icfg["temperature"] = float(body["temperature"])
         except (TypeError, ValueError):
             pass
     if "top_p" in body:
         try:
-            cfg["topP"] = float(body["top_p"])
+            icfg["topP"] = float(body["top_p"])
         except (TypeError, ValueError):
             pass
     if "max_tokens" in body or "max_completion_tokens" in body:
         try:
-            cfg["maxTokens"] = int(body.get("max_tokens") or body.get("max_completion_tokens"))
+            requested = int(body.get("max_tokens") or body.get("max_completion_tokens"))
+            icfg["maxTokens"] = min(requested, max_output_tokens) if max_output_tokens else requested
         except (TypeError, ValueError):
             pass
     stop = body.get("stop")
     if isinstance(stop, str):
-        cfg["stopSequences"] = [stop]
+        icfg["stopSequences"] = [stop]
     elif isinstance(stop, list):
-        cfg["stopSequences"] = [s for s in stop if isinstance(s, str)]
-    return cfg
+        icfg["stopSequences"] = [s for s in stop if isinstance(s, str)]
+    return icfg
 def _build_converse_kwargs(tier: Tier, body: dict[str, Any], cfg: BedrockConfig) -> dict[str, Any]:
@@ -463,7 +469,7 @@ def _build_converse_kwargs(tier: Tier, body: dict[str, Any], cfg: BedrockConfig)
     if sys_blocks:
         converse_kwargs["system"] = sys_blocks
-    inference = _inference_config(body)
+    inference = _inference_config(body, max_output_tokens=cfg.max_output_tokens)
     if inference:
         converse_kwargs["inferenceConfig"] = inference

llmstack/models.ini CHANGED Viewed

@@ -215,9 +215,10 @@ description  = Mistral-Small 3.2 24B Heretic - no-filter planning
 ; tier         = chat
 ; role         = plan-uncensored
 ; backend      = bedrock
-; aws_model_id = meta.llama3-1-405b-instruct-v1:0
-; aws_region   = us-west-2     ; Llama 405B has no EU deployment; keep on US
-; aws_profile  = bedrock-prod
+; aws_model_id         = meta.llama3-1-405b-instruct-v1:0
+; aws_region           = us-west-2     ; Llama 405B has no EU deployment; keep on US
+; aws_profile          = bedrock-prod
+; aws_max_output_tokens = 4096         ; Llama 3.1 405B Bedrock hard cap
 ; ctx_size     = 128000
 ; sampler      = temp=0.85, top_p=0.95   ; max exploration
 ; description  = Llama 3.1 405B on Bedrock - no-filter planning
@@ -229,10 +230,11 @@ description  = Mistral-Small 3.2 24B Heretic - no-filter planning
 ; tier             = chat
 ; role             = plan-uncensored
 ; backend          = bedrock
-; aws_model_id     = meta.llama3-1-405b-instruct-v1:0
-; aws_region       = us-west-2     ; Llama 405B has no EU deployment
-; aws_profile      = bedrock-prod
-; aws_endpoint_url = https://bedrock-runtime.us-west-2.vpce.amazonaws.com
+; aws_model_id         = meta.llama3-1-405b-instruct-v1:0
+; aws_region           = us-west-2     ; Llama 405B has no EU deployment
+; aws_profile          = bedrock-prod
+; aws_endpoint_url     = https://bedrock-runtime.us-west-2.vpce.amazonaws.com
+; aws_max_output_tokens = 4096         ; Llama 3.1 405B Bedrock hard cap
 ; ctx_size         = 128000
 ; sampler          = temp=0.85, top_p=0.95
 ; description      = Llama 3.1 405B on Bedrock (VPC) - no-filter planning

llmstack/tiers.py CHANGED Viewed

@@ -139,6 +139,7 @@ class BedrockConfig:
     endpoint_url: str | None = None
     model_id_next: str | None = None
     region_next: str | None = None
+    max_output_tokens: int | None = None
     @property
     def has_next(self) -> bool:
@@ -298,6 +299,7 @@ def _build_bedrock(section) -> BedrockConfig:
         endpoint_url=_opt(section.get("aws_endpoint_url")),
         model_id_next=_opt(section.get("aws_model_id_next")),
         region_next=_opt(section.get("aws_region_next")),
+        max_output_tokens=_int(section.get("aws_max_output_tokens", "")) or None,
     )

{opencode_llmstack-0.9.2.data → opencode_llmstack-0.9.4.data}/data/CHANGELOG.md RENAMED Viewed

@@ -4,6 +4,21 @@ All notable changes to `opencode-llmstack` are documented here.
 ---
+## [0.9.4] — 2026-05-11
+### Fixed
+- `classify()` now scopes `has_code_signal` to the **last user message only**
+  (was scanning the full conversation history). Previously, any prior coding
+  exchange in the session (code blocks, agent verbs) would permanently block
+  plan routing for the rest of the conversation — e.g. "explain why these
+  changes are important?" after a refactor request would never reach `plan`.
+- Added regression test:
+  `test_plan_signal_after_prior_coding_exchange_routes_to_plan`.
+- `__version__` corrected from `"0.9.2"` to `"0.9.4"` (was skewed vs
+  `pyproject.toml` since 0.9.3).
+---
 ## [0.9.2] — 2026-05-11
 ### Fixed

{opencode_llmstack-0.9.2.dist-info → opencode_llmstack-0.9.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: opencode-llmstack
-Version: 0.9.2
+Version: 0.9.4
 Summary: Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring.
 Author: llmstack
 License: MIT License

{opencode_llmstack-0.9.2.dist-info → opencode_llmstack-0.9.4.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,16 @@
 llmstack/AGENTS.md,sha256=4DVUkqJ1-EP-cDNRCpznzghOOX6dAMbVWdcwyfFCALw,528
-llmstack/__init__.py,sha256=Qs9d58V8cJWmJvu4QLvO7_panKa8UkGRXAurHYgKDyU,855
+llmstack/__init__.py,sha256=C56mGWt0YsaDyU0C7R_PpsIPw67G0jm4nRAqGKLVh5s,855
 llmstack/__main__.py,sha256=wXHd5-BmCCHUfNEmy2rbilBSyVhi4KD1dSIO_4NlxuE,199
 llmstack/_platform.py,sha256=eDY3T9krkaBigG5xXxqzIbH3MhdZqX3BWe7bozOsAso,13099
-llmstack/app.py,sha256=3Qt_bveLS13rPEucyX0P6QDf9o9O68amnJIvwMqoTQQ,28469
+llmstack/app.py,sha256=FpxEWpfvnRdRJg3IQdCOSdSuqPQG6iYKpBNGcfUR9wA,28554
 llmstack/check_models.py,sha256=WvTS2Td4acp-Q0-yWXUgXAgAgFOmpxiaeSDuAoivirw,4559
 llmstack/cli.py,sha256=Om70PzHrmU81y2Mw1sB6eeUs1fRHP0PnsCEVNC0UNvI,11341
-llmstack/models.ini,sha256=USyEOekc3tWw-m375K-zDc3EyaQjrJGEnG_Hf5ZxdWw,20370
+llmstack/models.ini,sha256=U38Z6wfGCpmgQTCNbnp1zu80rUrhNGWFTvI2nhVx1Mo,20556
 llmstack/paths.py,sha256=A8q4-tpwIt5UMGG5ZDESKSuViMGLbPIAL1VoONopJqU,11512
 llmstack/shell_env.py,sha256=MJSW0PP15q-fsppIZ98WZ7XoqYMZmDy4k8N0gzEA6wU,39362
-llmstack/tiers.py,sha256=et738dWftsc74ZElZ3Vt9eEF_SzgJCDuH9kBhzH-scI,14697
+llmstack/tiers.py,sha256=yl5xEhECe-GHiVXBRvlNoFtH_9y4uNSASpfHlZ4Ja74,14820
 llmstack/backends/__init__.py,sha256=-85sQz0R94OdbM2bUHGyyA5WaMnI9bHywPOaELeQHX0,777
-llmstack/backends/bedrock.py,sha256=eVYuAWyX7NeRi0X55H7kVBTJAF7hmutyTS2lrCzBZeY,30179
+llmstack/backends/bedrock.py,sha256=Nb9sV45aH0RQHie_AkQwcpX5pkio5EAqnsphZM5P_nQ,30638
 llmstack/commands/__init__.py,sha256=eVO-YUxh1fSfdq72KggC-NrTYMtN6zIykgjyRgOCAt4,406
 llmstack/commands/_helpers.py,sha256=UKADaNXrnuoDi_JG0W2Tph7rWFB0cXvQh8YknZBw56I,2660
 llmstack/commands/activate.py,sha256=zCdEmyVv5qZUdhfez6hZ5Y46N_yjPwfKbPTwCJXnA3o,3663
@@ -30,12 +30,12 @@ llmstack/download/ggufs.py,sha256=2hCr-svUiPIV2I3ruwTbXo6lPn9m-VBOqa3DFbvdIcA,54
 llmstack/generators/__init__.py,sha256=LfbcReuyYBCdVuT9J5RKo7-f8n585YBU3Hus6DsxqTs,1189
 llmstack/generators/llama_swap.py,sha256=KdYH9N6TJECotZvyxvAjaa3kRyzn4YOi2T6D2UdyVKw,14785
 llmstack/generators/opencode.py,sha256=s_FrLXUBnLzRGQovl1PcAEs7V_P52wT1vnvvxMcKfs4,11203
-opencode_llmstack-0.9.2.data/data/CHANGELOG.md,sha256=2Ok5sn4aA_N5UMaUJ4jRbuTeWC1pt7gdFgYnxj2JdKU,5217
-opencode_llmstack-0.9.2.data/data/LICENSE,sha256=6G-Otw6BHIM1WJSBlJ04P1rDVCqbDEzKpdOlSr5CqIY,1078
-opencode_llmstack-0.9.2.data/data/UPGRADING.md,sha256=0XSNZ9trCviFLH5EL3Jz02fO2_8AfqB8_9aX0-o1bik,24927
-opencode_llmstack-0.9.2.dist-info/licenses/LICENSE,sha256=6G-Otw6BHIM1WJSBlJ04P1rDVCqbDEzKpdOlSr5CqIY,1078
-opencode_llmstack-0.9.2.dist-info/METADATA,sha256=4HqjCNQ363c6DSkSuOZWSYxDvIujqhEc-DmRFNPhfNI,36323
-opencode_llmstack-0.9.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
-opencode_llmstack-0.9.2.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
-opencode_llmstack-0.9.2.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
-opencode_llmstack-0.9.2.dist-info/RECORD,,
+opencode_llmstack-0.9.4.data/data/CHANGELOG.md,sha256=58feU0rA9bBYvecDoFaLcwwgezLPkD3MSt0vRUVjdF8,5837
+opencode_llmstack-0.9.4.data/data/LICENSE,sha256=6G-Otw6BHIM1WJSBlJ04P1rDVCqbDEzKpdOlSr5CqIY,1078
+opencode_llmstack-0.9.4.data/data/UPGRADING.md,sha256=0XSNZ9trCviFLH5EL3Jz02fO2_8AfqB8_9aX0-o1bik,24927
+opencode_llmstack-0.9.4.dist-info/licenses/LICENSE,sha256=6G-Otw6BHIM1WJSBlJ04P1rDVCqbDEzKpdOlSr5CqIY,1078
+opencode_llmstack-0.9.4.dist-info/METADATA,sha256=hHxDa4-Iqb7saWESfg82n4DEhFpM0J9uY9hAosnuJAQ,36323
+opencode_llmstack-0.9.4.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+opencode_llmstack-0.9.4.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
+opencode_llmstack-0.9.4.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
+opencode_llmstack-0.9.4.dist-info/RECORD,,

{opencode_llmstack-0.9.2.data → opencode_llmstack-0.9.4.data}/data/LICENSE RENAMED Viewed

File without changes

{opencode_llmstack-0.9.2.data → opencode_llmstack-0.9.4.data}/data/UPGRADING.md RENAMED Viewed

File without changes

{opencode_llmstack-0.9.2.dist-info → opencode_llmstack-0.9.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{opencode_llmstack-0.9.2.dist-info → opencode_llmstack-0.9.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{opencode_llmstack-0.9.2.dist-info → opencode_llmstack-0.9.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{opencode_llmstack-0.9.2.dist-info → opencode_llmstack-0.9.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

opencode-llmstack 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

opencode-llmstack 0.9.2py3-none-any.whl → 0.9.4py3-none-any.whl