opencode-llmstack 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llmstack/__init__.py CHANGED
@@ -16,5 +16,5 @@ organised by concern:
16
16
 
17
17
  from __future__ import annotations
18
18
 
19
- __version__ = "0.9.2"
19
+ __version__ = "0.9.4"
20
20
  __all__ = ["__version__"]
llmstack/app.py CHANGED
@@ -355,9 +355,10 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
355
355
  return AGENT_MODEL, f"ultra-trigger->agent ({ULTRA_MODEL} unavailable)"
356
356
 
357
357
  n_turns = sum(1 for m in (messages or []) if m.get("role") == "user")
358
+ _last_msgs = [{"role": "user", "content": last_user}] if last_user else None
358
359
  has_code_signal = (
359
- _matches(CODE_BLOCK, messages, prompt)
360
- or _matches(AGENT_SIGNALS, messages, prompt)
360
+ _matches(CODE_BLOCK, _last_msgs, prompt)
361
+ or _matches(AGENT_SIGNALS, _last_msgs, prompt)
361
362
  )
362
363
 
363
364
  est = _estimate_tokens(messages, prompt)
@@ -400,7 +400,7 @@ def _tool_config(tools: list[dict[str, Any]] | None) -> dict[str, Any] | None:
400
400
  return {"tools": specs}
401
401
 
402
402
 
403
- def _inference_config(body: dict[str, Any]) -> dict[str, Any]:
403
+ def _inference_config(body: dict[str, Any], max_output_tokens: int | None = None) -> dict[str, Any]:
404
404
  # We forward only what the Converse `inferenceConfig` schema accepts:
405
405
  # `temperature`, `topP`, `maxTokens`, `stopSequences`. Other sampler
406
406
  # knobs (`top_k`, `min_p`, `repetition_penalty`) have no Converse-
@@ -415,28 +415,34 @@ def _inference_config(body: dict[str, Any]) -> dict[str, Any]:
415
415
  # forward. Configure Bedrock tiers in models.ini accordingly: omit
416
416
  # the `sampler =` line for Opus 4.7+, and pick the one allowed knob
417
417
  # for Sonnet 4.5 / Haiku 4.5.
418
- cfg: dict[str, Any] = {}
418
+ #
419
+ # `max_output_tokens` is the per-tier cap from models.ini
420
+ # (`aws_max_output_tokens`). When set, the client-requested value is
421
+ # silently clamped to this ceiling -- useful for models like
422
+ # Llama 3.1 405B whose Bedrock deployment rejects values above 4096.
423
+ icfg: dict[str, Any] = {}
419
424
  if "temperature" in body:
420
425
  try:
421
- cfg["temperature"] = float(body["temperature"])
426
+ icfg["temperature"] = float(body["temperature"])
422
427
  except (TypeError, ValueError):
423
428
  pass
424
429
  if "top_p" in body:
425
430
  try:
426
- cfg["topP"] = float(body["top_p"])
431
+ icfg["topP"] = float(body["top_p"])
427
432
  except (TypeError, ValueError):
428
433
  pass
429
434
  if "max_tokens" in body or "max_completion_tokens" in body:
430
435
  try:
431
- cfg["maxTokens"] = int(body.get("max_tokens") or body.get("max_completion_tokens"))
436
+ requested = int(body.get("max_tokens") or body.get("max_completion_tokens"))
437
+ icfg["maxTokens"] = min(requested, max_output_tokens) if max_output_tokens else requested
432
438
  except (TypeError, ValueError):
433
439
  pass
434
440
  stop = body.get("stop")
435
441
  if isinstance(stop, str):
436
- cfg["stopSequences"] = [stop]
442
+ icfg["stopSequences"] = [stop]
437
443
  elif isinstance(stop, list):
438
- cfg["stopSequences"] = [s for s in stop if isinstance(s, str)]
439
- return cfg
444
+ icfg["stopSequences"] = [s for s in stop if isinstance(s, str)]
445
+ return icfg
440
446
 
441
447
 
442
448
  def _build_converse_kwargs(tier: Tier, body: dict[str, Any], cfg: BedrockConfig) -> dict[str, Any]:
@@ -463,7 +469,7 @@ def _build_converse_kwargs(tier: Tier, body: dict[str, Any], cfg: BedrockConfig)
463
469
  if sys_blocks:
464
470
  converse_kwargs["system"] = sys_blocks
465
471
 
466
- inference = _inference_config(body)
472
+ inference = _inference_config(body, max_output_tokens=cfg.max_output_tokens)
467
473
  if inference:
468
474
  converse_kwargs["inferenceConfig"] = inference
469
475
 
llmstack/models.ini CHANGED
@@ -215,9 +215,10 @@ description = Mistral-Small 3.2 24B Heretic - no-filter planning
215
215
  ; tier = chat
216
216
  ; role = plan-uncensored
217
217
  ; backend = bedrock
218
- ; aws_model_id = meta.llama3-1-405b-instruct-v1:0
219
- ; aws_region = us-west-2 ; Llama 405B has no EU deployment; keep on US
220
- ; aws_profile = bedrock-prod
218
+ ; aws_model_id = meta.llama3-1-405b-instruct-v1:0
219
+ ; aws_region = us-west-2 ; Llama 405B has no EU deployment; keep on US
220
+ ; aws_profile = bedrock-prod
221
+ ; aws_max_output_tokens = 4096 ; Llama 3.1 405B Bedrock hard cap
221
222
  ; ctx_size = 128000
222
223
  ; sampler = temp=0.85, top_p=0.95 ; max exploration
223
224
  ; description = Llama 3.1 405B on Bedrock - no-filter planning
@@ -229,10 +230,11 @@ description = Mistral-Small 3.2 24B Heretic - no-filter planning
229
230
  ; tier = chat
230
231
  ; role = plan-uncensored
231
232
  ; backend = bedrock
232
- ; aws_model_id = meta.llama3-1-405b-instruct-v1:0
233
- ; aws_region = us-west-2 ; Llama 405B has no EU deployment
234
- ; aws_profile = bedrock-prod
235
- ; aws_endpoint_url = https://bedrock-runtime.us-west-2.vpce.amazonaws.com
233
+ ; aws_model_id = meta.llama3-1-405b-instruct-v1:0
234
+ ; aws_region = us-west-2 ; Llama 405B has no EU deployment
235
+ ; aws_profile = bedrock-prod
236
+ ; aws_endpoint_url = https://bedrock-runtime.us-west-2.vpce.amazonaws.com
237
+ ; aws_max_output_tokens = 4096 ; Llama 3.1 405B Bedrock hard cap
236
238
  ; ctx_size = 128000
237
239
  ; sampler = temp=0.85, top_p=0.95
238
240
  ; description = Llama 3.1 405B on Bedrock (VPC) - no-filter planning
llmstack/tiers.py CHANGED
@@ -139,6 +139,7 @@ class BedrockConfig:
139
139
  endpoint_url: str | None = None
140
140
  model_id_next: str | None = None
141
141
  region_next: str | None = None
142
+ max_output_tokens: int | None = None
142
143
 
143
144
  @property
144
145
  def has_next(self) -> bool:
@@ -298,6 +299,7 @@ def _build_bedrock(section) -> BedrockConfig:
298
299
  endpoint_url=_opt(section.get("aws_endpoint_url")),
299
300
  model_id_next=_opt(section.get("aws_model_id_next")),
300
301
  region_next=_opt(section.get("aws_region_next")),
302
+ max_output_tokens=_int(section.get("aws_max_output_tokens", "")) or None,
301
303
  )
302
304
 
303
305
 
@@ -4,6 +4,21 @@ All notable changes to `opencode-llmstack` are documented here.
4
4
 
5
5
  ---
6
6
 
7
+ ## [0.9.4] — 2026-05-11
8
+
9
+ ### Fixed
10
+ - `classify()` now scopes `has_code_signal` to the **last user message only**
11
+ (was scanning the full conversation history). Previously, any prior coding
12
+ exchange in the session (code blocks, agent verbs) would permanently block
13
+ plan routing for the rest of the conversation — e.g. "explain why these
14
+ changes are important?" after a refactor request would never reach `plan`.
15
+ - Added regression test:
16
+ `test_plan_signal_after_prior_coding_exchange_routes_to_plan`.
17
+ - `__version__` corrected from `"0.9.2"` to `"0.9.4"` (was skewed vs
18
+ `pyproject.toml` since 0.9.3).
19
+
20
+ ---
21
+
7
22
  ## [0.9.2] — 2026-05-11
8
23
 
9
24
  ### Fixed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opencode-llmstack
3
- Version: 0.9.2
3
+ Version: 0.9.4
4
4
  Summary: Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring.
5
5
  Author: llmstack
6
6
  License: MIT License
@@ -1,16 +1,16 @@
1
1
  llmstack/AGENTS.md,sha256=4DVUkqJ1-EP-cDNRCpznzghOOX6dAMbVWdcwyfFCALw,528
2
- llmstack/__init__.py,sha256=Qs9d58V8cJWmJvu4QLvO7_panKa8UkGRXAurHYgKDyU,855
2
+ llmstack/__init__.py,sha256=C56mGWt0YsaDyU0C7R_PpsIPw67G0jm4nRAqGKLVh5s,855
3
3
  llmstack/__main__.py,sha256=wXHd5-BmCCHUfNEmy2rbilBSyVhi4KD1dSIO_4NlxuE,199
4
4
  llmstack/_platform.py,sha256=eDY3T9krkaBigG5xXxqzIbH3MhdZqX3BWe7bozOsAso,13099
5
- llmstack/app.py,sha256=3Qt_bveLS13rPEucyX0P6QDf9o9O68amnJIvwMqoTQQ,28469
5
+ llmstack/app.py,sha256=FpxEWpfvnRdRJg3IQdCOSdSuqPQG6iYKpBNGcfUR9wA,28554
6
6
  llmstack/check_models.py,sha256=WvTS2Td4acp-Q0-yWXUgXAgAgFOmpxiaeSDuAoivirw,4559
7
7
  llmstack/cli.py,sha256=Om70PzHrmU81y2Mw1sB6eeUs1fRHP0PnsCEVNC0UNvI,11341
8
- llmstack/models.ini,sha256=USyEOekc3tWw-m375K-zDc3EyaQjrJGEnG_Hf5ZxdWw,20370
8
+ llmstack/models.ini,sha256=U38Z6wfGCpmgQTCNbnp1zu80rUrhNGWFTvI2nhVx1Mo,20556
9
9
  llmstack/paths.py,sha256=A8q4-tpwIt5UMGG5ZDESKSuViMGLbPIAL1VoONopJqU,11512
10
10
  llmstack/shell_env.py,sha256=MJSW0PP15q-fsppIZ98WZ7XoqYMZmDy4k8N0gzEA6wU,39362
11
- llmstack/tiers.py,sha256=et738dWftsc74ZElZ3Vt9eEF_SzgJCDuH9kBhzH-scI,14697
11
+ llmstack/tiers.py,sha256=yl5xEhECe-GHiVXBRvlNoFtH_9y4uNSASpfHlZ4Ja74,14820
12
12
  llmstack/backends/__init__.py,sha256=-85sQz0R94OdbM2bUHGyyA5WaMnI9bHywPOaELeQHX0,777
13
- llmstack/backends/bedrock.py,sha256=eVYuAWyX7NeRi0X55H7kVBTJAF7hmutyTS2lrCzBZeY,30179
13
+ llmstack/backends/bedrock.py,sha256=Nb9sV45aH0RQHie_AkQwcpX5pkio5EAqnsphZM5P_nQ,30638
14
14
  llmstack/commands/__init__.py,sha256=eVO-YUxh1fSfdq72KggC-NrTYMtN6zIykgjyRgOCAt4,406
15
15
  llmstack/commands/_helpers.py,sha256=UKADaNXrnuoDi_JG0W2Tph7rWFB0cXvQh8YknZBw56I,2660
16
16
  llmstack/commands/activate.py,sha256=zCdEmyVv5qZUdhfez6hZ5Y46N_yjPwfKbPTwCJXnA3o,3663
@@ -30,12 +30,12 @@ llmstack/download/ggufs.py,sha256=2hCr-svUiPIV2I3ruwTbXo6lPn9m-VBOqa3DFbvdIcA,54
30
30
  llmstack/generators/__init__.py,sha256=LfbcReuyYBCdVuT9J5RKo7-f8n585YBU3Hus6DsxqTs,1189
31
31
  llmstack/generators/llama_swap.py,sha256=KdYH9N6TJECotZvyxvAjaa3kRyzn4YOi2T6D2UdyVKw,14785
32
32
  llmstack/generators/opencode.py,sha256=s_FrLXUBnLzRGQovl1PcAEs7V_P52wT1vnvvxMcKfs4,11203
33
- opencode_llmstack-0.9.2.data/data/CHANGELOG.md,sha256=2Ok5sn4aA_N5UMaUJ4jRbuTeWC1pt7gdFgYnxj2JdKU,5217
34
- opencode_llmstack-0.9.2.data/data/LICENSE,sha256=6G-Otw6BHIM1WJSBlJ04P1rDVCqbDEzKpdOlSr5CqIY,1078
35
- opencode_llmstack-0.9.2.data/data/UPGRADING.md,sha256=0XSNZ9trCviFLH5EL3Jz02fO2_8AfqB8_9aX0-o1bik,24927
36
- opencode_llmstack-0.9.2.dist-info/licenses/LICENSE,sha256=6G-Otw6BHIM1WJSBlJ04P1rDVCqbDEzKpdOlSr5CqIY,1078
37
- opencode_llmstack-0.9.2.dist-info/METADATA,sha256=4HqjCNQ363c6DSkSuOZWSYxDvIujqhEc-DmRFNPhfNI,36323
38
- opencode_llmstack-0.9.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
39
- opencode_llmstack-0.9.2.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
40
- opencode_llmstack-0.9.2.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
41
- opencode_llmstack-0.9.2.dist-info/RECORD,,
33
+ opencode_llmstack-0.9.4.data/data/CHANGELOG.md,sha256=58feU0rA9bBYvecDoFaLcwwgezLPkD3MSt0vRUVjdF8,5837
34
+ opencode_llmstack-0.9.4.data/data/LICENSE,sha256=6G-Otw6BHIM1WJSBlJ04P1rDVCqbDEzKpdOlSr5CqIY,1078
35
+ opencode_llmstack-0.9.4.data/data/UPGRADING.md,sha256=0XSNZ9trCviFLH5EL3Jz02fO2_8AfqB8_9aX0-o1bik,24927
36
+ opencode_llmstack-0.9.4.dist-info/licenses/LICENSE,sha256=6G-Otw6BHIM1WJSBlJ04P1rDVCqbDEzKpdOlSr5CqIY,1078
37
+ opencode_llmstack-0.9.4.dist-info/METADATA,sha256=hHxDa4-Iqb7saWESfg82n4DEhFpM0J9uY9hAosnuJAQ,36323
38
+ opencode_llmstack-0.9.4.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
39
+ opencode_llmstack-0.9.4.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
40
+ opencode_llmstack-0.9.4.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
41
+ opencode_llmstack-0.9.4.dist-info/RECORD,,