opencode-llmstack 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llmstack/app.py CHANGED
@@ -90,16 +90,13 @@ Routing decision tree (first match wins):
90
90
  ("reasonable context still being built") -> code-ultra
91
91
  (else code-smart)
92
92
  5. Estimated input tokens <= MID_FIDELITY_CEILING -> code-smart
93
- 6. Otherwise (long context, top-tier becomes
94
- expensive/slow, fast tier's 128k window is the
95
- best fit and it's free) -> code-fast
93
+ 6. Otherwise (long context, top-tier becomes
94
+ expensive/slow, fast tier's 128k window is the
95
+ best fit and it's free) -> code-fast
96
96
  (floored at
97
97
  code-smart when
98
- ``tools[]`` is set
99
- or n_turns >=
100
- MULTI_TURN_THRESHOLD,
101
- since 3B models
102
- tool-call unreliably)
98
+ n_turns >=
99
+ MULTI_TURN_THRESHOLD)
103
100
 
104
101
  The auto router's effective max context window is
105
102
  ``[code-fast].ctx_size`` -- fast is the bottom of the step-down
@@ -356,7 +353,6 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
356
353
  ULTRA_MODEL, AGENT_MODEL)
357
354
  return AGENT_MODEL, f"ultra-trigger->agent ({ULTRA_MODEL} unavailable)"
358
355
 
359
- has_tools = bool(body.get("tools"))
360
356
  n_turns = len(messages) if messages else 0
361
357
  has_code_signal = (
362
358
  _matches(CODE_BLOCK, messages, prompt)
@@ -368,14 +364,14 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
368
364
  # Plan track is orthogonal to the code fidelity ladder: ``plan`` is a
369
365
  # chat-tuned model meant for design / "should we" discussions. Only
370
366
  # take it when nothing about the request says "I'm about to write
371
- # code" (no triple-backticks, no agent verbs, no tool calls). And
372
- # only if the input fits in the planner's ctx_size -- past that we'd
373
- # be sending a request the planner can't hold, so we fall through
374
- # to the coding ladder, which has tiers (smart, fast) explicitly
375
- # sized for larger contexts.
367
+ # code" (no triple-backticks, no agent verbs). Tools are stripped
368
+ # from the request body before dispatch (see ``_handle_completion``),
369
+ # so their presence here does not block plan routing.
370
+ # Only route to plan if the input fits in the planner's ctx_size --
371
+ # past that we fall through to the coding ladder which has tiers
372
+ # (smart, fast) explicitly sized for larger contexts.
376
373
  if (
377
- not has_tools
378
- and not has_code_signal
374
+ not has_code_signal
379
375
  and _matches(PLAN_SIGNALS, messages, prompt)
380
376
  ):
381
377
  plan_tier = TIER_BY_ALIAS.get(PLAN_MODEL)
@@ -401,12 +397,12 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
401
397
  if est <= MID_FIDELITY_CEILING:
402
398
  return AGENT_MODEL, f"mid-fidelity tokens~{est}<={MID_FIDELITY_CEILING}"
403
399
 
404
- # Rung 3: long context -- step down to fast (128k YaRN, free,
405
- # always-resident). Floor at smart when tools/agent loop is in
406
- # play; the 3B coder doesn't tool-call reliably.
407
- if has_tools or n_turns >= MULTI_TURN_THRESHOLD:
408
- why = "tools" if has_tools else f"turns={n_turns}"
409
- return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} ({why} floor)"
400
+ # Rung 3: long context -- step down to fast. Floor at smart only
401
+ # when the multi-turn threshold is hit; tools alone no longer
402
+ # prevent the step-down (plan tiers strip tools before dispatch,
403
+ # and code-fast is a hosted model that tool-calls reliably).
404
+ if n_turns >= MULTI_TURN_THRESHOLD:
405
+ return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} (turns={n_turns} floor)"
410
406
  return FAST_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING}"
411
407
 
412
408
 
@@ -626,6 +622,11 @@ async def _handle_completion(req: Request, path: str) -> Response:
626
622
  mutated = True
627
623
 
628
624
  chosen_name = body.get("model")
625
+ if chosen_name in {PLAN_MODEL, UNCENSORED_MODEL} and body.get("tools"):
626
+ log.info("plan tier %s: stripping tools from request", chosen_name)
627
+ body.pop("tools")
628
+ body.pop("tool_choice", None)
629
+ mutated = True
629
630
  tier = _resolve_tier(chosen_name)
630
631
  if tier is not None and _inject_sampler(body, tier):
631
632
  mutated = True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opencode-llmstack
3
- Version: 0.8.0
3
+ Version: 0.9.0
4
4
  Summary: Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring.
5
5
  Author: llmstack
6
6
  License: MIT
@@ -2,7 +2,7 @@ llmstack/AGENTS.md,sha256=4DVUkqJ1-EP-cDNRCpznzghOOX6dAMbVWdcwyfFCALw,528
2
2
  llmstack/__init__.py,sha256=EKHybZtPxLqFWkgkIoYBameu5_Tf9j4UewpANKm0fMU,855
3
3
  llmstack/__main__.py,sha256=wXHd5-BmCCHUfNEmy2rbilBSyVhi4KD1dSIO_4NlxuE,199
4
4
  llmstack/_platform.py,sha256=eDY3T9krkaBigG5xXxqzIbH3MhdZqX3BWe7bozOsAso,13099
5
- llmstack/app.py,sha256=YfglFlzrp58mh8K1srQA6KNqc9cF41w1xnWnUrLW0IE,27839
5
+ llmstack/app.py,sha256=Fha6Ivb-lsnoWVAK3ekzRlaLqQ1bIEavipgPP9W_TuQ,27888
6
6
  llmstack/check_models.py,sha256=WvTS2Td4acp-Q0-yWXUgXAgAgFOmpxiaeSDuAoivirw,4559
7
7
  llmstack/cli.py,sha256=Om70PzHrmU81y2Mw1sB6eeUs1fRHP0PnsCEVNC0UNvI,11341
8
8
  llmstack/models.ini,sha256=wWAmbfKUCacjLXpBpH7tcgasHgMyOrhF_AmDLsmzptI,20339
@@ -30,8 +30,8 @@ llmstack/download/ggufs.py,sha256=2hCr-svUiPIV2I3ruwTbXo6lPn9m-VBOqa3DFbvdIcA,54
30
30
  llmstack/generators/__init__.py,sha256=LfbcReuyYBCdVuT9J5RKo7-f8n585YBU3Hus6DsxqTs,1189
31
31
  llmstack/generators/llama_swap.py,sha256=KdYH9N6TJECotZvyxvAjaa3kRyzn4YOi2T6D2UdyVKw,14785
32
32
  llmstack/generators/opencode.py,sha256=s_FrLXUBnLzRGQovl1PcAEs7V_P52wT1vnvvxMcKfs4,11203
33
- opencode_llmstack-0.8.0.dist-info/METADATA,sha256=kskFW_TAESnhrsu3ims1bMeLgANnnfWK8YDaaSlbnGQ,34914
34
- opencode_llmstack-0.8.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
35
- opencode_llmstack-0.8.0.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
36
- opencode_llmstack-0.8.0.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
37
- opencode_llmstack-0.8.0.dist-info/RECORD,,
33
+ opencode_llmstack-0.9.0.dist-info/METADATA,sha256=WSRM1_jNIIwH9zBhb41tvEiHDPSbdara_FoHqFLgWj4,34914
34
+ opencode_llmstack-0.9.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
35
+ opencode_llmstack-0.9.0.dist-info/entry_points.txt,sha256=soomjpqvl4KzFScgpQbu96vgcLriOtkB9MbiSC0rvZ8,47
36
+ opencode_llmstack-0.9.0.dist-info/top_level.txt,sha256=tMv9sDWp8RW_DNNY8cuM4Uy4sND-KwTLcsScl5gdcEQ,9
37
+ opencode_llmstack-0.9.0.dist-info/RECORD,,