opencode-llmstack 0.8.0__tar.gz → 0.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/PKG-INFO +1 -1
  2. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/app.py +25 -24
  3. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/models.ini +1 -1
  4. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/PKG-INFO +1 -1
  5. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/pyproject.toml +1 -1
  6. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/README.md +0 -0
  7. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/AGENTS.md +0 -0
  8. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/__init__.py +0 -0
  9. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/__main__.py +0 -0
  10. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/_platform.py +0 -0
  11. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/backends/__init__.py +0 -0
  12. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/backends/bedrock.py +0 -0
  13. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/check_models.py +0 -0
  14. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/cli.py +0 -0
  15. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/__init__.py +0 -0
  16. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/_helpers.py +0 -0
  17. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/activate.py +0 -0
  18. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/check.py +0 -0
  19. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/download.py +0 -0
  20. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/install.py +0 -0
  21. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/install_llama_swap.py +0 -0
  22. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/reload.py +0 -0
  23. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/restart.py +0 -0
  24. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/setup.py +0 -0
  25. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/start.py +0 -0
  26. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/status.py +0 -0
  27. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/stop.py +0 -0
  28. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/download/__init__.py +0 -0
  29. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/download/binary.py +0 -0
  30. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/download/ggufs.py +0 -0
  31. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/generators/__init__.py +0 -0
  32. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/generators/llama_swap.py +0 -0
  33. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/generators/opencode.py +0 -0
  34. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/paths.py +0 -0
  35. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/shell_env.py +0 -0
  36. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/tiers.py +0 -0
  37. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/SOURCES.txt +0 -0
  38. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/dependency_links.txt +0 -0
  39. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/entry_points.txt +0 -0
  40. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/requires.txt +0 -0
  41. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/top_level.txt +0 -0
  42. {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opencode-llmstack
3
- Version: 0.8.0
3
+ Version: 0.9.1
4
4
  Summary: Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring.
5
5
  Author: llmstack
6
6
  License: MIT
@@ -90,16 +90,13 @@ Routing decision tree (first match wins):
90
90
  ("reasonable context still being built") -> code-ultra
91
91
  (else code-smart)
92
92
  5. Estimated input tokens <= MID_FIDELITY_CEILING -> code-smart
93
- 6. Otherwise (long context, top-tier becomes
94
- expensive/slow, fast tier's 128k window is the
95
- best fit and it's free) -> code-fast
93
+ 6. Otherwise (long context, top-tier becomes
94
+ expensive/slow, fast tier's 128k window is the
95
+ best fit and it's free) -> code-fast
96
96
  (floored at
97
97
  code-smart when
98
- ``tools[]`` is set
99
- or n_turns >=
100
- MULTI_TURN_THRESHOLD,
101
- since 3B models
102
- tool-call unreliably)
98
+ n_turns >=
99
+ MULTI_TURN_THRESHOLD)
103
100
 
104
101
  The auto router's effective max context window is
105
102
  ``[code-fast].ctx_size`` -- fast is the bottom of the step-down
@@ -173,7 +170,7 @@ MID_FIDELITY_CEILING = int(os.getenv("ROUTER_MID_FIDELITY_CEILING", "32000"))
173
170
  # Floor the long-context rung at code-smart whenever a tool-call
174
171
  # protocol is in play -- 3B models tool-call unreliably regardless of
175
172
  # how big their context window is.
176
- MULTI_TURN_THRESHOLD = int(os.getenv("ROUTER_MULTI_TURN", "6"))
173
+ MULTI_TURN_THRESHOLD = int(os.getenv("ROUTER_MULTI_TURN", "10"))
177
174
  AUTO_ALIASES = {"auto", "", None}
178
175
 
179
176
  UNCENSORED_TRIGGERS = re.compile(
@@ -356,8 +353,7 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
356
353
  ULTRA_MODEL, AGENT_MODEL)
357
354
  return AGENT_MODEL, f"ultra-trigger->agent ({ULTRA_MODEL} unavailable)"
358
355
 
359
- has_tools = bool(body.get("tools"))
360
- n_turns = len(messages) if messages else 0
356
+ n_turns = sum(1 for m in (messages or []) if m.get("role") == "user")
361
357
  has_code_signal = (
362
358
  _matches(CODE_BLOCK, messages, prompt)
363
359
  or _matches(AGENT_SIGNALS, messages, prompt)
@@ -368,14 +364,14 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
368
364
  # Plan track is orthogonal to the code fidelity ladder: ``plan`` is a
369
365
  # chat-tuned model meant for design / "should we" discussions. Only
370
366
  # take it when nothing about the request says "I'm about to write
371
- # code" (no triple-backticks, no agent verbs, no tool calls). And
372
- # only if the input fits in the planner's ctx_size -- past that we'd
373
- # be sending a request the planner can't hold, so we fall through
374
- # to the coding ladder, which has tiers (smart, fast) explicitly
375
- # sized for larger contexts.
367
+ # code" (no triple-backticks, no agent verbs). Tools are stripped
368
+ # from the request body before dispatch (see ``_handle_completion``),
369
+ # so their presence here does not block plan routing.
370
+ # Only route to plan if the input fits in the planner's ctx_size --
371
+ # past that we fall through to the coding ladder which has tiers
372
+ # (smart, fast) explicitly sized for larger contexts.
376
373
  if (
377
- not has_tools
378
- and not has_code_signal
374
+ not has_code_signal
379
375
  and _matches(PLAN_SIGNALS, messages, prompt)
380
376
  ):
381
377
  plan_tier = TIER_BY_ALIAS.get(PLAN_MODEL)
@@ -401,12 +397,12 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
401
397
  if est <= MID_FIDELITY_CEILING:
402
398
  return AGENT_MODEL, f"mid-fidelity tokens~{est}<={MID_FIDELITY_CEILING}"
403
399
 
404
- # Rung 3: long context -- step down to fast (128k YaRN, free,
405
- # always-resident). Floor at smart when tools/agent loop is in
406
- # play; the 3B coder doesn't tool-call reliably.
407
- if has_tools or n_turns >= MULTI_TURN_THRESHOLD:
408
- why = "tools" if has_tools else f"turns={n_turns}"
409
- return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} ({why} floor)"
400
+ # Rung 3: long context -- step down to fast. Floor at smart only
401
+ # when the multi-turn threshold is hit; tools alone no longer
402
+ # prevent the step-down (plan tiers strip tools before dispatch,
403
+ # and code-fast is a hosted model that tool-calls reliably).
404
+ if n_turns >= MULTI_TURN_THRESHOLD:
405
+ return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} (user-turns={n_turns}>={MULTI_TURN_THRESHOLD} floor)"
410
406
  return FAST_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING}"
411
407
 
412
408
 
@@ -626,6 +622,11 @@ async def _handle_completion(req: Request, path: str) -> Response:
626
622
  mutated = True
627
623
 
628
624
  chosen_name = body.get("model")
625
+ if chosen_name in {PLAN_MODEL, UNCENSORED_MODEL} and body.get("tools"):
626
+ log.info("plan tier %s: stripping tools from request", chosen_name)
627
+ body.pop("tools")
628
+ body.pop("tool_choice", None)
629
+ mutated = True
629
630
  tier = _resolve_tier(chosen_name)
630
631
  if tier is not None and _inject_sampler(body, tier):
631
632
  mutated = True
@@ -300,7 +300,7 @@ description = Mistral-Small 3.2 24B Heretic - no-filter planning
300
300
  ;
301
301
  high_fidelity_ceiling = 12000 ; tokens; below this, top-tier model is still cheap+fast (and ultra ctx_size = 2 * this)
302
302
  mid_fidelity_ceiling = 32000 ; tokens; smart's sweet spot up to here, then step down to fast (smart ctx_size = 2 * this)
303
- multi_turn = 6 ; turn count that floors the long-context rung at code-smart
303
+ multi_turn = 10 ; turn count that floors the long-context rung at code-smart
304
304
  agent_signal_words = implement, fix bug, write a function, refactor, edit, patch, debug, run tests, build it
305
305
  plan_signal_words = design, architect, approach, trade-off, should we, how would you, explain why, think through, compare options, brainstorm, root cause
306
306
  uncensored_triggers = [nofilter], [uncensored], [heretic], "uncensored:", "nofilter:" (line start)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opencode-llmstack
3
- Version: 0.8.0
3
+ Version: 0.9.1
4
4
  Summary: Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring.
5
5
  Author: llmstack
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "opencode-llmstack"
7
- version = "0.8.0"
7
+ version = "0.9.1"
8
8
  description = "Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"