opencode-llmstack 0.8.0__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/PKG-INFO +1 -1
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/app.py +23 -22
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/opencode_llmstack.egg-info/PKG-INFO +1 -1
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/pyproject.toml +1 -1
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/README.md +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/AGENTS.md +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/__main__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/_platform.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/backends/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/backends/bedrock.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/check_models.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/cli.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/_helpers.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/activate.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/check.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/download.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/install.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/install_llama_swap.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/reload.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/restart.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/setup.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/start.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/status.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/commands/stop.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/download/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/download/binary.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/download/ggufs.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/generators/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/generators/llama_swap.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/generators/opencode.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/models.ini +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/paths.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/shell_env.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/llmstack/tiers.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/opencode_llmstack.egg-info/SOURCES.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/opencode_llmstack.egg-info/dependency_links.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/opencode_llmstack.egg-info/entry_points.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/opencode_llmstack.egg-info/requires.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/opencode_llmstack.egg-info/top_level.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/setup.cfg +0 -0
|
@@ -90,16 +90,13 @@ Routing decision tree (first match wins):
|
|
|
90
90
|
("reasonable context still being built") -> code-ultra
|
|
91
91
|
(else code-smart)
|
|
92
92
|
5. Estimated input tokens <= MID_FIDELITY_CEILING -> code-smart
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
93
|
+
6. Otherwise (long context, top-tier becomes
|
|
94
|
+
expensive/slow, fast tier's 128k window is the
|
|
95
|
+
best fit and it's free) -> code-fast
|
|
96
96
|
(floored at
|
|
97
97
|
code-smart when
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
MULTI_TURN_THRESHOLD,
|
|
101
|
-
since 3B models
|
|
102
|
-
tool-call unreliably)
|
|
98
|
+
n_turns >=
|
|
99
|
+
MULTI_TURN_THRESHOLD)
|
|
103
100
|
|
|
104
101
|
The auto router's effective max context window is
|
|
105
102
|
``[code-fast].ctx_size`` -- fast is the bottom of the step-down
|
|
@@ -356,7 +353,6 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
|
|
|
356
353
|
ULTRA_MODEL, AGENT_MODEL)
|
|
357
354
|
return AGENT_MODEL, f"ultra-trigger->agent ({ULTRA_MODEL} unavailable)"
|
|
358
355
|
|
|
359
|
-
has_tools = bool(body.get("tools"))
|
|
360
356
|
n_turns = len(messages) if messages else 0
|
|
361
357
|
has_code_signal = (
|
|
362
358
|
_matches(CODE_BLOCK, messages, prompt)
|
|
@@ -368,14 +364,14 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
|
|
|
368
364
|
# Plan track is orthogonal to the code fidelity ladder: ``plan`` is a
|
|
369
365
|
# chat-tuned model meant for design / "should we" discussions. Only
|
|
370
366
|
# take it when nothing about the request says "I'm about to write
|
|
371
|
-
# code" (no triple-backticks, no agent verbs
|
|
372
|
-
#
|
|
373
|
-
#
|
|
374
|
-
# to the
|
|
375
|
-
#
|
|
367
|
+
# code" (no triple-backticks, no agent verbs). Tools are stripped
|
|
368
|
+
# from the request body before dispatch (see ``_handle_completion``),
|
|
369
|
+
# so their presence here does not block plan routing.
|
|
370
|
+
# Only route to plan if the input fits in the planner's ctx_size --
|
|
371
|
+
# past that we fall through to the coding ladder which has tiers
|
|
372
|
+
# (smart, fast) explicitly sized for larger contexts.
|
|
376
373
|
if (
|
|
377
|
-
not
|
|
378
|
-
and not has_code_signal
|
|
374
|
+
not has_code_signal
|
|
379
375
|
and _matches(PLAN_SIGNALS, messages, prompt)
|
|
380
376
|
):
|
|
381
377
|
plan_tier = TIER_BY_ALIAS.get(PLAN_MODEL)
|
|
@@ -401,12 +397,12 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
|
|
|
401
397
|
if est <= MID_FIDELITY_CEILING:
|
|
402
398
|
return AGENT_MODEL, f"mid-fidelity tokens~{est}<={MID_FIDELITY_CEILING}"
|
|
403
399
|
|
|
404
|
-
# Rung 3: long context -- step down to fast
|
|
405
|
-
#
|
|
406
|
-
#
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} ({
|
|
400
|
+
# Rung 3: long context -- step down to fast. Floor at smart only
|
|
401
|
+
# when the multi-turn threshold is hit; tools alone no longer
|
|
402
|
+
# prevent the step-down (plan tiers strip tools before dispatch,
|
|
403
|
+
# and code-fast is a hosted model that tool-calls reliably).
|
|
404
|
+
if n_turns >= MULTI_TURN_THRESHOLD:
|
|
405
|
+
return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} (turns={n_turns} floor)"
|
|
410
406
|
return FAST_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING}"
|
|
411
407
|
|
|
412
408
|
|
|
@@ -626,6 +622,11 @@ async def _handle_completion(req: Request, path: str) -> Response:
|
|
|
626
622
|
mutated = True
|
|
627
623
|
|
|
628
624
|
chosen_name = body.get("model")
|
|
625
|
+
if chosen_name in {PLAN_MODEL, UNCENSORED_MODEL} and body.get("tools"):
|
|
626
|
+
log.info("plan tier %s: stripping tools from request", chosen_name)
|
|
627
|
+
body.pop("tools")
|
|
628
|
+
body.pop("tool_choice", None)
|
|
629
|
+
mutated = True
|
|
629
630
|
tier = _resolve_tier(chosen_name)
|
|
630
631
|
if tier is not None and _inject_sampler(body, tier):
|
|
631
632
|
mutated = True
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "opencode-llmstack"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.9.0"
|
|
8
8
|
description = "Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/opencode_llmstack.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/opencode_llmstack.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{opencode_llmstack-0.8.0 → opencode_llmstack-0.9.0}/opencode_llmstack.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|