opencode-llmstack 0.8.0__tar.gz → 0.9.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/PKG-INFO +1 -1
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/app.py +25 -24
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/models.ini +1 -1
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/PKG-INFO +1 -1
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/pyproject.toml +1 -1
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/README.md +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/AGENTS.md +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/__main__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/_platform.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/backends/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/backends/bedrock.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/check_models.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/cli.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/_helpers.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/activate.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/check.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/download.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/install.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/install_llama_swap.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/reload.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/restart.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/setup.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/start.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/status.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/commands/stop.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/download/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/download/binary.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/download/ggufs.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/generators/__init__.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/generators/llama_swap.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/generators/opencode.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/paths.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/shell_env.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/llmstack/tiers.py +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/SOURCES.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/dependency_links.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/entry_points.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/requires.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/top_level.txt +0 -0
- {opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/setup.cfg +0 -0
|
@@ -90,16 +90,13 @@ Routing decision tree (first match wins):
|
|
|
90
90
|
("reasonable context still being built") -> code-ultra
|
|
91
91
|
(else code-smart)
|
|
92
92
|
5. Estimated input tokens <= MID_FIDELITY_CEILING -> code-smart
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
93
|
+
6. Otherwise (long context, top-tier becomes
|
|
94
|
+
expensive/slow, fast tier's 128k window is the
|
|
95
|
+
best fit and it's free) -> code-fast
|
|
96
96
|
(floored at
|
|
97
97
|
code-smart when
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
MULTI_TURN_THRESHOLD,
|
|
101
|
-
since 3B models
|
|
102
|
-
tool-call unreliably)
|
|
98
|
+
n_turns >=
|
|
99
|
+
MULTI_TURN_THRESHOLD)
|
|
103
100
|
|
|
104
101
|
The auto router's effective max context window is
|
|
105
102
|
``[code-fast].ctx_size`` -- fast is the bottom of the step-down
|
|
@@ -173,7 +170,7 @@ MID_FIDELITY_CEILING = int(os.getenv("ROUTER_MID_FIDELITY_CEILING", "32000"))
|
|
|
173
170
|
# Floor the long-context rung at code-smart whenever a tool-call
|
|
174
171
|
# protocol is in play -- 3B models tool-call unreliably regardless of
|
|
175
172
|
# how big their context window is.
|
|
176
|
-
MULTI_TURN_THRESHOLD = int(os.getenv("ROUTER_MULTI_TURN", "
|
|
173
|
+
MULTI_TURN_THRESHOLD = int(os.getenv("ROUTER_MULTI_TURN", "10"))
|
|
177
174
|
AUTO_ALIASES = {"auto", "", None}
|
|
178
175
|
|
|
179
176
|
UNCENSORED_TRIGGERS = re.compile(
|
|
@@ -356,8 +353,7 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
|
|
|
356
353
|
ULTRA_MODEL, AGENT_MODEL)
|
|
357
354
|
return AGENT_MODEL, f"ultra-trigger->agent ({ULTRA_MODEL} unavailable)"
|
|
358
355
|
|
|
359
|
-
|
|
360
|
-
n_turns = len(messages) if messages else 0
|
|
356
|
+
n_turns = sum(1 for m in (messages or []) if m.get("role") == "user")
|
|
361
357
|
has_code_signal = (
|
|
362
358
|
_matches(CODE_BLOCK, messages, prompt)
|
|
363
359
|
or _matches(AGENT_SIGNALS, messages, prompt)
|
|
@@ -368,14 +364,14 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
|
|
|
368
364
|
# Plan track is orthogonal to the code fidelity ladder: ``plan`` is a
|
|
369
365
|
# chat-tuned model meant for design / "should we" discussions. Only
|
|
370
366
|
# take it when nothing about the request says "I'm about to write
|
|
371
|
-
# code" (no triple-backticks, no agent verbs
|
|
372
|
-
#
|
|
373
|
-
#
|
|
374
|
-
# to the
|
|
375
|
-
#
|
|
367
|
+
# code" (no triple-backticks, no agent verbs). Tools are stripped
|
|
368
|
+
# from the request body before dispatch (see ``_handle_completion``),
|
|
369
|
+
# so their presence here does not block plan routing.
|
|
370
|
+
# Only route to plan if the input fits in the planner's ctx_size --
|
|
371
|
+
# past that we fall through to the coding ladder which has tiers
|
|
372
|
+
# (smart, fast) explicitly sized for larger contexts.
|
|
376
373
|
if (
|
|
377
|
-
not
|
|
378
|
-
and not has_code_signal
|
|
374
|
+
not has_code_signal
|
|
379
375
|
and _matches(PLAN_SIGNALS, messages, prompt)
|
|
380
376
|
):
|
|
381
377
|
plan_tier = TIER_BY_ALIAS.get(PLAN_MODEL)
|
|
@@ -401,12 +397,12 @@ def classify(body: dict[str, Any]) -> tuple[str, str]:
|
|
|
401
397
|
if est <= MID_FIDELITY_CEILING:
|
|
402
398
|
return AGENT_MODEL, f"mid-fidelity tokens~{est}<={MID_FIDELITY_CEILING}"
|
|
403
399
|
|
|
404
|
-
# Rung 3: long context -- step down to fast
|
|
405
|
-
#
|
|
406
|
-
#
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} ({
|
|
400
|
+
# Rung 3: long context -- step down to fast. Floor at smart only
|
|
401
|
+
# when the multi-turn threshold is hit; tools alone no longer
|
|
402
|
+
# prevent the step-down (plan tiers strip tools before dispatch,
|
|
403
|
+
# and code-fast is a hosted model that tool-calls reliably).
|
|
404
|
+
if n_turns >= MULTI_TURN_THRESHOLD:
|
|
405
|
+
return AGENT_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING} (user-turns={n_turns}>={MULTI_TURN_THRESHOLD} floor)"
|
|
410
406
|
return FAST_MODEL, f"long-context tokens~{est}>{MID_FIDELITY_CEILING}"
|
|
411
407
|
|
|
412
408
|
|
|
@@ -626,6 +622,11 @@ async def _handle_completion(req: Request, path: str) -> Response:
|
|
|
626
622
|
mutated = True
|
|
627
623
|
|
|
628
624
|
chosen_name = body.get("model")
|
|
625
|
+
if chosen_name in {PLAN_MODEL, UNCENSORED_MODEL} and body.get("tools"):
|
|
626
|
+
log.info("plan tier %s: stripping tools from request", chosen_name)
|
|
627
|
+
body.pop("tools")
|
|
628
|
+
body.pop("tool_choice", None)
|
|
629
|
+
mutated = True
|
|
629
630
|
tier = _resolve_tier(chosen_name)
|
|
630
631
|
if tier is not None and _inject_sampler(body, tier):
|
|
631
632
|
mutated = True
|
|
@@ -300,7 +300,7 @@ description = Mistral-Small 3.2 24B Heretic - no-filter planning
|
|
|
300
300
|
;
|
|
301
301
|
high_fidelity_ceiling = 12000 ; tokens; below this, top-tier model is still cheap+fast (and ultra ctx_size = 2 * this)
|
|
302
302
|
mid_fidelity_ceiling = 32000 ; tokens; smart's sweet spot up to here, then step down to fast (smart ctx_size = 2 * this)
|
|
303
|
-
multi_turn =
|
|
303
|
+
multi_turn = 10 ; turn count that floors the long-context rung at code-smart
|
|
304
304
|
agent_signal_words = implement, fix bug, write a function, refactor, edit, patch, debug, run tests, build it
|
|
305
305
|
plan_signal_words = design, architect, approach, trade-off, should we, how would you, explain why, think through, compare options, brainstorm, root cause
|
|
306
306
|
uncensored_triggers = [nofilter], [uncensored], [heretic], "uncensored:", "nofilter:" (line start)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "opencode-llmstack"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.9.1"
|
|
8
8
|
description = "Multi-tier local LLM stack: llama-swap + FastAPI auto-router + opencode wiring."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{opencode_llmstack-0.8.0 → opencode_llmstack-0.9.1}/opencode_llmstack.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|