legion-llm 0.14.8 → 0.14.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/lib/legion/llm/inference/executor/routing.rb +6 -9
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f67a1a83e7213a2e50437b07e71afd74302c2b1da8229708ec8d99506d45354e
|
|
4
|
+
data.tar.gz: fa1dcc7ae0d4b64dd2716271dd5c50778ff178fa3987704b16302c00b75029ab
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b3c1802c7bc32daa15d15c0fc8b16923d8fac48531adc5bcfb2997eec6234534166b85c43e9262774dedb63e28a7a62c148614bdac53e00c15fe44598770823b
|
|
7
|
+
data.tar.gz: cc6ae96b0bfc1e1e54298b028334d1bd1f19e18147ac0d7a7d61bb849c86e157ff26b986082be3027ba4199570481d91b960ce6eb1d82b2ec5aa66d787d3a9b7
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.14.9] - 2026-06-25
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
|
|
7
|
+
- Routing token estimate (`estimate_request_tokens`) now includes system prompt, tool definitions, and thinking config — matching what `enforce_final_context_budget!` checks at dispatch time. Previously only counted messages, causing large-context requests (e.g. Copilot Chat with 81 tools + system prompt) to route to small-context models and fail with ContextOverflow at dispatch.
|
|
8
|
+
- `resolve_routing_state` always consults `request_lane` when Inventory has lanes, removing the gate that required explicit routing flags. Enables weight-based lane selection for all callers including `Legion::LLM.chat`.
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- `:subcall` profile in `Inference::Profile` — skips all pipeline steps except routing, provider_call, and metering. Used for in-pipeline LLM sub-calls (debate, lex-knowledge, lex-apollo) to avoid recursive full-pipeline execution.
|
|
13
|
+
- `Executor.new(request, skip_pipeline: true)` kwarg forces `:subcall` profile without requiring caller identity manipulation.
|
|
14
|
+
- `dispatch_chat` unified path — builds a `Request` via `from_chat_args` and runs the Executor directly, detecting in-pipeline context via `Thread.current[:legion_llm_in_pipeline]`.
|
|
15
|
+
|
|
3
16
|
## [0.14.8] - 2026-06-25
|
|
4
17
|
|
|
5
18
|
### Fixed
|
|
@@ -199,14 +199,15 @@ module Legion
|
|
|
199
199
|
end
|
|
200
200
|
|
|
201
201
|
def estimate_request_tokens
|
|
202
|
-
# Estimate total tokens from current request messages + conversation history.
|
|
203
|
-
# This is used by the router to exclude models whose context window can't fit.
|
|
204
202
|
all_messages = []
|
|
205
203
|
all_messages.concat(@enrichments['context:conversation_history'] || [])
|
|
206
204
|
all_messages.concat(@request.messages || [])
|
|
207
|
-
return 0 if all_messages.empty?
|
|
208
205
|
|
|
209
|
-
estimate_message_tokens(all_messages)
|
|
206
|
+
estimated = all_messages.empty? ? 0 : estimate_message_tokens(all_messages)
|
|
207
|
+
estimated += ((@request.system || '').length / 4.0).ceil
|
|
208
|
+
estimated += estimate_tool_token_budget if @request.tools&.any?
|
|
209
|
+
estimated += (Legion::JSON.dump(@request.thinking).length / 3.5).ceil if @request.thinking.is_a?(Hash) && @request.thinking.any?
|
|
210
|
+
estimated
|
|
210
211
|
end
|
|
211
212
|
|
|
212
213
|
def chain_required_capabilities
|
|
@@ -367,11 +368,7 @@ module Legion
|
|
|
367
368
|
|
|
368
369
|
def resolve_routing_state(state)
|
|
369
370
|
return state unless defined?(Router)
|
|
370
|
-
|
|
371
|
-
explicit_route = state[:provider_explicit] || state[:instance_explicit] || state[:tier_explicit]
|
|
372
|
-
auto_route = state[:auto_route] == true
|
|
373
|
-
intent_route = state[:intent_explicit] && state[:intent] && Router.routing_enabled?
|
|
374
|
-
return state unless explicit_route || auto_route || intent_route
|
|
371
|
+
return state unless Legion::LLM::Inventory.lanes.any?
|
|
375
372
|
|
|
376
373
|
resolution = routing_resolution_for(state)
|
|
377
374
|
return state unless resolution
|
data/lib/legion/llm/version.rb
CHANGED