RubyGems - legion-llm - Versions diffs - 0.14.8 → 0.14.9 - Mend

legion-llm 0.14.8 → 0.14.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +13 -0
data/lib/legion/llm/inference/executor/routing.rb +6 -9
data/lib/legion/llm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: b12fc37520233aba6bc8b063e0334e5e1abb74970c5ba6a2efd9094bbd3414cf
-  data.tar.gz: f52cdb8f4cc7c7c2e67e0a6c8caa55fb842f736418955c4ec20d89a0b677212a
+  metadata.gz: f67a1a83e7213a2e50437b07e71afd74302c2b1da8229708ec8d99506d45354e
+  data.tar.gz: fa1dcc7ae0d4b64dd2716271dd5c50778ff178fa3987704b16302c00b75029ab
 SHA512:
-  metadata.gz: 598310ad96bad4d54ac02802a6d981294d5fb1faec31390fa6375e378f936f47f2336027255e0ca5c49f6db199b606528c8e96b70112325e41ccbb346245db11
-  data.tar.gz: 68c89f512cea238a856fdc5930155a7018cb095573f0de5de50bce8f03bb662c2b67abdd0f653a7f201139e3412806beef631f1f46dd80f065c18c0a496c489b
+  metadata.gz: b3c1802c7bc32daa15d15c0fc8b16923d8fac48531adc5bcfb2997eec6234534166b85c43e9262774dedb63e28a7a62c148614bdac53e00c15fe44598770823b
+  data.tar.gz: cc6ae96b0bfc1e1e54298b028334d1bd1f19e18147ac0d7a7d61bb849c86e157ff26b986082be3027ba4199570481d91b960ce6eb1d82b2ec5aa66d787d3a9b7

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # Legion LLM Changelog
+## [0.14.9] - 2026-06-25
+### Fixed
+- Routing token estimate (`estimate_request_tokens`) now includes system prompt, tool definitions, and thinking config — matching what `enforce_final_context_budget!` checks at dispatch time. Previously only counted messages, causing large-context requests (e.g. Copilot Chat with 81 tools + system prompt) to route to small-context models and fail with ContextOverflow at dispatch.
+- `resolve_routing_state` always consults `request_lane` when Inventory has lanes, removing the gate that required explicit routing flags. Enables weight-based lane selection for all callers including `Legion::LLM.chat`.
+### Added
+- `:subcall` profile in `Inference::Profile` — skips all pipeline steps except routing, provider_call, and metering. Used for in-pipeline LLM sub-calls (debate, lex-knowledge, lex-apollo) to avoid recursive full-pipeline execution.
+- `Executor.new(request, skip_pipeline: true)` kwarg forces `:subcall` profile without requiring caller identity manipulation.
+- `dispatch_chat` unified path — builds a `Request` via `from_chat_args` and runs the Executor directly, detecting in-pipeline context via `Thread.current[:legion_llm_in_pipeline]`.
 ## [0.14.8] - 2026-06-25
 ### Fixed

data/lib/legion/llm/inference/executor/routing.rb CHANGED Viewed

@@ -199,14 +199,15 @@ module Legion
           end
           def estimate_request_tokens
-            # Estimate total tokens from current request messages + conversation history.
-            # This is used by the router to exclude models whose context window can't fit.
             all_messages = []
             all_messages.concat(@enrichments['context:conversation_history'] || [])
             all_messages.concat(@request.messages || [])
-            return 0 if all_messages.empty?
-            estimate_message_tokens(all_messages)
+            estimated = all_messages.empty? ? 0 : estimate_message_tokens(all_messages)
+            estimated += ((@request.system || '').length / 4.0).ceil
+            estimated += estimate_tool_token_budget if @request.tools&.any?
+            estimated += (Legion::JSON.dump(@request.thinking).length / 3.5).ceil if @request.thinking.is_a?(Hash) && @request.thinking.any?
+            estimated
           end
           def chain_required_capabilities
@@ -367,11 +368,7 @@ module Legion
           def resolve_routing_state(state)
             return state unless defined?(Router)
-            explicit_route = state[:provider_explicit] || state[:instance_explicit] || state[:tier_explicit]
-            auto_route = state[:auto_route] == true
-            intent_route = state[:intent_explicit] && state[:intent] && Router.routing_enabled?
-            return state unless explicit_route || auto_route || intent_route
+            return state unless Legion::LLM::Inventory.lanes.any?
             resolution = routing_resolution_for(state)
             return state unless resolution

data/lib/legion/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Legion
   module LLM
-    VERSION = '0.14.8'
+    VERSION = '0.14.9'
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legion-llm
 version: !ruby/object:Gem::Version
-  version: 0.14.8
+  version: 0.14.9
 platform: ruby
 authors:
 - Esity