RubyGems - legion-llm - Versions diffs - 0.14.2 → 0.14.3 - Mend

legion-llm 0.14.2 → 0.14.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +18 -0
data/lib/legion/llm/api/shared_helpers.rb +23 -0
data/lib/legion/llm/inference/executor/context_window.rb +21 -5
data/lib/legion/llm/inference/executor/escalation.rb +22 -1
data/lib/legion/llm/settings.rb +17 -19
data/lib/legion/llm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8ab1822ba6aa5df945cd99b3bb2ee5e735080f97a517d4de694725e38f44bf71
-  data.tar.gz: 276fd55c3fabce052c0c27a3cd1e84e050b502446a4b047ff8ec88fe829c7f47
+  metadata.gz: dac2aadf427ccf4c6e902242ed392c3f9f0f5a3e0ddaeeb6d6c262e3926c87b1
+  data.tar.gz: 48ccd139e47330107147d7745ec7e34c7a1de879cc8939d2d1fd33322d04b336
 SHA512:
-  metadata.gz: f1a2ca486fe605683c14a8847ed209a6041c8a557d90b4e6e01676218d3bced83e776ae4352a6f19c09ab19e2f98e129cfb947dcd435a490a6df5c7b95240b31
-  data.tar.gz: aa8cbe3c10d72b28d9a31a3e8f3114faaa2e016860e4520532d5872ab01a38e53084184cb22dbb0b7943691819291c17770fb7292da392460200057ba980e638
+  metadata.gz: 27c3269042cf0b060f0e402fad967f048319a2a98a425d4b68f39569d07e41842505bf947f32ae62a52125a0c81143e4d1fcf076bee53e247fd0cfe76a9e7f6a
+  data.tar.gz: 1fcd99c1a3007b858bc033f57a1cd3ada1fd96e2729d597fd65f8ac8eeb406295f1b73a2d4bcea31793ca6669b0bba545c32ac86474e2c9f1a5adcc60f9550f2

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,23 @@
 # Legion LLM Changelog
+## [0.14.3] - 2026-06-22
+### Fixed
+- Context window enforcement now accounts for tool definition tokens when deciding whether to compact messages. Previously, 235 injected tools (~50k tokens) were invisible to the threshold check, causing context overflow on dispatch.
+- Context overflow errors are no longer unconditionally terminal during escalation. When a lane with a larger context window exists, the executor retries on that lane instead of raising immediately.
+- `X-Legion-Format: canonical` debug surface enabled by default (was gated to dev/lite mode only). The canonical format is a first-class output format, not a debug feature.
+- Completion log now includes `context_tokens_saved=N` showing total tokens saved by curation, archival, thinking-strip, and context window compaction per request.
+- Embedding dispatch now sends chunks individually instead of as an array, fixing intermittent `ContextOverflow` on RAG embedding queries with ollama.
+- Truncated tool results now instruct the model to make smaller targeted requests instead of silently chopping content.
+- `context_window_threshold` moved from hardcoded 0.90 to `context_curation_defaults` setting.
+### Changed
+- `tool_result_max_dispatch_chars` raised from 2k to 5k (dispatch-time truncation of oversized tool results before provider call).
+- `context_curation.tool_result_max_chars` lowered from 10k to 2k (curator distills tool results earlier, reducing context growth).
+- Default `tier_weights` rebalanced: direct=105, local=110, fleet=110, cloud=120, frontier=150 (prefer higher-capability tiers when lane_weight tie-breaks).
 ## [0.14.2] - 2026-06-20
 ### Fixed

data/lib/legion/llm/api/shared_helpers.rb CHANGED Viewed

@@ -179,6 +179,8 @@ module Legion
           provider_latency_ms = api_provider_latency_ms(pipeline_response, routing)
           conversation_id = pipeline_response.respond_to?(:conversation_id) ? pipeline_response.conversation_id : nil
+          context_stats = api_context_stats(pipeline_response)
           parts = {
             request_id:          request_id || 'unknown',
             conversation_id:     conversation_id || 'none',
@@ -199,6 +201,7 @@ module Legion
             tool_executions:     api_tool_execution_count(pipeline_response),
             stop_reason:         resolved_stop_reason
           }
+          parts.merge!(context_stats) if context_stats
           log.info("[llm][api][#{namespace}] action=completed " \
                    "#{parts.map { |key, value| "#{key}=#{value}" }.join(' ')}")
@@ -228,6 +231,26 @@ module Legion
           nil
         end
+        def api_context_stats(pipeline_response)
+          return nil unless pipeline_response.respond_to?(:audit)
+          accounting = pipeline_response.audit&.dig(:context_accounting)
+          return nil unless accounting.is_a?(Hash)
+          tokens = accounting[:tokens] || {}
+          curated = tokens[:curation_saved_estimated_tokens].to_i
+          archived = tokens[:archive_saved_estimated_tokens].to_i
+          stripped = tokens[:stripped_thinking_estimated_tokens].to_i
+          compacted = tokens[:context_window_saved_estimated_tokens].to_i
+          total_saved = curated + archived + stripped + compacted
+          return nil unless total_saved.positive?
+          { context_tokens_saved: total_saved }
+        rescue StandardError => e
+          handle_exception(e, level: :warn, handled: true, operation: 'llm.api.context_stats')
+          nil
+        end
         def api_hash_value(hash, key)
           return nil unless hash.respond_to?(:[])

data/lib/legion/llm/inference/executor/context_window.rb CHANGED Viewed

@@ -26,18 +26,21 @@ module Legion
             @context_accounting[:component_status][:context_window] = :observed
             return messages unless context_window&.positive?
-            threshold = (context_window * 0.90).to_i
+            threshold = (context_window * Legion::Settings[:llm][:context_curation][:context_window_threshold]).to_i
+            tool_budget = estimate_tool_token_budget
+            available_for_messages = threshold - tool_budget
             estimated = estimate_message_tokens(messages)
-            return messages if estimated <= threshold
+            return messages if estimated <= available_for_messages
             log.warn "[llm][executor] action=context_compaction request_id=#{@request.id} " \
-                     "estimated_tokens=#{estimated} context_window=#{context_window} threshold=#{threshold}"
+                     "estimated_tokens=#{estimated} context_window=#{context_window} " \
+                     "threshold=#{threshold} tool_budget=#{tool_budget} available=#{available_for_messages}"
             preserve_after = last_user_message_index(messages)
             recent = messages[preserve_after..]
             older = messages[0...preserve_after]
-            target_tokens = threshold - estimate_message_tokens(recent)
+            target_tokens = available_for_messages - estimate_message_tokens(recent)
             compacted = compact_to_fit(older, target_tokens)
             result = compacted + recent
@@ -94,6 +97,17 @@ module Legion
             messages.sum { |m| ((m[:content] || m['content']).to_s.length / 4.0).ceil }
           end
+          def estimate_tool_token_budget
+            tools = @request.tools
+            return 0 if tools.nil? || tools.empty?
+            tool_list = tools.is_a?(Hash) ? tools.values : Array(tools)
+            tool_list.sum do |tool|
+              json_repr = tool.respond_to?(:to_h) ? Legion::JSON.dump(tool.to_h) : tool.to_s
+              (json_repr.length / 3.5).ceil
+            end
+          end
           def strip_thinking_from_history(messages)
             before_tokens = ContextAccounting.estimate_message_tokens(messages)
             preserve_after = last_user_message_index(messages)
@@ -158,7 +172,9 @@ module Legion
               next msg unless content.is_a?(String) && content.length > max_chars
               trimmed_count += 1
-              msg.merge(content: "#{content[0, max_chars]}\n[truncated — #{content.length} chars total]")
+              msg.merge(content: "#{content[0, max_chars]}\n\n[TRUNCATED: showing first #{max_chars} of #{content.length} chars. " \
+                                 'If you need more content, make multiple smaller targeted requests ' \
+                                 '(e.g. read specific line ranges, grep for specific patterns, or request smaller sections).]')
             end
             if trimmed_count.positive?

data/lib/legion/llm/inference/executor/escalation.rb CHANGED Viewed

@@ -419,6 +419,22 @@ module Legion
             err.is_a?(::NoMethodError) || err.is_a?(::ArgumentError)
           end
+          def larger_context_lane_available?(lane:, payload:, **)
+            current_context = lane.dig(:limits, :context_window).to_i
+            return false unless current_context.positive?
+            Legion::LLM::Inventory.lanes.any? do |candidate|
+              next false if payload[:tried_lanes].include?(candidate[:id])
+              next false if candidate[:lane_weight].to_i <= 0
+              candidate_context = candidate.dig(:limits, :context_window).to_i
+              candidate_context > current_context
+            end
+          rescue StandardError => e
+            handle_exception(e, level: :warn, operation: 'llm.pipeline.larger_context_lane_available')
+            false
+          end
           # Classify error for the while remaining.positive? loop (G26 / D-C).
           # internal_error MUST be checked BEFORE account_specific (G25 / B-H):
           # a daemon NoMethodError must never be treated as a retriable account-scoped failure.
@@ -439,7 +455,12 @@ module Legion
           # Terminal errors re-raise immediately so the caller's while loop stops.
           def classify_and_accumulate_exclusions(error:, lane:, payload:, **)
             case classify_error(error: error)
-            when :internal_error, :context_overflow, :payload_error, :policy_denied
+            when :context_overflow
+              raise error unless larger_context_lane_available?(lane: lane, payload: payload)
+              payload[:tried_lanes] << lane[:id]
+            when :internal_error, :payload_error, :policy_denied
               raise error
             when :account_specific
               # Account/instance-scoped failure: trip the per-instance circuit.

data/lib/legion/llm/settings.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module Legion
           max_tool_rounds:                200,
           max_tool_calls_per_turn:        100,
           tool_error_log_chars:           500,
-          tool_result_max_dispatch_chars: 10_000,
+          tool_result_max_dispatch_chars: 5_000,
           default_model:                  model_override,
           default_temperature:            0.9,
           default_provider:               nil,
@@ -218,7 +218,7 @@ module Legion
           # Multiplicative tier weights for lane_weight computation (P1 SSOT RANKING v2).
           # Default 100 for all tiers. Operators can override per-tier to bias routing.
           # Used by Inventory.write_lane to compute lane_weight = tier_w * provider_w * instance_w * model_w * health_mult.
-          tier_weights:               { direct: 100, local: 100, fleet: 100, cloud: 100, frontier: 100 },
+          tier_weights:               { direct: 105, local: 110, fleet: 110, cloud: 120, frontier: 150 },
           max_attempts:               3,
           # Body-level routing hints are gated by this flag. Auto-routing aliases
           # like legionio/auto are still accepted as "you pick" intent.
@@ -399,19 +399,20 @@ module Legion
       def self.context_curation_defaults
         {
-          enabled:                 true,
-          mode:                    'heuristic',
-          llm_assisted:            false,
-          llm_model:               nil,
-          tool_result_max_chars:   10_000,
-          thinking_eviction:       true,
-          exchange_folding:        true,
-          superseded_eviction:     true,
-          dedup_enabled:           true,
-          dedup_threshold:         0.85,
-          target_context_tokens:   60_000,
-          archive_dropped_turns:   true,
-          archive_preserve_recent: 10
+          enabled:                  true,
+          mode:                     'heuristic',
+          llm_assisted:             false,
+          llm_model:                nil,
+          tool_result_max_chars:    2_000,
+          thinking_eviction:        true,
+          exchange_folding:         true,
+          superseded_eviction:      true,
+          dedup_enabled:            true,
+          dedup_threshold:          0.85,
+          target_context_tokens:    60_000,
+          context_window_threshold: 0.90,
+          archive_dropped_turns:    true,
+          archive_preserve_recent:  10
         }
       end
@@ -482,10 +483,7 @@ module Legion
       end
       def self.debug_formats_default_enabled
-        return true if defined?(Legion::Mode) && Legion::Mode.respond_to?(:lite?) && Legion::Mode.lite?
-        env = (ENV.fetch('LEGION_ENV', nil) || ENV.fetch('RACK_ENV', nil)).to_s.downcase
-        %w[development dev test].include?(env)
+        true
       end
       def self.streaming_defaults

data/lib/legion/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Legion
   module LLM
-    VERSION = '0.14.2'
+    VERSION = '0.14.3'
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legion-llm
 version: !ruby/object:Gem::Version
-  version: 0.14.2
+  version: 0.14.3
 platform: ruby
 authors:
 - Esity