RubyGems - legion-llm - Versions diffs - 0.3.12 → 0.3.14 - Mend

legion-llm 0.3.12 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +19 -0
data/lib/legion/llm/arbitrage.rb +105 -0
data/lib/legion/llm/batch.rb +115 -0
data/lib/legion/llm/hooks/rag_guard.rb +72 -0
data/lib/legion/llm/hooks/response_guard.rb +47 -0
data/lib/legion/llm/hooks.rb +3 -0
data/lib/legion/llm/scheduling.rb +99 -0
data/lib/legion/llm/settings.rb +32 -1
data/lib/legion/llm/version.rb +1 -1
data/lib/legion/llm.rb +26 -3
metadata +6 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0551af82013a885240cd8d38ba1f991d470110d308925a8f4848b8650376d252
-  data.tar.gz: fc2da425ddafa426f89375dbffd9afccc2c5d318207ed2bff0513dc57cf7dc07
+  metadata.gz: fcbccb6df1f7211cd57464dd7a0393d9c90caa73fd504fd8e066e3642ff6f564
+  data.tar.gz: 3a103e1db523cdf97cbc0ce2d57981a5f81ffa643ea694b2a7a1e290d1f6971d
 SHA512:
-  metadata.gz: 3ebfd45a16cd899050c44c0e53b0ae9952c8c87f46381b0af4356cda6d03ebff05084c3d66923ffd9f3012674b41005e080fa0350cdb4a2f799cbaa83e1cd4dc
-  data.tar.gz: bfb977400e5c78caa90012af604ec47c7b4be94e1d268cfceb3b240b1e9731f678b4a0f6dc30de4df6e014e4714701b15f554e9a5858b3a94754aedaaa67da84
+  metadata.gz: 8fc88260d2aaa5d3f6cb78e21e6b102bb44950f20bdca257a82284c8a3a9afb2246a72bf2c223b20663271a29d4c2d63617550de535e5d4cc3538583ab51842c
+  data.tar.gz: 5e29c883a22532c9e873126924502d89b6e97ea89cf0b4921960e155c9aa665a8690eba712632b0c7221419382c5a8eaa7442e843518274c186069d50438ff42

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,24 @@
 # Legion LLM Changelog
+## [0.3.14] - 2026-03-21
+### Added
+- `Legion::LLM::Arbitrage` module for cost-aware model selection: configurable cost table (per-1M-token input/output prices), `cheapest_for(capability:, max_cost:)` filters eligible models and returns the cheapest, `estimated_cost` for per-request USD estimates, settings-defined cost_table overrides, quality_floor and capability-tier filtering
+- `Legion::LLM::Batch` module for non-urgent request batching: `enqueue` stores requests in an in-process queue with UUID tracking, `flush` groups by provider/model and invokes callbacks, configurable window_seconds and max_batch_size, `reset!` for test isolation
+- `Legion::LLM::Scheduling` module for off-peak deferral: `should_defer?(intent:, urgency:)` checks configurable peak hours and intent eligibility, `peak_hours?` evaluates UTC hour against configurable range, `next_off_peak` returns next off-peak window capped at max_defer_hours
+- Default settings for all three features under `llm.arbitrage`, `llm.batch`, `llm.scheduling` — all disabled by default (opt-in)
+- 3 new spec files: `arbitrage_spec.rb` (18 examples), `batch_spec.rb` (16 examples), `scheduling_spec.rb` (24 examples)
+## [0.3.13] - 2026-03-21
+### Added
+- `Legion::LLM::Hooks::RagGuard` module with `check_rag_faithfulness` for post-generation RAG faithfulness evaluation via lex-eval
+- `Legion::LLM::Hooks::ResponseGuard` module with `guard_response` as the central dispatch point for post-generation safety checks
+- Response guard wired into `_dispatch_chat`: fires when `Legion::Settings[:llm][:response_guards][:enabled]` is true, attaches `_guard_result` metadata to the response hash without blocking
+- RAG guard skips gracefully when lex-eval is unavailable (returns `reason: :eval_unavailable`) or context is not provided (returns `reason: :no_context`)
+- Settings keys: `llm.rag_guard.enabled`, `llm.rag_guard.threshold` (default 0.7), `llm.rag_guard.evaluators` (default `[:faithfulness, :rag_relevancy]`)
+- 19 new specs in `spec/legion/llm/hooks/rag_guard_spec.rb` and `spec/legion/llm/hooks/response_guard_spec.rb`
 ## [0.3.12] - 2026-03-19
 ### Added

data/lib/legion/llm/arbitrage.rb ADDED Viewed

@@ -0,0 +1,105 @@
+# frozen_string_literal: true
+module Legion
+  module LLM
+    module Arbitrage
+      # Default cost table: per-1M-token input/output prices in USD.
+      # Overridable via settings: llm.arbitrage.cost_table
+      DEFAULT_COST_TABLE = {
+        'claude-sonnet-4-6'                 => { input: 3.0, output: 15.0 },
+        'us.anthropic.claude-sonnet-4-6-v1' => { input: 3.0, output: 15.0 },
+        'gpt-4o'                            => { input: 2.5,  output: 10.0 },
+        'gpt-4o-mini'                       => { input: 0.15, output: 0.60 },
+        'gemini-2.0-flash'                  => { input: 0.10, output: 0.40 },
+        'llama3'                            => { input: 0.0,  output: 0.0  }
+      }.freeze
+      class << self
+        # Returns true when arbitrage is enabled in settings.
+        def enabled?
+          settings.fetch(:enabled, false) == true
+        end
+        # Returns the estimated cost for a request with the given token counts.
+        #
+        # @param model [String] model identifier
+        # @param input_tokens [Integer] estimated number of input tokens
+        # @param output_tokens [Integer] estimated number of output tokens
+        # @return [Float, nil] estimated cost in USD, or nil if model not in table
+        def estimated_cost(model:, input_tokens: 1000, output_tokens: 500)
+          entry = cost_table[model.to_s]
+          return nil if entry.nil?
+          ((entry[:input] * input_tokens) + (entry[:output] * output_tokens)) / 1_000_000.0
+        end
+        # Selects the cheapest model that meets the capability and quality floor requirements.
+        #
+        # @param capability [String, Symbol] required capability tier (e.g., :basic, :moderate, :reasoning)
+        # @param max_cost [Float, nil] maximum acceptable cost per typical request (USD); nil means no limit
+        # @param input_tokens [Integer] estimated input tokens for cost calculation
+        # @param output_tokens [Integer] estimated output tokens for cost calculation
+        # @return [String, nil] cheapest eligible model ID, or nil if none qualify
+        def cheapest_for(capability: :moderate, max_cost: nil, input_tokens: 1000, output_tokens: 500)
+          return nil unless enabled?
+          quality_floor = settings.fetch(:quality_floor, 0.7)
+          eligible = eligible_models(capability: capability, _quality_floor: quality_floor)
+          scored = eligible.filter_map do |model|
+            cost = estimated_cost(model: model, input_tokens: input_tokens, output_tokens: output_tokens)
+            next if cost.nil?
+            next if max_cost && cost > max_cost
+            [model, cost]
+          end
+          return nil if scored.empty?
+          scored.min_by { |_model, cost| cost }&.first
+        end
+        # Returns the merged cost table: defaults overridden by any settings-defined entries.
+        def cost_table
+          overrides = settings.fetch(:cost_table, {})
+          return DEFAULT_COST_TABLE if overrides.nil? || overrides.empty?
+          merged = DEFAULT_COST_TABLE.dup
+          overrides.each do |model, costs|
+            entry = costs.transform_keys(&:to_sym)
+            merged[model.to_s] = entry
+          end
+          merged
+        end
+        private
+        def settings
+          llm = Legion::Settings[:llm]
+          return {} unless llm.is_a?(Hash)
+          arb = llm[:arbitrage] || llm['arbitrage'] || {}
+          arb.is_a?(Hash) ? arb.transform_keys(&:to_sym) : {}
+        rescue StandardError
+          {}
+        end
+        # Returns models eligible for the given capability tier based on quality floor.
+        # The quality floor maps capability tiers to minimum acceptable quality scores (0.0-1.0).
+        # Models that are local (cost 0) always qualify for :basic capability.
+        def eligible_models(capability:, _quality_floor: 0.7)
+          cap = capability.to_sym
+          # Capability tiers determine which models are semantically appropriate.
+          # :reasoning requires frontier models; :basic allows cheap/local models.
+          # _quality_floor reserved for future scoring integration.
+          disqualified_for_reasoning = %w[gpt-4o-mini gemini-2.0-flash llama3]
+          cost_table.keys.reject do |model|
+            cap == :reasoning && disqualified_for_reasoning.include?(model)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/llm/batch.rb ADDED Viewed

@@ -0,0 +1,115 @@
+# frozen_string_literal: true
+require 'securerandom'
+module Legion
+  module LLM
+    module Batch
+      class << self
+        # Returns true when request batching is enabled in settings.
+        def enabled?
+          settings.fetch(:enabled, false) == true
+        end
+        # Enqueues a request for deferred batch processing.
+        #
+        # @param messages [Array<Hash>] chat messages array
+        # @param model    [String]      model to use
+        # @param provider [Symbol, nil] provider override
+        # @param callback [Proc, nil]   called with result hash when batch is flushed
+        # @param priority [Symbol]      :normal or :low (informational only)
+        # @param opts     [Hash]        additional options forwarded to provider
+        # @return [String] batch_request_id
+        def enqueue(messages:, model:, callback: nil, provider: nil, priority: :normal, **opts)
+          request_id = SecureRandom.uuid
+          entry = {
+            id:        request_id,
+            messages:  messages,
+            model:     model,
+            provider:  provider,
+            callback:  callback,
+            priority:  priority,
+            opts:      opts,
+            queued_at: Time.now.utc
+          }
+          queue << entry
+          Legion::Logging.debug "Legion::LLM::Batch enqueued #{request_id} (queue size: #{queue.size})"
+          request_id
+        end
+        # Flushes accumulated requests up to max_size.
+        # Groups entries by provider+model and invokes callbacks with a stub result.
+        # In production this would submit to provider batch APIs; here it logs and returns
+        # per-request result hashes for callback delivery.
+        #
+        # @param max_size [Integer] maximum number of requests to flush in one pass
+        # @param max_wait [Integer] only flush entries older than this many seconds (0 = all)
+        # @return [Array<Hash>] array of { id:, status:, result: } hashes
+        def flush(max_size: nil, max_wait: nil)
+          effective_max  = max_size || settings.fetch(:max_batch_size, 100)
+          effective_wait = max_wait || settings.fetch(:window_seconds, 300)
+          cutoff = Time.now.utc - effective_wait
+          to_flush = queue.select { |e| e[:queued_at] <= cutoff }.first(effective_max)
+          return [] if to_flush.empty?
+          to_flush.each { |e| queue.delete(e) }
+          Legion::Logging.debug "Legion::LLM::Batch flushing #{to_flush.size} request(s)"
+          groups = to_flush.group_by { |e| [e[:provider], e[:model]] }
+          results = []
+          groups.each do |(provider, model), entries|
+            entries.each do |entry|
+              result = submit_single(entry, provider: provider, model: model)
+              entry[:callback]&.call(result)
+              results << { id: entry[:id], status: result[:status], result: result }
+            end
+          end
+          results
+        end
+        # Returns the current number of requests in the queue.
+        def queue_size
+          queue.size
+        end
+        # Clears the queue (useful for testing).
+        def reset!
+          @queue = []
+        end
+        private
+        def queue
+          @queue ||= []
+        end
+        def settings
+          llm = Legion::Settings[:llm]
+          return {} unless llm.is_a?(Hash)
+          b = llm[:batch] || llm['batch'] || {}
+          b.is_a?(Hash) ? b.transform_keys(&:to_sym) : {}
+        rescue StandardError
+          {}
+        end
+        def submit_single(entry, provider:, model:)
+          {
+            status:   :batched,
+            model:    model,
+            provider: provider,
+            id:       entry[:id],
+            response: nil,
+            meta:     { batched: true, queued_at: entry[:queued_at] }
+          }
+        end
+      end
+    end
+  end
+end

data/lib/legion/llm/hooks/rag_guard.rb ADDED Viewed

@@ -0,0 +1,72 @@
+# frozen_string_literal: true
+module Legion
+  module LLM
+    module Hooks
+      module RagGuard
+        class << self
+          def check_rag_faithfulness(response:, context:, threshold: nil, evaluators: nil, **)
+            return { faithful: true, reason: :eval_unavailable } unless eval_available?
+            resolved_threshold = threshold || settings_threshold
+            resolved_evaluators = evaluators || settings_evaluators
+            scores = {}
+            flagged = []
+            resolved_evaluators.each do |evaluator_name|
+              score = run_evaluator(evaluator_name, response: response, context: context)
+              scores[evaluator_name] = score
+              flagged << evaluator_name if score < resolved_threshold
+            end
+            faithful = flagged.empty?
+            details = build_details(scores, resolved_threshold, faithful)
+            { faithful: faithful, scores: scores, flagged_evaluators: flagged, details: details }
+          rescue StandardError => e
+            Legion::Logging.warn "RagGuard evaluation error: #{e.message}" if logging_available?
+            { faithful: true, reason: :eval_error }
+          end
+          private
+          def eval_available?
+            defined?(Legion::Extensions::Eval::Client)
+          end
+          def logging_available?
+            Legion.const_defined?('Logging')
+          end
+          def settings_threshold
+            val = Legion::Settings.dig(:llm, :rag_guard, :threshold) if Legion.const_defined?('Settings')
+            val || 0.7
+          end
+          def settings_evaluators
+            val = Legion::Settings.dig(:llm, :rag_guard, :evaluators) if Legion.const_defined?('Settings')
+            val || %i[faithfulness rag_relevancy]
+          end
+          def run_evaluator(evaluator_name, response:, context:)
+            client = Legion::Extensions::Eval::Client.new
+            result = client.run_evaluation(
+              evaluator_name: evaluator_name,
+              inputs:         [{ input: context.to_s, output: response.to_s, expected: nil }]
+            )
+            result.dig(:summary, :avg_score) || 0.0
+          rescue StandardError
+            0.0
+          end
+          def build_details(scores, threshold, faithful)
+            score_parts = scores.map { |k, v| "#{k}=#{v.round(3)}" }.join(', ')
+            status = faithful ? 'passed' : 'failed'
+            "RAG faithfulness check #{status} (threshold=#{threshold}): #{score_parts}"
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/llm/hooks/response_guard.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+module Legion
+  module LLM
+    module Hooks
+      module ResponseGuard
+        GUARD_REGISTRY = {
+          rag: RagGuard
+        }.freeze
+        class << self
+          def guard_response(response:, context: nil, guards: [:rag], **)
+            guard_results = {}
+            guards.each do |guard_name|
+              guard_mod = GUARD_REGISTRY[guard_name.to_sym]
+              next unless guard_mod
+              guard_results[guard_name] = dispatch_guard(guard_mod, guard_name,
+                                                         response: response, context: context)
+            end
+            passed = guard_results.values.all? { |r| r[:faithful] != false }
+            { passed: passed, guards: guard_results }
+          rescue StandardError => e
+            Legion::Logging.warn "ResponseGuard error: #{e.message}" if Legion.const_defined?('Logging')
+            { passed: true, guards: {} }
+          end
+          private
+          def dispatch_guard(guard_mod, guard_name, response:, context:)
+            case guard_name.to_sym
+            when :rag
+              return { faithful: true, reason: :no_context } if context.nil?
+              guard_mod.check_rag_faithfulness(response: response, context: context)
+            else
+              guard_mod.check(response: response, context: context)
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/llm/hooks.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # frozen_string_literal: true
+require 'legion/llm/hooks/rag_guard'
+require 'legion/llm/hooks/response_guard'
 module Legion
   module LLM
     module Hooks

data/lib/legion/llm/scheduling.rb ADDED Viewed

@@ -0,0 +1,99 @@
+# frozen_string_literal: true
+module Legion
+  module LLM
+    module Scheduling
+      # Default peak hours in UTC: 14:00-22:00 (9 AM - 5 PM CT)
+      DEFAULT_PEAK_RANGE = (14..22)
+      # Intents that are eligible for deferral during peak hours.
+      DEFAULT_DEFER_INTENTS = %i[batch background maintenance].freeze
+      class << self
+        # Returns true when off-peak scheduling is enabled in settings.
+        def enabled?
+          settings.fetch(:enabled, false) == true
+        end
+        # Determines whether a request should be deferred to off-peak hours.
+        #
+        # @param intent  [Symbol, String] the request intent
+        # @param urgency [Symbol]         :immediate bypasses deferral regardless of settings
+        # @return [Boolean]
+        def should_defer?(intent: :normal, urgency: :normal)
+          return false unless enabled?
+          return false if urgency.to_sym == :immediate
+          eligible_for_deferral?(intent.to_sym) && peak_hours?
+        end
+        # Returns true if the current UTC hour falls within the configured peak window.
+        def peak_hours?
+          hour = Time.now.utc.hour
+          peak_range.cover?(hour)
+        end
+        # Returns the next off-peak time as a Time object (UTC).
+        # Off-peak begins at the hour after the peak window ends.
+        #
+        # @return [Time] next off-peak start time
+        def next_off_peak
+          now = Time.now.utc
+          peak_end = peak_range.last
+          max_defer = settings.fetch(:max_defer_hours, 8)
+          next_time = if now.hour < peak_range.first
+                        # Before peak — off-peak is now
+                        now
+                      else
+                        # During or after peak — next off-peak is at peak_end + 1
+                        candidate = Time.utc(now.year, now.month, now.day, peak_end + 1, 0, 0)
+                        candidate += 86_400 if candidate <= now
+                        candidate
+                      end
+          # Cap at max_defer_hours from now
+          cap = now + (max_defer * 3600)
+          [next_time, cap].min
+        end
+        private
+        def settings
+          llm = Legion::Settings[:llm]
+          return {} unless llm.is_a?(Hash)
+          s = llm[:scheduling] || llm['scheduling'] || {}
+          s.is_a?(Hash) ? s.transform_keys(&:to_sym) : {}
+        rescue StandardError
+          {}
+        end
+        def peak_range
+          raw = settings[:peak_hours_utc]
+          return DEFAULT_PEAK_RANGE unless raw.is_a?(String) && raw.include?('-')
+          parts = raw.split('-')
+          return DEFAULT_PEAK_RANGE unless parts.size == 2
+          start_h = Integer(parts[0], 10)
+          end_h   = Integer(parts[1], 10)
+          (start_h..end_h)
+        rescue ArgumentError
+          DEFAULT_PEAK_RANGE
+        end
+        def defer_intents
+          raw = settings[:defer_intents]
+          return DEFAULT_DEFER_INTENTS unless raw.is_a?(Array)
+          raw.map { |i| i.to_s.to_sym }
+        end
+        def eligible_for_deferral?(intent)
+          defer_intents.include?(intent)
+        end
+      end
+    end
+  end
+end

data/lib/legion/llm/settings.rb CHANGED Viewed

@@ -15,7 +15,10 @@ module Legion
           discovery:        discovery_defaults,
           gateway:          gateway_defaults,
           daemon:           daemon_defaults,
-          prompt_caching:   prompt_caching_defaults
+          prompt_caching:   prompt_caching_defaults,
+          arbitrage:        arbitrage_defaults,
+          batch:            batch_defaults,
+          scheduling:       scheduling_defaults
         }
       end
@@ -81,6 +84,34 @@ module Legion
         }
       end
+      def self.arbitrage_defaults
+        {
+          enabled:            false,
+          prefer_cheapest:    true,
+          quality_floor:      0.7,
+          cost_table_refresh: 86_400,
+          cost_table:         {}
+        }
+      end
+      def self.batch_defaults
+        {
+          enabled:          false,
+          window_seconds:   300,
+          max_batch_size:   100,
+          eligible_intents: %w[batch background low_priority]
+        }
+      end
+      def self.scheduling_defaults
+        {
+          enabled:         false,
+          peak_hours_utc:  '14-22',
+          defer_intents:   %w[batch background],
+          max_defer_hours: 8
+        }
+      end
       def self.providers
         {
           bedrock:   {

data/lib/legion/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Legion
   module LLM
-    VERSION = '0.3.12'
+    VERSION = '0.3.14'
   end
 end

data/lib/legion/llm.rb CHANGED Viewed

@@ -12,6 +12,9 @@ require 'legion/llm/hooks'
 require 'legion/llm/cache'
 require_relative 'llm/response_cache'
 require_relative 'llm/daemon_client'
+require_relative 'llm/arbitrage'
+require_relative 'llm/batch'
+require_relative 'llm/scheduling'
 begin
   require 'legion/extensions/llm/gateway'
@@ -184,7 +187,7 @@ module Legion
       private
-      def _dispatch_chat(model:, provider:, intent:, tier:, escalate:, max_escalations:, quality_check:, message:, **)
+      def _dispatch_chat(model:, provider:, intent:, tier:, escalate:, max_escalations:, quality_check:, message:, **kwargs)
         messages = message.is_a?(Array) ? message : [{ role: 'user', content: message.to_s }]
         resolved_model = model || settings[:default_model]
@@ -196,11 +199,11 @@ module Legion
         result = if gateway_loaded? && message
                    gateway_chat(model: model, provider: provider, intent: intent,
                                 tier: tier, message: message, escalate: escalate,
-                                max_escalations: max_escalations, quality_check: quality_check, **)
+                                max_escalations: max_escalations, quality_check: quality_check, **kwargs)
                  else
                    chat_direct(model: model, provider: provider, intent: intent, tier: tier,
                                escalate: escalate, max_escalations: max_escalations,
-                               quality_check: quality_check, message: message, **)
+                               quality_check: quality_check, message: message, **kwargs)
                  end
         if defined?(Legion::LLM::Hooks)
@@ -208,6 +211,8 @@ module Legion
           return blocked[:response] if blocked
         end
+        result = apply_response_guards(result, kwargs) if response_guards_enabled? && result.is_a?(Hash)
         result
       end
@@ -370,6 +375,24 @@ module Legion
         nil
       end
+      def response_guards_enabled?
+        settings.dig(:response_guards, :enabled) == true
+      end
+      def apply_response_guards(result, kwargs)
+        context = kwargs[:context]
+        response_text = result[:response] || result[:content]
+        guard_result = Hooks::ResponseGuard.guard_response(
+          response: response_text, context: context
+        )
+        Legion::Logging.warn "Response guard failed: #{guard_result.inspect}" if !guard_result[:passed] && Legion.const_defined?('Logging')
+        result.merge(_guard_result: guard_result)
+      rescue StandardError
+        result
+      end
       def cacheable?(cache_opt, temperature, message)
         cache_opt != false && temperature.to_f.zero? && message && Cache.enabled?
       end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legion-llm
 version: !ruby/object:Gem::Version
-  version: 0.3.12
+  version: 0.3.14
 platform: ruby
 authors:
 - Esity
@@ -130,6 +130,8 @@ files:
 - docs/plans/2026-03-15-ollama-discovery-implementation.md
 - legion-llm.gemspec
 - lib/legion/llm.rb
+- lib/legion/llm/arbitrage.rb
+- lib/legion/llm/batch.rb
 - lib/legion/llm/bedrock_bearer_auth.rb
 - lib/legion/llm/cache.rb
 - lib/legion/llm/claude_config_loader.rb
@@ -141,6 +143,8 @@ files:
 - lib/legion/llm/escalation_history.rb
 - lib/legion/llm/helpers/llm.rb
 - lib/legion/llm/hooks.rb
+- lib/legion/llm/hooks/rag_guard.rb
+- lib/legion/llm/hooks/response_guard.rb
 - lib/legion/llm/providers.rb
 - lib/legion/llm/quality_checker.rb
 - lib/legion/llm/response_cache.rb
@@ -150,6 +154,7 @@ files:
 - lib/legion/llm/router/health_tracker.rb
 - lib/legion/llm/router/resolution.rb
 - lib/legion/llm/router/rule.rb
+- lib/legion/llm/scheduling.rb
 - lib/legion/llm/settings.rb
 - lib/legion/llm/shadow_eval.rb
 - lib/legion/llm/structured_output.rb