legion-llm 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f6dc45bc6e985a3a6399ba3ed860bfb1ac9d3d9a0f31dda55a2f812d3c46e7cb
4
- data.tar.gz: c3db21154b0b43de08e3e23b24416d9a7dc26a58eb10beb19835845b6ad83500
3
+ metadata.gz: 16ae90179fe84f5fdef3459c5463517048a71481e53c778c6be53ef8a0e4f078
4
+ data.tar.gz: 6bc1cdebbf9807443e057748abd11e6fb41694a68666bbc1da2dee3fa4ead10a
5
5
  SHA512:
6
- metadata.gz: 6bd0700aee69aab3d7dad4e3266855d6ddf28de1574a9b1e48e972b653f4af509720e53b2d8c34e84ac9599a325b539c5fc6c7ac765e6c62a846a40e2b6b9519
7
- data.tar.gz: c2ffe0842728637165668508a68a690eb0a00596710108b4685f47e4fa8b78f24e634ec652e11d7f86ace856f0166299c6827e7bb7a4f1e9ed6e491ed97ca559
6
+ metadata.gz: 1a5a14010e8b18f19f38d94a64ebfbc5a1d0f0ad589aa6c87d2155ee2705f1369c68e56f3697075bab2a4a43a2fc67cbf3797845f32a8214e156ca2003f64ccb
7
+ data.tar.gz: 6251af13334ead29cb2d53cd76137d3627d28f23fce0787816b037718611f1f9ca26bc4b48ea49c33ca0747e9a1c875c2641ae577f4e557b65f68b9d1adfe32b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.3.15] - 2026-03-21
4
+
5
+ ### Changed
6
+ - Pin ruby_llm dependency from `>= 1.0` to `~> 1.13` to prevent breaking changes from a future 2.0 release
7
+
8
+ ## [0.3.14] - 2026-03-21
9
+
10
+ ### Added
11
+ - `Legion::LLM::Arbitrage` module for cost-aware model selection: configurable cost table (per-1M-token input/output prices), `cheapest_for(capability:, max_cost:)` filters eligible models and returns the cheapest, `estimated_cost` for per-request USD estimates, settings-defined cost_table overrides, quality_floor and capability-tier filtering
12
+ - `Legion::LLM::Batch` module for non-urgent request batching: `enqueue` stores requests in an in-process queue with UUID tracking, `flush` groups by provider/model and invokes callbacks, configurable window_seconds and max_batch_size, `reset!` for test isolation
13
+ - `Legion::LLM::Scheduling` module for off-peak deferral: `should_defer?(intent:, urgency:)` checks configurable peak hours and intent eligibility, `peak_hours?` evaluates UTC hour against configurable range, `next_off_peak` returns next off-peak window capped at max_defer_hours
14
+ - Default settings for all three features under `llm.arbitrage`, `llm.batch`, `llm.scheduling` — all disabled by default (opt-in)
15
+ - 3 new spec files: `arbitrage_spec.rb` (18 examples), `batch_spec.rb` (16 examples), `scheduling_spec.rb` (24 examples)
16
+
3
17
  ## [0.3.13] - 2026-03-21
4
18
 
5
19
  ### Added
data/CODEOWNERS ADDED
@@ -0,0 +1 @@
1
+ * @Esity
data/legion-llm.gemspec CHANGED
@@ -30,6 +30,6 @@ Gem::Specification.new do |spec|
30
30
  spec.add_dependency 'lex-claude'
31
31
  spec.add_dependency 'lex-gemini'
32
32
  spec.add_dependency 'lex-openai'
33
- spec.add_dependency 'ruby_llm', '>= 1.0'
33
+ spec.add_dependency 'ruby_llm', '~> 1.13'
34
34
  spec.add_dependency 'tzinfo', '>= 2.0'
35
35
  end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module Arbitrage
6
+ # Default cost table: per-1M-token input/output prices in USD.
7
+ # Overridable via settings: llm.arbitrage.cost_table
8
+ DEFAULT_COST_TABLE = {
9
+ 'claude-sonnet-4-6' => { input: 3.0, output: 15.0 },
10
+ 'us.anthropic.claude-sonnet-4-6-v1' => { input: 3.0, output: 15.0 },
11
+ 'gpt-4o' => { input: 2.5, output: 10.0 },
12
+ 'gpt-4o-mini' => { input: 0.15, output: 0.60 },
13
+ 'gemini-2.0-flash' => { input: 0.10, output: 0.40 },
14
+ 'llama3' => { input: 0.0, output: 0.0 }
15
+ }.freeze
16
+
17
+ class << self
18
+ # Returns true when arbitrage is enabled in settings.
19
+ def enabled?
20
+ settings.fetch(:enabled, false) == true
21
+ end
22
+
23
+ # Returns the estimated cost for a request with the given token counts.
24
+ #
25
+ # @param model [String] model identifier
26
+ # @param input_tokens [Integer] estimated number of input tokens
27
+ # @param output_tokens [Integer] estimated number of output tokens
28
+ # @return [Float, nil] estimated cost in USD, or nil if model not in table
29
+ def estimated_cost(model:, input_tokens: 1000, output_tokens: 500)
30
+ entry = cost_table[model.to_s]
31
+ return nil if entry.nil?
32
+
33
+ ((entry[:input] * input_tokens) + (entry[:output] * output_tokens)) / 1_000_000.0
34
+ end
35
+
36
+ # Selects the cheapest model that meets the capability and quality floor requirements.
37
+ #
38
+ # @param capability [String, Symbol] required capability tier (e.g., :basic, :moderate, :reasoning)
39
+ # @param max_cost [Float, nil] maximum acceptable cost per typical request (USD); nil means no limit
40
+ # @param input_tokens [Integer] estimated input tokens for cost calculation
41
+ # @param output_tokens [Integer] estimated output tokens for cost calculation
42
+ # @return [String, nil] cheapest eligible model ID, or nil if none qualify
43
+ def cheapest_for(capability: :moderate, max_cost: nil, input_tokens: 1000, output_tokens: 500)
44
+ return nil unless enabled?
45
+
46
+ quality_floor = settings.fetch(:quality_floor, 0.7)
47
+ eligible = eligible_models(capability: capability, _quality_floor: quality_floor)
48
+
49
+ scored = eligible.filter_map do |model|
50
+ cost = estimated_cost(model: model, input_tokens: input_tokens, output_tokens: output_tokens)
51
+ next if cost.nil?
52
+ next if max_cost && cost > max_cost
53
+
54
+ [model, cost]
55
+ end
56
+
57
+ return nil if scored.empty?
58
+
59
+ scored.min_by { |_model, cost| cost }&.first
60
+ end
61
+
62
+ # Returns the merged cost table: defaults overridden by any settings-defined entries.
63
+ def cost_table
64
+ overrides = settings.fetch(:cost_table, {})
65
+ return DEFAULT_COST_TABLE if overrides.nil? || overrides.empty?
66
+
67
+ merged = DEFAULT_COST_TABLE.dup
68
+ overrides.each do |model, costs|
69
+ entry = costs.transform_keys(&:to_sym)
70
+ merged[model.to_s] = entry
71
+ end
72
+ merged
73
+ end
74
+
75
+ private
76
+
77
+ def settings
78
+ llm = Legion::Settings[:llm]
79
+ return {} unless llm.is_a?(Hash)
80
+
81
+ arb = llm[:arbitrage] || llm['arbitrage'] || {}
82
+ arb.is_a?(Hash) ? arb.transform_keys(&:to_sym) : {}
83
+ rescue StandardError
84
+ {}
85
+ end
86
+
87
+ # Returns models eligible for the given capability tier based on quality floor.
88
+ # The quality floor maps capability tiers to minimum acceptable quality scores (0.0-1.0).
89
+ # Models that are local (cost 0) always qualify for :basic capability.
90
+ def eligible_models(capability:, _quality_floor: 0.7)
91
+ cap = capability.to_sym
92
+
93
+ # Capability tiers determine which models are semantically appropriate.
94
+ # :reasoning requires frontier models; :basic allows cheap/local models.
95
+ # _quality_floor reserved for future scoring integration.
96
+ disqualified_for_reasoning = %w[gpt-4o-mini gemini-2.0-flash llama3]
97
+
98
+ cost_table.keys.reject do |model|
99
+ cap == :reasoning && disqualified_for_reasoning.include?(model)
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+
5
+ module Legion
6
+ module LLM
7
+ module Batch
8
+ class << self
9
+ # Returns true when request batching is enabled in settings.
10
+ def enabled?
11
+ settings.fetch(:enabled, false) == true
12
+ end
13
+
14
+ # Enqueues a request for deferred batch processing.
15
+ #
16
+ # @param messages [Array<Hash>] chat messages array
17
+ # @param model [String] model to use
18
+ # @param provider [Symbol, nil] provider override
19
+ # @param callback [Proc, nil] called with result hash when batch is flushed
20
+ # @param priority [Symbol] :normal or :low (informational only)
21
+ # @param opts [Hash] additional options forwarded to provider
22
+ # @return [String] batch_request_id
23
+ def enqueue(messages:, model:, callback: nil, provider: nil, priority: :normal, **opts)
24
+ request_id = SecureRandom.uuid
25
+
26
+ entry = {
27
+ id: request_id,
28
+ messages: messages,
29
+ model: model,
30
+ provider: provider,
31
+ callback: callback,
32
+ priority: priority,
33
+ opts: opts,
34
+ queued_at: Time.now.utc
35
+ }
36
+
37
+ queue << entry
38
+ Legion::Logging.debug "Legion::LLM::Batch enqueued #{request_id} (queue size: #{queue.size})"
39
+ request_id
40
+ end
41
+
42
+ # Flushes accumulated requests up to max_size.
43
+ # Groups entries by provider+model and invokes callbacks with a stub result.
44
+ # In production this would submit to provider batch APIs; here it logs and returns
45
+ # per-request result hashes for callback delivery.
46
+ #
47
+ # @param max_size [Integer] maximum number of requests to flush in one pass
48
+ # @param max_wait [Integer] only flush entries older than this many seconds (0 = all)
49
+ # @return [Array<Hash>] array of { id:, status:, result: } hashes
50
+ def flush(max_size: nil, max_wait: nil)
51
+ effective_max = max_size || settings.fetch(:max_batch_size, 100)
52
+ effective_wait = max_wait || settings.fetch(:window_seconds, 300)
53
+
54
+ cutoff = Time.now.utc - effective_wait
55
+ to_flush = queue.select { |e| e[:queued_at] <= cutoff }.first(effective_max)
56
+
57
+ return [] if to_flush.empty?
58
+
59
+ to_flush.each { |e| queue.delete(e) }
60
+ Legion::Logging.debug "Legion::LLM::Batch flushing #{to_flush.size} request(s)"
61
+
62
+ groups = to_flush.group_by { |e| [e[:provider], e[:model]] }
63
+ results = []
64
+
65
+ groups.each do |(provider, model), entries|
66
+ entries.each do |entry|
67
+ result = submit_single(entry, provider: provider, model: model)
68
+ entry[:callback]&.call(result)
69
+ results << { id: entry[:id], status: result[:status], result: result }
70
+ end
71
+ end
72
+
73
+ results
74
+ end
75
+
76
+ # Returns the current number of requests in the queue.
77
+ def queue_size
78
+ queue.size
79
+ end
80
+
81
+ # Clears the queue (useful for testing).
82
+ def reset!
83
+ @queue = []
84
+ end
85
+
86
+ private
87
+
88
+ def queue
89
+ @queue ||= []
90
+ end
91
+
92
+ def settings
93
+ llm = Legion::Settings[:llm]
94
+ return {} unless llm.is_a?(Hash)
95
+
96
+ b = llm[:batch] || llm['batch'] || {}
97
+ b.is_a?(Hash) ? b.transform_keys(&:to_sym) : {}
98
+ rescue StandardError
99
+ {}
100
+ end
101
+
102
+ def submit_single(entry, provider:, model:)
103
+ {
104
+ status: :batched,
105
+ model: model,
106
+ provider: provider,
107
+ id: entry[:id],
108
+ response: nil,
109
+ meta: { batched: true, queued_at: entry[:queued_at] }
110
+ }
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module Scheduling
6
+ # Default peak hours in UTC: 14:00-22:00 (9 AM - 5 PM CT)
7
+ DEFAULT_PEAK_RANGE = (14..22)
8
+
9
+ # Intents that are eligible for deferral during peak hours.
10
+ DEFAULT_DEFER_INTENTS = %i[batch background maintenance].freeze
11
+
12
+ class << self
13
+ # Returns true when off-peak scheduling is enabled in settings.
14
+ def enabled?
15
+ settings.fetch(:enabled, false) == true
16
+ end
17
+
18
+ # Determines whether a request should be deferred to off-peak hours.
19
+ #
20
+ # @param intent [Symbol, String] the request intent
21
+ # @param urgency [Symbol] :immediate bypasses deferral regardless of settings
22
+ # @return [Boolean]
23
+ def should_defer?(intent: :normal, urgency: :normal)
24
+ return false unless enabled?
25
+ return false if urgency.to_sym == :immediate
26
+
27
+ eligible_for_deferral?(intent.to_sym) && peak_hours?
28
+ end
29
+
30
+ # Returns true if the current UTC hour falls within the configured peak window.
31
+ def peak_hours?
32
+ hour = Time.now.utc.hour
33
+ peak_range.cover?(hour)
34
+ end
35
+
36
+ # Returns the next off-peak time as a Time object (UTC).
37
+ # Off-peak begins at the hour after the peak window ends.
38
+ #
39
+ # @return [Time] next off-peak start time
40
+ def next_off_peak
41
+ now = Time.now.utc
42
+ peak_end = peak_range.last
43
+ max_defer = settings.fetch(:max_defer_hours, 8)
44
+
45
+ next_time = if now.hour < peak_range.first
46
+ # Before peak — off-peak is now
47
+ now
48
+ else
49
+ # During or after peak — next off-peak is at peak_end + 1
50
+ candidate = Time.utc(now.year, now.month, now.day, peak_end + 1, 0, 0)
51
+ candidate += 86_400 if candidate <= now
52
+ candidate
53
+ end
54
+
55
+ # Cap at max_defer_hours from now
56
+ cap = now + (max_defer * 3600)
57
+ [next_time, cap].min
58
+ end
59
+
60
+ private
61
+
62
+ def settings
63
+ llm = Legion::Settings[:llm]
64
+ return {} unless llm.is_a?(Hash)
65
+
66
+ s = llm[:scheduling] || llm['scheduling'] || {}
67
+ s.is_a?(Hash) ? s.transform_keys(&:to_sym) : {}
68
+ rescue StandardError
69
+ {}
70
+ end
71
+
72
+ def peak_range
73
+ raw = settings[:peak_hours_utc]
74
+ return DEFAULT_PEAK_RANGE unless raw.is_a?(String) && raw.include?('-')
75
+
76
+ parts = raw.split('-')
77
+ return DEFAULT_PEAK_RANGE unless parts.size == 2
78
+
79
+ start_h = Integer(parts[0], 10)
80
+ end_h = Integer(parts[1], 10)
81
+ (start_h..end_h)
82
+ rescue ArgumentError
83
+ DEFAULT_PEAK_RANGE
84
+ end
85
+
86
+ def defer_intents
87
+ raw = settings[:defer_intents]
88
+ return DEFAULT_DEFER_INTENTS unless raw.is_a?(Array)
89
+
90
+ raw.map { |i| i.to_s.to_sym }
91
+ end
92
+
93
+ def eligible_for_deferral?(intent)
94
+ defer_intents.include?(intent)
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -15,7 +15,10 @@ module Legion
15
15
  discovery: discovery_defaults,
16
16
  gateway: gateway_defaults,
17
17
  daemon: daemon_defaults,
18
- prompt_caching: prompt_caching_defaults
18
+ prompt_caching: prompt_caching_defaults,
19
+ arbitrage: arbitrage_defaults,
20
+ batch: batch_defaults,
21
+ scheduling: scheduling_defaults
19
22
  }
20
23
  end
21
24
 
@@ -81,6 +84,34 @@ module Legion
81
84
  }
82
85
  end
83
86
 
87
+ def self.arbitrage_defaults
88
+ {
89
+ enabled: false,
90
+ prefer_cheapest: true,
91
+ quality_floor: 0.7,
92
+ cost_table_refresh: 86_400,
93
+ cost_table: {}
94
+ }
95
+ end
96
+
97
+ def self.batch_defaults
98
+ {
99
+ enabled: false,
100
+ window_seconds: 300,
101
+ max_batch_size: 100,
102
+ eligible_intents: %w[batch background low_priority]
103
+ }
104
+ end
105
+
106
+ def self.scheduling_defaults
107
+ {
108
+ enabled: false,
109
+ peak_hours_utc: '14-22',
110
+ defer_intents: %w[batch background],
111
+ max_defer_hours: 8
112
+ }
113
+ end
114
+
84
115
  def self.providers
85
116
  {
86
117
  bedrock: {
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.3.13'
5
+ VERSION = '0.3.15'
6
6
  end
7
7
  end
data/lib/legion/llm.rb CHANGED
@@ -12,6 +12,9 @@ require 'legion/llm/hooks'
12
12
  require 'legion/llm/cache'
13
13
  require_relative 'llm/response_cache'
14
14
  require_relative 'llm/daemon_client'
15
+ require_relative 'llm/arbitrage'
16
+ require_relative 'llm/batch'
17
+ require_relative 'llm/scheduling'
15
18
 
16
19
  begin
17
20
  require 'legion/extensions/llm/gateway'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.13
4
+ version: 0.3.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -83,16 +83,16 @@ dependencies:
83
83
  name: ruby_llm
84
84
  requirement: !ruby/object:Gem::Requirement
85
85
  requirements:
86
- - - ">="
86
+ - - "~>"
87
87
  - !ruby/object:Gem::Version
88
- version: '1.0'
88
+ version: '1.13'
89
89
  type: :runtime
90
90
  prerelease: false
91
91
  version_requirements: !ruby/object:Gem::Requirement
92
92
  requirements:
93
- - - ">="
93
+ - - "~>"
94
94
  - !ruby/object:Gem::Version
95
- version: '1.0'
95
+ version: '1.13'
96
96
  - !ruby/object:Gem::Dependency
97
97
  name: tzinfo
98
98
  requirement: !ruby/object:Gem::Requirement
@@ -123,6 +123,7 @@ files:
123
123
  - ".rubocop.yml"
124
124
  - CHANGELOG.md
125
125
  - CLAUDE.md
126
+ - CODEOWNERS
126
127
  - Gemfile
127
128
  - LICENSE
128
129
  - README.md
@@ -130,6 +131,8 @@ files:
130
131
  - docs/plans/2026-03-15-ollama-discovery-implementation.md
131
132
  - legion-llm.gemspec
132
133
  - lib/legion/llm.rb
134
+ - lib/legion/llm/arbitrage.rb
135
+ - lib/legion/llm/batch.rb
133
136
  - lib/legion/llm/bedrock_bearer_auth.rb
134
137
  - lib/legion/llm/cache.rb
135
138
  - lib/legion/llm/claude_config_loader.rb
@@ -152,6 +155,7 @@ files:
152
155
  - lib/legion/llm/router/health_tracker.rb
153
156
  - lib/legion/llm/router/resolution.rb
154
157
  - lib/legion/llm/router/rule.rb
158
+ - lib/legion/llm/scheduling.rb
155
159
  - lib/legion/llm/settings.rb
156
160
  - lib/legion/llm/shadow_eval.rb
157
161
  - lib/legion/llm/structured_output.rb