legion-llm 0.5.7 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3c8ab6d42537aec5471648490e91968d41395206396b69f4cf48d73ac48cf217
4
- data.tar.gz: 49ac7e085f62e08b3391e5926e774b920539a1bef03f64b9c7531bc697646ca4
3
+ metadata.gz: 2ce29d10392a53a933f031e6a19d4437ea57ee6f78710946624275c5c4ac083d
4
+ data.tar.gz: b372503d1dd95713e3a8b1471715fb10d05aae541e8a9631e6f35fc59b812bb5
5
5
  SHA512:
6
- metadata.gz: eb1f7f99f9edf09cc108a1d3643c9372ca9e07669dc4f89ff3d2c9d1c5305d1a0e26a8d02316f7edebcb5a9bfe3112a4c8a061f19d2af7bce2ba5649d0e7dccc
7
- data.tar.gz: 22983edc5b03faa7692d9c71d5a73ca8d3eccadbf6730b1fd0e7ff9edfb5f2e0b1ea8c926045d3ad0bd6cc4d5fa96900c021964472076b0fce71a5a8fb128be8
6
+ metadata.gz: 66c5ee8b6f787d69706e32691603b83c895144bc439732810789761680fcb197511a0c0ce31aa210286ef82e279e4d08d44c21b1a7a5cac9c0bd95dfe8573fbf
7
+ data.tar.gz: ca65b36532cb559c9d0b53ffacc2dfdb404a19e7470f53e933aee6dd686fd97e87e904bd89f10b3fdd593fc78622646c5a0cabe87a1631416af207c2e88dcb50
data/CHANGELOG.md CHANGED
@@ -1,8 +1,19 @@
1
1
  # Legion LLM Changelog
2
2
 
3
- ## [Unreleased]
3
+ ## [0.5.8] - 2026-03-25
4
+
5
+ ### Added
6
+ - Wire shadow evaluation sampling into `chat_single` dispatch path (closes #3)
7
+ - ToolRegistry spec coverage: 8 examples covering register, dedup, clear, thread safety (closes #4)
8
+ - Arbitrage as router fallback: `Router.resolve` consults `Arbitrage.cheapest_for` when no rules match (closes #5)
9
+ - Batch thread safety: Mutex around queue, priority-sorted flush, auto-flush via `Concurrent::TimerTask` (closes #6)
10
+ - Scheduling deferral in `chat_direct`: defers to Batch during peak hours when scheduling is enabled (closes #7)
11
+ - `publish_escalation_event` now publishes to `Legion::Events` and AMQP transport (closes #8)
12
+ - Arbitrage `quality_floor` filtering via `QualityChecker.model_score` when available (closes #9)
4
13
 
5
14
  ### Fixed
15
+ - `OffPeak.should_defer?` now checks `Scheduling.enabled?` before returning true (closes #9)
16
+ - Pre-existing ordering-dependent spec failure in `llm_spec.rb` (ToolRegistry bleed)
6
17
  - Fix namespace collision: use `::Data.define` instead of `Data.define` in Pipeline Request and Response to prevent resolution to `Legion::Data`
7
18
 
8
19
  ## [0.5.6] - 2026-03-24
@@ -44,7 +44,7 @@ module Legion
44
44
  return nil unless enabled?
45
45
 
46
46
  quality_floor = settings.fetch(:quality_floor, 0.7)
47
- eligible = eligible_models(capability: capability, _quality_floor: quality_floor)
47
+ eligible = eligible_models(capability: capability, quality_floor: quality_floor)
48
48
 
49
49
  scored = eligible.filter_map do |model|
50
50
  cost = estimated_cost(model: model, input_tokens: input_tokens, output_tokens: output_tokens)
@@ -90,17 +90,21 @@ module Legion
90
90
  # Returns models eligible for the given capability tier based on quality floor.
91
91
  # The quality floor maps capability tiers to minimum acceptable quality scores (0.0-1.0).
92
92
  # Models that are local (cost 0) always qualify for :basic capability.
93
- def eligible_models(capability:, _quality_floor: 0.7)
93
+ def eligible_models(capability:, quality_floor: 0.7)
94
94
  cap = capability.to_sym
95
95
 
96
- # Capability tiers determine which models are semantically appropriate.
97
- # :reasoning requires frontier models; :basic allows cheap/local models.
98
- # _quality_floor reserved for future scoring integration.
99
96
  disqualified_for_reasoning = %w[gpt-4o-mini gemini-2.0-flash llama3]
100
97
 
101
- cost_table.keys.reject do |model|
98
+ models = cost_table.keys.reject do |model|
102
99
  cap == :reasoning && disqualified_for_reasoning.include?(model)
103
100
  end
101
+
102
+ return models unless defined?(Legion::LLM::QualityChecker) && QualityChecker.respond_to?(:model_score)
103
+
104
+ models.select do |model|
105
+ score = QualityChecker.model_score(model)
106
+ score.nil? || score >= quality_floor
107
+ end
104
108
  end
105
109
  end
106
110
  end
@@ -5,27 +5,21 @@ require 'securerandom'
5
5
  module Legion
6
6
  module LLM
7
7
  module Batch
8
+ @mutex = Mutex.new
9
+ @flush_timer = nil
10
+
8
11
  class << self
9
- # Returns true when request batching is enabled in settings.
10
12
  def enabled?
11
13
  settings.fetch(:enabled, false) == true
12
14
  end
13
15
 
14
- # Enqueues a request for deferred batch processing.
15
- #
16
- # @param messages [Array<Hash>] chat messages array
17
- # @param model [String] model to use
18
- # @param provider [Symbol, nil] provider override
19
- # @param callback [Proc, nil] called with result hash when batch is flushed
20
- # @param priority [Symbol] :normal or :low (informational only)
21
- # @param opts [Hash] additional options forwarded to provider
22
- # @return [String] batch_request_id
23
- def enqueue(messages:, model:, callback: nil, provider: nil, priority: :normal, **opts)
16
+ def enqueue(messages: nil, model: nil, message: nil, callback: nil, provider: nil, priority: :normal, **opts)
24
17
  request_id = SecureRandom.uuid
18
+ msgs = messages || (message ? [{ role: 'user', content: message }] : [])
25
19
 
26
20
  entry = {
27
21
  id: request_id,
28
- messages: messages,
22
+ messages: msgs,
29
23
  model: model,
30
24
  provider: provider,
31
25
  callback: callback,
@@ -34,30 +28,28 @@ module Legion
34
28
  queued_at: Time.now.utc
35
29
  }
36
30
 
37
- queue << entry
38
- Legion::Logging.debug "Legion::LLM::Batch enqueued #{request_id} (queue size: #{queue.size})"
31
+ @mutex.synchronize { queue << entry }
32
+ ensure_flush_timer
33
+ Legion::Logging.debug "Legion::LLM::Batch enqueued #{request_id} (queue size: #{queue_size})" if defined?(Legion::Logging)
39
34
  request_id
40
35
  end
41
36
 
42
- # Flushes accumulated requests up to max_size.
43
- # Groups entries by provider+model and invokes callbacks with a stub result.
44
- # In production this would submit to provider batch APIs; here it logs and returns
45
- # per-request result hashes for callback delivery.
46
- #
47
- # @param max_size [Integer] maximum number of requests to flush in one pass
48
- # @param max_wait [Integer] only flush entries older than this many seconds (0 = all)
49
- # @return [Array<Hash>] array of { id:, status:, result: } hashes
50
37
  def flush(max_size: nil, max_wait: nil)
51
38
  effective_max = max_size || settings.fetch(:max_batch_size, 100)
52
39
  effective_wait = max_wait || settings.fetch(:window_seconds, 300)
53
-
54
40
  cutoff = Time.now.utc - effective_wait
55
- to_flush = queue.select { |e| e[:queued_at] <= cutoff }.first(effective_max)
41
+
42
+ to_flush = @mutex.synchronize do
43
+ ready = queue.select { |e| e[:queued_at] <= cutoff }
44
+ .sort_by { |e| priority_rank(e[:priority]) }
45
+ .first(effective_max)
46
+ ready.each { |e| queue.delete(e) }
47
+ ready
48
+ end
56
49
 
57
50
  return [] if to_flush.empty?
58
51
 
59
- to_flush.each { |e| queue.delete(e) }
60
- Legion::Logging.debug "Legion::LLM::Batch flushing #{to_flush.size} request(s)"
52
+ Legion::Logging.debug "Legion::LLM::Batch flushing #{to_flush.size} request(s)" if defined?(Legion::Logging)
61
53
 
62
54
  groups = to_flush.group_by { |e| [e[:provider], e[:model]] }
63
55
  results = []
@@ -73,14 +65,12 @@ module Legion
73
65
  results
74
66
  end
75
67
 
76
- # Returns the current number of requests in the queue.
77
68
  def queue_size
78
- queue.size
69
+ @mutex.synchronize { queue.size }
79
70
  end
80
71
 
81
- # Returns a summary of current batch queue state.
82
72
  def status
83
- entries = queue.dup
73
+ entries = @mutex.synchronize { queue.dup }
84
74
  oldest = entries.min_by { |e| e[:queued_at] }
85
75
  {
86
76
  enabled: enabled?,
@@ -92,9 +82,14 @@ module Legion
92
82
  }
93
83
  end
94
84
 
95
- # Clears the queue (useful for testing).
96
85
  def reset!
97
- @queue = []
86
+ @mutex.synchronize { @queue = [] }
87
+ stop_flush_timer
88
+ end
89
+
90
+ def stop_flush_timer
91
+ @flush_timer&.shutdown if @flush_timer.respond_to?(:shutdown)
92
+ @flush_timer = nil
98
93
  end
99
94
 
100
95
  private
@@ -103,6 +98,30 @@ module Legion
103
98
  @queue ||= []
104
99
  end
105
100
 
101
+ def priority_rank(priority)
102
+ case priority.to_sym
103
+ when :urgent then 0
104
+ when :normal then 1
105
+ when :low then 2
106
+ else 3
107
+ end
108
+ end
109
+
110
+ def ensure_flush_timer
111
+ return if @flush_timer
112
+ return unless defined?(Concurrent::TimerTask)
113
+
114
+ interval = settings.fetch(:window_seconds, 300)
115
+ return if interval <= 0
116
+
117
+ @flush_timer = Concurrent::TimerTask.new(execution_interval: interval) do
118
+ flush(max_wait: 0)
119
+ rescue StandardError => e
120
+ Legion::Logging.warn("Batch auto-flush failed: #{e.message}") if defined?(Legion::Logging)
121
+ end
122
+ @flush_timer.execute
123
+ end
124
+
106
125
  def settings
107
126
  llm = Legion::Settings[:llm]
108
127
  return {} unless llm.is_a?(Hash)
@@ -14,6 +14,7 @@ module Legion
14
14
 
15
15
  def should_defer?(priority: :normal)
16
16
  return false if priority.to_sym == :urgent
17
+ return false unless Scheduling.enabled?
17
18
 
18
19
  peak_hour?
19
20
  end
@@ -38,7 +38,7 @@ module Legion
38
38
  Legion::Logging.debug('Router: no rules matched, resolution is nil')
39
39
  end
40
40
 
41
- resolution
41
+ resolution || arbitrage_fallback(intent)
42
42
  end
43
43
 
44
44
  def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil)
@@ -80,6 +80,27 @@ module Legion
80
80
 
81
81
  private
82
82
 
83
+ def arbitrage_fallback(intent)
84
+ return nil unless defined?(Arbitrage) && Arbitrage.enabled?
85
+
86
+ capability = intent&.dig(:capability) || :moderate
87
+ model = Arbitrage.cheapest_for(capability: capability)
88
+ return nil unless model
89
+
90
+ provider = Arbitrage.cost_table[model] ? infer_provider(model) : nil
91
+ Legion::Logging.debug("Router: arbitrage fallback selected model=#{model}") if defined?(Legion::Logging)
92
+ Resolution.new(tier: :cloud, provider: provider || :bedrock, model: model, rule: 'arbitrage_fallback')
93
+ end
94
+
95
+ def infer_provider(model)
96
+ return :ollama if model.include?('llama')
97
+ return :bedrock if model.start_with?('us.')
98
+ return :openai if model.start_with?('gpt')
99
+ return :google if model.start_with?('gemini')
100
+
101
+ :anthropic if model.start_with?('claude')
102
+ end
103
+
83
104
  def explicit_resolution(tier, provider, model)
84
105
  resolved_provider = provider ? provider.to_sym : default_provider_for_tier(tier)
85
106
  resolved_model = model || default_model_for_tier(tier)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.5.7'
5
+ VERSION = '0.5.8'
6
6
  end
7
7
  end
data/lib/legion/llm.rb CHANGED
@@ -121,6 +121,10 @@ module Legion
121
121
  end
122
122
  end
123
123
 
124
+ urgency = kwargs.delete(:urgency) { :normal }
125
+ deferred = try_defer(intent: intent, urgency: urgency, model: model, provider: provider, message: message, **kwargs)
126
+ return deferred if deferred
127
+
124
128
  if defined?(Legion::Logging)
125
129
  Legion::Logging.debug "[LLM] chat_direct escalate=#{escalate} message_present=#{!message.nil?} model=#{model} provider=#{provider}"
126
130
  end
@@ -321,9 +325,37 @@ module Legion
321
325
  Legion::Logging.debug '[LLM] chat_single calling session.ask' if defined?(Legion::Logging)
322
326
  response = block ? session.ask(message, &block) : session.ask(message)
323
327
  Legion::Logging.debug "[LLM] chat_single response_class=#{response.class} response_nil=#{response.nil?}" if defined?(Legion::Logging)
328
+
329
+ if response && !block && ShadowEval.enabled?
330
+ msgs = session.respond_to?(:messages) ? session.messages : nil
331
+ maybe_shadow_evaluate(response, msgs, opts[:model])
332
+ end
333
+
324
334
  response
325
335
  end
326
336
 
337
+ def try_defer(intent:, urgency:, model:, provider:, message:, **)
338
+ return nil unless Scheduling.enabled? && Scheduling.should_defer?(intent: intent || :normal, urgency: urgency)
339
+ return nil unless defined?(Batch) && Batch.enabled?
340
+
341
+ entry_id = Batch.enqueue(model: model, provider: provider, message: message, priority: urgency, **)
342
+ { deferred: true, batch_id: entry_id, next_off_peak: Scheduling.next_off_peak.iso8601 }
343
+ end
344
+
345
+ def maybe_shadow_evaluate(response, messages, primary_model)
346
+ return unless ShadowEval.enabled? && ShadowEval.should_sample?
347
+
348
+ Thread.new do
349
+ ShadowEval.evaluate(
350
+ primary_response: { content: response.respond_to?(:content) ? response.content : response.to_s,
351
+ model: primary_model, usage: {} },
352
+ messages: messages
353
+ )
354
+ rescue StandardError => e
355
+ Legion::Logging.debug("shadow evaluation failed: #{e.message}") if defined?(Legion::Logging)
356
+ end
357
+ end
358
+
327
359
  def chat_with_escalation(model:, provider:, intent:, tier:, max_escalations:, quality_check:, message:, **kwargs)
328
360
  chain = Router.resolve_chain(
329
361
  intent: intent, tier: tier, model: model, provider: provider,
@@ -390,9 +422,20 @@ module Legion
390
422
  end
391
423
 
392
424
  def publish_escalation_event(history, final_outcome)
393
- return unless defined?(Legion::Transport)
425
+ payload = {
426
+ outcome: final_outcome,
427
+ attempts: history.size,
428
+ history: history,
429
+ timestamp: Time.now.utc.iso8601
430
+ }
394
431
 
395
- Legion::Logging.debug("Escalation event: #{final_outcome}, #{history.size} attempts") if Legion.const_defined?('Logging')
432
+ Legion::Events.emit('llm.escalation', **payload) if defined?(Legion::Events) && Legion::Events.respond_to?(:emit)
433
+
434
+ Legion::Logging.info("Escalation event: #{final_outcome}, #{history.size} attempts") if defined?(Legion::Logging)
435
+
436
+ if defined?(Legion::Transport) && Legion::Transport.respond_to?(:connected?) && Legion::Transport.connected?
437
+ Transport::Messages::EscalationEvent.new(payload).publish
438
+ end
396
439
  rescue StandardError => e
397
440
  Legion::Logging.warn("publish_escalation_event failed: #{e.message}") if defined?(Legion::Logging)
398
441
  nil
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.7
4
+ version: 0.5.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity