legion-llm 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -1
- data/lib/legion/llm/arbitrage.rb +10 -6
- data/lib/legion/llm/batch.rb +51 -32
- data/lib/legion/llm/off_peak.rb +1 -0
- data/lib/legion/llm/router.rb +22 -1
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +45 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2ce29d10392a53a933f031e6a19d4437ea57ee6f78710946624275c5c4ac083d
|
|
4
|
+
data.tar.gz: b372503d1dd95713e3a8b1471715fb10d05aae541e8a9631e6f35fc59b812bb5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 66c5ee8b6f787d69706e32691603b83c895144bc439732810789761680fcb197511a0c0ce31aa210286ef82e279e4d08d44c21b1a7a5cac9c0bd95dfe8573fbf
|
|
7
|
+
data.tar.gz: ca65b36532cb559c9d0b53ffacc2dfdb404a19e7470f53e933aee6dd686fd97e87e904bd89f10b3fdd593fc78622646c5a0cabe87a1631416af207c2e88dcb50
|
data/CHANGELOG.md
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
-
## [
|
|
3
|
+
## [0.5.8] - 2026-03-25
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Wire shadow evaluation sampling into `chat_single` dispatch path (closes #3)
|
|
7
|
+
- ToolRegistry spec coverage: 8 examples covering register, dedup, clear, thread safety (closes #4)
|
|
8
|
+
- Arbitrage as router fallback: `Router.resolve` consults `Arbitrage.cheapest_for` when no rules match (closes #5)
|
|
9
|
+
- Batch thread safety: Mutex around queue, priority-sorted flush, auto-flush via `Concurrent::TimerTask` (closes #6)
|
|
10
|
+
- Scheduling deferral in `chat_direct`: defers to Batch during peak hours when scheduling is enabled (closes #7)
|
|
11
|
+
- `publish_escalation_event` now publishes to `Legion::Events` and AMQP transport (closes #8)
|
|
12
|
+
- Arbitrage `quality_floor` filtering via `QualityChecker.model_score` when available (closes #9)
|
|
4
13
|
|
|
5
14
|
### Fixed
|
|
15
|
+
- `OffPeak.should_defer?` now checks `Scheduling.enabled?` before returning true (closes #9)
|
|
16
|
+
- Pre-existing ordering-dependent spec failure in `llm_spec.rb` (ToolRegistry bleed)
|
|
6
17
|
- Fix namespace collision: use `::Data.define` instead of `Data.define` in Pipeline Request and Response to prevent resolution to `Legion::Data`
|
|
7
18
|
|
|
8
19
|
## [0.5.6] - 2026-03-24
|
data/lib/legion/llm/arbitrage.rb
CHANGED
|
@@ -44,7 +44,7 @@ module Legion
|
|
|
44
44
|
return nil unless enabled?
|
|
45
45
|
|
|
46
46
|
quality_floor = settings.fetch(:quality_floor, 0.7)
|
|
47
|
-
eligible = eligible_models(capability: capability,
|
|
47
|
+
eligible = eligible_models(capability: capability, quality_floor: quality_floor)
|
|
48
48
|
|
|
49
49
|
scored = eligible.filter_map do |model|
|
|
50
50
|
cost = estimated_cost(model: model, input_tokens: input_tokens, output_tokens: output_tokens)
|
|
@@ -90,17 +90,21 @@ module Legion
|
|
|
90
90
|
# Returns models eligible for the given capability tier based on quality floor.
|
|
91
91
|
# The quality floor maps capability tiers to minimum acceptable quality scores (0.0-1.0).
|
|
92
92
|
# Models that are local (cost 0) always qualify for :basic capability.
|
|
93
|
-
def eligible_models(capability:,
|
|
93
|
+
def eligible_models(capability:, quality_floor: 0.7)
|
|
94
94
|
cap = capability.to_sym
|
|
95
95
|
|
|
96
|
-
# Capability tiers determine which models are semantically appropriate.
|
|
97
|
-
# :reasoning requires frontier models; :basic allows cheap/local models.
|
|
98
|
-
# _quality_floor reserved for future scoring integration.
|
|
99
96
|
disqualified_for_reasoning = %w[gpt-4o-mini gemini-2.0-flash llama3]
|
|
100
97
|
|
|
101
|
-
cost_table.keys.reject do |model|
|
|
98
|
+
models = cost_table.keys.reject do |model|
|
|
102
99
|
cap == :reasoning && disqualified_for_reasoning.include?(model)
|
|
103
100
|
end
|
|
101
|
+
|
|
102
|
+
return models unless defined?(Legion::LLM::QualityChecker) && QualityChecker.respond_to?(:model_score)
|
|
103
|
+
|
|
104
|
+
models.select do |model|
|
|
105
|
+
score = QualityChecker.model_score(model)
|
|
106
|
+
score.nil? || score >= quality_floor
|
|
107
|
+
end
|
|
104
108
|
end
|
|
105
109
|
end
|
|
106
110
|
end
|
data/lib/legion/llm/batch.rb
CHANGED
|
@@ -5,27 +5,21 @@ require 'securerandom'
|
|
|
5
5
|
module Legion
|
|
6
6
|
module LLM
|
|
7
7
|
module Batch
|
|
8
|
+
@mutex = Mutex.new
|
|
9
|
+
@flush_timer = nil
|
|
10
|
+
|
|
8
11
|
class << self
|
|
9
|
-
# Returns true when request batching is enabled in settings.
|
|
10
12
|
def enabled?
|
|
11
13
|
settings.fetch(:enabled, false) == true
|
|
12
14
|
end
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
#
|
|
16
|
-
# @param messages [Array<Hash>] chat messages array
|
|
17
|
-
# @param model [String] model to use
|
|
18
|
-
# @param provider [Symbol, nil] provider override
|
|
19
|
-
# @param callback [Proc, nil] called with result hash when batch is flushed
|
|
20
|
-
# @param priority [Symbol] :normal or :low (informational only)
|
|
21
|
-
# @param opts [Hash] additional options forwarded to provider
|
|
22
|
-
# @return [String] batch_request_id
|
|
23
|
-
def enqueue(messages:, model:, callback: nil, provider: nil, priority: :normal, **opts)
|
|
16
|
+
def enqueue(messages: nil, model: nil, message: nil, callback: nil, provider: nil, priority: :normal, **opts)
|
|
24
17
|
request_id = SecureRandom.uuid
|
|
18
|
+
msgs = messages || (message ? [{ role: 'user', content: message }] : [])
|
|
25
19
|
|
|
26
20
|
entry = {
|
|
27
21
|
id: request_id,
|
|
28
|
-
messages:
|
|
22
|
+
messages: msgs,
|
|
29
23
|
model: model,
|
|
30
24
|
provider: provider,
|
|
31
25
|
callback: callback,
|
|
@@ -34,30 +28,28 @@ module Legion
|
|
|
34
28
|
queued_at: Time.now.utc
|
|
35
29
|
}
|
|
36
30
|
|
|
37
|
-
queue << entry
|
|
38
|
-
|
|
31
|
+
@mutex.synchronize { queue << entry }
|
|
32
|
+
ensure_flush_timer
|
|
33
|
+
Legion::Logging.debug "Legion::LLM::Batch enqueued #{request_id} (queue size: #{queue_size})" if defined?(Legion::Logging)
|
|
39
34
|
request_id
|
|
40
35
|
end
|
|
41
36
|
|
|
42
|
-
# Flushes accumulated requests up to max_size.
|
|
43
|
-
# Groups entries by provider+model and invokes callbacks with a stub result.
|
|
44
|
-
# In production this would submit to provider batch APIs; here it logs and returns
|
|
45
|
-
# per-request result hashes for callback delivery.
|
|
46
|
-
#
|
|
47
|
-
# @param max_size [Integer] maximum number of requests to flush in one pass
|
|
48
|
-
# @param max_wait [Integer] only flush entries older than this many seconds (0 = all)
|
|
49
|
-
# @return [Array<Hash>] array of { id:, status:, result: } hashes
|
|
50
37
|
def flush(max_size: nil, max_wait: nil)
|
|
51
38
|
effective_max = max_size || settings.fetch(:max_batch_size, 100)
|
|
52
39
|
effective_wait = max_wait || settings.fetch(:window_seconds, 300)
|
|
53
|
-
|
|
54
40
|
cutoff = Time.now.utc - effective_wait
|
|
55
|
-
|
|
41
|
+
|
|
42
|
+
to_flush = @mutex.synchronize do
|
|
43
|
+
ready = queue.select { |e| e[:queued_at] <= cutoff }
|
|
44
|
+
.sort_by { |e| priority_rank(e[:priority]) }
|
|
45
|
+
.first(effective_max)
|
|
46
|
+
ready.each { |e| queue.delete(e) }
|
|
47
|
+
ready
|
|
48
|
+
end
|
|
56
49
|
|
|
57
50
|
return [] if to_flush.empty?
|
|
58
51
|
|
|
59
|
-
|
|
60
|
-
Legion::Logging.debug "Legion::LLM::Batch flushing #{to_flush.size} request(s)"
|
|
52
|
+
Legion::Logging.debug "Legion::LLM::Batch flushing #{to_flush.size} request(s)" if defined?(Legion::Logging)
|
|
61
53
|
|
|
62
54
|
groups = to_flush.group_by { |e| [e[:provider], e[:model]] }
|
|
63
55
|
results = []
|
|
@@ -73,14 +65,12 @@ module Legion
|
|
|
73
65
|
results
|
|
74
66
|
end
|
|
75
67
|
|
|
76
|
-
# Returns the current number of requests in the queue.
|
|
77
68
|
def queue_size
|
|
78
|
-
queue.size
|
|
69
|
+
@mutex.synchronize { queue.size }
|
|
79
70
|
end
|
|
80
71
|
|
|
81
|
-
# Returns a summary of current batch queue state.
|
|
82
72
|
def status
|
|
83
|
-
entries = queue.dup
|
|
73
|
+
entries = @mutex.synchronize { queue.dup }
|
|
84
74
|
oldest = entries.min_by { |e| e[:queued_at] }
|
|
85
75
|
{
|
|
86
76
|
enabled: enabled?,
|
|
@@ -92,9 +82,14 @@ module Legion
|
|
|
92
82
|
}
|
|
93
83
|
end
|
|
94
84
|
|
|
95
|
-
# Clears the queue (useful for testing).
|
|
96
85
|
def reset!
|
|
97
|
-
@queue = []
|
|
86
|
+
@mutex.synchronize { @queue = [] }
|
|
87
|
+
stop_flush_timer
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def stop_flush_timer
|
|
91
|
+
@flush_timer&.shutdown if @flush_timer.respond_to?(:shutdown)
|
|
92
|
+
@flush_timer = nil
|
|
98
93
|
end
|
|
99
94
|
|
|
100
95
|
private
|
|
@@ -103,6 +98,30 @@ module Legion
|
|
|
103
98
|
@queue ||= []
|
|
104
99
|
end
|
|
105
100
|
|
|
101
|
+
def priority_rank(priority)
|
|
102
|
+
case priority.to_sym
|
|
103
|
+
when :urgent then 0
|
|
104
|
+
when :normal then 1
|
|
105
|
+
when :low then 2
|
|
106
|
+
else 3
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def ensure_flush_timer
|
|
111
|
+
return if @flush_timer
|
|
112
|
+
return unless defined?(Concurrent::TimerTask)
|
|
113
|
+
|
|
114
|
+
interval = settings.fetch(:window_seconds, 300)
|
|
115
|
+
return if interval <= 0
|
|
116
|
+
|
|
117
|
+
@flush_timer = Concurrent::TimerTask.new(execution_interval: interval) do
|
|
118
|
+
flush(max_wait: 0)
|
|
119
|
+
rescue StandardError => e
|
|
120
|
+
Legion::Logging.warn("Batch auto-flush failed: #{e.message}") if defined?(Legion::Logging)
|
|
121
|
+
end
|
|
122
|
+
@flush_timer.execute
|
|
123
|
+
end
|
|
124
|
+
|
|
106
125
|
def settings
|
|
107
126
|
llm = Legion::Settings[:llm]
|
|
108
127
|
return {} unless llm.is_a?(Hash)
|
data/lib/legion/llm/off_peak.rb
CHANGED
data/lib/legion/llm/router.rb
CHANGED
|
@@ -38,7 +38,7 @@ module Legion
|
|
|
38
38
|
Legion::Logging.debug('Router: no rules matched, resolution is nil')
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
-
resolution
|
|
41
|
+
resolution || arbitrage_fallback(intent)
|
|
42
42
|
end
|
|
43
43
|
|
|
44
44
|
def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil)
|
|
@@ -80,6 +80,27 @@ module Legion
|
|
|
80
80
|
|
|
81
81
|
private
|
|
82
82
|
|
|
83
|
+
def arbitrage_fallback(intent)
|
|
84
|
+
return nil unless defined?(Arbitrage) && Arbitrage.enabled?
|
|
85
|
+
|
|
86
|
+
capability = intent&.dig(:capability) || :moderate
|
|
87
|
+
model = Arbitrage.cheapest_for(capability: capability)
|
|
88
|
+
return nil unless model
|
|
89
|
+
|
|
90
|
+
provider = Arbitrage.cost_table[model] ? infer_provider(model) : nil
|
|
91
|
+
Legion::Logging.debug("Router: arbitrage fallback selected model=#{model}") if defined?(Legion::Logging)
|
|
92
|
+
Resolution.new(tier: :cloud, provider: provider || :bedrock, model: model, rule: 'arbitrage_fallback')
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def infer_provider(model)
|
|
96
|
+
return :ollama if model.include?('llama')
|
|
97
|
+
return :bedrock if model.start_with?('us.')
|
|
98
|
+
return :openai if model.start_with?('gpt')
|
|
99
|
+
return :google if model.start_with?('gemini')
|
|
100
|
+
|
|
101
|
+
:anthropic if model.start_with?('claude')
|
|
102
|
+
end
|
|
103
|
+
|
|
83
104
|
def explicit_resolution(tier, provider, model)
|
|
84
105
|
resolved_provider = provider ? provider.to_sym : default_provider_for_tier(tier)
|
|
85
106
|
resolved_model = model || default_model_for_tier(tier)
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -121,6 +121,10 @@ module Legion
|
|
|
121
121
|
end
|
|
122
122
|
end
|
|
123
123
|
|
|
124
|
+
urgency = kwargs.delete(:urgency) { :normal }
|
|
125
|
+
deferred = try_defer(intent: intent, urgency: urgency, model: model, provider: provider, message: message, **kwargs)
|
|
126
|
+
return deferred if deferred
|
|
127
|
+
|
|
124
128
|
if defined?(Legion::Logging)
|
|
125
129
|
Legion::Logging.debug "[LLM] chat_direct escalate=#{escalate} message_present=#{!message.nil?} model=#{model} provider=#{provider}"
|
|
126
130
|
end
|
|
@@ -321,9 +325,37 @@ module Legion
|
|
|
321
325
|
Legion::Logging.debug '[LLM] chat_single calling session.ask' if defined?(Legion::Logging)
|
|
322
326
|
response = block ? session.ask(message, &block) : session.ask(message)
|
|
323
327
|
Legion::Logging.debug "[LLM] chat_single response_class=#{response.class} response_nil=#{response.nil?}" if defined?(Legion::Logging)
|
|
328
|
+
|
|
329
|
+
if response && !block && ShadowEval.enabled?
|
|
330
|
+
msgs = session.respond_to?(:messages) ? session.messages : nil
|
|
331
|
+
maybe_shadow_evaluate(response, msgs, opts[:model])
|
|
332
|
+
end
|
|
333
|
+
|
|
324
334
|
response
|
|
325
335
|
end
|
|
326
336
|
|
|
337
|
+
def try_defer(intent:, urgency:, model:, provider:, message:, **)
|
|
338
|
+
return nil unless Scheduling.enabled? && Scheduling.should_defer?(intent: intent || :normal, urgency: urgency)
|
|
339
|
+
return nil unless defined?(Batch) && Batch.enabled?
|
|
340
|
+
|
|
341
|
+
entry_id = Batch.enqueue(model: model, provider: provider, message: message, priority: urgency, **)
|
|
342
|
+
{ deferred: true, batch_id: entry_id, next_off_peak: Scheduling.next_off_peak.iso8601 }
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def maybe_shadow_evaluate(response, messages, primary_model)
|
|
346
|
+
return unless ShadowEval.enabled? && ShadowEval.should_sample?
|
|
347
|
+
|
|
348
|
+
Thread.new do
|
|
349
|
+
ShadowEval.evaluate(
|
|
350
|
+
primary_response: { content: response.respond_to?(:content) ? response.content : response.to_s,
|
|
351
|
+
model: primary_model, usage: {} },
|
|
352
|
+
messages: messages
|
|
353
|
+
)
|
|
354
|
+
rescue StandardError => e
|
|
355
|
+
Legion::Logging.debug("shadow evaluation failed: #{e.message}") if defined?(Legion::Logging)
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
|
|
327
359
|
def chat_with_escalation(model:, provider:, intent:, tier:, max_escalations:, quality_check:, message:, **kwargs)
|
|
328
360
|
chain = Router.resolve_chain(
|
|
329
361
|
intent: intent, tier: tier, model: model, provider: provider,
|
|
@@ -390,9 +422,20 @@ module Legion
|
|
|
390
422
|
end
|
|
391
423
|
|
|
392
424
|
def publish_escalation_event(history, final_outcome)
|
|
393
|
-
|
|
425
|
+
payload = {
|
|
426
|
+
outcome: final_outcome,
|
|
427
|
+
attempts: history.size,
|
|
428
|
+
history: history,
|
|
429
|
+
timestamp: Time.now.utc.iso8601
|
|
430
|
+
}
|
|
394
431
|
|
|
395
|
-
Legion::
|
|
432
|
+
Legion::Events.emit('llm.escalation', **payload) if defined?(Legion::Events) && Legion::Events.respond_to?(:emit)
|
|
433
|
+
|
|
434
|
+
Legion::Logging.info("Escalation event: #{final_outcome}, #{history.size} attempts") if defined?(Legion::Logging)
|
|
435
|
+
|
|
436
|
+
if defined?(Legion::Transport) && Legion::Transport.respond_to?(:connected?) && Legion::Transport.connected?
|
|
437
|
+
Transport::Messages::EscalationEvent.new(payload).publish
|
|
438
|
+
end
|
|
396
439
|
rescue StandardError => e
|
|
397
440
|
Legion::Logging.warn("publish_escalation_event failed: #{e.message}") if defined?(Legion::Logging)
|
|
398
441
|
nil
|