legion-llm 0.9.28 → 0.9.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/lib/legion/llm/api/native/tiers.rb +5 -1
- data/lib/legion/llm/discovery/rule_generator.rb +48 -3
- data/lib/legion/llm/inference/executor.rb +88 -13
- data/lib/legion/llm/inventory.rb +19 -1
- data/lib/legion/llm/router.rb +74 -9
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 914f95fd880bbb73c043d16612bbf22fc22569455b721fe9052b5ee4c55e83b3
|
|
4
|
+
data.tar.gz: 0a2f12babcae95cab6c9f2e0ee8b5207c26d998aafcee464d457af7ebea4dc16
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 52a9853e3c2337d19a0e4517d0472143e9aa809dd94363655db76a94ed924041b95b20a1038442053fc8b30f4c035782335a6edc0ad9634cb17e7f6c59c81f4b
|
|
7
|
+
data.tar.gz: bd2f8b50e52e0653cedcb44c0c18105c212151b64baab5dd62248a456e917bdae8e8347d83d7c8c4dd535da354f00795d938643e94e95ec03657c03d2bfa33a2
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.29] - 2026-05-16
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Routing: generated discovery rules now expose `:tools` capability rules for tool-capable models and normalize provider aliases such as `function_calling`/`functions`.
|
|
7
|
+
|
|
8
|
+
### Changed
|
|
9
|
+
- Routing: automatic routing honors top-level `llm.tier_order` before routing-specific tier priority settings.
|
|
10
|
+
- Routing: stream requests with injected native tools now require both `streaming` and `tools` model capabilities before selecting a target.
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- Router: required model capabilities filter out non-tool-capable candidates instead of selecting a local model that later rejects tool payloads.
|
|
14
|
+
- Executor: streaming provider calls now use the escalation chain, so provider errors like "does not support tools" can move to the next routed model.
|
|
15
|
+
- Executor: synthetic routing requirements no longer make model-only or explicit-provider requests bypass provider inference or registry defaults.
|
|
16
|
+
|
|
3
17
|
## [0.9.28] - 2026-05-15
|
|
4
18
|
|
|
5
19
|
### Added
|
|
@@ -160,8 +160,12 @@ module Legion
|
|
|
160
160
|
end
|
|
161
161
|
|
|
162
162
|
def self.tier_priority
|
|
163
|
+
return Legion::LLM::Router.tier_priority if defined?(Legion::LLM::Router)
|
|
164
|
+
|
|
163
165
|
routing_config = Legion::LLM::Settings.value(:routing) || {}
|
|
164
|
-
|
|
166
|
+
top_level = Legion::LLM::Settings.value(:tier_order, default: nil)
|
|
167
|
+
Array(top_level || routing_config[:tier_order] || routing_config[:tier_priority] ||
|
|
168
|
+
%w[local direct fleet openai_compat cloud frontier])
|
|
165
169
|
end
|
|
166
170
|
|
|
167
171
|
def self.privacy_mode?
|
|
@@ -27,6 +27,14 @@ module Legion
|
|
|
27
27
|
}.freeze
|
|
28
28
|
|
|
29
29
|
DEFAULT_TIER_PRIORITY = %i[local direct fleet openai_compat cloud frontier].freeze
|
|
30
|
+
CAPABILITY_ALIASES = {
|
|
31
|
+
function_calling: :tools,
|
|
32
|
+
functions: :tools,
|
|
33
|
+
tool: :tools,
|
|
34
|
+
tool_use: :tools,
|
|
35
|
+
stream: :streaming,
|
|
36
|
+
stream_chat: :streaming
|
|
37
|
+
}.freeze
|
|
30
38
|
|
|
31
39
|
module_function
|
|
32
40
|
|
|
@@ -52,7 +60,10 @@ module Legion
|
|
|
52
60
|
capability = embedding_model?(model_data) ? :embed : :chat
|
|
53
61
|
priority = tier_weight(model_tier) - order
|
|
54
62
|
rules << build_rule(provider, instance_id, model_data, capability, model_tier, priority)
|
|
55
|
-
|
|
63
|
+
if capability == :chat
|
|
64
|
+
rules << build_rule(provider, instance_id, model_data, :stream, model_tier, priority) if supports_streaming?(model_data)
|
|
65
|
+
rules << build_rule(provider, instance_id, model_data, :tools, model_tier, priority) if supports_tools?(model_data)
|
|
66
|
+
end
|
|
56
67
|
order += 1
|
|
57
68
|
end
|
|
58
69
|
end
|
|
@@ -125,11 +136,39 @@ module Legion
|
|
|
125
136
|
return nil unless model_data.is_a?(Hash)
|
|
126
137
|
|
|
127
138
|
caps = model_data[:capabilities] || model_data['capabilities']
|
|
128
|
-
|
|
139
|
+
normalized = normalize_capabilities(caps)
|
|
140
|
+
return normalized if normalized.any?
|
|
129
141
|
|
|
130
142
|
nil
|
|
131
143
|
end
|
|
132
144
|
|
|
145
|
+
def supports_streaming?(model_data)
|
|
146
|
+
capabilities = extract_capabilities(model_data)
|
|
147
|
+
return true if capabilities.nil?
|
|
148
|
+
|
|
149
|
+
capabilities.include?(:streaming)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def supports_tools?(model_data)
|
|
153
|
+
capabilities = extract_capabilities(model_data)
|
|
154
|
+
return false if capabilities.nil?
|
|
155
|
+
|
|
156
|
+
capabilities.include?(:tools)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def normalize_capabilities(capabilities)
|
|
160
|
+
Array(capabilities).compact.each_with_object([]) do |capability, normalized|
|
|
161
|
+
next unless capability.respond_to?(:to_s)
|
|
162
|
+
|
|
163
|
+
capability_sym = capability.to_s.downcase.strip.to_sym
|
|
164
|
+
next if capability_sym.to_s.empty?
|
|
165
|
+
|
|
166
|
+
normalized << capability_sym
|
|
167
|
+
alias_sym = CAPABILITY_ALIASES[capability_sym]
|
|
168
|
+
normalized << alias_sym if alias_sym
|
|
169
|
+
end.uniq
|
|
170
|
+
end
|
|
171
|
+
|
|
133
172
|
def extract_field(model_data, field)
|
|
134
173
|
return nil unless model_data.is_a?(Hash)
|
|
135
174
|
|
|
@@ -145,7 +184,9 @@ module Legion
|
|
|
145
184
|
end
|
|
146
185
|
|
|
147
186
|
def tier_priority
|
|
148
|
-
configured = Legion::LLM::Settings.value(:
|
|
187
|
+
configured = Legion::LLM::Settings.value(:tier_order, default: nil)
|
|
188
|
+
configured = Legion::LLM::Settings.value(:routing, :tier_order, default: nil) if blank_array?(configured)
|
|
189
|
+
configured = Legion::LLM::Settings.value(:routing, :tier_priority, default: DEFAULT_TIER_PRIORITY) if blank_array?(configured)
|
|
149
190
|
normalized = Array(configured).filter_map do |tier|
|
|
150
191
|
tier.to_sym if tier.respond_to?(:to_sym)
|
|
151
192
|
end
|
|
@@ -156,6 +197,10 @@ module Legion
|
|
|
156
197
|
DEFAULT_TIER_PRIORITY
|
|
157
198
|
end
|
|
158
199
|
|
|
200
|
+
def blank_array?(value)
|
|
201
|
+
Array(value).empty?
|
|
202
|
+
end
|
|
203
|
+
|
|
159
204
|
def extension_providers
|
|
160
205
|
ext = Legion::Settings[:extensions]
|
|
161
206
|
return ext[:llm] if ext.is_a?(Hash) && ext[:llm].is_a?(Hash)
|
|
@@ -376,6 +376,7 @@ module Legion
|
|
|
376
376
|
|
|
377
377
|
def routing_request_state
|
|
378
378
|
routing_explicit = @request.extra[:routing_explicit]
|
|
379
|
+
request_intent = @request.extra[:intent]
|
|
379
380
|
instance = @request.routing[:instance] || @request.routing[:instance_id] || @request.routing[:provider_instance]
|
|
380
381
|
tier = @request.extra[:tier]
|
|
381
382
|
{
|
|
@@ -385,7 +386,8 @@ module Legion
|
|
|
385
386
|
offering_id: @request.routing[:offering_id] || @request.routing[:id],
|
|
386
387
|
offering_metadata: normalize_offering_metadata(@request.routing[:offering_metadata] ||
|
|
387
388
|
@request.routing[:offering]),
|
|
388
|
-
intent:
|
|
389
|
+
intent: routing_intent_for_request(request_intent),
|
|
390
|
+
intent_explicit: routing_intent_present?(request_intent),
|
|
389
391
|
tier: tier,
|
|
390
392
|
auto_route: @request.extra[:auto_route],
|
|
391
393
|
provider_explicit: routing_field_explicit?(routing_explicit, :provider, @request.routing[:provider]),
|
|
@@ -394,6 +396,65 @@ module Legion
|
|
|
394
396
|
}
|
|
395
397
|
end
|
|
396
398
|
|
|
399
|
+
def routing_intent_present?(intent)
|
|
400
|
+
intent.is_a?(Hash) && intent.any?
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def routing_intent_for_request(intent)
|
|
404
|
+
normalized = if intent.is_a?(Hash)
|
|
405
|
+
intent.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
406
|
+
else
|
|
407
|
+
{}
|
|
408
|
+
end
|
|
409
|
+
required = normalize_required_capabilities(
|
|
410
|
+
normalized.delete(:required_capabilities) || normalized.delete(:requires)
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
if @request.stream == true
|
|
414
|
+
normalized[:capability] = :stream if stream_routable_capability?(normalized[:capability])
|
|
415
|
+
required << :streaming
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
required << :tools if native_tools_requested_for_routing?
|
|
419
|
+
normalized[:required_capabilities] = required.uniq if required.any?
|
|
420
|
+
normalized
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def stream_routable_capability?(capability)
|
|
424
|
+
capability.nil? || %i[chat completion stream].include?(capability.to_s.downcase.to_sym)
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def native_tools_requested_for_routing?
|
|
428
|
+
Array(@request.tools).any? ||
|
|
429
|
+
requested_deferred_tool_names.any? ||
|
|
430
|
+
@triggered_tools.any? ||
|
|
431
|
+
Tools::Special.pinned_definitions.any?
|
|
432
|
+
rescue StandardError => e
|
|
433
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.pipeline.routing_tools_required')
|
|
434
|
+
false
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
def normalize_required_capabilities(capabilities)
|
|
438
|
+
aliases = {
|
|
439
|
+
function_calling: :tools,
|
|
440
|
+
functions: :tools,
|
|
441
|
+
tool: :tools,
|
|
442
|
+
tool_use: :tools,
|
|
443
|
+
stream: :streaming,
|
|
444
|
+
stream_chat: :streaming
|
|
445
|
+
}
|
|
446
|
+
Array(capabilities).compact.each_with_object([]) do |capability, normalized|
|
|
447
|
+
next unless capability.respond_to?(:to_s)
|
|
448
|
+
|
|
449
|
+
capability_sym = capability.to_s.downcase.strip.to_sym
|
|
450
|
+
next if capability_sym.to_s.empty?
|
|
451
|
+
|
|
452
|
+
normalized << capability_sym
|
|
453
|
+
alias_sym = aliases[capability_sym]
|
|
454
|
+
normalized << alias_sym if alias_sym
|
|
455
|
+
end.uniq
|
|
456
|
+
end
|
|
457
|
+
|
|
397
458
|
def apply_proactive_tier_assignment(state)
|
|
398
459
|
# Forced assignments carry security/privacy constraints and override
|
|
399
460
|
# caller-supplied tier/intent. Advisory assignments only fill blanks.
|
|
@@ -416,7 +477,7 @@ module Legion
|
|
|
416
477
|
|
|
417
478
|
explicit_route = state[:provider_explicit] || state[:instance_explicit] || state[:tier_explicit]
|
|
418
479
|
auto_route = state[:auto_route] == true
|
|
419
|
-
intent_route = state[:intent] && Router.routing_enabled?
|
|
480
|
+
intent_route = state[:intent_explicit] && state[:intent] && Router.routing_enabled?
|
|
420
481
|
return state unless explicit_route || auto_route || intent_route
|
|
421
482
|
|
|
422
483
|
resolution = routing_resolution_for(state)
|
|
@@ -426,7 +487,7 @@ module Legion
|
|
|
426
487
|
end
|
|
427
488
|
|
|
428
489
|
def routing_resolution_for(state)
|
|
429
|
-
if state[:auto_route] == true || (state[:intent] && pipeline_escalation_enabled?)
|
|
490
|
+
if state[:auto_route] == true || (state[:intent_explicit] && state[:intent] && pipeline_escalation_enabled?)
|
|
430
491
|
@escalation_chain = Router.resolve_chain(
|
|
431
492
|
intent: state[:intent],
|
|
432
493
|
tier: state[:tier],
|
|
@@ -527,13 +588,14 @@ module Legion
|
|
|
527
588
|
end
|
|
528
589
|
end
|
|
529
590
|
|
|
530
|
-
def run_provider_call_with_escalation
|
|
591
|
+
def run_provider_call_with_escalation(stream_block: nil)
|
|
531
592
|
@escalation_chain ||= build_default_escalation_chain
|
|
532
593
|
chain = @escalation_chain
|
|
533
594
|
threshold = pipeline_escalation_quality_threshold
|
|
534
595
|
quality_check = @request.extra[:quality_check]
|
|
535
596
|
succeeded = false
|
|
536
597
|
tried = []
|
|
598
|
+
@last_escalation_error = nil
|
|
537
599
|
log.debug "[llm][executor] action=escalation.enter chain_size=#{chain.size} threshold=#{threshold}"
|
|
538
600
|
|
|
539
601
|
primary_tier = @escalation_chain.primary&.tier
|
|
@@ -541,10 +603,12 @@ module Legion
|
|
|
541
603
|
chain.each do |resolution|
|
|
542
604
|
next if tried.any? { |t| t[:provider] == resolution.provider && t[:instance] == resolution.instance && t[:model] == resolution.model }
|
|
543
605
|
|
|
544
|
-
succeeded = run_escalation_resolution(resolution, threshold, quality_check, tried, primary_tier
|
|
606
|
+
succeeded = run_escalation_resolution(resolution, threshold, quality_check, tried, primary_tier,
|
|
607
|
+
stream_block: stream_block)
|
|
545
608
|
break if succeeded
|
|
546
609
|
end
|
|
547
610
|
return if succeeded
|
|
611
|
+
raise @last_escalation_error if chain.size <= 1 && @last_escalation_error
|
|
548
612
|
|
|
549
613
|
emit_error_audit(
|
|
550
614
|
EscalationExhausted.new("All #{@escalation_history.size} attempts failed"),
|
|
@@ -553,7 +617,7 @@ module Legion
|
|
|
553
617
|
raise EscalationExhausted, "All #{@escalation_history.size} escalation attempts failed"
|
|
554
618
|
end
|
|
555
619
|
|
|
556
|
-
def run_escalation_resolution(resolution, threshold, quality_check, tried, primary_tier)
|
|
620
|
+
def run_escalation_resolution(resolution, threshold, quality_check, tried, primary_tier, stream_block: nil)
|
|
557
621
|
move_type = if tried.empty?
|
|
558
622
|
:primary
|
|
559
623
|
elsif resolution.tier == primary_tier
|
|
@@ -570,7 +634,7 @@ module Legion
|
|
|
570
634
|
@resolved_tier = resolution.tier
|
|
571
635
|
@resolved_offering_id = resolution.offering_id
|
|
572
636
|
@resolved_offering_metadata = resolution.offering_metadata
|
|
573
|
-
succeeded = attempt_escalation(resolution, threshold, quality_check, start_time)
|
|
637
|
+
succeeded = attempt_escalation(resolution, threshold, quality_check, start_time, stream_block: stream_block)
|
|
574
638
|
tried << { provider: resolution.provider, instance: resolution.instance, model: resolution.model } unless succeeded
|
|
575
639
|
succeeded
|
|
576
640
|
rescue Legion::LLM::AuthError, Legion::LLM::PrivacyModeError => e
|
|
@@ -605,14 +669,19 @@ module Legion
|
|
|
605
669
|
false
|
|
606
670
|
end
|
|
607
671
|
|
|
608
|
-
def attempt_escalation(resolution, threshold, quality_check, start_time)
|
|
672
|
+
def attempt_escalation(resolution, threshold, quality_check, start_time, stream_block: nil)
|
|
609
673
|
@current_escalation_context = {
|
|
610
674
|
attempt: @escalation_history.size + 1,
|
|
611
675
|
max_attempts: @escalation_chain&.max_attempts
|
|
612
676
|
}.compact
|
|
613
|
-
|
|
677
|
+
if stream_block
|
|
678
|
+
execute_provider_request_stream(&stream_block)
|
|
679
|
+
result = Quality::Checker::QualityResult.new(passed: true, failures: [])
|
|
680
|
+
else
|
|
681
|
+
execute_provider_request
|
|
682
|
+
result = Quality::Checker.check(@raw_response, quality_threshold: threshold, quality_check: quality_check)
|
|
683
|
+
end
|
|
614
684
|
duration_ms = ((Time.now - start_time) * 1000).round
|
|
615
|
-
result = Quality::Checker.check(@raw_response, quality_threshold: threshold, quality_check: quality_check)
|
|
616
685
|
outcome = result.passed ? :success : :quality_failure
|
|
617
686
|
@timeline.record(
|
|
618
687
|
category: :provider, key: 'escalation:attempt', direction: :internal,
|
|
@@ -649,6 +718,7 @@ module Legion
|
|
|
649
718
|
end
|
|
650
719
|
|
|
651
720
|
def record_escalation_failure(err, resolution, start_time, outcome:, operation:, handled: false)
|
|
721
|
+
@last_escalation_error = err
|
|
652
722
|
duration_ms = ((Time.now - start_time) * 1000).round
|
|
653
723
|
handle_exception(err, level: :warn, handled: handled, operation: operation,
|
|
654
724
|
provider: resolution.provider, model: resolution.model, duration_ms: duration_ms)
|
|
@@ -689,7 +759,7 @@ module Legion
|
|
|
689
759
|
end
|
|
690
760
|
|
|
691
761
|
def build_fallback_resolutions(exclude_provider: nil, exclude_instance: nil, primary_tier: nil)
|
|
692
|
-
tier_rank = Router
|
|
762
|
+
tier_rank = Router.tier_rank
|
|
693
763
|
primary_rank = primary_tier ? (tier_rank[primary_tier.to_sym] || 99) : 99
|
|
694
764
|
|
|
695
765
|
candidates = Call::Registry.all_instances.filter_map do |entry|
|
|
@@ -1168,10 +1238,15 @@ module Legion
|
|
|
1168
1238
|
|
|
1169
1239
|
private :async_post_enabled?
|
|
1170
1240
|
|
|
1171
|
-
def step_provider_call_stream(&)
|
|
1241
|
+
def step_provider_call_stream(&block)
|
|
1242
|
+
if pipeline_escalation_enabled?
|
|
1243
|
+
run_provider_call_with_escalation(stream_block: block)
|
|
1244
|
+
return
|
|
1245
|
+
end
|
|
1246
|
+
|
|
1172
1247
|
providers_tried = []
|
|
1173
1248
|
begin
|
|
1174
|
-
execute_provider_request_stream(&)
|
|
1249
|
+
execute_provider_request_stream(&block)
|
|
1175
1250
|
rescue Legion::LLM::AuthError, Faraday::UnauthorizedError, Faraday::ForbiddenError => e
|
|
1176
1251
|
try_fallback_or_raise(e, providers_tried, operation: 'provider_call_stream.auth',
|
|
1177
1252
|
reason: 'auth_failed', error_class: Legion::LLM::AuthError)
|
data/lib/legion/llm/inventory.rb
CHANGED
|
@@ -35,6 +35,15 @@ module Legion
|
|
|
35
35
|
chat: %i[chat completion tools json_schema]
|
|
36
36
|
}.freeze
|
|
37
37
|
|
|
38
|
+
CAPABILITY_ALIASES = {
|
|
39
|
+
function_calling: :tools,
|
|
40
|
+
functions: :tools,
|
|
41
|
+
tool: :tools,
|
|
42
|
+
tool_use: :tools,
|
|
43
|
+
stream: :streaming,
|
|
44
|
+
stream_chat: :streaming
|
|
45
|
+
}.freeze
|
|
46
|
+
|
|
38
47
|
class << self
|
|
39
48
|
def offerings(filters = {})
|
|
40
49
|
log.debug "[llm][inventory] action=offerings.enter filters=#{filters.keys}"
|
|
@@ -216,7 +225,16 @@ module Legion
|
|
|
216
225
|
|
|
217
226
|
def normalize_capabilities(capabilities, type)
|
|
218
227
|
raw = capabilities || DEFAULT_CAPABILITIES.fetch(type, DEFAULT_CAPABILITIES[:inference])
|
|
219
|
-
Array(raw).
|
|
228
|
+
Array(raw).compact.each_with_object([]) do |capability, normalized|
|
|
229
|
+
next unless capability.respond_to?(:to_s)
|
|
230
|
+
|
|
231
|
+
capability_sym = capability.to_s.downcase.strip.to_sym
|
|
232
|
+
next if capability_sym.to_s.empty?
|
|
233
|
+
|
|
234
|
+
normalized << capability_sym
|
|
235
|
+
alias_sym = CAPABILITY_ALIASES[capability_sym]
|
|
236
|
+
normalized << alias_sym if alias_sym
|
|
237
|
+
end.uniq.map(&:to_s).sort
|
|
220
238
|
end
|
|
221
239
|
|
|
222
240
|
def normalize_type(value)
|
data/lib/legion/llm/router.rb
CHANGED
|
@@ -19,6 +19,14 @@ module Legion
|
|
|
19
19
|
PROVIDER_ORDER = %i[ollama vllm bedrock azure gemini anthropic openai].freeze
|
|
20
20
|
TIER_EXTERNAL = Set[:cloud, :frontier, :openai_compat].freeze
|
|
21
21
|
TIER_RANK = { local: 0, direct: 1, fleet: 2, openai_compat: 3, cloud: 4, frontier: 5 }.freeze
|
|
22
|
+
CAPABILITY_ALIASES = {
|
|
23
|
+
function_calling: :tools,
|
|
24
|
+
functions: :tools,
|
|
25
|
+
tool: :tools,
|
|
26
|
+
tool_use: :tools,
|
|
27
|
+
stream: :streaming,
|
|
28
|
+
stream_chat: :streaming
|
|
29
|
+
}.freeze
|
|
22
30
|
|
|
23
31
|
OLLAMA_MODEL_PATTERN = %r{[:/]}
|
|
24
32
|
|
|
@@ -120,6 +128,22 @@ module Legion
|
|
|
120
128
|
@auto_rules_populated = false
|
|
121
129
|
end
|
|
122
130
|
|
|
131
|
+
def tier_priority
|
|
132
|
+
configured = Legion::LLM::Settings.value(:tier_order, default: nil)
|
|
133
|
+
configured = routing_settings[:tier_order] if configured.nil? || Array(configured).empty?
|
|
134
|
+
configured = routing_settings[:tier_priority] if configured.nil? || Array(configured).empty?
|
|
135
|
+
normalized = Array(configured).filter_map { |tier| tier.to_sym if tier.respond_to?(:to_sym) }
|
|
136
|
+
normalized = TIER_RANK.keys if normalized.empty?
|
|
137
|
+
(normalized + TIER_RANK.keys).uniq
|
|
138
|
+
rescue StandardError => e
|
|
139
|
+
handle_exception(e, level: :warn, handled: true, operation: 'router.tier_priority')
|
|
140
|
+
TIER_RANK.keys
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def tier_rank
|
|
144
|
+
tier_priority.each_with_index.to_h
|
|
145
|
+
end
|
|
146
|
+
|
|
123
147
|
# Check whether a tier can be used right now.
|
|
124
148
|
# :local — always available
|
|
125
149
|
# :direct — always available (remote self-hosted instances)
|
|
@@ -227,16 +251,19 @@ module Legion
|
|
|
227
251
|
# 3. Filter by schedule
|
|
228
252
|
scheduled = matched.select(&:within_schedule?)
|
|
229
253
|
|
|
230
|
-
# 4. Reject rules
|
|
231
|
-
|
|
254
|
+
# 4. Reject rules that cannot satisfy required model capabilities
|
|
255
|
+
capable = scheduled.select { |r| satisfies_required_capabilities?(r, intent) }
|
|
232
256
|
|
|
233
|
-
#
|
|
257
|
+
# 5. Reject rules excluded by active constraints
|
|
258
|
+
unconstrained = capable.reject { |r| excluded_by_constraint?(r, constraints) }
|
|
259
|
+
|
|
260
|
+
# 5.5 Reject Ollama rules where model is not pulled or doesn't fit
|
|
234
261
|
discovered = unconstrained.reject { |r| excluded_by_discovery?(r) }
|
|
235
262
|
|
|
236
|
-
#
|
|
263
|
+
# 5.55 Reject local-tier rules where model exceeds available memory
|
|
237
264
|
memory_checked = discovered.reject { |r| excluded_by_memory?(r) }
|
|
238
265
|
|
|
239
|
-
#
|
|
266
|
+
# 5.6 Reject rules matching caller-provided exclude list
|
|
240
267
|
normalized_exclude = exclude.is_a?(Hash) ? exclude : {}
|
|
241
268
|
not_excluded = if normalized_exclude.empty?
|
|
242
269
|
memory_checked
|
|
@@ -244,10 +271,10 @@ module Legion
|
|
|
244
271
|
memory_checked.reject { |r| excluded_by_caller?(r, normalized_exclude) }
|
|
245
272
|
end
|
|
246
273
|
|
|
247
|
-
#
|
|
274
|
+
# 5.7 Reject rules for models denied by health tracker
|
|
248
275
|
not_denied = not_excluded.reject { |r| excluded_by_denial?(r) }
|
|
249
276
|
|
|
250
|
-
#
|
|
277
|
+
# 6. Filter by tier availability
|
|
251
278
|
final = not_denied.select { |r| tier_available?(r.target[:tier] || r.target['tier']) }
|
|
252
279
|
|
|
253
280
|
log.debug("Router: #{final.size} candidates after filtering (started with #{rules.size})")
|
|
@@ -255,6 +282,37 @@ module Legion
|
|
|
255
282
|
final
|
|
256
283
|
end
|
|
257
284
|
|
|
285
|
+
def satisfies_required_capabilities?(rule, intent)
|
|
286
|
+
required = required_capabilities(intent)
|
|
287
|
+
return true if required.empty?
|
|
288
|
+
|
|
289
|
+
rule_capabilities = normalize_capabilities(rule.target[:model_capabilities] || rule.target['model_capabilities'] ||
|
|
290
|
+
rule.target[:capabilities] || rule.target['capabilities'])
|
|
291
|
+
return false if rule_capabilities.empty?
|
|
292
|
+
|
|
293
|
+
required.all? { |capability| rule_capabilities.include?(capability) }
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def required_capabilities(intent)
|
|
297
|
+
return [] unless intent.is_a?(Hash)
|
|
298
|
+
|
|
299
|
+
normalized = intent.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
300
|
+
normalize_capabilities(normalized[:required_capabilities] || normalized[:requires])
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def normalize_capabilities(capabilities)
|
|
304
|
+
Array(capabilities).compact.each_with_object([]) do |capability, normalized|
|
|
305
|
+
next unless capability.respond_to?(:to_s)
|
|
306
|
+
|
|
307
|
+
capability_sym = capability.to_s.downcase.strip.to_sym
|
|
308
|
+
next if capability_sym.to_s.empty?
|
|
309
|
+
|
|
310
|
+
normalized << capability_sym
|
|
311
|
+
alias_sym = CAPABILITY_ALIASES[capability_sym]
|
|
312
|
+
normalized << alias_sym if alias_sym
|
|
313
|
+
end.uniq
|
|
314
|
+
end
|
|
315
|
+
|
|
258
316
|
def excluded_by_constraint?(rule, constraints)
|
|
259
317
|
return false if constraints.empty?
|
|
260
318
|
|
|
@@ -474,8 +532,15 @@ module Legion
|
|
|
474
532
|
seen[pname] ||= entry
|
|
475
533
|
end
|
|
476
534
|
|
|
477
|
-
# Build resolutions
|
|
478
|
-
PROVIDER_ORDER.
|
|
535
|
+
# Build resolutions ordered by configured tier priority, preserving provider preference inside a tier.
|
|
536
|
+
provider_index = PROVIDER_ORDER.each_with_index.to_h
|
|
537
|
+
sorted_entries = seen.values.sort_by do |entry|
|
|
538
|
+
tier = registry_tier(entry[:provider], entry[:metadata])
|
|
539
|
+
[tier_rank.fetch(tier, 99), provider_index.fetch(entry[:provider], PROVIDER_ORDER.size)]
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
sorted_entries.filter_map do |entry|
|
|
543
|
+
pname = entry[:provider]
|
|
479
544
|
entry = seen[pname]
|
|
480
545
|
next unless entry
|
|
481
546
|
|
data/lib/legion/llm/version.rb
CHANGED