legion-llm 0.9.28 → 0.9.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/lib/legion/llm/api/anthropic/messages.rb +1 -1
- data/lib/legion/llm/api/native/inference.rb +5 -1
- data/lib/legion/llm/api/native/tiers.rb +5 -1
- data/lib/legion/llm/api/openai/chat_completions.rb +1 -1
- data/lib/legion/llm/discovery/rule_generator.rb +48 -3
- data/lib/legion/llm/inference/executor.rb +113 -13
- data/lib/legion/llm/inventory.rb +19 -1
- data/lib/legion/llm/router.rb +74 -9
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 221d8482faa90a3fa8e4bbefe90999838a56b96bc34d6506956f6b8ac90ab290
|
|
4
|
+
data.tar.gz: a3c3ce4cde046e8cd1af18874e04da1698d4527549fc70df274d14c0830f3803
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fd2fdd7bce06cd6efa9a8b583072b947f14ffdaa5a1898b65f2027da8693c79bdd56b3553d04f6f6329f2f3a9d71c0699947f371aa5715708fd538a144819363
|
|
7
|
+
data.tar.gz: 7c3c36ca8b7db0075b18ea656e3c13c9235d873362d8febbe663c96eb07a1fa562a16d754f1bfed42d909698b466c303d7395e79caa0093ad1d1008c64389f69
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.30] - 2026-05-16
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Tools: API-submitted client tools are no longer advertised to providers unless callers explicitly opt into client-tool passthrough, preventing incomplete tool calls from ending streaming responses.
|
|
7
|
+
|
|
8
|
+
## [0.9.29] - 2026-05-16
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Routing: generated discovery rules now expose `:tools` capability rules for tool-capable models and normalize provider aliases such as `function_calling`/`functions`.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
- Routing: automatic routing honors top-level `llm.tier_order` before routing-specific tier priority settings.
|
|
15
|
+
- Routing: stream requests with injected native tools now require both `streaming` and `tools` model capabilities before selecting a target.
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
- Router: required model capabilities filter out non-tool-capable candidates instead of selecting a local model that later rejects tool payloads.
|
|
19
|
+
- Executor: streaming provider calls now use the escalation chain, so provider errors like "does not support tools" can move to the next routed model.
|
|
20
|
+
- Executor: synthetic routing requirements no longer make model-only or explicit-provider requests bypass provider inference or registry defaults.
|
|
21
|
+
|
|
3
22
|
## [0.9.28] - 2026-05-15
|
|
4
23
|
|
|
5
24
|
### Added
|
|
@@ -149,7 +149,7 @@ module Legion
|
|
|
149
149
|
name: tname,
|
|
150
150
|
description: tdesc,
|
|
151
151
|
parameters: tschema,
|
|
152
|
-
source: { type: :client, executable:
|
|
152
|
+
source: { type: :client, executable: true }
|
|
153
153
|
)
|
|
154
154
|
rescue StandardError => e
|
|
155
155
|
log.warn("[llm][api][anthropic][messages] build_tool_classes failed name=#{tname} error=#{e.message}")
|
|
@@ -28,6 +28,7 @@ module Legion
|
|
|
28
28
|
conversation_id = body[:conversation_id]
|
|
29
29
|
request_id = body[:request_id] || SecureRandom.uuid
|
|
30
30
|
include_thinking = body[:include_thinking] == true
|
|
31
|
+
client_tool_passthrough = body[:client_tool_passthrough] == true
|
|
31
32
|
|
|
32
33
|
unless messages.is_a?(Array)
|
|
33
34
|
halt 400, { 'Content-Type' => 'application/json' },
|
|
@@ -103,6 +104,9 @@ module Legion
|
|
|
103
104
|
|
|
104
105
|
extra = {}
|
|
105
106
|
extra[:tier] = tier.to_sym if tier
|
|
107
|
+
metadata = { requested_tools: requested_tools }
|
|
108
|
+
metadata[:client_tool_passthrough] = true if client_tool_passthrough
|
|
109
|
+
metadata[:client_tool_request_count] = tools.size if tools.any?
|
|
106
110
|
|
|
107
111
|
pipeline_request = Legion::LLM::Inference::Request.build(
|
|
108
112
|
id: request_id,
|
|
@@ -112,7 +116,7 @@ module Legion
|
|
|
112
116
|
tools: tool_declarations,
|
|
113
117
|
caller: effective_caller,
|
|
114
118
|
conversation_id: conversation_id,
|
|
115
|
-
metadata:
|
|
119
|
+
metadata: metadata,
|
|
116
120
|
stream: streaming,
|
|
117
121
|
cache: { strategy: :default, cacheable: true },
|
|
118
122
|
extra: extra
|
|
@@ -160,8 +160,12 @@ module Legion
|
|
|
160
160
|
end
|
|
161
161
|
|
|
162
162
|
def self.tier_priority
|
|
163
|
+
return Legion::LLM::Router.tier_priority if defined?(Legion::LLM::Router)
|
|
164
|
+
|
|
163
165
|
routing_config = Legion::LLM::Settings.value(:routing) || {}
|
|
164
|
-
|
|
166
|
+
top_level = Legion::LLM::Settings.value(:tier_order, default: nil)
|
|
167
|
+
Array(top_level || routing_config[:tier_order] || routing_config[:tier_priority] ||
|
|
168
|
+
%w[local direct fleet openai_compat cloud frontier])
|
|
165
169
|
end
|
|
166
170
|
|
|
167
171
|
def self.privacy_mode?
|
|
@@ -123,7 +123,7 @@ module Legion
|
|
|
123
123
|
name: t[:name].to_s,
|
|
124
124
|
description: t[:description].to_s,
|
|
125
125
|
parameters: t[:parameters] || {},
|
|
126
|
-
source: { type: :client, executable:
|
|
126
|
+
source: { type: :client, executable: true }
|
|
127
127
|
)
|
|
128
128
|
rescue StandardError => e
|
|
129
129
|
tool_name = t.is_a?(Hash) ? t[:name] : nil
|
|
@@ -27,6 +27,14 @@ module Legion
|
|
|
27
27
|
}.freeze
|
|
28
28
|
|
|
29
29
|
DEFAULT_TIER_PRIORITY = %i[local direct fleet openai_compat cloud frontier].freeze
|
|
30
|
+
CAPABILITY_ALIASES = {
|
|
31
|
+
function_calling: :tools,
|
|
32
|
+
functions: :tools,
|
|
33
|
+
tool: :tools,
|
|
34
|
+
tool_use: :tools,
|
|
35
|
+
stream: :streaming,
|
|
36
|
+
stream_chat: :streaming
|
|
37
|
+
}.freeze
|
|
30
38
|
|
|
31
39
|
module_function
|
|
32
40
|
|
|
@@ -52,7 +60,10 @@ module Legion
|
|
|
52
60
|
capability = embedding_model?(model_data) ? :embed : :chat
|
|
53
61
|
priority = tier_weight(model_tier) - order
|
|
54
62
|
rules << build_rule(provider, instance_id, model_data, capability, model_tier, priority)
|
|
55
|
-
|
|
63
|
+
if capability == :chat
|
|
64
|
+
rules << build_rule(provider, instance_id, model_data, :stream, model_tier, priority) if supports_streaming?(model_data)
|
|
65
|
+
rules << build_rule(provider, instance_id, model_data, :tools, model_tier, priority) if supports_tools?(model_data)
|
|
66
|
+
end
|
|
56
67
|
order += 1
|
|
57
68
|
end
|
|
58
69
|
end
|
|
@@ -125,11 +136,39 @@ module Legion
|
|
|
125
136
|
return nil unless model_data.is_a?(Hash)
|
|
126
137
|
|
|
127
138
|
caps = model_data[:capabilities] || model_data['capabilities']
|
|
128
|
-
|
|
139
|
+
normalized = normalize_capabilities(caps)
|
|
140
|
+
return normalized if normalized.any?
|
|
129
141
|
|
|
130
142
|
nil
|
|
131
143
|
end
|
|
132
144
|
|
|
145
|
+
def supports_streaming?(model_data)
|
|
146
|
+
capabilities = extract_capabilities(model_data)
|
|
147
|
+
return true if capabilities.nil?
|
|
148
|
+
|
|
149
|
+
capabilities.include?(:streaming)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def supports_tools?(model_data)
|
|
153
|
+
capabilities = extract_capabilities(model_data)
|
|
154
|
+
return false if capabilities.nil?
|
|
155
|
+
|
|
156
|
+
capabilities.include?(:tools)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def normalize_capabilities(capabilities)
|
|
160
|
+
Array(capabilities).compact.each_with_object([]) do |capability, normalized|
|
|
161
|
+
next unless capability.respond_to?(:to_s)
|
|
162
|
+
|
|
163
|
+
capability_sym = capability.to_s.downcase.strip.to_sym
|
|
164
|
+
next if capability_sym.to_s.empty?
|
|
165
|
+
|
|
166
|
+
normalized << capability_sym
|
|
167
|
+
alias_sym = CAPABILITY_ALIASES[capability_sym]
|
|
168
|
+
normalized << alias_sym if alias_sym
|
|
169
|
+
end.uniq
|
|
170
|
+
end
|
|
171
|
+
|
|
133
172
|
def extract_field(model_data, field)
|
|
134
173
|
return nil unless model_data.is_a?(Hash)
|
|
135
174
|
|
|
@@ -145,7 +184,9 @@ module Legion
|
|
|
145
184
|
end
|
|
146
185
|
|
|
147
186
|
def tier_priority
|
|
148
|
-
configured = Legion::LLM::Settings.value(:
|
|
187
|
+
configured = Legion::LLM::Settings.value(:tier_order, default: nil)
|
|
188
|
+
configured = Legion::LLM::Settings.value(:routing, :tier_order, default: nil) if blank_array?(configured)
|
|
189
|
+
configured = Legion::LLM::Settings.value(:routing, :tier_priority, default: DEFAULT_TIER_PRIORITY) if blank_array?(configured)
|
|
149
190
|
normalized = Array(configured).filter_map do |tier|
|
|
150
191
|
tier.to_sym if tier.respond_to?(:to_sym)
|
|
151
192
|
end
|
|
@@ -156,6 +197,10 @@ module Legion
|
|
|
156
197
|
DEFAULT_TIER_PRIORITY
|
|
157
198
|
end
|
|
158
199
|
|
|
200
|
+
def blank_array?(value)
|
|
201
|
+
Array(value).empty?
|
|
202
|
+
end
|
|
203
|
+
|
|
159
204
|
def extension_providers
|
|
160
205
|
ext = Legion::Settings[:extensions]
|
|
161
206
|
return ext[:llm] if ext.is_a?(Hash) && ext[:llm].is_a?(Hash)
|
|
@@ -376,6 +376,7 @@ module Legion
|
|
|
376
376
|
|
|
377
377
|
def routing_request_state
|
|
378
378
|
routing_explicit = @request.extra[:routing_explicit]
|
|
379
|
+
request_intent = @request.extra[:intent]
|
|
379
380
|
instance = @request.routing[:instance] || @request.routing[:instance_id] || @request.routing[:provider_instance]
|
|
380
381
|
tier = @request.extra[:tier]
|
|
381
382
|
{
|
|
@@ -385,7 +386,8 @@ module Legion
|
|
|
385
386
|
offering_id: @request.routing[:offering_id] || @request.routing[:id],
|
|
386
387
|
offering_metadata: normalize_offering_metadata(@request.routing[:offering_metadata] ||
|
|
387
388
|
@request.routing[:offering]),
|
|
388
|
-
intent:
|
|
389
|
+
intent: routing_intent_for_request(request_intent),
|
|
390
|
+
intent_explicit: routing_intent_present?(request_intent),
|
|
389
391
|
tier: tier,
|
|
390
392
|
auto_route: @request.extra[:auto_route],
|
|
391
393
|
provider_explicit: routing_field_explicit?(routing_explicit, :provider, @request.routing[:provider]),
|
|
@@ -394,6 +396,65 @@ module Legion
|
|
|
394
396
|
}
|
|
395
397
|
end
|
|
396
398
|
|
|
399
|
+
def routing_intent_present?(intent)
|
|
400
|
+
intent.is_a?(Hash) && intent.any?
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def routing_intent_for_request(intent)
|
|
404
|
+
normalized = if intent.is_a?(Hash)
|
|
405
|
+
intent.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
406
|
+
else
|
|
407
|
+
{}
|
|
408
|
+
end
|
|
409
|
+
required = normalize_required_capabilities(
|
|
410
|
+
normalized.delete(:required_capabilities) || normalized.delete(:requires)
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
if @request.stream == true
|
|
414
|
+
normalized[:capability] = :stream if stream_routable_capability?(normalized[:capability])
|
|
415
|
+
required << :streaming
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
required << :tools if native_tools_requested_for_routing?
|
|
419
|
+
normalized[:required_capabilities] = required.uniq if required.any?
|
|
420
|
+
normalized
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def stream_routable_capability?(capability)
|
|
424
|
+
capability.nil? || %i[chat completion stream].include?(capability.to_s.downcase.to_sym)
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def native_tools_requested_for_routing?
|
|
428
|
+
Array(@request.tools).any? ||
|
|
429
|
+
requested_deferred_tool_names.any? ||
|
|
430
|
+
@triggered_tools.any? ||
|
|
431
|
+
Tools::Special.pinned_definitions.any?
|
|
432
|
+
rescue StandardError => e
|
|
433
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.pipeline.routing_tools_required')
|
|
434
|
+
false
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
def normalize_required_capabilities(capabilities)
|
|
438
|
+
aliases = {
|
|
439
|
+
function_calling: :tools,
|
|
440
|
+
functions: :tools,
|
|
441
|
+
tool: :tools,
|
|
442
|
+
tool_use: :tools,
|
|
443
|
+
stream: :streaming,
|
|
444
|
+
stream_chat: :streaming
|
|
445
|
+
}
|
|
446
|
+
Array(capabilities).compact.each_with_object([]) do |capability, normalized|
|
|
447
|
+
next unless capability.respond_to?(:to_s)
|
|
448
|
+
|
|
449
|
+
capability_sym = capability.to_s.downcase.strip.to_sym
|
|
450
|
+
next if capability_sym.to_s.empty?
|
|
451
|
+
|
|
452
|
+
normalized << capability_sym
|
|
453
|
+
alias_sym = aliases[capability_sym]
|
|
454
|
+
normalized << alias_sym if alias_sym
|
|
455
|
+
end.uniq
|
|
456
|
+
end
|
|
457
|
+
|
|
397
458
|
def apply_proactive_tier_assignment(state)
|
|
398
459
|
# Forced assignments carry security/privacy constraints and override
|
|
399
460
|
# caller-supplied tier/intent. Advisory assignments only fill blanks.
|
|
@@ -416,7 +477,7 @@ module Legion
|
|
|
416
477
|
|
|
417
478
|
explicit_route = state[:provider_explicit] || state[:instance_explicit] || state[:tier_explicit]
|
|
418
479
|
auto_route = state[:auto_route] == true
|
|
419
|
-
intent_route = state[:intent] && Router.routing_enabled?
|
|
480
|
+
intent_route = state[:intent_explicit] && state[:intent] && Router.routing_enabled?
|
|
420
481
|
return state unless explicit_route || auto_route || intent_route
|
|
421
482
|
|
|
422
483
|
resolution = routing_resolution_for(state)
|
|
@@ -426,7 +487,7 @@ module Legion
|
|
|
426
487
|
end
|
|
427
488
|
|
|
428
489
|
def routing_resolution_for(state)
|
|
429
|
-
if state[:auto_route] == true || (state[:intent] && pipeline_escalation_enabled?)
|
|
490
|
+
if state[:auto_route] == true || (state[:intent_explicit] && state[:intent] && pipeline_escalation_enabled?)
|
|
430
491
|
@escalation_chain = Router.resolve_chain(
|
|
431
492
|
intent: state[:intent],
|
|
432
493
|
tier: state[:tier],
|
|
@@ -527,13 +588,14 @@ module Legion
|
|
|
527
588
|
end
|
|
528
589
|
end
|
|
529
590
|
|
|
530
|
-
def run_provider_call_with_escalation
|
|
591
|
+
def run_provider_call_with_escalation(stream_block: nil)
|
|
531
592
|
@escalation_chain ||= build_default_escalation_chain
|
|
532
593
|
chain = @escalation_chain
|
|
533
594
|
threshold = pipeline_escalation_quality_threshold
|
|
534
595
|
quality_check = @request.extra[:quality_check]
|
|
535
596
|
succeeded = false
|
|
536
597
|
tried = []
|
|
598
|
+
@last_escalation_error = nil
|
|
537
599
|
log.debug "[llm][executor] action=escalation.enter chain_size=#{chain.size} threshold=#{threshold}"
|
|
538
600
|
|
|
539
601
|
primary_tier = @escalation_chain.primary&.tier
|
|
@@ -541,10 +603,12 @@ module Legion
|
|
|
541
603
|
chain.each do |resolution|
|
|
542
604
|
next if tried.any? { |t| t[:provider] == resolution.provider && t[:instance] == resolution.instance && t[:model] == resolution.model }
|
|
543
605
|
|
|
544
|
-
succeeded = run_escalation_resolution(resolution, threshold, quality_check, tried, primary_tier
|
|
606
|
+
succeeded = run_escalation_resolution(resolution, threshold, quality_check, tried, primary_tier,
|
|
607
|
+
stream_block: stream_block)
|
|
545
608
|
break if succeeded
|
|
546
609
|
end
|
|
547
610
|
return if succeeded
|
|
611
|
+
raise @last_escalation_error if chain.size <= 1 && @last_escalation_error
|
|
548
612
|
|
|
549
613
|
emit_error_audit(
|
|
550
614
|
EscalationExhausted.new("All #{@escalation_history.size} attempts failed"),
|
|
@@ -553,7 +617,7 @@ module Legion
|
|
|
553
617
|
raise EscalationExhausted, "All #{@escalation_history.size} escalation attempts failed"
|
|
554
618
|
end
|
|
555
619
|
|
|
556
|
-
def run_escalation_resolution(resolution, threshold, quality_check, tried, primary_tier)
|
|
620
|
+
def run_escalation_resolution(resolution, threshold, quality_check, tried, primary_tier, stream_block: nil)
|
|
557
621
|
move_type = if tried.empty?
|
|
558
622
|
:primary
|
|
559
623
|
elsif resolution.tier == primary_tier
|
|
@@ -570,7 +634,7 @@ module Legion
|
|
|
570
634
|
@resolved_tier = resolution.tier
|
|
571
635
|
@resolved_offering_id = resolution.offering_id
|
|
572
636
|
@resolved_offering_metadata = resolution.offering_metadata
|
|
573
|
-
succeeded = attempt_escalation(resolution, threshold, quality_check, start_time)
|
|
637
|
+
succeeded = attempt_escalation(resolution, threshold, quality_check, start_time, stream_block: stream_block)
|
|
574
638
|
tried << { provider: resolution.provider, instance: resolution.instance, model: resolution.model } unless succeeded
|
|
575
639
|
succeeded
|
|
576
640
|
rescue Legion::LLM::AuthError, Legion::LLM::PrivacyModeError => e
|
|
@@ -605,14 +669,19 @@ module Legion
|
|
|
605
669
|
false
|
|
606
670
|
end
|
|
607
671
|
|
|
608
|
-
def attempt_escalation(resolution, threshold, quality_check, start_time)
|
|
672
|
+
def attempt_escalation(resolution, threshold, quality_check, start_time, stream_block: nil)
|
|
609
673
|
@current_escalation_context = {
|
|
610
674
|
attempt: @escalation_history.size + 1,
|
|
611
675
|
max_attempts: @escalation_chain&.max_attempts
|
|
612
676
|
}.compact
|
|
613
|
-
|
|
677
|
+
if stream_block
|
|
678
|
+
execute_provider_request_stream(&stream_block)
|
|
679
|
+
result = Quality::Checker::QualityResult.new(passed: true, failures: [])
|
|
680
|
+
else
|
|
681
|
+
execute_provider_request
|
|
682
|
+
result = Quality::Checker.check(@raw_response, quality_threshold: threshold, quality_check: quality_check)
|
|
683
|
+
end
|
|
614
684
|
duration_ms = ((Time.now - start_time) * 1000).round
|
|
615
|
-
result = Quality::Checker.check(@raw_response, quality_threshold: threshold, quality_check: quality_check)
|
|
616
685
|
outcome = result.passed ? :success : :quality_failure
|
|
617
686
|
@timeline.record(
|
|
618
687
|
category: :provider, key: 'escalation:attempt', direction: :internal,
|
|
@@ -649,6 +718,7 @@ module Legion
|
|
|
649
718
|
end
|
|
650
719
|
|
|
651
720
|
def record_escalation_failure(err, resolution, start_time, outcome:, operation:, handled: false)
|
|
721
|
+
@last_escalation_error = err
|
|
652
722
|
duration_ms = ((Time.now - start_time) * 1000).round
|
|
653
723
|
handle_exception(err, level: :warn, handled: handled, operation: operation,
|
|
654
724
|
provider: resolution.provider, model: resolution.model, duration_ms: duration_ms)
|
|
@@ -689,7 +759,7 @@ module Legion
|
|
|
689
759
|
end
|
|
690
760
|
|
|
691
761
|
def build_fallback_resolutions(exclude_provider: nil, exclude_instance: nil, primary_tier: nil)
|
|
692
|
-
tier_rank = Router
|
|
762
|
+
tier_rank = Router.tier_rank
|
|
693
763
|
primary_rank = primary_tier ? (tier_rank[primary_tier.to_sym] || 99) : 99
|
|
694
764
|
|
|
695
765
|
candidates = Call::Registry.all_instances.filter_map do |entry|
|
|
@@ -869,6 +939,23 @@ module Legion
|
|
|
869
939
|
!(@request.respond_to?(:suppress_tools) && @request.suppress_tools)
|
|
870
940
|
end
|
|
871
941
|
|
|
942
|
+
def client_tool_passthrough_enabled?
|
|
943
|
+
return false unless @request.respond_to?(:metadata)
|
|
944
|
+
|
|
945
|
+
metadata = @request.metadata || {}
|
|
946
|
+
value = metadata[:client_tool_passthrough] || metadata['client_tool_passthrough']
|
|
947
|
+
value == true
|
|
948
|
+
end
|
|
949
|
+
|
|
950
|
+
def non_executable_client_tool?(definition)
|
|
951
|
+
source = definition.respond_to?(:source) ? definition.source : {}
|
|
952
|
+
return false unless source.is_a?(Hash)
|
|
953
|
+
|
|
954
|
+
source_type = source[:type] || source['type']
|
|
955
|
+
executable = source.key?(:executable) ? source[:executable] : source['executable']
|
|
956
|
+
source_type.respond_to?(:to_sym) && source_type.to_sym == :client && executable != true
|
|
957
|
+
end
|
|
958
|
+
|
|
872
959
|
def add_pinned_special_tool_definitions(definitions)
|
|
873
960
|
Tools::Special.pinned_definitions.each do |definition|
|
|
874
961
|
next if definitions.any? { |existing| existing.name == definition.name }
|
|
@@ -887,6 +974,14 @@ module Legion
|
|
|
887
974
|
else
|
|
888
975
|
Types::ToolDefinition.from_tool_class(tool)
|
|
889
976
|
end
|
|
977
|
+
if non_executable_client_tool?(definition) && !client_tool_passthrough_enabled?
|
|
978
|
+
log.info(
|
|
979
|
+
"[llm][tools][inject] action=client_tool_skipped request_id=#{request_log_value(:id, 'unknown')} " \
|
|
980
|
+
"conversation_id=#{request_log_value(:conversation_id, 'none') || 'none'} name=#{definition.name} " \
|
|
981
|
+
'reason=client_passthrough_not_enabled'
|
|
982
|
+
)
|
|
983
|
+
return
|
|
984
|
+
end
|
|
890
985
|
return if gaia_tool_suppressed?(definition.name)
|
|
891
986
|
return if definitions.any? { |existing| existing.name == definition.name }
|
|
892
987
|
|
|
@@ -1168,10 +1263,15 @@ module Legion
|
|
|
1168
1263
|
|
|
1169
1264
|
private :async_post_enabled?
|
|
1170
1265
|
|
|
1171
|
-
def step_provider_call_stream(&)
|
|
1266
|
+
def step_provider_call_stream(&block)
|
|
1267
|
+
if pipeline_escalation_enabled?
|
|
1268
|
+
run_provider_call_with_escalation(stream_block: block)
|
|
1269
|
+
return
|
|
1270
|
+
end
|
|
1271
|
+
|
|
1172
1272
|
providers_tried = []
|
|
1173
1273
|
begin
|
|
1174
|
-
execute_provider_request_stream(&)
|
|
1274
|
+
execute_provider_request_stream(&block)
|
|
1175
1275
|
rescue Legion::LLM::AuthError, Faraday::UnauthorizedError, Faraday::ForbiddenError => e
|
|
1176
1276
|
try_fallback_or_raise(e, providers_tried, operation: 'provider_call_stream.auth',
|
|
1177
1277
|
reason: 'auth_failed', error_class: Legion::LLM::AuthError)
|
data/lib/legion/llm/inventory.rb
CHANGED
|
@@ -35,6 +35,15 @@ module Legion
|
|
|
35
35
|
chat: %i[chat completion tools json_schema]
|
|
36
36
|
}.freeze
|
|
37
37
|
|
|
38
|
+
CAPABILITY_ALIASES = {
|
|
39
|
+
function_calling: :tools,
|
|
40
|
+
functions: :tools,
|
|
41
|
+
tool: :tools,
|
|
42
|
+
tool_use: :tools,
|
|
43
|
+
stream: :streaming,
|
|
44
|
+
stream_chat: :streaming
|
|
45
|
+
}.freeze
|
|
46
|
+
|
|
38
47
|
class << self
|
|
39
48
|
def offerings(filters = {})
|
|
40
49
|
log.debug "[llm][inventory] action=offerings.enter filters=#{filters.keys}"
|
|
@@ -216,7 +225,16 @@ module Legion
|
|
|
216
225
|
|
|
217
226
|
def normalize_capabilities(capabilities, type)
|
|
218
227
|
raw = capabilities || DEFAULT_CAPABILITIES.fetch(type, DEFAULT_CAPABILITIES[:inference])
|
|
219
|
-
Array(raw).
|
|
228
|
+
Array(raw).compact.each_with_object([]) do |capability, normalized|
|
|
229
|
+
next unless capability.respond_to?(:to_s)
|
|
230
|
+
|
|
231
|
+
capability_sym = capability.to_s.downcase.strip.to_sym
|
|
232
|
+
next if capability_sym.to_s.empty?
|
|
233
|
+
|
|
234
|
+
normalized << capability_sym
|
|
235
|
+
alias_sym = CAPABILITY_ALIASES[capability_sym]
|
|
236
|
+
normalized << alias_sym if alias_sym
|
|
237
|
+
end.uniq.map(&:to_s).sort
|
|
220
238
|
end
|
|
221
239
|
|
|
222
240
|
def normalize_type(value)
|
data/lib/legion/llm/router.rb
CHANGED
|
@@ -19,6 +19,14 @@ module Legion
|
|
|
19
19
|
PROVIDER_ORDER = %i[ollama vllm bedrock azure gemini anthropic openai].freeze
|
|
20
20
|
TIER_EXTERNAL = Set[:cloud, :frontier, :openai_compat].freeze
|
|
21
21
|
TIER_RANK = { local: 0, direct: 1, fleet: 2, openai_compat: 3, cloud: 4, frontier: 5 }.freeze
|
|
22
|
+
CAPABILITY_ALIASES = {
|
|
23
|
+
function_calling: :tools,
|
|
24
|
+
functions: :tools,
|
|
25
|
+
tool: :tools,
|
|
26
|
+
tool_use: :tools,
|
|
27
|
+
stream: :streaming,
|
|
28
|
+
stream_chat: :streaming
|
|
29
|
+
}.freeze
|
|
22
30
|
|
|
23
31
|
OLLAMA_MODEL_PATTERN = %r{[:/]}
|
|
24
32
|
|
|
@@ -120,6 +128,22 @@ module Legion
|
|
|
120
128
|
@auto_rules_populated = false
|
|
121
129
|
end
|
|
122
130
|
|
|
131
|
+
def tier_priority
|
|
132
|
+
configured = Legion::LLM::Settings.value(:tier_order, default: nil)
|
|
133
|
+
configured = routing_settings[:tier_order] if configured.nil? || Array(configured).empty?
|
|
134
|
+
configured = routing_settings[:tier_priority] if configured.nil? || Array(configured).empty?
|
|
135
|
+
normalized = Array(configured).filter_map { |tier| tier.to_sym if tier.respond_to?(:to_sym) }
|
|
136
|
+
normalized = TIER_RANK.keys if normalized.empty?
|
|
137
|
+
(normalized + TIER_RANK.keys).uniq
|
|
138
|
+
rescue StandardError => e
|
|
139
|
+
handle_exception(e, level: :warn, handled: true, operation: 'router.tier_priority')
|
|
140
|
+
TIER_RANK.keys
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def tier_rank
|
|
144
|
+
tier_priority.each_with_index.to_h
|
|
145
|
+
end
|
|
146
|
+
|
|
123
147
|
# Check whether a tier can be used right now.
|
|
124
148
|
# :local — always available
|
|
125
149
|
# :direct — always available (remote self-hosted instances)
|
|
@@ -227,16 +251,19 @@ module Legion
|
|
|
227
251
|
# 3. Filter by schedule
|
|
228
252
|
scheduled = matched.select(&:within_schedule?)
|
|
229
253
|
|
|
230
|
-
# 4. Reject rules
|
|
231
|
-
|
|
254
|
+
# 4. Reject rules that cannot satisfy required model capabilities
|
|
255
|
+
capable = scheduled.select { |r| satisfies_required_capabilities?(r, intent) }
|
|
232
256
|
|
|
233
|
-
#
|
|
257
|
+
# 5. Reject rules excluded by active constraints
|
|
258
|
+
unconstrained = capable.reject { |r| excluded_by_constraint?(r, constraints) }
|
|
259
|
+
|
|
260
|
+
# 5.5 Reject Ollama rules where model is not pulled or doesn't fit
|
|
234
261
|
discovered = unconstrained.reject { |r| excluded_by_discovery?(r) }
|
|
235
262
|
|
|
236
|
-
#
|
|
263
|
+
# 5.55 Reject local-tier rules where model exceeds available memory
|
|
237
264
|
memory_checked = discovered.reject { |r| excluded_by_memory?(r) }
|
|
238
265
|
|
|
239
|
-
#
|
|
266
|
+
# 5.6 Reject rules matching caller-provided exclude list
|
|
240
267
|
normalized_exclude = exclude.is_a?(Hash) ? exclude : {}
|
|
241
268
|
not_excluded = if normalized_exclude.empty?
|
|
242
269
|
memory_checked
|
|
@@ -244,10 +271,10 @@ module Legion
|
|
|
244
271
|
memory_checked.reject { |r| excluded_by_caller?(r, normalized_exclude) }
|
|
245
272
|
end
|
|
246
273
|
|
|
247
|
-
#
|
|
274
|
+
# 5.7 Reject rules for models denied by health tracker
|
|
248
275
|
not_denied = not_excluded.reject { |r| excluded_by_denial?(r) }
|
|
249
276
|
|
|
250
|
-
#
|
|
277
|
+
# 6. Filter by tier availability
|
|
251
278
|
final = not_denied.select { |r| tier_available?(r.target[:tier] || r.target['tier']) }
|
|
252
279
|
|
|
253
280
|
log.debug("Router: #{final.size} candidates after filtering (started with #{rules.size})")
|
|
@@ -255,6 +282,37 @@ module Legion
|
|
|
255
282
|
final
|
|
256
283
|
end
|
|
257
284
|
|
|
285
|
+
def satisfies_required_capabilities?(rule, intent)
|
|
286
|
+
required = required_capabilities(intent)
|
|
287
|
+
return true if required.empty?
|
|
288
|
+
|
|
289
|
+
rule_capabilities = normalize_capabilities(rule.target[:model_capabilities] || rule.target['model_capabilities'] ||
|
|
290
|
+
rule.target[:capabilities] || rule.target['capabilities'])
|
|
291
|
+
return false if rule_capabilities.empty?
|
|
292
|
+
|
|
293
|
+
required.all? { |capability| rule_capabilities.include?(capability) }
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def required_capabilities(intent)
|
|
297
|
+
return [] unless intent.is_a?(Hash)
|
|
298
|
+
|
|
299
|
+
normalized = intent.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
300
|
+
normalize_capabilities(normalized[:required_capabilities] || normalized[:requires])
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def normalize_capabilities(capabilities)
|
|
304
|
+
Array(capabilities).compact.each_with_object([]) do |capability, normalized|
|
|
305
|
+
next unless capability.respond_to?(:to_s)
|
|
306
|
+
|
|
307
|
+
capability_sym = capability.to_s.downcase.strip.to_sym
|
|
308
|
+
next if capability_sym.to_s.empty?
|
|
309
|
+
|
|
310
|
+
normalized << capability_sym
|
|
311
|
+
alias_sym = CAPABILITY_ALIASES[capability_sym]
|
|
312
|
+
normalized << alias_sym if alias_sym
|
|
313
|
+
end.uniq
|
|
314
|
+
end
|
|
315
|
+
|
|
258
316
|
def excluded_by_constraint?(rule, constraints)
|
|
259
317
|
return false if constraints.empty?
|
|
260
318
|
|
|
@@ -474,8 +532,15 @@ module Legion
|
|
|
474
532
|
seen[pname] ||= entry
|
|
475
533
|
end
|
|
476
534
|
|
|
477
|
-
# Build resolutions
|
|
478
|
-
PROVIDER_ORDER.
|
|
535
|
+
# Build resolutions ordered by configured tier priority, preserving provider preference inside a tier.
|
|
536
|
+
provider_index = PROVIDER_ORDER.each_with_index.to_h
|
|
537
|
+
sorted_entries = seen.values.sort_by do |entry|
|
|
538
|
+
tier = registry_tier(entry[:provider], entry[:metadata])
|
|
539
|
+
[tier_rank.fetch(tier, 99), provider_index.fetch(entry[:provider], PROVIDER_ORDER.size)]
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
sorted_entries.filter_map do |entry|
|
|
543
|
+
pname = entry[:provider]
|
|
479
544
|
entry = seen[pname]
|
|
480
545
|
next unless entry
|
|
481
546
|
|
data/lib/legion/llm/version.rb
CHANGED