legion-llm 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +16 -0
- data/.gitignore +18 -0
- data/.rubocop.yml +56 -0
- data/CHANGELOG.md +71 -0
- data/CLAUDE.md +388 -0
- data/Gemfile +14 -0
- data/LICENSE +20 -0
- data/README.md +615 -0
- data/docs/plans/2026-03-15-ollama-discovery-design.md +164 -0
- data/docs/plans/2026-03-15-ollama-discovery-implementation.md +1147 -0
- data/legion-llm.gemspec +32 -0
- data/lib/legion/llm/bedrock_bearer_auth.rb +53 -0
- data/lib/legion/llm/compressor.rb +75 -0
- data/lib/legion/llm/discovery/ollama.rb +88 -0
- data/lib/legion/llm/discovery/system.rb +139 -0
- data/lib/legion/llm/escalation_history.rb +28 -0
- data/lib/legion/llm/helpers/llm.rb +59 -0
- data/lib/legion/llm/providers.rb +88 -0
- data/lib/legion/llm/quality_checker.rb +56 -0
- data/lib/legion/llm/router/escalation_chain.rb +49 -0
- data/lib/legion/llm/router/health_tracker.rb +160 -0
- data/lib/legion/llm/router/resolution.rb +43 -0
- data/lib/legion/llm/router/rule.rb +103 -0
- data/lib/legion/llm/router.rb +279 -0
- data/lib/legion/llm/settings.rb +97 -0
- data/lib/legion/llm/transport/exchanges/escalation.rb +14 -0
- data/lib/legion/llm/transport/messages/escalation_event.rb +13 -0
- data/lib/legion/llm/version.rb +7 -0
- data/lib/legion/llm.rb +264 -0
- metadata +136 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Router
|
|
6
|
+
class HealthTracker
|
|
7
|
+
OPEN_PENALTY = -50
|
|
8
|
+
LATENCY_THRESHOLD_MS = 5000
|
|
9
|
+
LATENCY_PENALTY_STEP = -10
|
|
10
|
+
|
|
11
|
+
def initialize(window_seconds: 300, failure_threshold: 3, cooldown_seconds: 60)
|
|
12
|
+
@window_seconds = window_seconds
|
|
13
|
+
@failure_threshold = failure_threshold
|
|
14
|
+
@cooldown_seconds = cooldown_seconds
|
|
15
|
+
|
|
16
|
+
@circuits = {}
|
|
17
|
+
@latency_window = {}
|
|
18
|
+
@handlers = {}
|
|
19
|
+
@mutex = Mutex.new
|
|
20
|
+
|
|
21
|
+
register_default_handlers
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Register a custom handler for a signal type.
|
|
25
|
+
def register_handler(signal, &block)
|
|
26
|
+
@handlers[signal.to_sym] = block
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Thread-safe signal intake. Dispatches to the registered handler if one exists.
|
|
30
|
+
def report(provider:, signal:, value:, metadata: {})
|
|
31
|
+
sym = signal.to_sym
|
|
32
|
+
handler = @handlers[sym]
|
|
33
|
+
return nil unless handler
|
|
34
|
+
|
|
35
|
+
payload = { provider: provider, signal: sym, value: value, metadata: metadata, at: Time.now }
|
|
36
|
+
@mutex.synchronize { handler.call(payload) }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Returns total priority adjustment for a provider.
|
|
40
|
+
# Combines circuit-breaker penalty and latency penalty.
|
|
41
|
+
def adjustment(provider)
|
|
42
|
+
circuit_adjustment(provider) + latency_adjustment(provider)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns :closed, :open, or :half_open.
|
|
46
|
+
def circuit_state(provider)
|
|
47
|
+
circuit = @circuits[provider]
|
|
48
|
+
return :closed if circuit.nil?
|
|
49
|
+
|
|
50
|
+
if circuit[:state] == :open
|
|
51
|
+
elapsed = Time.now - circuit[:opened_at]
|
|
52
|
+
return :half_open if elapsed >= @cooldown_seconds
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
circuit[:state]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Clears circuit and latency data for a single provider.
|
|
59
|
+
def reset(provider)
|
|
60
|
+
@mutex.synchronize do
|
|
61
|
+
@circuits.delete(provider)
|
|
62
|
+
@latency_window.delete(provider)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Clears all state.
|
|
67
|
+
def reset_all
|
|
68
|
+
@mutex.synchronize do
|
|
69
|
+
@circuits.clear
|
|
70
|
+
@latency_window.clear
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private
|
|
75
|
+
|
|
76
|
+
def register_default_handlers
|
|
77
|
+
register_handler(:error) do |payload|
|
|
78
|
+
provider = payload[:provider]
|
|
79
|
+
ensure_circuit(provider)
|
|
80
|
+
circuit = @circuits[provider]
|
|
81
|
+
|
|
82
|
+
if circuit_state(provider) == :half_open
|
|
83
|
+
circuit[:state] = :open
|
|
84
|
+
circuit[:opened_at] = Time.now
|
|
85
|
+
else
|
|
86
|
+
circuit[:failures] += 1.0
|
|
87
|
+
if circuit[:failures] >= @failure_threshold
|
|
88
|
+
circuit[:state] = :open
|
|
89
|
+
circuit[:opened_at] = Time.now
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
register_handler(:success) do |payload|
|
|
95
|
+
provider = payload[:provider]
|
|
96
|
+
ensure_circuit(provider)
|
|
97
|
+
circuit = @circuits[provider]
|
|
98
|
+
circuit[:failures] = 0
|
|
99
|
+
circuit[:state] = :closed
|
|
100
|
+
circuit[:opened_at] = nil
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
register_handler(:quality_failure) do |payload|
|
|
104
|
+
provider = payload[:provider]
|
|
105
|
+
ensure_circuit(provider)
|
|
106
|
+
circuit = @circuits[provider]
|
|
107
|
+
|
|
108
|
+
if circuit_state(provider) == :half_open
|
|
109
|
+
circuit[:state] = :open
|
|
110
|
+
circuit[:opened_at] = Time.now
|
|
111
|
+
else
|
|
112
|
+
circuit[:failures] += 0.5
|
|
113
|
+
if circuit[:failures] >= @failure_threshold
|
|
114
|
+
circuit[:state] = :open
|
|
115
|
+
circuit[:opened_at] = Time.now
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
register_handler(:latency) do |payload|
|
|
121
|
+
provider = payload[:provider]
|
|
122
|
+
@latency_window[provider] ||= []
|
|
123
|
+
@latency_window[provider] << { value: payload[:value], at: payload[:at] }
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def ensure_circuit(provider)
|
|
128
|
+
@circuits[provider] ||= { state: :closed, failures: 0.0, opened_at: nil }
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def circuit_adjustment(provider)
|
|
132
|
+
case circuit_state(provider)
|
|
133
|
+
when :open then OPEN_PENALTY
|
|
134
|
+
when :half_open then OPEN_PENALTY / 2
|
|
135
|
+
else 0
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def latency_adjustment(provider)
|
|
140
|
+
entries = @latency_window[provider]
|
|
141
|
+
return 0 if entries.nil? || entries.empty?
|
|
142
|
+
|
|
143
|
+
cutoff = Time.now - @window_seconds
|
|
144
|
+
recent = entries.select { |e| e[:at] >= cutoff }
|
|
145
|
+
|
|
146
|
+
# Prune stale entries in-place
|
|
147
|
+
@latency_window[provider] = recent
|
|
148
|
+
|
|
149
|
+
return 0 if recent.empty?
|
|
150
|
+
|
|
151
|
+
avg = recent.sum { |e| e[:value] } / recent.size.to_f
|
|
152
|
+
return 0 if avg <= LATENCY_THRESHOLD_MS
|
|
153
|
+
|
|
154
|
+
multiplier = (avg / LATENCY_THRESHOLD_MS).floor
|
|
155
|
+
[LATENCY_PENALTY_STEP * multiplier, OPEN_PENALTY].max
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Router
|
|
6
|
+
class Resolution
|
|
7
|
+
attr_reader :tier, :provider, :model, :rule, :metadata, :compress_level
|
|
8
|
+
|
|
9
|
+
def initialize(tier:, provider:, model:, rule: nil, metadata: {}, compress_level: 0)
|
|
10
|
+
@tier = tier.to_sym
|
|
11
|
+
@provider = provider.to_sym
|
|
12
|
+
@model = model
|
|
13
|
+
@rule = rule
|
|
14
|
+
@metadata = metadata
|
|
15
|
+
@compress_level = compress_level.to_i
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def local?
|
|
19
|
+
@tier == :local
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def fleet?
|
|
23
|
+
@tier == :fleet
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def cloud?
|
|
27
|
+
@tier == :cloud
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def to_h
|
|
31
|
+
{
|
|
32
|
+
tier: @tier,
|
|
33
|
+
provider: @provider,
|
|
34
|
+
model: @model,
|
|
35
|
+
rule: @rule,
|
|
36
|
+
metadata: @metadata,
|
|
37
|
+
compress_level: @compress_level
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'time'
|
|
4
|
+
require_relative 'resolution'
|
|
5
|
+
|
|
6
|
+
module Legion
|
|
7
|
+
module LLM
|
|
8
|
+
module Router
|
|
9
|
+
class Rule
|
|
10
|
+
attr_reader :name, :conditions, :target, :priority, :constraint, :fallback, :cost_multiplier, :schedule, :note
|
|
11
|
+
|
|
12
|
+
def self.from_hash(hash)
|
|
13
|
+
h = hash.transform_keys(&:to_sym)
|
|
14
|
+
new(
|
|
15
|
+
name: h[:name],
|
|
16
|
+
conditions: h[:when] || {},
|
|
17
|
+
target: h[:then] || {},
|
|
18
|
+
priority: h.fetch(:priority, 0),
|
|
19
|
+
constraint: h[:constraint],
|
|
20
|
+
fallback: h[:fallback].is_a?(Hash) ? h[:fallback].transform_keys(&:to_sym) : h[:fallback]&.to_sym,
|
|
21
|
+
cost_multiplier: h.fetch(:cost_multiplier, 1.0),
|
|
22
|
+
schedule: h[:schedule],
|
|
23
|
+
note: h[:note]
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def initialize(name:, conditions:, target:, priority: 0, constraint: nil, fallback: nil,
|
|
28
|
+
cost_multiplier: 1.0, schedule: nil, note: nil)
|
|
29
|
+
@name = name
|
|
30
|
+
@conditions = conditions.transform_keys(&:to_sym)
|
|
31
|
+
@target = target.transform_keys(&:to_sym)
|
|
32
|
+
@priority = priority
|
|
33
|
+
@constraint = constraint
|
|
34
|
+
@fallback = fallback
|
|
35
|
+
@cost_multiplier = cost_multiplier
|
|
36
|
+
@schedule = schedule
|
|
37
|
+
@note = note
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def matches_intent?(intent)
|
|
41
|
+
@conditions.all? do |key, value|
|
|
42
|
+
return false unless intent.key?(key)
|
|
43
|
+
|
|
44
|
+
intent[key].to_s == value.to_s
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def to_resolution
|
|
49
|
+
target_without_compress = @target.except(:compress_level)
|
|
50
|
+
Resolution.new(
|
|
51
|
+
**target_without_compress,
|
|
52
|
+
rule: @name,
|
|
53
|
+
metadata: { cost_multiplier: @cost_multiplier, fallback: @fallback }.compact,
|
|
54
|
+
compress_level: @target.fetch(:compress_level, 0)
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def within_schedule?(now = Time.now)
|
|
59
|
+
return true if @schedule.nil? || (@schedule.respond_to?(:empty?) && @schedule.empty?)
|
|
60
|
+
|
|
61
|
+
sched = @schedule.transform_keys(&:to_s)
|
|
62
|
+
now = localize(now, sched['timezone'])
|
|
63
|
+
|
|
64
|
+
return false if sched['valid_from'] && now < Time.parse(sched['valid_from'])
|
|
65
|
+
return false if sched['valid_until'] && now > Time.parse(sched['valid_until'])
|
|
66
|
+
return false if sched['hours'] && !within_hours?(sched['hours'], now)
|
|
67
|
+
return false if sched['days'] && !on_allowed_day?(sched['days'], now)
|
|
68
|
+
|
|
69
|
+
true
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
def localize(time, timezone_name)
|
|
75
|
+
return time unless timezone_name
|
|
76
|
+
|
|
77
|
+
require 'tzinfo'
|
|
78
|
+
TZInfo::Timezone.get(timezone_name).to_local(time)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def within_hours?(ranges, now)
|
|
82
|
+
current = (now.hour * 60) + now.min
|
|
83
|
+
ranges.any? do |range|
|
|
84
|
+
start_str, end_str = range.split('-')
|
|
85
|
+
start_min = time_str_to_minutes(start_str)
|
|
86
|
+
end_min = time_str_to_minutes(end_str)
|
|
87
|
+
current.between?(start_min, end_min)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def on_allowed_day?(days, now)
|
|
92
|
+
today = now.strftime('%A').downcase
|
|
93
|
+
days.map { |d| d.to_s.downcase }.include?(today)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def time_str_to_minutes(str)
|
|
97
|
+
parts = str.split(':')
|
|
98
|
+
(parts[0].to_i * 60) + parts[1].to_i
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'router/resolution'
|
|
4
|
+
require_relative 'router/rule'
|
|
5
|
+
require_relative 'router/health_tracker'
|
|
6
|
+
require_relative 'router/escalation_chain'
|
|
7
|
+
require_relative 'discovery/ollama'
|
|
8
|
+
require_relative 'discovery/system'
|
|
9
|
+
|
|
10
|
+
module Legion
|
|
11
|
+
module LLM
|
|
12
|
+
module Router
|
|
13
|
+
class << self
|
|
14
|
+
# Resolve an LLM routing intent to a tier/provider/model decision.
|
|
15
|
+
#
|
|
16
|
+
# @param intent [Hash, nil] routing intent (capability, privacy, etc.)
|
|
17
|
+
# @param tier [Symbol, nil] explicit tier override — skips rule matching
|
|
18
|
+
# @param model [String, nil] explicit model override
|
|
19
|
+
# @param provider [Symbol, nil] explicit provider override
|
|
20
|
+
# @return [Resolution, nil]
|
|
21
|
+
def resolve(intent: nil, tier: nil, model: nil, provider: nil)
|
|
22
|
+
return explicit_resolution(tier, provider, model) if tier
|
|
23
|
+
|
|
24
|
+
return nil unless routing_enabled? && intent
|
|
25
|
+
|
|
26
|
+
merged = merge_defaults(intent)
|
|
27
|
+
rules = load_rules
|
|
28
|
+
candidates = select_candidates(rules, merged)
|
|
29
|
+
best = pick_best(candidates)
|
|
30
|
+
best&.to_resolution
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil)
|
|
34
|
+
max = max_escalations || escalation_max_attempts
|
|
35
|
+
return chain_from_defaults(model, provider, max) unless routing_enabled? && (intent || tier)
|
|
36
|
+
return EscalationChain.new(resolutions: [explicit_resolution(tier, provider, model)], max_attempts: max) if tier
|
|
37
|
+
|
|
38
|
+
chain_from_intent(intent, max)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def health_tracker
|
|
42
|
+
@health_tracker ||= build_health_tracker
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def routing_enabled?
|
|
46
|
+
settings = routing_settings
|
|
47
|
+
return false if settings.nil? || settings.empty?
|
|
48
|
+
return false unless settings[:enabled]
|
|
49
|
+
|
|
50
|
+
rules = settings[:rules]
|
|
51
|
+
rules.is_a?(Array) && !rules.empty?
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def reset!
|
|
55
|
+
@health_tracker = nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Check whether a tier can be used right now.
|
|
59
|
+
# :local — always available
|
|
60
|
+
# :fleet — available when Legion::Transport is loaded
|
|
61
|
+
# :cloud — always available
|
|
62
|
+
def tier_available?(tier)
|
|
63
|
+
return Legion.const_defined?('Transport') if tier.to_sym == :fleet
|
|
64
|
+
|
|
65
|
+
true
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def explicit_resolution(tier, provider, model)
|
|
71
|
+
resolved_provider = provider ? provider.to_sym : default_provider_for_tier(tier)
|
|
72
|
+
resolved_model = model || default_model_for_tier(tier)
|
|
73
|
+
|
|
74
|
+
Resolution.new(
|
|
75
|
+
tier: tier,
|
|
76
|
+
provider: resolved_provider,
|
|
77
|
+
model: resolved_model,
|
|
78
|
+
rule: 'explicit'
|
|
79
|
+
)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def merge_defaults(intent)
|
|
83
|
+
defaults = (routing_settings[:default_intent] || {})
|
|
84
|
+
.transform_keys(&:to_sym)
|
|
85
|
+
.transform_values { |v| v.respond_to?(:to_sym) ? v.to_sym : v }
|
|
86
|
+
|
|
87
|
+
normalized_intent = intent
|
|
88
|
+
.transform_keys(&:to_sym)
|
|
89
|
+
.transform_values { |v| v.respond_to?(:to_sym) ? v.to_sym : v }
|
|
90
|
+
|
|
91
|
+
defaults.merge(normalized_intent)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def load_rules
|
|
95
|
+
raw = routing_settings[:rules] || []
|
|
96
|
+
raw.map { |h| Rule.from_hash(h.transform_keys(&:to_sym)) }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def select_candidates(rules, intent)
|
|
100
|
+
# 1. Collect constraints from constraint rules that match the intent
|
|
101
|
+
constraints = rules
|
|
102
|
+
.select { |r| r.constraint && r.matches_intent?(intent) }
|
|
103
|
+
.map(&:constraint)
|
|
104
|
+
|
|
105
|
+
# 2. Filter by intent match
|
|
106
|
+
matched = rules.select { |r| r.matches_intent?(intent) }
|
|
107
|
+
|
|
108
|
+
# 3. Filter by schedule
|
|
109
|
+
scheduled = matched.select(&:within_schedule?)
|
|
110
|
+
|
|
111
|
+
# 4. Reject rules excluded by active constraints
|
|
112
|
+
unconstrained = scheduled.reject { |r| excluded_by_constraint?(r, constraints) }
|
|
113
|
+
|
|
114
|
+
# 4.5 Reject Ollama rules where model is not pulled or doesn't fit
|
|
115
|
+
discovered = unconstrained.reject { |r| excluded_by_discovery?(r) }
|
|
116
|
+
|
|
117
|
+
# 5. Filter by tier availability
|
|
118
|
+
discovered.select { |r| tier_available?(r.target[:tier] || r.target['tier']) }
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def excluded_by_constraint?(rule, constraints)
|
|
122
|
+
return false if constraints.empty?
|
|
123
|
+
|
|
124
|
+
tier = (rule.target[:tier] || rule.target['tier'])&.to_sym
|
|
125
|
+
|
|
126
|
+
constraints.any? do |c|
|
|
127
|
+
c.to_s == 'never_cloud' && tier == :cloud
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def excluded_by_discovery?(rule)
|
|
132
|
+
return false unless discovery_enabled?
|
|
133
|
+
|
|
134
|
+
tier = (rule.target[:tier] || rule.target['tier'])&.to_sym
|
|
135
|
+
provider = (rule.target[:provider] || rule.target['provider'])&.to_sym
|
|
136
|
+
model = rule.target[:model] || rule.target['model']
|
|
137
|
+
|
|
138
|
+
return false unless tier == :local && provider == :ollama && model
|
|
139
|
+
|
|
140
|
+
return true unless Discovery::Ollama.model_available?(model)
|
|
141
|
+
|
|
142
|
+
model_bytes = Discovery::Ollama.model_size(model)
|
|
143
|
+
available = Discovery::System.available_memory_mb
|
|
144
|
+
return false if model_bytes.nil? || available.nil?
|
|
145
|
+
|
|
146
|
+
floor = discovery_settings[:memory_floor_mb] || 2048
|
|
147
|
+
model_mb = model_bytes / 1024 / 1024
|
|
148
|
+
model_mb > (available - floor)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def discovery_enabled?
|
|
152
|
+
ds = discovery_settings
|
|
153
|
+
ds.fetch(:enabled, true)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def discovery_settings
|
|
157
|
+
llm = Legion::Settings[:llm]
|
|
158
|
+
return {} unless llm.is_a?(Hash)
|
|
159
|
+
|
|
160
|
+
(llm[:discovery] || {}).transform_keys(&:to_sym)
|
|
161
|
+
rescue StandardError
|
|
162
|
+
{}
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def pick_best(candidates)
|
|
166
|
+
return nil if candidates.empty?
|
|
167
|
+
|
|
168
|
+
candidates.max_by { |r| effective_priority(r) }
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def effective_priority(rule)
|
|
172
|
+
provider = (rule.target[:provider] || rule.target['provider'])&.to_sym
|
|
173
|
+
cost_bonus = (1.0 - rule.cost_multiplier) * 10
|
|
174
|
+
rule.priority + health_tracker.adjustment(provider) + cost_bonus
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def routing_settings
|
|
178
|
+
llm = Legion::Settings[:llm]
|
|
179
|
+
return {} unless llm.is_a?(Hash)
|
|
180
|
+
|
|
181
|
+
routing = llm[:routing] || llm['routing'] || {}
|
|
182
|
+
routing.transform_keys(&:to_sym)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def build_health_tracker
|
|
186
|
+
settings = routing_settings
|
|
187
|
+
cb = (settings[:circuit_breaker] || {}).transform_keys(&:to_sym)
|
|
188
|
+
|
|
189
|
+
HealthTracker.new(
|
|
190
|
+
window_seconds: settings.fetch(:window_seconds, 300),
|
|
191
|
+
failure_threshold: cb.fetch(:failure_threshold, 3),
|
|
192
|
+
cooldown_seconds: cb.fetch(:cooldown_seconds, 60)
|
|
193
|
+
)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def default_provider_for_tier(tier)
|
|
197
|
+
if tier.to_sym == :cloud
|
|
198
|
+
default = routing_settings[:default_provider]
|
|
199
|
+
default ? default.to_sym : :bedrock
|
|
200
|
+
else
|
|
201
|
+
:ollama
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def default_model_for_tier(tier)
|
|
206
|
+
case tier.to_sym
|
|
207
|
+
when :local
|
|
208
|
+
ollama = Legion::Settings[:llm].dig(:providers, :ollama) || {}
|
|
209
|
+
ollama[:default_model] || 'llama3'
|
|
210
|
+
when :fleet then 'llama4:70b'
|
|
211
|
+
when :cloud
|
|
212
|
+
Legion::Settings[:llm][:default_model] || 'claude-sonnet-4-6'
|
|
213
|
+
else 'llama3'
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def chain_from_defaults(model, provider, max)
|
|
218
|
+
fallback_model = model || default_settings_model
|
|
219
|
+
fallback_provider = (provider || default_settings_provider)&.to_sym
|
|
220
|
+
res = Resolution.new(tier: :cloud, provider: fallback_provider || :bedrock, model: fallback_model || 'claude-sonnet-4-6')
|
|
221
|
+
EscalationChain.new(resolutions: [res], max_attempts: max)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def chain_from_intent(intent, max)
|
|
225
|
+
merged = intent ? merge_defaults(intent) : {}
|
|
226
|
+
rules = load_rules
|
|
227
|
+
candidates = select_candidates(rules, merged)
|
|
228
|
+
sorted = candidates.sort_by { |r| -effective_priority(r) }
|
|
229
|
+
resolutions = sorted.map(&:to_resolution)
|
|
230
|
+
resolutions = build_fallback_chain(sorted.first, sorted, resolutions) if sorted.first&.fallback
|
|
231
|
+
resolutions = resolutions.uniq { |r| [r.provider, r.model] }
|
|
232
|
+
EscalationChain.new(resolutions: resolutions, max_attempts: max)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def build_fallback_chain(primary_rule, candidates, default_chain)
|
|
236
|
+
chain = [primary_rule.to_resolution]
|
|
237
|
+
current = primary_rule
|
|
238
|
+
|
|
239
|
+
while current.fallback
|
|
240
|
+
fallback_target = current.fallback
|
|
241
|
+
if fallback_target.is_a?(Hash)
|
|
242
|
+
fb = fallback_target.transform_keys(&:to_sym)
|
|
243
|
+
fb_tier = fb[:tier]&.to_sym || :cloud
|
|
244
|
+
fb_provider = fb[:provider]&.to_sym || default_provider_for_tier(fb_tier)
|
|
245
|
+
fb_model = fb[:model] || default_model_for_tier(fb_tier)
|
|
246
|
+
chain << Resolution.new(tier: fb_tier, provider: fb_provider, model: fb_model)
|
|
247
|
+
break
|
|
248
|
+
else
|
|
249
|
+
next_rule = candidates.find { |r| r.name == fallback_target.to_s }
|
|
250
|
+
break unless next_rule
|
|
251
|
+
|
|
252
|
+
chain << next_rule.to_resolution
|
|
253
|
+
current = next_rule
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
remaining = default_chain.reject { |r| chain.any? { |c| c.provider == r.provider && c.model == r.model } }
|
|
258
|
+
chain + remaining
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def escalation_max_attempts
|
|
262
|
+
settings = routing_settings
|
|
263
|
+
esc = (settings[:escalation] || {}).transform_keys(&:to_sym)
|
|
264
|
+
esc.fetch(:max_attempts, 3)
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def default_settings_model
|
|
268
|
+
llm = Legion::Settings[:llm]
|
|
269
|
+
llm[:default_model] if llm.is_a?(Hash)
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def default_settings_provider
|
|
273
|
+
llm = Legion::Settings[:llm]
|
|
274
|
+
llm[:default_provider] if llm.is_a?(Hash)
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
end
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Settings
|
|
6
|
+
def self.default
|
|
7
|
+
{
|
|
8
|
+
enabled: true,
|
|
9
|
+
connected: false,
|
|
10
|
+
default_model: nil,
|
|
11
|
+
default_provider: nil,
|
|
12
|
+
providers: providers,
|
|
13
|
+
routing: routing_defaults,
|
|
14
|
+
discovery: discovery_defaults
|
|
15
|
+
}
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.discovery_defaults
|
|
19
|
+
{
|
|
20
|
+
enabled: true,
|
|
21
|
+
refresh_seconds: 60,
|
|
22
|
+
memory_floor_mb: 2048
|
|
23
|
+
}
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.routing_defaults
|
|
27
|
+
{
|
|
28
|
+
enabled: false,
|
|
29
|
+
default_intent: { privacy: 'normal', capability: 'moderate', cost: 'normal' },
|
|
30
|
+
tiers: {
|
|
31
|
+
local: { provider: 'ollama' },
|
|
32
|
+
fleet: { queue: 'llm.inference', timeout_seconds: 30 },
|
|
33
|
+
cloud: { providers: %w[bedrock anthropic] }
|
|
34
|
+
},
|
|
35
|
+
health: {
|
|
36
|
+
window_seconds: 300,
|
|
37
|
+
circuit_breaker: { failure_threshold: 3, cooldown_seconds: 60 },
|
|
38
|
+
latency_penalty_threshold_ms: 5000,
|
|
39
|
+
budget: { daily_limit_usd: nil, monthly_limit_usd: nil }
|
|
40
|
+
},
|
|
41
|
+
escalation: {
|
|
42
|
+
enabled: false,
|
|
43
|
+
max_attempts: 3,
|
|
44
|
+
quality_threshold: 50
|
|
45
|
+
},
|
|
46
|
+
rules: []
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def self.providers
|
|
51
|
+
{
|
|
52
|
+
bedrock: {
|
|
53
|
+
enabled: false,
|
|
54
|
+
default_model: 'us.anthropic.claude-sonnet-4-6-v1',
|
|
55
|
+
api_key: nil,
|
|
56
|
+
secret_key: nil,
|
|
57
|
+
session_token: nil,
|
|
58
|
+
bearer_token: nil,
|
|
59
|
+
region: 'us-east-2'
|
|
60
|
+
},
|
|
61
|
+
anthropic: {
|
|
62
|
+
enabled: false,
|
|
63
|
+
default_model: 'claude-sonnet-4-6',
|
|
64
|
+
api_key: nil
|
|
65
|
+
},
|
|
66
|
+
openai: {
|
|
67
|
+
enabled: false,
|
|
68
|
+
default_model: 'gpt-4o',
|
|
69
|
+
api_key: nil
|
|
70
|
+
},
|
|
71
|
+
gemini: {
|
|
72
|
+
enabled: false,
|
|
73
|
+
default_model: 'gemini-2.0-flash',
|
|
74
|
+
api_key: nil
|
|
75
|
+
},
|
|
76
|
+
ollama: {
|
|
77
|
+
enabled: false,
|
|
78
|
+
default_model: 'llama3',
|
|
79
|
+
base_url: 'http://localhost:11434'
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
begin
|
|
88
|
+
Legion::Settings.merge_settings('llm', Legion::LLM::Settings.default) if Legion.const_defined?('Settings')
|
|
89
|
+
rescue StandardError => e
|
|
90
|
+
if Legion.const_defined?('Logging') && Legion::Logging.respond_to?(:fatal)
|
|
91
|
+
Legion::Logging.fatal(e.message)
|
|
92
|
+
Legion::Logging.fatal(e.backtrace)
|
|
93
|
+
else
|
|
94
|
+
puts e.message
|
|
95
|
+
puts e.backtrace
|
|
96
|
+
end
|
|
97
|
+
end
|