legion-llm 0.9.22 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 428a14e141f5cbbb278e05f49fd198ef13f6e789727037c90154a855b76a8b34
4
- data.tar.gz: 8dc2aea0cd776675aad1c8ff198b35f0eba573e4a37c6e2bcdc0b6dfbbb7210b
3
+ metadata.gz: 8c8c98a439d2e96bba437e5e8b4bf8c47c01277a4079bd459c7257e2990278c6
4
+ data.tar.gz: f9344c761ebf18b4c5ab271ac8cb5858ce46f791588ace438726002d2907c70e
5
5
  SHA512:
6
- metadata.gz: 3b9f1b9fae5371eefcbfbc89262bfa422e23df0e2e52d56735c5f3af9912b7245883ae4864568b6d8828e2dfdc3ab8c3d9fd4f125f662d6f8ae51602976d9952
7
- data.tar.gz: dcdbf11006d26b929779bdb0e2ae8a541225b3a62c820dd027ef6801198a5056eb1d9a93e7cd504846b11b2196c187d7a415e263592864c3eae5ace4153b31ee
6
+ metadata.gz: 9574535d0eeca84d522858dd323e8d028994b46b3d3f78a37a8094a4f1a692fbdd68bf24e8a061160b5238a2c3e4f73141e29bf70c6423f18e4b4441937f5417
7
+ data.tar.gz: 69aa8eccf10beb687b637b7442d9eb8a7bae0d42405fc9cfd47f8c8d5c036b7724df6cb55c10d8264f0701f46f8282f0a44d8622d607a6129a09ab4c39ad2e99
data/CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.23] - 2026-05-13
4
+
5
+ ### Added
6
+ - Router: `registry_entry_for_provider` for explicit provider model resolution
7
+ - Router: model denylist (`deny_model`, `model_denied?`, `excluded_by_denial?`) — config errors auto-deny models
8
+ - Executor: config error detection (`CONFIG_ERROR_PATTERNS`) — prevents circuit breaker trips on auth/validation errors
9
+ - Executor: step timing hash on response (`metrics.timing`, `metrics.latency_legionio_ms`)
10
+ - API: `/api/llm/inference` response includes `provider`, `instance`, `tier`, `metrics`
11
+ - API: `/api/llm/providers` surfaces `source` and `credential_fingerprint`
12
+ - Inventory: provider-scoped queries skip unrelated providers
13
+ - Metering: disk-based JSONL spool when transport unavailable (was dropping events)
14
+ - Discovery: `report_discovery_failure` reports connection failures to health tracker
15
+ - Providers: `enabled: false` instances not registered; `default_model` in metadata
16
+
17
+ ### Changed
18
+ - Router: tier-aware model fallback — global default no longer bleeds across providers
19
+ - Inventory: single-source offerings (native_provider preferred over discovery to eliminate duplicates)
20
+ - Inventory: dedup normalizes `"default"` instance name
21
+ - Discovery: concise connection error log (no stacktrace for unreachable providers)
22
+ - Settings: removed `claude` from `native_providers` list
23
+
24
+ ### Fixed
25
+ - Cache spec rewritten to use real `Legion::Cache` instead of fragile stubs
26
+
3
27
  ## [0.9.22] - 2026-05-12
4
28
 
5
29
  ### Added
@@ -498,6 +498,26 @@ module Legion
498
498
 
499
499
  nil
500
500
  end
501
+
502
+ define_method(:build_response_metrics) do |pipeline_response|
503
+ routing = pipeline_response.routing || {}
504
+ timestamps = pipeline_response.timestamps || {}
505
+ metrics = {}
506
+
507
+ if (latency = routing[:latency_ms])
508
+ metrics[:latency_ms] = latency
509
+ end
510
+
511
+ step_timings = timestamps[:step_timings]
512
+ if step_timings.is_a?(Hash) && step_timings.any?
513
+ metrics[:timing] = step_timings
514
+ total = step_timings[:total].to_i
515
+ external = step_timings[:provider_call].to_i + step_timings[:tool_calls].to_i
516
+ metrics[:latency_legionio_ms] = total - external if total.positive?
517
+ end
518
+
519
+ metrics.empty? ? nil : metrics
520
+ end
501
521
  end
502
522
 
503
523
  log.debug('[llm][api][helpers] shared helpers registered')
@@ -184,11 +184,15 @@ module Legion
184
184
  request_id: request_id,
185
185
  content: full_text,
186
186
  model: (routing[:model] || routing['model']).to_s,
187
+ provider: (routing[:provider] || routing['provider'])&.to_s,
188
+ instance: (routing[:instance] || routing['instance'])&.to_s,
189
+ tier: (routing[:tier] || routing['tier'])&.to_s,
187
190
  input_tokens: token_value(tokens, :input),
188
191
  output_tokens: token_value(tokens, :output),
189
192
  tool_calls: extract_tool_calls(pipeline_response),
190
- conversation_id: pipeline_response.conversation_id
191
- }
193
+ conversation_id: pipeline_response.conversation_id,
194
+ metrics: build_response_metrics(pipeline_response)
195
+ }.compact
192
196
  done_payload[:thinking] = pipeline_response.thinking if include_thinking && pipeline_response.thinking
193
197
  emit_sse_event(out, 'done', {
194
198
  **done_payload
@@ -237,11 +241,16 @@ module Legion
237
241
  tool_calls: tool_calls,
238
242
  stop_reason: pipeline_response.stop&.dig(:reason)&.to_s,
239
243
  model: (routing[:model] || routing['model']).to_s,
244
+ provider: (routing[:provider] || routing['provider'])&.to_s,
245
+ instance: (routing[:instance] || routing['instance'])&.to_s,
246
+ tier: (routing[:tier] || routing['tier'])&.to_s,
240
247
  input_tokens: token_value(tokens, :input),
241
248
  output_tokens: token_value(tokens, :output),
242
- conversation_id: pipeline_response.conversation_id
249
+ conversation_id: pipeline_response.conversation_id,
250
+ metrics: build_response_metrics(pipeline_response)
243
251
  }
244
252
  payload[:thinking] = pipeline_response.thinking if include_thinking && pipeline_response.thinking
253
+ payload.compact!
245
254
  json_response(payload, status_code: 200)
246
255
  end
247
256
  rescue Legion::LLM::AuthError => e
@@ -87,7 +87,7 @@ module Legion
87
87
  provider_key = entry[:provider].to_sym
88
88
  instance_key = entry[:instance].to_sym
89
89
 
90
- {
90
+ result = {
91
91
  provider: entry[:provider].to_s,
92
92
  instance: entry[:instance].to_s,
93
93
  tier: entry.dig(:metadata, :tier)&.to_s,
@@ -102,6 +102,9 @@ module Legion
102
102
  end,
103
103
  native: true
104
104
  }
105
+ result[:source] = entry.dig(:metadata, :source) if entry.dig(:metadata, :source)
106
+ result[:credential_fingerprint] = entry.dig(:metadata, :credential_fingerprint) if entry.dig(:metadata, :credential_fingerprint)
107
+ result
105
108
  end
106
109
  end
107
110
  end
@@ -80,6 +80,8 @@ module Legion
80
80
 
81
81
  def register_provider_instance(provider_module, family, aliases, instance_id, config)
82
82
  normalized_config = normalize_instance_config(config)
83
+ return if normalized_config[:enabled] == false
84
+
83
85
  registry_config = adapter_instance_config(normalized_config, instance_id)
84
86
  metadata = instance_metadata(normalized_config)
85
87
  adapter = Call::LexLLMAdapter.new(family, provider_module.provider_class, instance_config: registry_config)
@@ -107,7 +109,11 @@ module Legion
107
109
  end
108
110
 
109
111
  def instance_metadata(config)
110
- { tier: config[:tier], capabilities: config[:capabilities] || [] }
112
+ meta = { tier: config[:tier], capabilities: config[:capabilities] || [] }
113
+ meta[:default_model] = config[:default_model] if config[:default_model]
114
+ meta[:source] = config[:source] if config[:source]
115
+ meta[:credential_fingerprint] = config[:credential_fingerprint] if config[:credential_fingerprint]
116
+ meta
111
117
  end
112
118
 
113
119
  def safe_provider_family(provider_module)
@@ -141,8 +141,7 @@ module Legion
141
141
  }
142
142
  end
143
143
  rescue StandardError => e
144
- handle_exception(e, level: :debug,
145
- operation: "discovery.offerings.#{entry[:provider]}/#{entry[:instance]}")
144
+ report_discovery_failure(entry, e)
146
145
  []
147
146
  end
148
147
  end
@@ -165,6 +164,28 @@ module Legion
165
164
 
166
165
  private
167
166
 
167
+ def report_discovery_failure(entry, error)
168
+ provider = entry[:provider]
169
+ instance = entry[:instance]
170
+ connection_error = error.is_a?(Faraday::ConnectionFailed) ||
171
+ error.message.match?(/connection refused|connect.*timeout|no route to host/i)
172
+
173
+ if connection_error
174
+ log.warn("[llm][discovery] provider=#{provider} instance=#{instance} unreachable: #{error.message}")
175
+ else
176
+ handle_exception(error, level: :warn, handled: true,
177
+ operation: "discovery.offerings.#{provider}/#{instance}")
178
+ end
179
+
180
+ return unless defined?(Router) && Router.respond_to?(:health_tracker)
181
+
182
+ Router.health_tracker.report(
183
+ provider: provider, instance: instance,
184
+ signal: :error, value: 1,
185
+ metadata: { reason: error.class.name, source: :discovery }
186
+ )
187
+ end
188
+
168
189
  def normalize_offering(offering)
169
190
  data = if offering.is_a?(Hash)
170
191
  offering
@@ -11,7 +11,7 @@ require_relative 'route_attempts'
11
11
  module Legion
12
12
  module LLM
13
13
  module Inference
14
- class Executor
14
+ class Executor # rubocop:disable Metrics/ClassLength
15
15
  include Legion::Logging::Helper
16
16
  include NativeToolLoop
17
17
  include RouteAttempts
@@ -59,6 +59,15 @@ module Legion
59
59
 
60
60
  ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
61
61
 
62
+ CONFIG_ERROR_PATTERNS = [
63
+ /ValidationException/,
64
+ /AccessDeniedException/,
65
+ /InvalidModel/i,
66
+ /model.*not found/i,
67
+ /not authorized/i,
68
+ /AWS Marketplace/i
69
+ ].freeze
70
+
62
71
  MAX_NATIVE_TOOL_ROUNDS = 200
63
72
  ToolResultEvent = Struct.new(:result, :tool_call_id, :tool_name, :started_at, keyword_init: true)
64
73
 
@@ -160,6 +169,7 @@ module Legion
160
169
  skipped = 0
161
170
  pipeline_start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
162
171
  step_timings = []
172
+ @step_timing_hash = {}
163
173
  STEPS.each do |step|
164
174
  if Profile.skip?(@profile, step)
165
175
  skipped += 1
@@ -170,9 +180,12 @@ module Legion
170
180
  execute_step(step) { send(:"step_#{step}") }
171
181
  elapsed_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - t0) * 1000).round
172
182
  step_timings << "#{step}=#{elapsed_ms}ms"
183
+ @step_timing_hash[step] = elapsed_ms
173
184
  executed += 1
174
185
  end
175
186
  total_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - pipeline_start) * 1000).round
187
+ @step_timing_hash[:total] = total_ms
188
+ @timestamps[:step_timings] = @step_timing_hash
176
189
  log.warn("[pipeline][timing] profile=#{@profile} total=#{total_ms}ms executed=#{executed} skipped=#{skipped} #{step_timings.join(' ')}")
177
190
  annotate_top_level_span(steps_executed: executed, steps_skipped: skipped)
178
191
  end
@@ -547,9 +560,18 @@ module Legion
547
560
  duration_ms = ((Time.now - start_time) * 1000).round
548
561
  handle_exception(err, level: :warn, handled: handled, operation: operation,
549
562
  provider: resolution.provider, model: resolution.model, duration_ms: duration_ms)
550
- Router.health_tracker.report(provider: resolution.provider, offering_id: resolution.offering_id,
551
- signal: :error, value: 1,
552
- metadata: { reason: err.class.name, message: err.message })
563
+ if config_error?(err)
564
+ Router.health_tracker.deny_model(
565
+ provider: resolution.provider,
566
+ model: resolution.model,
567
+ instance: resolution.instance,
568
+ reason: err.message
569
+ )
570
+ else
571
+ Router.health_tracker.report(provider: resolution.provider, offering_id: resolution.offering_id,
572
+ signal: :error, value: 1,
573
+ metadata: { reason: err.class.name, message: err.message })
574
+ end
553
575
  @escalation_history << escalation_attempt_hash(
554
576
  resolution,
555
577
  outcome: outcome,
@@ -928,6 +950,12 @@ module Legion
928
950
  handle_exception(e, level: :warn, operation: 'llm.pipeline.emit_error_audit')
929
951
  end
930
952
 
953
+ def config_error?(err)
954
+ name = err.class.name.to_s
955
+ msg = err.message.to_s
956
+ CONFIG_ERROR_PATTERNS.any? { |pat| pat.match?(name) || pat.match?(msg) }
957
+ end
958
+
931
959
  def execute_pre_provider_steps
932
960
  log.debug "[llm][executor] action=pre_provider_steps.enter step_count=#{PRE_PROVIDER_STEPS.size}"
933
961
  PRE_PROVIDER_STEPS.each do |step|
@@ -1549,7 +1577,12 @@ module Legion
1549
1577
  end
1550
1578
 
1551
1579
  def build_response_routing
1552
- routing = { provider: @resolved_provider, model: @resolved_model }
1580
+ routing = {
1581
+ provider: @resolved_provider,
1582
+ instance: @resolved_instance,
1583
+ model: @resolved_model,
1584
+ tier: @resolved_tier
1585
+ }.compact
1553
1586
  routing[:offering_id] = @resolved_offering_id if @resolved_offering_id
1554
1587
  routing[:offering_metadata] = @resolved_offering_metadata if @resolved_offering_metadata&.any?
1555
1588
 
@@ -39,15 +39,19 @@ module Legion
39
39
  def offerings(filters = {})
40
40
  log.debug "[llm][inventory] action=offerings.enter filters=#{filters.keys}"
41
41
  normalized_filters = normalize_filter_hash(filters)
42
+ provider_scope = normalized_filters[:provider]&.to_sym
42
43
  list = []
43
44
  providers_config.each do |provider_family, config|
44
45
  next unless enabled_config?(config)
46
+ next if provider_scope && provider_family.to_sym != provider_scope
45
47
 
46
48
  list.concat(provider_offerings(provider_family.to_sym, config))
47
49
  end
48
50
 
49
- list.concat(discovery_offerings)
50
- list.concat(native_provider_offerings)
51
+ native = native_provider_offerings(provider: provider_scope)
52
+ native_providers = native.map { |o| o[:provider_family]&.to_sym }.uniq
53
+ list.concat(native)
54
+ list.concat(discovery_offerings(provider: provider_scope, exclude_providers: native_providers))
51
55
  list = dedupe_offerings(list)
52
56
  result = filter_offerings(list, normalized_filters)
53
57
  log.debug "[llm][inventory] action=offerings.complete total=#{result.size}"
@@ -265,7 +269,7 @@ module Legion
265
269
  ))
266
270
  end
267
271
 
268
- def discovery_offerings
272
+ def discovery_offerings(provider: nil, exclude_providers: [])
269
273
  return [] unless defined?(Legion::LLM::Discovery)
270
274
 
271
275
  cached_models = if Legion::LLM::Discovery.respond_to?(:cached_discovered_models)
@@ -276,6 +280,9 @@ module Legion
276
280
 
277
281
  cached_models.filter_map do |model_entry|
278
282
  provider_family = model_entry[:provider]
283
+ next if provider && provider_family.to_sym != provider
284
+ next if exclude_providers.include?(provider_family.to_sym)
285
+
279
286
  config = option(providers_config, provider_family, {})
280
287
  next unless enabled_config?(config)
281
288
 
@@ -295,11 +302,13 @@ module Legion
295
302
  []
296
303
  end
297
304
 
298
- def native_provider_offerings
305
+ def native_provider_offerings(provider: nil)
299
306
  return [] unless defined?(Legion::LLM::Call::Registry)
300
307
 
301
308
  Legion::LLM::Call::Registry.all_instances.flat_map do |entry|
302
309
  provider_name = entry[:provider]
310
+ next [] if provider && provider_name.to_sym != provider
311
+
303
312
  adapter = entry[:adapter]
304
313
  next [] unless adapter.respond_to?(:offerings)
305
314
 
@@ -347,7 +356,9 @@ module Legion
347
356
 
348
357
  def dedupe_offerings(list)
349
358
  list.each_with_object({}) do |offering, seen|
350
- key = [offering[:provider_family], offering[:provider_instance], offering[:model], offering[:type]]
359
+ instance = offering[:provider_instance]
360
+ instance = nil if instance.to_s == 'default'
361
+ key = [offering[:provider_family], instance, offering[:model], offering[:type]]
351
362
  current = seen[key]
352
363
  seen[key] = offering if current.nil? || source_priority(offering) > source_priority(current)
353
364
  end.values
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'legion/logging/helper'
4
+ require 'fileutils'
4
5
  require_relative 'metering/estimator'
5
6
  require_relative 'metering/tracker'
6
7
  require_relative 'metering/tokens'
@@ -12,6 +13,10 @@ module Legion
12
13
  module Metering
13
14
  extend Legion::Logging::Helper
14
15
 
16
+ SPOOL_DIR = File.expand_path('~/.legionio/data/spool/metering')
17
+ SPOOL_FILE = File.join(SPOOL_DIR, 'events.jsonl').freeze
18
+ SPOOL_MUTEX = Mutex.new
19
+
15
20
  def self.load_transport
16
21
  return unless defined?(Legion::Transport::Message)
17
22
 
@@ -30,8 +35,9 @@ module Legion
30
35
  log.info("[llm][metering] published provider=#{event[:provider]} model=#{event[:model_id]}")
31
36
  :published
32
37
  else
33
- log.warn("[llm][metering] dropped provider=#{event[:provider]} model=#{event[:model_id]} reason=transport_unavailable")
34
- :dropped
38
+ spool_event(event)
39
+ log.info("[llm][metering] spooled provider=#{event[:provider]} model=#{event[:model_id]} reason=transport_unavailable")
40
+ :spooled
35
41
  end
36
42
  rescue StandardError => e
37
43
  handle_exception(e, level: :warn, operation: 'llm.metering.emit')
@@ -46,8 +52,43 @@ module Legion
46
52
  end
47
53
 
48
54
  def flush_spool
49
- log.debug('[llm][metering] spool disabled; metering events are transport-only')
50
- 0
55
+ return 0 unless File.exist?(spool_file_path)
56
+
57
+ event_class = metering_event_class
58
+ unless event_class && transport_connected?
59
+ log.debug('[llm][metering] flush_spool skipped reason=transport_unavailable')
60
+ return 0
61
+ end
62
+
63
+ # Read and truncate atomically under the mutex so no events written
64
+ # between read and truncate can be silently lost.
65
+ events = SPOOL_MUTEX.synchronize do
66
+ path = spool_file_path
67
+ return 0 unless File.exist?(path)
68
+
69
+ lines = File.readlines(path, chomp: true)
70
+ parsed = lines.filter_map do |line|
71
+ next if line.strip.empty?
72
+
73
+ Legion::JSON.load(line)
74
+ end
75
+ File.write(path, '')
76
+ parsed
77
+ end
78
+
79
+ return 0 if events.empty?
80
+
81
+ batch_sleep = spool_settings[:flush_batch_sleep] || 0.0
82
+ flushed = 0
83
+
84
+ events.each_with_index do |event_data, index|
85
+ event_class.new(**event_data).publish
86
+ flushed += 1
87
+ sleep(batch_sleep) if batch_sleep.positive? && index < events.size - 1
88
+ end
89
+
90
+ log.info("[llm][metering] flush_spool flushed=#{flushed}")
91
+ flushed
51
92
  rescue StandardError => e
52
93
  handle_exception(e, level: :warn, operation: 'llm.metering.flush_spool')
53
94
  0
@@ -128,6 +169,81 @@ module Legion
128
169
  hash[key] if hash.key?(key)
129
170
  end
130
171
 
172
+ # --- Spool internals (private) ---
173
+
174
+ def spool_event(event)
175
+ SPOOL_MUTEX.synchronize do
176
+ ensure_spool_dir
177
+ enforce_max_events
178
+ line = Legion::JSON.dump(event)
179
+ File.open(spool_file_path, 'a') { |f| f.puts(line) }
180
+ end
181
+ log.debug("[llm][metering] spool_event written provider=#{event[:provider]} model=#{event[:model_id]}")
182
+ rescue StandardError => e
183
+ handle_exception(e, level: :warn, operation: 'llm.metering.spool_event')
184
+ end
185
+
186
+ def read_spool
187
+ SPOOL_MUTEX.synchronize do
188
+ path = spool_file_path
189
+ return [] unless File.exist?(path)
190
+
191
+ lines = File.readlines(path, chomp: true)
192
+ lines.filter_map do |line|
193
+ next if line.strip.empty?
194
+
195
+ Legion::JSON.load(line)
196
+ end
197
+ end
198
+ rescue StandardError => e
199
+ handle_exception(e, level: :warn, operation: 'llm.metering.read_spool')
200
+ []
201
+ end
202
+
203
+ def truncate_spool
204
+ SPOOL_MUTEX.synchronize do
205
+ path = spool_file_path
206
+ File.write(path, '') if File.exist?(path)
207
+ end
208
+ rescue StandardError => e
209
+ handle_exception(e, level: :warn, operation: 'llm.metering.truncate_spool')
210
+ end
211
+
212
+ def enforce_max_events
213
+ path = spool_file_path
214
+ return unless File.exist?(path)
215
+
216
+ max = spool_settings[:max_events] || 10_000
217
+ lines = File.readlines(path, chomp: true)
218
+ return if lines.size < max
219
+
220
+ # Drop oldest events to make room
221
+ trimmed = lines.last(max - 1)
222
+ File.write(path, trimmed.map { |l| "#{l}\n" }.join)
223
+ log.debug("[llm][metering] enforce_max_events trimmed=#{lines.size - trimmed.size} max=#{max}")
224
+ end
225
+
226
+ def ensure_spool_dir
227
+ FileUtils.mkdir_p(spool_dir_path)
228
+ end
229
+
230
+ def spool_settings
231
+ settings = Legion::LLM::Settings.value(:metering, :spool, default: {})
232
+ settings.is_a?(Hash) ? settings : {}
233
+ end
234
+
235
+ # Resolve spool file path at call time, honouring operator-configured
236
+ # paths (e.g. for containerised deployments where $HOME is not writable).
237
+ # Falls back to the compile-time SPOOL_FILE constant.
238
+ def spool_file_path
239
+ configured = spool_settings[:path]
240
+ configured && !configured.to_s.strip.empty? ? configured.to_s : SPOOL_FILE
241
+ end
242
+
243
+ def spool_dir_path
244
+ File.dirname(spool_file_path)
245
+ end
246
+
131
247
  # Backward-compat: resolve old Legion::LLM::Metering::Exchange, ::Event
132
248
  def self.const_missing(name)
133
249
  case name
@@ -19,6 +19,7 @@ module Legion
19
19
  @circuits = {}
20
20
  @latency_window = {}
21
21
  @handlers = {}
22
+ @denied_models = {}
22
23
  @mutex = Mutex.new
23
24
 
24
25
  register_default_handlers
@@ -111,6 +112,42 @@ module Legion
111
112
  worst_circuit_state(instances)
112
113
  end
113
114
 
115
+ # Record that a model is denied for a provider+instance (e.g. AccessDenied).
116
+ # Excluded from routing until restart or explicit clear.
117
+ def deny_model(provider:, model:, instance: nil, reason: nil)
118
+ key = instance ? instance_key(provider, instance) : provider.to_s
119
+ @mutex.synchronize do
120
+ @denied_models[key] ||= {}
121
+ @denied_models[key][model.to_s] = { reason: reason, at: Time.now }
122
+ end
123
+ log.warn("Model denied provider=#{key} model=#{model} reason=#{reason}")
124
+ end
125
+
126
+ # Check if a model is denied for a provider+instance.
127
+ def model_denied?(provider:, model:, instance: nil)
128
+ key = instance ? instance_key(provider, instance) : provider.to_s
129
+ @mutex.synchronize do
130
+ !@denied_models.dig(key, model.to_s).nil?
131
+ end
132
+ end
133
+
134
+ # List all denied models (for diagnostics).
135
+ def denied_models
136
+ @mutex.synchronize { @denied_models.dup }
137
+ end
138
+
139
+ # Clear denied models for a provider (or all if no args).
140
+ def clear_denied(provider: nil, instance: nil)
141
+ @mutex.synchronize do
142
+ if provider
143
+ key = instance ? instance_key(provider, instance) : provider.to_s
144
+ @denied_models.delete(key)
145
+ else
146
+ @denied_models.clear
147
+ end
148
+ end
149
+ end
150
+
114
151
  # Clears circuit and latency data for a single provider.
115
152
  def reset(provider, instance: nil, offering_id: nil)
116
153
  key = instance ? instance_key(provider, instance) : health_key(provider, offering_id)
@@ -125,6 +162,7 @@ module Legion
125
162
  @mutex.synchronize do
126
163
  @circuits.clear
127
164
  @latency_window.clear
165
+ @denied_models.clear
128
166
  end
129
167
  end
130
168
 
@@ -163,7 +163,11 @@ module Legion
163
163
  end
164
164
 
165
165
  def explicit_resolution(tier, provider, model)
166
- registry_entry = provider ? nil : registry_entry_for_tier(tier)
166
+ registry_entry = if provider
167
+ registry_entry_for_provider(provider.to_sym)
168
+ else
169
+ registry_entry_for_tier(tier)
170
+ end
167
171
  resolved_provider = provider ? provider.to_sym : (registry_entry&.[](:provider) || default_provider_for_tier(tier))
168
172
  resolved_model = model || registry_default_model(registry_entry) || default_model_for_tier(tier)
169
173
 
@@ -229,8 +233,11 @@ module Legion
229
233
  memory_checked.reject { |r| excluded_by_caller?(r, normalized_exclude) }
230
234
  end
231
235
 
236
+ # 4.7 Reject rules for models denied by health tracker
237
+ not_denied = not_excluded.reject { |r| excluded_by_denial?(r) }
238
+
232
239
  # 5. Filter by tier availability
233
- final = not_excluded.select { |r| tier_available?(r.target[:tier] || r.target['tier']) }
240
+ final = not_denied.select { |r| tier_available?(r.target[:tier] || r.target['tier']) }
234
241
 
235
242
  log.debug("Router: #{final.size} candidates after filtering (started with #{rules.size})")
236
243
 
@@ -303,6 +310,15 @@ module Legion
303
310
  {}
304
311
  end
305
312
 
313
+ def excluded_by_denial?(rule)
314
+ provider = (rule.target[:provider] || rule.target['provider'])&.to_sym
315
+ model = rule.target[:model] || rule.target['model']
316
+ instance = rule.target[:instance] || rule.target['instance']
317
+ return false unless provider && model
318
+
319
+ health_tracker.model_denied?(provider: provider, model: model, instance: instance)
320
+ end
321
+
306
322
  def excluded_by_caller?(rule, exclude)
307
323
  return false if exclude.nil? || exclude.empty?
308
324
 
@@ -397,22 +413,24 @@ module Legion
397
413
  # Fallback to static defaults
398
414
  case sym
399
415
  when :local, :direct, :fleet
400
- 'llama3'
416
+ default_settings_model_for_tier(sym) || 'llama3'
401
417
  when :openai_compat
402
418
  'gpt-4o'
403
419
  when :cloud
404
- default_settings_model || 'us.anthropic.claude-sonnet-4-6'
420
+ default_settings_model_for_tier(sym) || 'us.anthropic.claude-sonnet-4-6'
405
421
  when :frontier
406
- default_settings_model || 'claude-sonnet-4-6'
422
+ default_settings_model_for_tier(sym) || 'claude-sonnet-4-6'
407
423
  end
408
424
  end
409
425
 
410
426
  def chain_from_defaults(model, provider, max)
411
427
  if provider || model || default_settings_provider || default_settings_model
412
428
  p = (provider || default_settings_provider)&.to_sym
429
+ resolved_model = model || registry_default_model(registry_entry_for_provider(p)) ||
430
+ default_settings_model || 'claude-sonnet-4-6'
413
431
  res = Resolution.new(tier: PROVIDER_TIER.fetch(p, :frontier),
414
432
  provider: p || :anthropic,
415
- model: model || default_settings_model || 'claude-sonnet-4-6')
433
+ model: resolved_model)
416
434
  return EscalationChain.new(resolutions: [res], max_attempts: max)
417
435
  end
418
436
 
@@ -512,6 +530,31 @@ module Legion
512
530
  Legion::LLM::Settings.value(:default_model)
513
531
  end
514
532
 
533
+ def default_settings_model_for_tier(tier)
534
+ model = default_settings_model
535
+ return nil if model.nil? || model.to_s.empty?
536
+
537
+ provider = default_settings_provider&.to_sym
538
+ return nil unless provider
539
+
540
+ provider_tier = registry_tier_for_default_provider(provider)
541
+ return model if provider_tier == tier
542
+
543
+ nil
544
+ end
545
+
546
+ def registry_tier_for_default_provider(provider)
547
+ instances = begin
548
+ Call::Registry.all_instances
549
+ rescue StandardError
550
+ []
551
+ end
552
+ entry = instances.find { |i| i[:provider] == provider }
553
+ return registry_tier(provider, entry[:metadata]) if entry
554
+
555
+ PROVIDER_TIER.fetch(provider, :cloud)
556
+ end
557
+
515
558
  def default_settings_provider
516
559
  Legion::LLM::Settings.value(:default_provider)
517
560
  end
@@ -529,6 +572,17 @@ module Legion
529
572
  registry_entry_for_tier(tier)&.[](:provider)
530
573
  end
531
574
 
575
+ # Find the first registered instance for a specific provider.
576
+ def registry_entry_for_provider(provider)
577
+ instances = begin
578
+ Call::Registry.all_instances
579
+ rescue StandardError => e
580
+ handle_exception(e, level: :warn, handled: true, operation: 'router.registry_entry_for_provider')
581
+ []
582
+ end
583
+ instances.find { |entry| entry[:provider] == provider }
584
+ end
585
+
532
586
  # Find a default model from registry for a given tier.
533
587
  # Tries adapter.offerings first, then metadata[:default_model].
534
588
  def registry_model_for_tier(tier)
@@ -474,7 +474,7 @@ module Legion
474
474
  mode: 'auto',
475
475
  native_providers: %w[
476
476
  ollama vllm anthropic openai gemini mlx
477
- bedrock azure_foundry vertex claude
477
+ bedrock azure_foundry vertex
478
478
  ]
479
479
  }
480
480
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.9.22'
5
+ VERSION = '0.9.23'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.22
4
+ version: 0.9.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity