legion-llm 0.9.22 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/lib/legion/llm/api/native/helpers.rb +20 -0
- data/lib/legion/llm/api/native/inference.rb +12 -3
- data/lib/legion/llm/api/native/providers.rb +4 -1
- data/lib/legion/llm/call/providers.rb +7 -1
- data/lib/legion/llm/discovery.rb +23 -2
- data/lib/legion/llm/inference/executor.rb +38 -5
- data/lib/legion/llm/inventory.rb +16 -5
- data/lib/legion/llm/metering.rb +120 -4
- data/lib/legion/llm/router/health_tracker.rb +38 -0
- data/lib/legion/llm/router.rb +60 -6
- data/lib/legion/llm/settings.rb +1 -1
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8c8c98a439d2e96bba437e5e8b4bf8c47c01277a4079bd459c7257e2990278c6
|
|
4
|
+
data.tar.gz: f9344c761ebf18b4c5ab271ac8cb5858ce46f791588ace438726002d2907c70e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9574535d0eeca84d522858dd323e8d028994b46b3d3f78a37a8094a4f1a692fbdd68bf24e8a061160b5238a2c3e4f73141e29bf70c6423f18e4b4441937f5417
|
|
7
|
+
data.tar.gz: 69aa8eccf10beb687b637b7442d9eb8a7bae0d42405fc9cfd47f8c8d5c036b7724df6cb55c10d8264f0701f46f8282f0a44d8622d607a6129a09ab4c39ad2e99
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,29 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.23] - 2026-05-13
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Router: `registry_entry_for_provider` for explicit provider model resolution
|
|
7
|
+
- Router: model denylist (`deny_model`, `model_denied?`, `excluded_by_denial?`) — config errors auto-deny models
|
|
8
|
+
- Executor: config error detection (`CONFIG_ERROR_PATTERNS`) — prevents circuit breaker trips on auth/validation errors
|
|
9
|
+
- Executor: step timing hash on response (`metrics.timing`, `metrics.latency_legionio_ms`)
|
|
10
|
+
- API: `/api/llm/inference` response includes `provider`, `instance`, `tier`, `metrics`
|
|
11
|
+
- API: `/api/llm/providers` surfaces `source` and `credential_fingerprint`
|
|
12
|
+
- Inventory: provider-scoped queries skip unrelated providers
|
|
13
|
+
- Metering: disk-based JSONL spool when transport unavailable (was dropping events)
|
|
14
|
+
- Discovery: `report_discovery_failure` reports connection failures to health tracker
|
|
15
|
+
- Providers: `enabled: false` instances not registered; `default_model` in metadata
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
- Router: tier-aware model fallback — global default no longer bleeds across providers
|
|
19
|
+
- Inventory: single-source offerings (native_provider preferred over discovery to eliminate duplicates)
|
|
20
|
+
- Inventory: dedup normalizes `"default"` instance name
|
|
21
|
+
- Discovery: concise connection error log (no stacktrace for unreachable providers)
|
|
22
|
+
- Settings: removed `claude` from `native_providers` list
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
- Cache spec rewritten to use real `Legion::Cache` instead of fragile stubs
|
|
26
|
+
|
|
3
27
|
## [0.9.22] - 2026-05-12
|
|
4
28
|
|
|
5
29
|
### Added
|
|
@@ -498,6 +498,26 @@ module Legion
|
|
|
498
498
|
|
|
499
499
|
nil
|
|
500
500
|
end
|
|
501
|
+
|
|
502
|
+
define_method(:build_response_metrics) do |pipeline_response|
|
|
503
|
+
routing = pipeline_response.routing || {}
|
|
504
|
+
timestamps = pipeline_response.timestamps || {}
|
|
505
|
+
metrics = {}
|
|
506
|
+
|
|
507
|
+
if (latency = routing[:latency_ms])
|
|
508
|
+
metrics[:latency_ms] = latency
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
step_timings = timestamps[:step_timings]
|
|
512
|
+
if step_timings.is_a?(Hash) && step_timings.any?
|
|
513
|
+
metrics[:timing] = step_timings
|
|
514
|
+
total = step_timings[:total].to_i
|
|
515
|
+
external = step_timings[:provider_call].to_i + step_timings[:tool_calls].to_i
|
|
516
|
+
metrics[:latency_legionio_ms] = total - external if total.positive?
|
|
517
|
+
end
|
|
518
|
+
|
|
519
|
+
metrics.empty? ? nil : metrics
|
|
520
|
+
end
|
|
501
521
|
end
|
|
502
522
|
|
|
503
523
|
log.debug('[llm][api][helpers] shared helpers registered')
|
|
@@ -184,11 +184,15 @@ module Legion
|
|
|
184
184
|
request_id: request_id,
|
|
185
185
|
content: full_text,
|
|
186
186
|
model: (routing[:model] || routing['model']).to_s,
|
|
187
|
+
provider: (routing[:provider] || routing['provider'])&.to_s,
|
|
188
|
+
instance: (routing[:instance] || routing['instance'])&.to_s,
|
|
189
|
+
tier: (routing[:tier] || routing['tier'])&.to_s,
|
|
187
190
|
input_tokens: token_value(tokens, :input),
|
|
188
191
|
output_tokens: token_value(tokens, :output),
|
|
189
192
|
tool_calls: extract_tool_calls(pipeline_response),
|
|
190
|
-
conversation_id: pipeline_response.conversation_id
|
|
191
|
-
|
|
193
|
+
conversation_id: pipeline_response.conversation_id,
|
|
194
|
+
metrics: build_response_metrics(pipeline_response)
|
|
195
|
+
}.compact
|
|
192
196
|
done_payload[:thinking] = pipeline_response.thinking if include_thinking && pipeline_response.thinking
|
|
193
197
|
emit_sse_event(out, 'done', {
|
|
194
198
|
**done_payload
|
|
@@ -237,11 +241,16 @@ module Legion
|
|
|
237
241
|
tool_calls: tool_calls,
|
|
238
242
|
stop_reason: pipeline_response.stop&.dig(:reason)&.to_s,
|
|
239
243
|
model: (routing[:model] || routing['model']).to_s,
|
|
244
|
+
provider: (routing[:provider] || routing['provider'])&.to_s,
|
|
245
|
+
instance: (routing[:instance] || routing['instance'])&.to_s,
|
|
246
|
+
tier: (routing[:tier] || routing['tier'])&.to_s,
|
|
240
247
|
input_tokens: token_value(tokens, :input),
|
|
241
248
|
output_tokens: token_value(tokens, :output),
|
|
242
|
-
conversation_id: pipeline_response.conversation_id
|
|
249
|
+
conversation_id: pipeline_response.conversation_id,
|
|
250
|
+
metrics: build_response_metrics(pipeline_response)
|
|
243
251
|
}
|
|
244
252
|
payload[:thinking] = pipeline_response.thinking if include_thinking && pipeline_response.thinking
|
|
253
|
+
payload.compact!
|
|
245
254
|
json_response(payload, status_code: 200)
|
|
246
255
|
end
|
|
247
256
|
rescue Legion::LLM::AuthError => e
|
|
@@ -87,7 +87,7 @@ module Legion
|
|
|
87
87
|
provider_key = entry[:provider].to_sym
|
|
88
88
|
instance_key = entry[:instance].to_sym
|
|
89
89
|
|
|
90
|
-
{
|
|
90
|
+
result = {
|
|
91
91
|
provider: entry[:provider].to_s,
|
|
92
92
|
instance: entry[:instance].to_s,
|
|
93
93
|
tier: entry.dig(:metadata, :tier)&.to_s,
|
|
@@ -102,6 +102,9 @@ module Legion
|
|
|
102
102
|
end,
|
|
103
103
|
native: true
|
|
104
104
|
}
|
|
105
|
+
result[:source] = entry.dig(:metadata, :source) if entry.dig(:metadata, :source)
|
|
106
|
+
result[:credential_fingerprint] = entry.dig(:metadata, :credential_fingerprint) if entry.dig(:metadata, :credential_fingerprint)
|
|
107
|
+
result
|
|
105
108
|
end
|
|
106
109
|
end
|
|
107
110
|
end
|
|
@@ -80,6 +80,8 @@ module Legion
|
|
|
80
80
|
|
|
81
81
|
def register_provider_instance(provider_module, family, aliases, instance_id, config)
|
|
82
82
|
normalized_config = normalize_instance_config(config)
|
|
83
|
+
return if normalized_config[:enabled] == false
|
|
84
|
+
|
|
83
85
|
registry_config = adapter_instance_config(normalized_config, instance_id)
|
|
84
86
|
metadata = instance_metadata(normalized_config)
|
|
85
87
|
adapter = Call::LexLLMAdapter.new(family, provider_module.provider_class, instance_config: registry_config)
|
|
@@ -107,7 +109,11 @@ module Legion
|
|
|
107
109
|
end
|
|
108
110
|
|
|
109
111
|
def instance_metadata(config)
|
|
110
|
-
{ tier: config[:tier], capabilities: config[:capabilities] || [] }
|
|
112
|
+
meta = { tier: config[:tier], capabilities: config[:capabilities] || [] }
|
|
113
|
+
meta[:default_model] = config[:default_model] if config[:default_model]
|
|
114
|
+
meta[:source] = config[:source] if config[:source]
|
|
115
|
+
meta[:credential_fingerprint] = config[:credential_fingerprint] if config[:credential_fingerprint]
|
|
116
|
+
meta
|
|
111
117
|
end
|
|
112
118
|
|
|
113
119
|
def safe_provider_family(provider_module)
|
data/lib/legion/llm/discovery.rb
CHANGED
|
@@ -141,8 +141,7 @@ module Legion
|
|
|
141
141
|
}
|
|
142
142
|
end
|
|
143
143
|
rescue StandardError => e
|
|
144
|
-
|
|
145
|
-
operation: "discovery.offerings.#{entry[:provider]}/#{entry[:instance]}")
|
|
144
|
+
report_discovery_failure(entry, e)
|
|
146
145
|
[]
|
|
147
146
|
end
|
|
148
147
|
end
|
|
@@ -165,6 +164,28 @@ module Legion
|
|
|
165
164
|
|
|
166
165
|
private
|
|
167
166
|
|
|
167
|
+
def report_discovery_failure(entry, error)
|
|
168
|
+
provider = entry[:provider]
|
|
169
|
+
instance = entry[:instance]
|
|
170
|
+
connection_error = error.is_a?(Faraday::ConnectionFailed) ||
|
|
171
|
+
error.message.match?(/connection refused|connect.*timeout|no route to host/i)
|
|
172
|
+
|
|
173
|
+
if connection_error
|
|
174
|
+
log.warn("[llm][discovery] provider=#{provider} instance=#{instance} unreachable: #{error.message}")
|
|
175
|
+
else
|
|
176
|
+
handle_exception(error, level: :warn, handled: true,
|
|
177
|
+
operation: "discovery.offerings.#{provider}/#{instance}")
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
return unless defined?(Router) && Router.respond_to?(:health_tracker)
|
|
181
|
+
|
|
182
|
+
Router.health_tracker.report(
|
|
183
|
+
provider: provider, instance: instance,
|
|
184
|
+
signal: :error, value: 1,
|
|
185
|
+
metadata: { reason: error.class.name, source: :discovery }
|
|
186
|
+
)
|
|
187
|
+
end
|
|
188
|
+
|
|
168
189
|
def normalize_offering(offering)
|
|
169
190
|
data = if offering.is_a?(Hash)
|
|
170
191
|
offering
|
|
@@ -11,7 +11,7 @@ require_relative 'route_attempts'
|
|
|
11
11
|
module Legion
|
|
12
12
|
module LLM
|
|
13
13
|
module Inference
|
|
14
|
-
class Executor
|
|
14
|
+
class Executor # rubocop:disable Metrics/ClassLength
|
|
15
15
|
include Legion::Logging::Helper
|
|
16
16
|
include NativeToolLoop
|
|
17
17
|
include RouteAttempts
|
|
@@ -59,6 +59,15 @@ module Legion
|
|
|
59
59
|
|
|
60
60
|
ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
|
|
61
61
|
|
|
62
|
+
CONFIG_ERROR_PATTERNS = [
|
|
63
|
+
/ValidationException/,
|
|
64
|
+
/AccessDeniedException/,
|
|
65
|
+
/InvalidModel/i,
|
|
66
|
+
/model.*not found/i,
|
|
67
|
+
/not authorized/i,
|
|
68
|
+
/AWS Marketplace/i
|
|
69
|
+
].freeze
|
|
70
|
+
|
|
62
71
|
MAX_NATIVE_TOOL_ROUNDS = 200
|
|
63
72
|
ToolResultEvent = Struct.new(:result, :tool_call_id, :tool_name, :started_at, keyword_init: true)
|
|
64
73
|
|
|
@@ -160,6 +169,7 @@ module Legion
|
|
|
160
169
|
skipped = 0
|
|
161
170
|
pipeline_start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
162
171
|
step_timings = []
|
|
172
|
+
@step_timing_hash = {}
|
|
163
173
|
STEPS.each do |step|
|
|
164
174
|
if Profile.skip?(@profile, step)
|
|
165
175
|
skipped += 1
|
|
@@ -170,9 +180,12 @@ module Legion
|
|
|
170
180
|
execute_step(step) { send(:"step_#{step}") }
|
|
171
181
|
elapsed_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - t0) * 1000).round
|
|
172
182
|
step_timings << "#{step}=#{elapsed_ms}ms"
|
|
183
|
+
@step_timing_hash[step] = elapsed_ms
|
|
173
184
|
executed += 1
|
|
174
185
|
end
|
|
175
186
|
total_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - pipeline_start) * 1000).round
|
|
187
|
+
@step_timing_hash[:total] = total_ms
|
|
188
|
+
@timestamps[:step_timings] = @step_timing_hash
|
|
176
189
|
log.warn("[pipeline][timing] profile=#{@profile} total=#{total_ms}ms executed=#{executed} skipped=#{skipped} #{step_timings.join(' ')}")
|
|
177
190
|
annotate_top_level_span(steps_executed: executed, steps_skipped: skipped)
|
|
178
191
|
end
|
|
@@ -547,9 +560,18 @@ module Legion
|
|
|
547
560
|
duration_ms = ((Time.now - start_time) * 1000).round
|
|
548
561
|
handle_exception(err, level: :warn, handled: handled, operation: operation,
|
|
549
562
|
provider: resolution.provider, model: resolution.model, duration_ms: duration_ms)
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
563
|
+
if config_error?(err)
|
|
564
|
+
Router.health_tracker.deny_model(
|
|
565
|
+
provider: resolution.provider,
|
|
566
|
+
model: resolution.model,
|
|
567
|
+
instance: resolution.instance,
|
|
568
|
+
reason: err.message
|
|
569
|
+
)
|
|
570
|
+
else
|
|
571
|
+
Router.health_tracker.report(provider: resolution.provider, offering_id: resolution.offering_id,
|
|
572
|
+
signal: :error, value: 1,
|
|
573
|
+
metadata: { reason: err.class.name, message: err.message })
|
|
574
|
+
end
|
|
553
575
|
@escalation_history << escalation_attempt_hash(
|
|
554
576
|
resolution,
|
|
555
577
|
outcome: outcome,
|
|
@@ -928,6 +950,12 @@ module Legion
|
|
|
928
950
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.emit_error_audit')
|
|
929
951
|
end
|
|
930
952
|
|
|
953
|
+
def config_error?(err)
|
|
954
|
+
name = err.class.name.to_s
|
|
955
|
+
msg = err.message.to_s
|
|
956
|
+
CONFIG_ERROR_PATTERNS.any? { |pat| pat.match?(name) || pat.match?(msg) }
|
|
957
|
+
end
|
|
958
|
+
|
|
931
959
|
def execute_pre_provider_steps
|
|
932
960
|
log.debug "[llm][executor] action=pre_provider_steps.enter step_count=#{PRE_PROVIDER_STEPS.size}"
|
|
933
961
|
PRE_PROVIDER_STEPS.each do |step|
|
|
@@ -1549,7 +1577,12 @@ module Legion
|
|
|
1549
1577
|
end
|
|
1550
1578
|
|
|
1551
1579
|
def build_response_routing
|
|
1552
|
-
routing = {
|
|
1580
|
+
routing = {
|
|
1581
|
+
provider: @resolved_provider,
|
|
1582
|
+
instance: @resolved_instance,
|
|
1583
|
+
model: @resolved_model,
|
|
1584
|
+
tier: @resolved_tier
|
|
1585
|
+
}.compact
|
|
1553
1586
|
routing[:offering_id] = @resolved_offering_id if @resolved_offering_id
|
|
1554
1587
|
routing[:offering_metadata] = @resolved_offering_metadata if @resolved_offering_metadata&.any?
|
|
1555
1588
|
|
data/lib/legion/llm/inventory.rb
CHANGED
|
@@ -39,15 +39,19 @@ module Legion
|
|
|
39
39
|
def offerings(filters = {})
|
|
40
40
|
log.debug "[llm][inventory] action=offerings.enter filters=#{filters.keys}"
|
|
41
41
|
normalized_filters = normalize_filter_hash(filters)
|
|
42
|
+
provider_scope = normalized_filters[:provider]&.to_sym
|
|
42
43
|
list = []
|
|
43
44
|
providers_config.each do |provider_family, config|
|
|
44
45
|
next unless enabled_config?(config)
|
|
46
|
+
next if provider_scope && provider_family.to_sym != provider_scope
|
|
45
47
|
|
|
46
48
|
list.concat(provider_offerings(provider_family.to_sym, config))
|
|
47
49
|
end
|
|
48
50
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
+
native = native_provider_offerings(provider: provider_scope)
|
|
52
|
+
native_providers = native.map { |o| o[:provider_family]&.to_sym }.uniq
|
|
53
|
+
list.concat(native)
|
|
54
|
+
list.concat(discovery_offerings(provider: provider_scope, exclude_providers: native_providers))
|
|
51
55
|
list = dedupe_offerings(list)
|
|
52
56
|
result = filter_offerings(list, normalized_filters)
|
|
53
57
|
log.debug "[llm][inventory] action=offerings.complete total=#{result.size}"
|
|
@@ -265,7 +269,7 @@ module Legion
|
|
|
265
269
|
))
|
|
266
270
|
end
|
|
267
271
|
|
|
268
|
-
def discovery_offerings
|
|
272
|
+
def discovery_offerings(provider: nil, exclude_providers: [])
|
|
269
273
|
return [] unless defined?(Legion::LLM::Discovery)
|
|
270
274
|
|
|
271
275
|
cached_models = if Legion::LLM::Discovery.respond_to?(:cached_discovered_models)
|
|
@@ -276,6 +280,9 @@ module Legion
|
|
|
276
280
|
|
|
277
281
|
cached_models.filter_map do |model_entry|
|
|
278
282
|
provider_family = model_entry[:provider]
|
|
283
|
+
next if provider && provider_family.to_sym != provider
|
|
284
|
+
next if exclude_providers.include?(provider_family.to_sym)
|
|
285
|
+
|
|
279
286
|
config = option(providers_config, provider_family, {})
|
|
280
287
|
next unless enabled_config?(config)
|
|
281
288
|
|
|
@@ -295,11 +302,13 @@ module Legion
|
|
|
295
302
|
[]
|
|
296
303
|
end
|
|
297
304
|
|
|
298
|
-
def native_provider_offerings
|
|
305
|
+
def native_provider_offerings(provider: nil)
|
|
299
306
|
return [] unless defined?(Legion::LLM::Call::Registry)
|
|
300
307
|
|
|
301
308
|
Legion::LLM::Call::Registry.all_instances.flat_map do |entry|
|
|
302
309
|
provider_name = entry[:provider]
|
|
310
|
+
next [] if provider && provider_name.to_sym != provider
|
|
311
|
+
|
|
303
312
|
adapter = entry[:adapter]
|
|
304
313
|
next [] unless adapter.respond_to?(:offerings)
|
|
305
314
|
|
|
@@ -347,7 +356,9 @@ module Legion
|
|
|
347
356
|
|
|
348
357
|
def dedupe_offerings(list)
|
|
349
358
|
list.each_with_object({}) do |offering, seen|
|
|
350
|
-
|
|
359
|
+
instance = offering[:provider_instance]
|
|
360
|
+
instance = nil if instance.to_s == 'default'
|
|
361
|
+
key = [offering[:provider_family], instance, offering[:model], offering[:type]]
|
|
351
362
|
current = seen[key]
|
|
352
363
|
seen[key] = offering if current.nil? || source_priority(offering) > source_priority(current)
|
|
353
364
|
end.values
|
data/lib/legion/llm/metering.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/logging/helper'
|
|
4
|
+
require 'fileutils'
|
|
4
5
|
require_relative 'metering/estimator'
|
|
5
6
|
require_relative 'metering/tracker'
|
|
6
7
|
require_relative 'metering/tokens'
|
|
@@ -12,6 +13,10 @@ module Legion
|
|
|
12
13
|
module Metering
|
|
13
14
|
extend Legion::Logging::Helper
|
|
14
15
|
|
|
16
|
+
SPOOL_DIR = File.expand_path('~/.legionio/data/spool/metering')
|
|
17
|
+
SPOOL_FILE = File.join(SPOOL_DIR, 'events.jsonl').freeze
|
|
18
|
+
SPOOL_MUTEX = Mutex.new
|
|
19
|
+
|
|
15
20
|
def self.load_transport
|
|
16
21
|
return unless defined?(Legion::Transport::Message)
|
|
17
22
|
|
|
@@ -30,8 +35,9 @@ module Legion
|
|
|
30
35
|
log.info("[llm][metering] published provider=#{event[:provider]} model=#{event[:model_id]}")
|
|
31
36
|
:published
|
|
32
37
|
else
|
|
33
|
-
|
|
34
|
-
:
|
|
38
|
+
spool_event(event)
|
|
39
|
+
log.info("[llm][metering] spooled provider=#{event[:provider]} model=#{event[:model_id]} reason=transport_unavailable")
|
|
40
|
+
:spooled
|
|
35
41
|
end
|
|
36
42
|
rescue StandardError => e
|
|
37
43
|
handle_exception(e, level: :warn, operation: 'llm.metering.emit')
|
|
@@ -46,8 +52,43 @@ module Legion
|
|
|
46
52
|
end
|
|
47
53
|
|
|
48
54
|
def flush_spool
|
|
49
|
-
|
|
50
|
-
|
|
55
|
+
return 0 unless File.exist?(spool_file_path)
|
|
56
|
+
|
|
57
|
+
event_class = metering_event_class
|
|
58
|
+
unless event_class && transport_connected?
|
|
59
|
+
log.debug('[llm][metering] flush_spool skipped reason=transport_unavailable')
|
|
60
|
+
return 0
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Read and truncate atomically under the mutex so no events written
|
|
64
|
+
# between read and truncate can be silently lost.
|
|
65
|
+
events = SPOOL_MUTEX.synchronize do
|
|
66
|
+
path = spool_file_path
|
|
67
|
+
return 0 unless File.exist?(path)
|
|
68
|
+
|
|
69
|
+
lines = File.readlines(path, chomp: true)
|
|
70
|
+
parsed = lines.filter_map do |line|
|
|
71
|
+
next if line.strip.empty?
|
|
72
|
+
|
|
73
|
+
Legion::JSON.load(line)
|
|
74
|
+
end
|
|
75
|
+
File.write(path, '')
|
|
76
|
+
parsed
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
return 0 if events.empty?
|
|
80
|
+
|
|
81
|
+
batch_sleep = spool_settings[:flush_batch_sleep] || 0.0
|
|
82
|
+
flushed = 0
|
|
83
|
+
|
|
84
|
+
events.each_with_index do |event_data, index|
|
|
85
|
+
event_class.new(**event_data).publish
|
|
86
|
+
flushed += 1
|
|
87
|
+
sleep(batch_sleep) if batch_sleep.positive? && index < events.size - 1
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
log.info("[llm][metering] flush_spool flushed=#{flushed}")
|
|
91
|
+
flushed
|
|
51
92
|
rescue StandardError => e
|
|
52
93
|
handle_exception(e, level: :warn, operation: 'llm.metering.flush_spool')
|
|
53
94
|
0
|
|
@@ -128,6 +169,81 @@ module Legion
|
|
|
128
169
|
hash[key] if hash.key?(key)
|
|
129
170
|
end
|
|
130
171
|
|
|
172
|
+
# --- Spool internals (private) ---
|
|
173
|
+
|
|
174
|
+
def spool_event(event)
|
|
175
|
+
SPOOL_MUTEX.synchronize do
|
|
176
|
+
ensure_spool_dir
|
|
177
|
+
enforce_max_events
|
|
178
|
+
line = Legion::JSON.dump(event)
|
|
179
|
+
File.open(spool_file_path, 'a') { |f| f.puts(line) }
|
|
180
|
+
end
|
|
181
|
+
log.debug("[llm][metering] spool_event written provider=#{event[:provider]} model=#{event[:model_id]}")
|
|
182
|
+
rescue StandardError => e
|
|
183
|
+
handle_exception(e, level: :warn, operation: 'llm.metering.spool_event')
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def read_spool
|
|
187
|
+
SPOOL_MUTEX.synchronize do
|
|
188
|
+
path = spool_file_path
|
|
189
|
+
return [] unless File.exist?(path)
|
|
190
|
+
|
|
191
|
+
lines = File.readlines(path, chomp: true)
|
|
192
|
+
lines.filter_map do |line|
|
|
193
|
+
next if line.strip.empty?
|
|
194
|
+
|
|
195
|
+
Legion::JSON.load(line)
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
rescue StandardError => e
|
|
199
|
+
handle_exception(e, level: :warn, operation: 'llm.metering.read_spool')
|
|
200
|
+
[]
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def truncate_spool
|
|
204
|
+
SPOOL_MUTEX.synchronize do
|
|
205
|
+
path = spool_file_path
|
|
206
|
+
File.write(path, '') if File.exist?(path)
|
|
207
|
+
end
|
|
208
|
+
rescue StandardError => e
|
|
209
|
+
handle_exception(e, level: :warn, operation: 'llm.metering.truncate_spool')
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def enforce_max_events
|
|
213
|
+
path = spool_file_path
|
|
214
|
+
return unless File.exist?(path)
|
|
215
|
+
|
|
216
|
+
max = spool_settings[:max_events] || 10_000
|
|
217
|
+
lines = File.readlines(path, chomp: true)
|
|
218
|
+
return if lines.size < max
|
|
219
|
+
|
|
220
|
+
# Drop oldest events to make room
|
|
221
|
+
trimmed = lines.last(max - 1)
|
|
222
|
+
File.write(path, trimmed.map { |l| "#{l}\n" }.join)
|
|
223
|
+
log.debug("[llm][metering] enforce_max_events trimmed=#{lines.size - trimmed.size} max=#{max}")
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def ensure_spool_dir
|
|
227
|
+
FileUtils.mkdir_p(spool_dir_path)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def spool_settings
|
|
231
|
+
settings = Legion::LLM::Settings.value(:metering, :spool, default: {})
|
|
232
|
+
settings.is_a?(Hash) ? settings : {}
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Resolve spool file path at call time, honouring operator-configured
|
|
236
|
+
# paths (e.g. for containerised deployments where $HOME is not writable).
|
|
237
|
+
# Falls back to the compile-time SPOOL_FILE constant.
|
|
238
|
+
def spool_file_path
|
|
239
|
+
configured = spool_settings[:path]
|
|
240
|
+
configured && !configured.to_s.strip.empty? ? configured.to_s : SPOOL_FILE
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def spool_dir_path
|
|
244
|
+
File.dirname(spool_file_path)
|
|
245
|
+
end
|
|
246
|
+
|
|
131
247
|
# Backward-compat: resolve old Legion::LLM::Metering::Exchange, ::Event
|
|
132
248
|
def self.const_missing(name)
|
|
133
249
|
case name
|
|
@@ -19,6 +19,7 @@ module Legion
|
|
|
19
19
|
@circuits = {}
|
|
20
20
|
@latency_window = {}
|
|
21
21
|
@handlers = {}
|
|
22
|
+
@denied_models = {}
|
|
22
23
|
@mutex = Mutex.new
|
|
23
24
|
|
|
24
25
|
register_default_handlers
|
|
@@ -111,6 +112,42 @@ module Legion
|
|
|
111
112
|
worst_circuit_state(instances)
|
|
112
113
|
end
|
|
113
114
|
|
|
115
|
+
# Record that a model is denied for a provider+instance (e.g. AccessDenied).
|
|
116
|
+
# Excluded from routing until restart or explicit clear.
|
|
117
|
+
def deny_model(provider:, model:, instance: nil, reason: nil)
|
|
118
|
+
key = instance ? instance_key(provider, instance) : provider.to_s
|
|
119
|
+
@mutex.synchronize do
|
|
120
|
+
@denied_models[key] ||= {}
|
|
121
|
+
@denied_models[key][model.to_s] = { reason: reason, at: Time.now }
|
|
122
|
+
end
|
|
123
|
+
log.warn("Model denied provider=#{key} model=#{model} reason=#{reason}")
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Check if a model is denied for a provider+instance.
|
|
127
|
+
def model_denied?(provider:, model:, instance: nil)
|
|
128
|
+
key = instance ? instance_key(provider, instance) : provider.to_s
|
|
129
|
+
@mutex.synchronize do
|
|
130
|
+
!@denied_models.dig(key, model.to_s).nil?
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# List all denied models (for diagnostics).
|
|
135
|
+
def denied_models
|
|
136
|
+
@mutex.synchronize { @denied_models.dup }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Clear denied models for a provider (or all if no args).
|
|
140
|
+
def clear_denied(provider: nil, instance: nil)
|
|
141
|
+
@mutex.synchronize do
|
|
142
|
+
if provider
|
|
143
|
+
key = instance ? instance_key(provider, instance) : provider.to_s
|
|
144
|
+
@denied_models.delete(key)
|
|
145
|
+
else
|
|
146
|
+
@denied_models.clear
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
114
151
|
# Clears circuit and latency data for a single provider.
|
|
115
152
|
def reset(provider, instance: nil, offering_id: nil)
|
|
116
153
|
key = instance ? instance_key(provider, instance) : health_key(provider, offering_id)
|
|
@@ -125,6 +162,7 @@ module Legion
|
|
|
125
162
|
@mutex.synchronize do
|
|
126
163
|
@circuits.clear
|
|
127
164
|
@latency_window.clear
|
|
165
|
+
@denied_models.clear
|
|
128
166
|
end
|
|
129
167
|
end
|
|
130
168
|
|
data/lib/legion/llm/router.rb
CHANGED
|
@@ -163,7 +163,11 @@ module Legion
|
|
|
163
163
|
end
|
|
164
164
|
|
|
165
165
|
def explicit_resolution(tier, provider, model)
|
|
166
|
-
registry_entry = provider
|
|
166
|
+
registry_entry = if provider
|
|
167
|
+
registry_entry_for_provider(provider.to_sym)
|
|
168
|
+
else
|
|
169
|
+
registry_entry_for_tier(tier)
|
|
170
|
+
end
|
|
167
171
|
resolved_provider = provider ? provider.to_sym : (registry_entry&.[](:provider) || default_provider_for_tier(tier))
|
|
168
172
|
resolved_model = model || registry_default_model(registry_entry) || default_model_for_tier(tier)
|
|
169
173
|
|
|
@@ -229,8 +233,11 @@ module Legion
|
|
|
229
233
|
memory_checked.reject { |r| excluded_by_caller?(r, normalized_exclude) }
|
|
230
234
|
end
|
|
231
235
|
|
|
236
|
+
# 4.7 Reject rules for models denied by health tracker
|
|
237
|
+
not_denied = not_excluded.reject { |r| excluded_by_denial?(r) }
|
|
238
|
+
|
|
232
239
|
# 5. Filter by tier availability
|
|
233
|
-
final =
|
|
240
|
+
final = not_denied.select { |r| tier_available?(r.target[:tier] || r.target['tier']) }
|
|
234
241
|
|
|
235
242
|
log.debug("Router: #{final.size} candidates after filtering (started with #{rules.size})")
|
|
236
243
|
|
|
@@ -303,6 +310,15 @@ module Legion
|
|
|
303
310
|
{}
|
|
304
311
|
end
|
|
305
312
|
|
|
313
|
+
def excluded_by_denial?(rule)
|
|
314
|
+
provider = (rule.target[:provider] || rule.target['provider'])&.to_sym
|
|
315
|
+
model = rule.target[:model] || rule.target['model']
|
|
316
|
+
instance = rule.target[:instance] || rule.target['instance']
|
|
317
|
+
return false unless provider && model
|
|
318
|
+
|
|
319
|
+
health_tracker.model_denied?(provider: provider, model: model, instance: instance)
|
|
320
|
+
end
|
|
321
|
+
|
|
306
322
|
def excluded_by_caller?(rule, exclude)
|
|
307
323
|
return false if exclude.nil? || exclude.empty?
|
|
308
324
|
|
|
@@ -397,22 +413,24 @@ module Legion
|
|
|
397
413
|
# Fallback to static defaults
|
|
398
414
|
case sym
|
|
399
415
|
when :local, :direct, :fleet
|
|
400
|
-
'llama3'
|
|
416
|
+
default_settings_model_for_tier(sym) || 'llama3'
|
|
401
417
|
when :openai_compat
|
|
402
418
|
'gpt-4o'
|
|
403
419
|
when :cloud
|
|
404
|
-
|
|
420
|
+
default_settings_model_for_tier(sym) || 'us.anthropic.claude-sonnet-4-6'
|
|
405
421
|
when :frontier
|
|
406
|
-
|
|
422
|
+
default_settings_model_for_tier(sym) || 'claude-sonnet-4-6'
|
|
407
423
|
end
|
|
408
424
|
end
|
|
409
425
|
|
|
410
426
|
def chain_from_defaults(model, provider, max)
|
|
411
427
|
if provider || model || default_settings_provider || default_settings_model
|
|
412
428
|
p = (provider || default_settings_provider)&.to_sym
|
|
429
|
+
resolved_model = model || registry_default_model(registry_entry_for_provider(p)) ||
|
|
430
|
+
default_settings_model || 'claude-sonnet-4-6'
|
|
413
431
|
res = Resolution.new(tier: PROVIDER_TIER.fetch(p, :frontier),
|
|
414
432
|
provider: p || :anthropic,
|
|
415
|
-
model:
|
|
433
|
+
model: resolved_model)
|
|
416
434
|
return EscalationChain.new(resolutions: [res], max_attempts: max)
|
|
417
435
|
end
|
|
418
436
|
|
|
@@ -512,6 +530,31 @@ module Legion
|
|
|
512
530
|
Legion::LLM::Settings.value(:default_model)
|
|
513
531
|
end
|
|
514
532
|
|
|
533
|
+
def default_settings_model_for_tier(tier)
|
|
534
|
+
model = default_settings_model
|
|
535
|
+
return nil if model.nil? || model.to_s.empty?
|
|
536
|
+
|
|
537
|
+
provider = default_settings_provider&.to_sym
|
|
538
|
+
return nil unless provider
|
|
539
|
+
|
|
540
|
+
provider_tier = registry_tier_for_default_provider(provider)
|
|
541
|
+
return model if provider_tier == tier
|
|
542
|
+
|
|
543
|
+
nil
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
def registry_tier_for_default_provider(provider)
|
|
547
|
+
instances = begin
|
|
548
|
+
Call::Registry.all_instances
|
|
549
|
+
rescue StandardError
|
|
550
|
+
[]
|
|
551
|
+
end
|
|
552
|
+
entry = instances.find { |i| i[:provider] == provider }
|
|
553
|
+
return registry_tier(provider, entry[:metadata]) if entry
|
|
554
|
+
|
|
555
|
+
PROVIDER_TIER.fetch(provider, :cloud)
|
|
556
|
+
end
|
|
557
|
+
|
|
515
558
|
def default_settings_provider
|
|
516
559
|
Legion::LLM::Settings.value(:default_provider)
|
|
517
560
|
end
|
|
@@ -529,6 +572,17 @@ module Legion
|
|
|
529
572
|
registry_entry_for_tier(tier)&.[](:provider)
|
|
530
573
|
end
|
|
531
574
|
|
|
575
|
+
# Find the first registered instance for a specific provider.
|
|
576
|
+
def registry_entry_for_provider(provider)
|
|
577
|
+
instances = begin
|
|
578
|
+
Call::Registry.all_instances
|
|
579
|
+
rescue StandardError => e
|
|
580
|
+
handle_exception(e, level: :warn, handled: true, operation: 'router.registry_entry_for_provider')
|
|
581
|
+
[]
|
|
582
|
+
end
|
|
583
|
+
instances.find { |entry| entry[:provider] == provider }
|
|
584
|
+
end
|
|
585
|
+
|
|
532
586
|
# Find a default model from registry for a given tier.
|
|
533
587
|
# Tries adapter.offerings first, then metadata[:default_model].
|
|
534
588
|
def registry_model_for_tier(tier)
|
data/lib/legion/llm/settings.rb
CHANGED
data/lib/legion/llm/version.rb
CHANGED