legion-llm 0.8.15 → 0.8.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/lib/legion/llm/api/native/helpers.rb +52 -0
- data/lib/legion/llm/api/native/inference.rb +2 -2
- data/lib/legion/llm/inference/audit_publisher.rb +56 -5
- data/lib/legion/llm/inference/executor.rb +62 -62
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1106f652c69b801af983117b4fc97f6bf547dc4d63f1b1df12eb6f1adb6d51d2
|
|
4
|
+
data.tar.gz: '00593f91f0467fd63e5a8867017033da483bf0abc4fcfa26641fe97fd66c3674'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 30c59046ad40659fa02f3bde8db8807b3a9c3b718365dc1be5d92ca01c9cb84e593c4bcab5027f12a591ee12c3f21671ae3695573f5e85b3c5e9d8b7b1ebf74b
|
|
7
|
+
data.tar.gz: 91368ea195a58f8321cca378febcca27a96685a6a1fe78ac432be88731ee03a4ca050b1782f8d1f24da6045b5f136ea070ee14fd4dbb2eb28e23c8db63f28174
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,39 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.18] - 2026-04-22
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- API caller identity no longer hardcoded as `api:inference`. The inference route now resolves the actual user via `env['legion.principal']` (from Identity::Middleware), `Legion::Identity::Process` (LDAP/Kerberos), or OS username (with email domain stripped). Adds `username` and `hostname` to the `requested_by` hash in audit trails.
|
|
7
|
+
|
|
8
|
+
## [0.8.17] - 2026-04-22
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Audit events now include `system_prompt` (full text sent to provider), `injected_tools` (list of tool names injected), and `identity` (extracted user identity from caller).
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
- `tokens` field in audit events was serialized as a `#<data ...>` inspect string instead of a proper hash. Now calls `.to_h` on Data.define objects.
|
|
15
|
+
- `enrichments` in audit events now compacted: array values (e.g. GAIA valence history) reduced to their last element.
|
|
16
|
+
- `timeline` in audit events filtered to only provider, escalation, and tool execution events — diagnostic trace entries (tracing:init, rbac, context:stored, etc.) are stripped.
|
|
17
|
+
|
|
18
|
+
## [0.8.16] - 2026-04-22
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
- `RubyLLM::BadRequestError` (HTTP 400) and `RubyLLM::ContextLengthExceededError` now trigger the provider fallback-retry chain instead of bubbling up as unhandled 500s. Both `run_provider_call_single` and `step_provider_call_stream` retry on the next available provider before giving up.
|
|
22
|
+
- Resolved provider/model is now logged (`log.info`) in `step_routing` so provider errors can be diagnosed from daemon logs without relying on SSE done events.
|
|
23
|
+
|
|
24
|
+
### Changed
|
|
25
|
+
- Extracted `try_fallback_or_raise` helper from duplicated retry logic in both rescue chains, reducing the auth/bad-request/context-overflow fallback pattern to a single call each.
|
|
26
|
+
|
|
27
|
+
## [0.8.15] - 2026-04-22
|
|
28
|
+
|
|
29
|
+
### Changed
|
|
30
|
+
- **5-tier routing model**: restructured from 3 tiers (local/fleet/cloud) to 5 tiers (local/fleet/openai_compat/cloud/frontier). Anthropic and OpenAI are now `:frontier` (direct API); Bedrock, Azure, Gemini are `:cloud` (managed providers). New `:openai_compat` tier for user-configured OpenAI-spec gateways.
|
|
31
|
+
- `Resolution`: added `frontier?`, `openai_compat?`, and `external?` predicates.
|
|
32
|
+
- `TierAssigner`: `user:*` and critical/high priority requests route to `:frontier` instead of `:cloud`.
|
|
33
|
+
- `GatewayInterceptor`: intercepts both `:cloud` and `:frontier` tiers, preserving original tier.
|
|
34
|
+
- Privacy enforcement (`assert_external_allowed!`) blocks all external tiers (cloud + frontier + openai_compat), not just cloud. `never_cloud` constraint now blocks both `:cloud` and `:frontier`. New `never_external` constraint blocks all three external tiers.
|
|
35
|
+
- `resolve_chain` fallback defaults changed from `:cloud`/`:bedrock` to `:frontier`/`:anthropic`.
|
|
36
|
+
|
|
3
37
|
## [0.8.13] - 2026-04-22
|
|
4
38
|
|
|
5
39
|
### Fixed
|
|
@@ -326,6 +326,58 @@ module Legion
|
|
|
326
326
|
end
|
|
327
327
|
end
|
|
328
328
|
|
|
329
|
+
define_method(:resolve_caller_identity) do |rack_env|
|
|
330
|
+
return rack_env['legion.tenant_id'] if rack_env['legion.tenant_id']
|
|
331
|
+
|
|
332
|
+
kerb = begin
|
|
333
|
+
Legion::Settings.dig(:kerberos, :username)
|
|
334
|
+
rescue StandardError
|
|
335
|
+
nil
|
|
336
|
+
end
|
|
337
|
+
return "user:#{kerb}" if kerb.is_a?(String) && !kerb.empty?
|
|
338
|
+
|
|
339
|
+
principal = rack_env['legion.principal']
|
|
340
|
+
return "user:#{principal.canonical_name}" if principal.respond_to?(:canonical_name) && principal.canonical_name != 'system'
|
|
341
|
+
|
|
342
|
+
if defined?(Legion::Identity::Process)
|
|
343
|
+
name = Legion::Identity::Process.canonical_name
|
|
344
|
+
return "user:#{name}" if name && name != 'anonymous'
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
raw = ENV.fetch('USER', nil) || ENV.fetch('LOGNAME', nil) || 'anonymous'
|
|
348
|
+
username = raw.include?('@') ? raw.split('@').first : raw
|
|
349
|
+
"user:#{username}"
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
define_method(:resolve_requested_by) do |rack_env, identity_string|
|
|
353
|
+
hostname = begin
|
|
354
|
+
Legion::Settings[:client][:hostname]
|
|
355
|
+
rescue StandardError
|
|
356
|
+
Socket.gethostname
|
|
357
|
+
end
|
|
358
|
+
username = identity_string.delete_prefix('user:')
|
|
359
|
+
|
|
360
|
+
kerb = begin
|
|
361
|
+
Legion::Settings.dig(:kerberos, :username)
|
|
362
|
+
rescue StandardError
|
|
363
|
+
nil
|
|
364
|
+
end
|
|
365
|
+
if kerb.is_a?(String) && !kerb.empty?
|
|
366
|
+
return { identity: identity_string, type: :user, credential: :kerberos,
|
|
367
|
+
username: kerb, hostname: hostname }
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
principal = rack_env['legion.principal']
|
|
371
|
+
if principal.respond_to?(:canonical_name) && principal.canonical_name != 'system'
|
|
372
|
+
return { identity: identity_string, type: principal.kind || :user,
|
|
373
|
+
credential: principal.source || :local,
|
|
374
|
+
username: principal.canonical_name, hostname: hostname }
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
{ identity: identity_string, type: :user, credential: :local,
|
|
378
|
+
username: username, hostname: hostname }
|
|
379
|
+
end
|
|
380
|
+
|
|
329
381
|
define_method(:token_value) do |tokens, key|
|
|
330
382
|
return nil if tokens.nil?
|
|
331
383
|
return tokens[key] || tokens[key.to_s] if tokens.is_a?(Hash)
|
|
@@ -42,7 +42,7 @@ module Legion
|
|
|
42
42
|
tools = raw_tools || []
|
|
43
43
|
validate_tools!(tools) unless tools.empty?
|
|
44
44
|
|
|
45
|
-
caller_identity = env
|
|
45
|
+
caller_identity = resolve_caller_identity(env)
|
|
46
46
|
last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
|
|
47
47
|
prompt = (last_user || {})[:content] || (last_user || {})['content'] || ''
|
|
48
48
|
|
|
@@ -79,7 +79,7 @@ module Legion
|
|
|
79
79
|
server_caller_fields = {
|
|
80
80
|
source: 'api',
|
|
81
81
|
path: request.path,
|
|
82
|
-
requested_by:
|
|
82
|
+
requested_by: resolve_requested_by(env, caller_identity)
|
|
83
83
|
}
|
|
84
84
|
effective_caller = server_caller_fields.merge(safe_caller_fields)
|
|
85
85
|
caller_summary = [effective_caller[:source], effective_caller[:path]].compact.join(':')
|
|
@@ -22,17 +22,22 @@ module Legion
|
|
|
22
22
|
tc.is_a?(Types::ToolCall) ? tc.to_audit_hash : tc
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
+
audit_data = response.audit || {}
|
|
26
|
+
provider_payload = audit_data[:provider_payload] || {}
|
|
27
|
+
|
|
25
28
|
event = {
|
|
26
29
|
request_id: response.request_id,
|
|
27
30
|
conversation_id: response.conversation_id,
|
|
28
31
|
caller: response.caller,
|
|
32
|
+
identity: extract_identity(response.caller),
|
|
29
33
|
routing: response.routing,
|
|
30
|
-
tokens: response.tokens,
|
|
34
|
+
tokens: serialize_tokens(response.tokens),
|
|
31
35
|
cost: response.cost,
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
+
system_prompt: provider_payload[:system_prompt],
|
|
37
|
+
injected_tools: provider_payload[:injected_tools],
|
|
38
|
+
enrichments: compact_enrichments(response.enrichments),
|
|
39
|
+
audit: audit_data.except(:provider_payload),
|
|
40
|
+
timeline: compact_timeline(response.timeline),
|
|
36
41
|
classification: response.classification,
|
|
37
42
|
tracing: response.tracing,
|
|
38
43
|
messages: request.messages,
|
|
@@ -58,6 +63,52 @@ module Legion
|
|
|
58
63
|
nil
|
|
59
64
|
end
|
|
60
65
|
|
|
66
|
+
def extract_identity(caller)
|
|
67
|
+
return nil unless caller.is_a?(Hash)
|
|
68
|
+
|
|
69
|
+
rb = caller[:requested_by] || caller['requested_by']
|
|
70
|
+
return nil unless rb.is_a?(Hash)
|
|
71
|
+
|
|
72
|
+
{
|
|
73
|
+
identity: rb[:identity] || rb['identity'],
|
|
74
|
+
type: rb[:type] || rb['type'],
|
|
75
|
+
credential: rb[:credential] || rb['credential']
|
|
76
|
+
}.compact
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def serialize_tokens(tokens)
|
|
80
|
+
return tokens.to_h if tokens.respond_to?(:to_h) && !tokens.is_a?(Hash)
|
|
81
|
+
return tokens if tokens.is_a?(Hash)
|
|
82
|
+
|
|
83
|
+
{}
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def compact_enrichments(enrichments)
|
|
87
|
+
return {} unless enrichments.is_a?(Hash)
|
|
88
|
+
|
|
89
|
+
enrichments.transform_values do |v|
|
|
90
|
+
next v unless v.is_a?(Hash)
|
|
91
|
+
|
|
92
|
+
summary = { content: v[:content], timestamp: v[:timestamp] }
|
|
93
|
+
data = v[:data]
|
|
94
|
+
next summary unless data.is_a?(Hash)
|
|
95
|
+
|
|
96
|
+
compacted = data.transform_values do |dv|
|
|
97
|
+
dv.is_a?(Array) && dv.size > 1 ? dv.last : dv
|
|
98
|
+
end
|
|
99
|
+
summary.merge(data: compacted)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def compact_timeline(timeline)
|
|
104
|
+
return [] unless timeline.is_a?(Array)
|
|
105
|
+
|
|
106
|
+
timeline.select do |event|
|
|
107
|
+
key = (event[:key] || event['key']).to_s
|
|
108
|
+
key.start_with?('provider:') || key.start_with?('escalation:') || key.start_with?('tool:execute:')
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
61
112
|
def build_message_context(response:, **)
|
|
62
113
|
{
|
|
63
114
|
request_id: response.request_id,
|
|
@@ -331,6 +331,7 @@ module Legion
|
|
|
331
331
|
@resolved_provider = provider || Legion::LLM.settings[:default_provider]
|
|
332
332
|
@resolved_model = model || Legion::LLM.settings[:default_model]
|
|
333
333
|
|
|
334
|
+
log.info "[llm][inference] resolved provider=#{@resolved_provider} model=#{@resolved_model}"
|
|
334
335
|
@timeline.record(
|
|
335
336
|
category: :audit, key: 'routing:provider_selection',
|
|
336
337
|
direction: :internal, detail: "routed to #{@resolved_provider}:#{@resolved_model}",
|
|
@@ -356,38 +357,17 @@ module Legion
|
|
|
356
357
|
execute_provider_request
|
|
357
358
|
rescue RubyLLM::UnauthorizedError, RubyLLM::ForbiddenError,
|
|
358
359
|
Faraday::UnauthorizedError, Faraday::ForbiddenError => e
|
|
359
|
-
providers_tried
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
log.warn "[pipeline] #{@resolved_provider} auth failed (#{e.class}), falling back to #{fallback[:provider]}:#{fallback[:model]}"
|
|
371
|
-
from_provider = @resolved_provider
|
|
372
|
-
from_model = @resolved_model
|
|
373
|
-
@resolved_provider = fallback[:provider]
|
|
374
|
-
@resolved_model = fallback[:model]
|
|
375
|
-
@warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
|
|
376
|
-
@tool_event_handler&.call(
|
|
377
|
-
type: :model_fallback,
|
|
378
|
-
from_provider: from_provider, to_provider: @resolved_provider,
|
|
379
|
-
from_model: from_model, to_model: @resolved_model,
|
|
380
|
-
error: e.message, reason: 'auth_failed'
|
|
381
|
-
)
|
|
382
|
-
@timeline.record(
|
|
383
|
-
category: :provider, key: 'provider:fallback',
|
|
384
|
-
direction: :internal,
|
|
385
|
-
detail: "auth failed on #{providers_tried.last}, trying #{@resolved_provider}",
|
|
386
|
-
from: 'pipeline', to: "provider:#{@resolved_provider}"
|
|
387
|
-
)
|
|
388
|
-
retry
|
|
389
|
-
end
|
|
390
|
-
raise Legion::LLM::AuthError, e.message
|
|
360
|
+
try_fallback_or_raise(e, providers_tried, operation: 'provider_call.auth',
|
|
361
|
+
reason: 'auth_failed', error_class: Legion::LLM::AuthError)
|
|
362
|
+
retry
|
|
363
|
+
rescue RubyLLM::ContextLengthExceededError => e
|
|
364
|
+
try_fallback_or_raise(e, providers_tried, operation: 'provider_call.context_overflow',
|
|
365
|
+
reason: 'context_overflow', error_class: Legion::LLM::ContextOverflow)
|
|
366
|
+
retry
|
|
367
|
+
rescue RubyLLM::BadRequestError => e
|
|
368
|
+
try_fallback_or_raise(e, providers_tried, operation: 'provider_call.bad_request',
|
|
369
|
+
reason: 'bad_request', error_class: Legion::LLM::ProviderError)
|
|
370
|
+
retry
|
|
391
371
|
rescue RubyLLM::RateLimitError => e
|
|
392
372
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call.rate_limit',
|
|
393
373
|
provider: @resolved_provider, model: @resolved_model)
|
|
@@ -651,33 +631,17 @@ module Legion
|
|
|
651
631
|
execute_provider_request_stream(&)
|
|
652
632
|
rescue RubyLLM::UnauthorizedError, RubyLLM::ForbiddenError,
|
|
653
633
|
Faraday::UnauthorizedError, Faraday::ForbiddenError => e
|
|
654
|
-
providers_tried
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
log.warn "[pipeline] #{@resolved_provider} stream auth failed (#{e.class}), " \
|
|
666
|
-
"falling back to #{fallback[:provider]}:#{fallback[:model]}"
|
|
667
|
-
from_provider = @resolved_provider
|
|
668
|
-
from_model = @resolved_model
|
|
669
|
-
@resolved_provider = fallback[:provider]
|
|
670
|
-
@resolved_model = fallback[:model]
|
|
671
|
-
@warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
|
|
672
|
-
@tool_event_handler&.call(
|
|
673
|
-
type: :model_fallback,
|
|
674
|
-
from_provider: from_provider, to_provider: @resolved_provider,
|
|
675
|
-
from_model: from_model, to_model: @resolved_model,
|
|
676
|
-
error: e.message, reason: 'auth_failed'
|
|
677
|
-
)
|
|
678
|
-
retry
|
|
679
|
-
end
|
|
680
|
-
raise Legion::LLM::AuthError, e.message
|
|
634
|
+
try_fallback_or_raise(e, providers_tried, operation: 'provider_call_stream.auth',
|
|
635
|
+
reason: 'auth_failed', error_class: Legion::LLM::AuthError)
|
|
636
|
+
retry
|
|
637
|
+
rescue RubyLLM::ContextLengthExceededError => e
|
|
638
|
+
try_fallback_or_raise(e, providers_tried, operation: 'provider_call_stream.context_overflow',
|
|
639
|
+
reason: 'context_overflow', error_class: Legion::LLM::ContextOverflow)
|
|
640
|
+
retry
|
|
641
|
+
rescue RubyLLM::BadRequestError => e
|
|
642
|
+
try_fallback_or_raise(e, providers_tried, operation: 'provider_call_stream.bad_request',
|
|
643
|
+
reason: 'bad_request', error_class: Legion::LLM::ProviderError)
|
|
644
|
+
retry
|
|
681
645
|
rescue RubyLLM::RateLimitError => e
|
|
682
646
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call_stream.rate_limit',
|
|
683
647
|
provider: @resolved_provider, model: @resolved_model)
|
|
@@ -728,7 +692,14 @@ module Legion
|
|
|
728
692
|
session = RubyLLM.chat(**ruby_llm_chat_options)
|
|
729
693
|
|
|
730
694
|
inject_ruby_llm_tools(session)
|
|
731
|
-
apply_ruby_llm_instructions(session)
|
|
695
|
+
system_prompt = apply_ruby_llm_instructions(session)
|
|
696
|
+
|
|
697
|
+
@audit[:provider_payload] = {
|
|
698
|
+
system_prompt: system_prompt,
|
|
699
|
+
injected_tools: @injected_tool_map.keys,
|
|
700
|
+
tool_count: @injected_tool_map.size,
|
|
701
|
+
timestamp: Time.now
|
|
702
|
+
}
|
|
732
703
|
|
|
733
704
|
messages = apply_conversation_breakpoint(@request.messages)
|
|
734
705
|
add_ruby_llm_prior_messages(session, messages)
|
|
@@ -887,10 +858,12 @@ module Legion
|
|
|
887
858
|
system: @request.system,
|
|
888
859
|
enrichments: @enrichments
|
|
889
860
|
)
|
|
890
|
-
return unless injected_system
|
|
861
|
+
return nil unless injected_system
|
|
891
862
|
|
|
892
863
|
system_blocks = apply_cache_control([{ type: :text, content: injected_system }])
|
|
893
|
-
|
|
864
|
+
final = system_blocks.last[:content]
|
|
865
|
+
session.with_instructions(final)
|
|
866
|
+
final
|
|
894
867
|
end
|
|
895
868
|
|
|
896
869
|
def add_ruby_llm_prior_messages(session, messages)
|
|
@@ -967,6 +940,33 @@ module Legion
|
|
|
967
940
|
nil
|
|
968
941
|
end
|
|
969
942
|
|
|
943
|
+
def try_fallback_or_raise(error, providers_tried, operation:, reason:, error_class:)
|
|
944
|
+
providers_tried << @resolved_provider
|
|
945
|
+
fallback = find_fallback_provider(exclude: providers_tried)
|
|
946
|
+
handle_exception(
|
|
947
|
+
error,
|
|
948
|
+
level: :warn, operation: "llm.pipeline.#{operation}",
|
|
949
|
+
provider: @resolved_provider, model: @resolved_model,
|
|
950
|
+
fallback_provider: fallback&.dig(:provider)
|
|
951
|
+
)
|
|
952
|
+
raise error_class, "#{@resolved_provider}:#{@resolved_model} #{reason} — #{error.message}" unless fallback
|
|
953
|
+
|
|
954
|
+
log.warn "[pipeline] #{@resolved_provider}:#{@resolved_model} #{reason} (#{error.message}), " \
|
|
955
|
+
"falling back to #{fallback[:provider]}:#{fallback[:model]}"
|
|
956
|
+
from_provider = @resolved_provider
|
|
957
|
+
from_model = @resolved_model
|
|
958
|
+
@resolved_provider = fallback[:provider]
|
|
959
|
+
@resolved_model = fallback[:model]
|
|
960
|
+
@warnings << { type: :provider_fallback, original_error: error.message,
|
|
961
|
+
fallback: "#{@resolved_provider}:#{@resolved_model}" }
|
|
962
|
+
@tool_event_handler&.call(
|
|
963
|
+
type: :model_fallback,
|
|
964
|
+
from_provider: from_provider, to_provider: @resolved_provider,
|
|
965
|
+
from_model: from_model, to_model: @resolved_model,
|
|
966
|
+
error: error.message, reason: reason
|
|
967
|
+
)
|
|
968
|
+
end
|
|
969
|
+
|
|
970
970
|
def find_fallback_provider(exclude: [])
|
|
971
971
|
providers = Legion::LLM.settings[:providers] || {}
|
|
972
972
|
providers.each do |name, config|
|
data/lib/legion/llm/version.rb
CHANGED