legion-llm 0.8.21 → 0.8.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/lib/legion/llm/call/embeddings.rb +12 -2
- data/lib/legion/llm/call/structured_output.rb +15 -4
- data/lib/legion/llm/inference/executor.rb +35 -2
- data/lib/legion/llm/inference/steps/metering.rb +2 -0
- data/lib/legion/llm/inference.rb +26 -0
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bea0deb0330e257b0a513675970bd988c5b157170e1bf46569482e9203578681
|
|
4
|
+
data.tar.gz: 9af8c0c5e9d6911f95f738bfd840c3dd1989e2503da7042357332e1c394fe930
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ff80717d479fb79c9c2ea60123828b50c218ce549d90c7d6a9605885c8791c1a078c48a232d4b5437213c904206c326d7b832348eabf694caef8e7cb30abdfcd
|
|
7
|
+
data.tar.gz: d81969d08b0dd13e6447a662aaeb4c4a0c43fe07cfe3e4a2af328bd30ab9d09df0e817a6ffaa8b296e44ebdb00d4a4f2b70f55a3f86c59e1f8b18c0207fb4da2
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.23] - 2026-04-23
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- `Call::StructuredOutput` prompt-fallback path passed `messages:` (plural) to `chat_single` which only accepts `message:` (singular), leaking the unknown kwarg into `RubyLLM::Chat.new`. Visible as repeated "unknown keyword: :messages" warnings during dream cycle contradiction detection. Flattened instruction + messages into a single string via `extract_user_content`.
|
|
7
|
+
|
|
8
|
+
## [0.8.22] - 2026-04-22
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- Error paths in `Executor#run_provider_call_single` and `#step_provider_call_stream` now emit audit events (`Audit.emit_prompt`) before re-raising `RateLimitError`, `ProviderError`, and `ProviderDown`. Previously these errors produced no audit trail.
|
|
12
|
+
- Escalation exhaustion (`EscalationExhausted`) in the pipeline executor now emits an audit event with `status: 'escalation_exhausted'` before raising.
|
|
13
|
+
- `assert_external_allowed!` in the Inference module now emits an audit event with `status: 'privacy_blocked'` before raising `PrivacyModeError`, so enterprise privacy blocks are observable in the audit trail.
|
|
14
|
+
- `step_metering` in `Executor` now passes `request_id:` and `caller:` to `Steps::Metering.build_event` so every metering event carries caller identity and request correlation.
|
|
15
|
+
- `Steps::Metering.identity_fields` updated to include `request_id` and `caller` fields in the emitted metering event payload.
|
|
16
|
+
- `Call::Embeddings.generate` now emits a metering event via `Metering.emit` after each successful `RubyLLM.embed` call, covering the previously unmetered embedding path.
|
|
17
|
+
- `chat_single` in Inference now calls `emit_non_pipeline_metering` after a direct (non-pipeline) `session.ask` so token usage is recorded when the pipeline is disabled.
|
|
18
|
+
- `Call::StructuredOutput.generate` now logs `info` on successful parse and `warn` on `JSON::ParserError` for observability.
|
|
19
|
+
|
|
3
20
|
## [0.8.21] - 2026-04-22
|
|
4
21
|
|
|
5
22
|
### Fixed
|
|
@@ -25,8 +25,9 @@ module Legion
|
|
|
25
25
|
return { vector: nil, model: model, provider: provider, error: "provider #{provider} does not support embeddings" } \
|
|
26
26
|
if provider && !provider_supports_embeddings?(provider)
|
|
27
27
|
|
|
28
|
-
response
|
|
29
|
-
|
|
28
|
+
response = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
|
|
29
|
+
emit_embedding_metering(provider: provider, model: model, tokens: response.input_tokens)
|
|
30
|
+
vector = apply_dimension_enforcement(response.vectors.first, provider)
|
|
30
31
|
return dimension_error(model, provider, vector) if vector.is_a?(String)
|
|
31
32
|
|
|
32
33
|
{ vector: vector, model: model, provider: provider, dimensions: vector&.size || 0, tokens: response.input_tokens }
|
|
@@ -459,6 +460,15 @@ module Legion
|
|
|
459
460
|
|
|
460
461
|
[]
|
|
461
462
|
end
|
|
463
|
+
|
|
464
|
+
def emit_embedding_metering(provider:, model:, tokens:)
|
|
465
|
+
Legion::LLM::Metering.emit(
|
|
466
|
+
provider: provider, model_id: model, request_type: 'embed',
|
|
467
|
+
tier: 'cloud', input_tokens: tokens.to_i, output_tokens: 0, total_tokens: tokens.to_i
|
|
468
|
+
)
|
|
469
|
+
rescue StandardError => e
|
|
470
|
+
handle_exception(e, level: :warn, operation: 'llm.embeddings.metering')
|
|
471
|
+
end
|
|
462
472
|
end
|
|
463
473
|
end
|
|
464
474
|
end
|
|
@@ -13,10 +13,12 @@ module Legion
|
|
|
13
13
|
def generate(messages:, schema:, model: nil, provider: nil, **)
|
|
14
14
|
model ||= Legion::LLM.settings[:default_model]
|
|
15
15
|
result = call_with_schema(messages, schema, model, provider: provider, **)
|
|
16
|
+
log.info "[llm][structured_output] model=#{model} provider=#{provider} valid=true"
|
|
16
17
|
|
|
17
18
|
parsed = Legion::JSON.load(result[:content])
|
|
18
19
|
{ data: parsed, raw: result[:content], model: result[:model], valid: true }
|
|
19
20
|
rescue ::JSON::ParserError => e
|
|
21
|
+
log.warn "[llm][structured_output] model=#{model} provider=#{provider} parse_error=#{e.message}"
|
|
20
22
|
handle_parse_error(e, messages, schema, model, provider, result, **)
|
|
21
23
|
end
|
|
22
24
|
|
|
@@ -34,10 +36,10 @@ module Legion
|
|
|
34
36
|
instruction = "You MUST respond with valid JSON matching this schema:\n" \
|
|
35
37
|
"```json\n#{Legion::JSON.dump(schema)}\n```\n" \
|
|
36
38
|
'Respond with ONLY the JSON object, no other text.'
|
|
37
|
-
|
|
39
|
+
user_content = extract_user_content(messages, instruction)
|
|
38
40
|
Legion::LLM::Inference.send(:chat_single,
|
|
39
41
|
model: model, provider: provider, intent: nil, tier: nil,
|
|
40
|
-
|
|
42
|
+
message: user_content, **opts.except(:attempt))
|
|
41
43
|
end
|
|
42
44
|
end
|
|
43
45
|
|
|
@@ -53,10 +55,10 @@ module Legion
|
|
|
53
55
|
|
|
54
56
|
def retry_with_instruction(messages, schema, model, provider: nil, **opts)
|
|
55
57
|
instruction = "Your previous response was not valid JSON. Respond with ONLY a valid JSON object matching this schema:\n#{Legion::JSON.dump(schema)}"
|
|
56
|
-
|
|
58
|
+
user_content = extract_user_content(messages, instruction)
|
|
57
59
|
result = Legion::LLM::Inference.send(:chat_single,
|
|
58
60
|
model: model, provider: provider, intent: nil, tier: nil,
|
|
59
|
-
|
|
61
|
+
message: user_content, **opts.except(:attempt))
|
|
60
62
|
|
|
61
63
|
parsed = Legion::JSON.load(result[:content])
|
|
62
64
|
{ data: parsed, raw: result[:content], model: result[:model], valid: true, retried: true }
|
|
@@ -65,6 +67,15 @@ module Legion
|
|
|
65
67
|
{ data: nil, error: e.message, valid: false }
|
|
66
68
|
end
|
|
67
69
|
|
|
70
|
+
def extract_user_content(messages, instruction)
|
|
71
|
+
parts = [instruction]
|
|
72
|
+
Array(messages).each do |msg|
|
|
73
|
+
content = msg[:content] || msg['content']
|
|
74
|
+
parts << content.to_s unless content.to_s.empty?
|
|
75
|
+
end
|
|
76
|
+
parts.join("\n\n")
|
|
77
|
+
end
|
|
78
|
+
|
|
68
79
|
def supports_response_format?(model)
|
|
69
80
|
SCHEMA_CAPABLE_MODELS.any? { |m| model.to_s.include?(m) }
|
|
70
81
|
end
|
|
@@ -371,19 +371,23 @@ module Legion
|
|
|
371
371
|
rescue RubyLLM::RateLimitError => e
|
|
372
372
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call.rate_limit',
|
|
373
373
|
provider: @resolved_provider, model: @resolved_model)
|
|
374
|
+
emit_error_audit(e, status: 'rate_limited')
|
|
374
375
|
raise Legion::LLM::RateLimitError, e.message
|
|
375
376
|
rescue RubyLLM::ServerError, RubyLLM::ServiceUnavailableError, RubyLLM::OverloadedError,
|
|
376
377
|
Faraday::ServerError => e
|
|
377
378
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call.provider_error',
|
|
378
379
|
provider: @resolved_provider, model: @resolved_model)
|
|
380
|
+
emit_error_audit(e, status: 'provider_error')
|
|
379
381
|
raise Legion::LLM::ProviderError, e.message
|
|
380
382
|
rescue Faraday::TooManyRequestsError => e
|
|
381
383
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call.http_rate_limit',
|
|
382
384
|
provider: @resolved_provider, model: @resolved_model)
|
|
385
|
+
emit_error_audit(e, status: 'rate_limited')
|
|
383
386
|
raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
|
|
384
387
|
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
385
388
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call.provider_down',
|
|
386
389
|
provider: @resolved_provider, model: @resolved_model)
|
|
390
|
+
emit_error_audit(e, status: 'provider_down')
|
|
387
391
|
raise Legion::LLM::ProviderDown, e.message
|
|
388
392
|
end
|
|
389
393
|
end
|
|
@@ -412,7 +416,13 @@ module Legion
|
|
|
412
416
|
record_escalation_failure(e, resolution, start_time, outcome: :error,
|
|
413
417
|
operation: 'llm.pipeline.escalation_attempt')
|
|
414
418
|
end
|
|
415
|
-
|
|
419
|
+
return if succeeded
|
|
420
|
+
|
|
421
|
+
emit_error_audit(
|
|
422
|
+
EscalationExhausted.new("All #{@escalation_history.size} attempts failed"),
|
|
423
|
+
status: 'escalation_exhausted'
|
|
424
|
+
)
|
|
425
|
+
raise EscalationExhausted, "All #{@escalation_history.size} escalation attempts failed"
|
|
416
426
|
end
|
|
417
427
|
|
|
418
428
|
def attempt_escalation(resolution, threshold, quality_check, start_time)
|
|
@@ -567,6 +577,23 @@ module Legion
|
|
|
567
577
|
error.response[:headers]&.fetch('retry-after', nil)&.to_i
|
|
568
578
|
end
|
|
569
579
|
|
|
580
|
+
def emit_error_audit(error, status:, provider: @resolved_provider, model: @resolved_model)
|
|
581
|
+
Legion::LLM::Audit.emit_prompt(
|
|
582
|
+
request_id: @request.id,
|
|
583
|
+
conversation_id: @request.conversation_id,
|
|
584
|
+
caller: @request.caller,
|
|
585
|
+
routing: { provider: provider, model: model },
|
|
586
|
+
tokens: {},
|
|
587
|
+
status: status,
|
|
588
|
+
error: { class: error.class.name, message: error.message },
|
|
589
|
+
tracing: @tracing,
|
|
590
|
+
timestamp: Time.now,
|
|
591
|
+
request_type: 'chat'
|
|
592
|
+
)
|
|
593
|
+
rescue StandardError => e
|
|
594
|
+
handle_exception(e, level: :warn, operation: 'llm.pipeline.emit_error_audit')
|
|
595
|
+
end
|
|
596
|
+
|
|
570
597
|
def execute_pre_provider_steps
|
|
571
598
|
PRE_PROVIDER_STEPS.each do |step|
|
|
572
599
|
next if Profile.skip?(@profile, step)
|
|
@@ -645,19 +672,23 @@ module Legion
|
|
|
645
672
|
rescue RubyLLM::RateLimitError => e
|
|
646
673
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call_stream.rate_limit',
|
|
647
674
|
provider: @resolved_provider, model: @resolved_model)
|
|
675
|
+
emit_error_audit(e, status: 'rate_limited')
|
|
648
676
|
raise Legion::LLM::RateLimitError, e.message
|
|
649
677
|
rescue RubyLLM::ServerError, RubyLLM::ServiceUnavailableError, RubyLLM::OverloadedError,
|
|
650
678
|
Faraday::ServerError => e
|
|
651
679
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call_stream.provider_error',
|
|
652
680
|
provider: @resolved_provider, model: @resolved_model)
|
|
681
|
+
emit_error_audit(e, status: 'provider_error')
|
|
653
682
|
raise Legion::LLM::ProviderError, e.message
|
|
654
683
|
rescue Faraday::TooManyRequestsError => e
|
|
655
684
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call_stream.http_rate_limit',
|
|
656
685
|
provider: @resolved_provider, model: @resolved_model)
|
|
686
|
+
emit_error_audit(e, status: 'rate_limited')
|
|
657
687
|
raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
|
|
658
688
|
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
659
689
|
handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call_stream.provider_down',
|
|
660
690
|
provider: @resolved_provider, model: @resolved_model)
|
|
691
|
+
emit_error_audit(e, status: 'provider_down')
|
|
661
692
|
raise Legion::LLM::ProviderDown, e.message
|
|
662
693
|
end
|
|
663
694
|
end
|
|
@@ -1034,7 +1065,9 @@ module Legion
|
|
|
1034
1065
|
request_type: 'chat',
|
|
1035
1066
|
input_tokens: input_tokens,
|
|
1036
1067
|
output_tokens: output_tokens,
|
|
1037
|
-
latency_ms: latency_ms
|
|
1068
|
+
latency_ms: latency_ms,
|
|
1069
|
+
request_id: @request.id,
|
|
1070
|
+
caller: @request.caller
|
|
1038
1071
|
)
|
|
1039
1072
|
Steps::Metering.publish_or_spool(event)
|
|
1040
1073
|
rescue StandardError => e
|
data/lib/legion/llm/inference.rb
CHANGED
|
@@ -514,6 +514,7 @@ module Legion
|
|
|
514
514
|
log.debug '[llm][inference] chat_single asking session'
|
|
515
515
|
response = block ? session.ask(message, &block) : session.ask(message)
|
|
516
516
|
log.debug "[llm][inference] chat_single response_class=#{response.class} response_nil=#{response.nil?}"
|
|
517
|
+
emit_non_pipeline_metering(response, model: opts[:model], provider: opts[:provider])
|
|
517
518
|
|
|
518
519
|
if response && !block && defined?(Quality::ShadowEval) && Quality::ShadowEval.enabled?
|
|
519
520
|
msgs = session.respond_to?(:messages) ? session.messages : nil
|
|
@@ -711,6 +712,19 @@ module Legion
|
|
|
711
712
|
esc.fetch(:quality_threshold, 50)
|
|
712
713
|
end
|
|
713
714
|
|
|
715
|
+
def emit_non_pipeline_metering(response, model:, provider:)
|
|
716
|
+
return unless response
|
|
717
|
+
|
|
718
|
+
input = response.respond_to?(:input_tokens) ? response.input_tokens.to_i : 0
|
|
719
|
+
output = response.respond_to?(:output_tokens) ? response.output_tokens.to_i : 0
|
|
720
|
+
Legion::LLM::Metering.emit(
|
|
721
|
+
provider: provider, model_id: model, request_type: 'chat',
|
|
722
|
+
tier: 'direct', input_tokens: input, output_tokens: output, total_tokens: input + output
|
|
723
|
+
)
|
|
724
|
+
rescue StandardError => e
|
|
725
|
+
handle_exception(e, level: :warn, operation: 'llm.inference.non_pipeline_metering')
|
|
726
|
+
end
|
|
727
|
+
|
|
714
728
|
def enterprise_privacy?
|
|
715
729
|
if Legion.const_defined?('Settings', false) && Legion::Settings.respond_to?(:enterprise_privacy?)
|
|
716
730
|
Legion::Settings.enterprise_privacy?
|
|
@@ -719,9 +733,21 @@ module Legion
|
|
|
719
733
|
end
|
|
720
734
|
end
|
|
721
735
|
|
|
736
|
+
def emit_privacy_blocked_audit
|
|
737
|
+
Legion::LLM::Audit.emit_prompt(
|
|
738
|
+
request_id: nil, conversation_id: nil, caller: nil,
|
|
739
|
+
routing: {}, tokens: {}, status: 'privacy_blocked',
|
|
740
|
+
error: { class: 'PrivacyModeError', message: 'External tiers blocked by enterprise privacy' },
|
|
741
|
+
timestamp: Time.now, request_type: 'chat'
|
|
742
|
+
)
|
|
743
|
+
rescue StandardError => e
|
|
744
|
+
handle_exception(e, level: :warn, operation: 'llm.inference.emit_privacy_blocked_audit')
|
|
745
|
+
end
|
|
746
|
+
|
|
722
747
|
def assert_external_allowed!
|
|
723
748
|
return unless enterprise_privacy?
|
|
724
749
|
|
|
750
|
+
emit_privacy_blocked_audit
|
|
725
751
|
raise Legion::LLM::PrivacyModeError,
|
|
726
752
|
'External LLM tiers are disabled: enterprise_data_privacy is enabled. ' \
|
|
727
753
|
'Only local and fleet tiers are permitted.'
|
data/lib/legion/llm/version.rb
CHANGED