legion-llm 0.8.21 → 0.8.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5fdd5beb0c0e3a464cd06848369ec4cf9c07beab7a9c1560cfc0d13e1a6dc721
4
- data.tar.gz: 2539e7e1a1cebcd9ba363ac66f79c08f9314d7477d482f1d17a7bf6185e1fd0b
3
+ metadata.gz: 06c8e0373f627e588f41b1c2d8fda18fd75bbc0e673ab270ec0d08bfe27695ec
4
+ data.tar.gz: 15c2e48761d2e797e144178db61363344663c9860918ddd001088532eaad84ee
5
5
  SHA512:
6
- metadata.gz: 377c8498427bbe07d0dd171ce8fba340683ac8fb0286b781092caa24eb19c038fc9e621115bfcbfe12aee25c1f84aa267b4c289b03cd9424a2beee0d31ccf128
7
- data.tar.gz: 611ab93b2732f10aaac6015b2fc042af6de8963b23012c73a00a6c6a36477639a61763ace1be0e65eb2f38c815cd0418c9d118a644090324a6de53b98cc2f5a6
6
+ metadata.gz: e4420346b02d8ec03fb5b80d930f678256245b4b80bd75076e9be15e9abf82d3f569af16f0278cbc78ec0bf8deb6058484045beb27fee8db265f8abf37a67d87
7
+ data.tar.gz: e1477099d25547be1bd2f3556a19d1d38a6ce7c77eb3b856c03c0605d1d2510eaa173828e63ed7e1d2763dc2d4f036e754bc93c274b5a98b3437de72bc72aeaf
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.8.22] - 2026-04-22
4
+
5
+ ### Fixed
6
+ - Error paths in `Executor#run_provider_call_single` and `#step_provider_call_stream` now emit audit events (`Audit.emit_prompt`) before re-raising `RateLimitError`, `ProviderError`, and `ProviderDown`. Previously these errors produced no audit trail.
7
+ - Escalation exhaustion (`EscalationExhausted`) in the pipeline executor now emits an audit event with `status: 'escalation_exhausted'` before raising.
8
+ - `assert_external_allowed!` in the Inference module now emits an audit event with `status: 'privacy_blocked'` before raising `PrivacyModeError`, so enterprise privacy blocks are observable in the audit trail.
9
+ - `step_metering` in `Executor` now passes `request_id:` and `caller:` to `Steps::Metering.build_event` so every metering event carries caller identity and request correlation.
10
+ - `Steps::Metering.identity_fields` updated to include `request_id` and `caller` fields in the emitted metering event payload.
11
+ - `Call::Embeddings.generate` now emits a metering event via `Metering.emit` after each successful `RubyLLM.embed` call, covering the previously unmetered embedding path.
12
+ - `chat_single` in Inference now calls `emit_non_pipeline_metering` after a direct (non-pipeline) `session.ask` so token usage is recorded when the pipeline is disabled.
13
+ - `Call::StructuredOutput.generate` now logs `info` on successful parse and `warn` on `JSON::ParserError` for observability.
14
+
3
15
  ## [0.8.21] - 2026-04-22
4
16
 
5
17
  ### Fixed
@@ -25,8 +25,9 @@ module Legion
25
25
  return { vector: nil, model: model, provider: provider, error: "provider #{provider} does not support embeddings" } \
26
26
  if provider && !provider_supports_embeddings?(provider)
27
27
 
28
- response = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
29
- vector = apply_dimension_enforcement(response.vectors.first, provider)
28
+ response = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
29
+ emit_embedding_metering(provider: provider, model: model, tokens: response.input_tokens)
30
+ vector = apply_dimension_enforcement(response.vectors.first, provider)
30
31
  return dimension_error(model, provider, vector) if vector.is_a?(String)
31
32
 
32
33
  { vector: vector, model: model, provider: provider, dimensions: vector&.size || 0, tokens: response.input_tokens }
@@ -459,6 +460,15 @@ module Legion
459
460
 
460
461
  []
461
462
  end
463
+
464
+ def emit_embedding_metering(provider:, model:, tokens:)
465
+ Legion::LLM::Metering.emit(
466
+ provider: provider, model_id: model, request_type: 'embed',
467
+ tier: 'cloud', input_tokens: tokens.to_i, output_tokens: 0, total_tokens: tokens.to_i
468
+ )
469
+ rescue StandardError => e
470
+ handle_exception(e, level: :warn, operation: 'llm.embeddings.metering')
471
+ end
462
472
  end
463
473
  end
464
474
  end
@@ -13,10 +13,12 @@ module Legion
13
13
  def generate(messages:, schema:, model: nil, provider: nil, **)
14
14
  model ||= Legion::LLM.settings[:default_model]
15
15
  result = call_with_schema(messages, schema, model, provider: provider, **)
16
+ log.info "[llm][structured_output] model=#{model} provider=#{provider} valid=true"
16
17
 
17
18
  parsed = Legion::JSON.load(result[:content])
18
19
  { data: parsed, raw: result[:content], model: result[:model], valid: true }
19
20
  rescue ::JSON::ParserError => e
21
+ log.warn "[llm][structured_output] model=#{model} provider=#{provider} parse_error=#{e.message}"
20
22
  handle_parse_error(e, messages, schema, model, provider, result, **)
21
23
  end
22
24
 
@@ -371,19 +371,23 @@ module Legion
371
371
  rescue RubyLLM::RateLimitError => e
372
372
  handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call.rate_limit',
373
373
  provider: @resolved_provider, model: @resolved_model)
374
+ emit_error_audit(e, status: 'rate_limited')
374
375
  raise Legion::LLM::RateLimitError, e.message
375
376
  rescue RubyLLM::ServerError, RubyLLM::ServiceUnavailableError, RubyLLM::OverloadedError,
376
377
  Faraday::ServerError => e
377
378
  handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call.provider_error',
378
379
  provider: @resolved_provider, model: @resolved_model)
380
+ emit_error_audit(e, status: 'provider_error')
379
381
  raise Legion::LLM::ProviderError, e.message
380
382
  rescue Faraday::TooManyRequestsError => e
381
383
  handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call.http_rate_limit',
382
384
  provider: @resolved_provider, model: @resolved_model)
385
+ emit_error_audit(e, status: 'rate_limited')
383
386
  raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
384
387
  rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
385
388
  handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call.provider_down',
386
389
  provider: @resolved_provider, model: @resolved_model)
390
+ emit_error_audit(e, status: 'provider_down')
387
391
  raise Legion::LLM::ProviderDown, e.message
388
392
  end
389
393
  end
@@ -412,7 +416,13 @@ module Legion
412
416
  record_escalation_failure(e, resolution, start_time, outcome: :error,
413
417
  operation: 'llm.pipeline.escalation_attempt')
414
418
  end
415
- raise EscalationExhausted, "All #{@escalation_history.size} escalation attempts failed" unless succeeded
419
+ return if succeeded
420
+
421
+ emit_error_audit(
422
+ EscalationExhausted.new("All #{@escalation_history.size} attempts failed"),
423
+ status: 'escalation_exhausted'
424
+ )
425
+ raise EscalationExhausted, "All #{@escalation_history.size} escalation attempts failed"
416
426
  end
417
427
 
418
428
  def attempt_escalation(resolution, threshold, quality_check, start_time)
@@ -567,6 +577,23 @@ module Legion
567
577
  error.response[:headers]&.fetch('retry-after', nil)&.to_i
568
578
  end
569
579
 
580
+ def emit_error_audit(error, status:, provider: @resolved_provider, model: @resolved_model)
581
+ Legion::LLM::Audit.emit_prompt(
582
+ request_id: @request.id,
583
+ conversation_id: @request.conversation_id,
584
+ caller: @request.caller,
585
+ routing: { provider: provider, model: model },
586
+ tokens: {},
587
+ status: status,
588
+ error: { class: error.class.name, message: error.message },
589
+ tracing: @tracing,
590
+ timestamp: Time.now,
591
+ request_type: 'chat'
592
+ )
593
+ rescue StandardError => e
594
+ handle_exception(e, level: :warn, operation: 'llm.pipeline.emit_error_audit')
595
+ end
596
+
570
597
  def execute_pre_provider_steps
571
598
  PRE_PROVIDER_STEPS.each do |step|
572
599
  next if Profile.skip?(@profile, step)
@@ -645,19 +672,23 @@ module Legion
645
672
  rescue RubyLLM::RateLimitError => e
646
673
  handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call_stream.rate_limit',
647
674
  provider: @resolved_provider, model: @resolved_model)
675
+ emit_error_audit(e, status: 'rate_limited')
648
676
  raise Legion::LLM::RateLimitError, e.message
649
677
  rescue RubyLLM::ServerError, RubyLLM::ServiceUnavailableError, RubyLLM::OverloadedError,
650
678
  Faraday::ServerError => e
651
679
  handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call_stream.provider_error',
652
680
  provider: @resolved_provider, model: @resolved_model)
681
+ emit_error_audit(e, status: 'provider_error')
653
682
  raise Legion::LLM::ProviderError, e.message
654
683
  rescue Faraday::TooManyRequestsError => e
655
684
  handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call_stream.http_rate_limit',
656
685
  provider: @resolved_provider, model: @resolved_model)
686
+ emit_error_audit(e, status: 'rate_limited')
657
687
  raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
658
688
  rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
659
689
  handle_exception(e, level: :warn, operation: 'llm.pipeline.provider_call_stream.provider_down',
660
690
  provider: @resolved_provider, model: @resolved_model)
691
+ emit_error_audit(e, status: 'provider_down')
661
692
  raise Legion::LLM::ProviderDown, e.message
662
693
  end
663
694
  end
@@ -1034,7 +1065,9 @@ module Legion
1034
1065
  request_type: 'chat',
1035
1066
  input_tokens: input_tokens,
1036
1067
  output_tokens: output_tokens,
1037
- latency_ms: latency_ms
1068
+ latency_ms: latency_ms,
1069
+ request_id: @request.id,
1070
+ caller: @request.caller
1038
1071
  )
1039
1072
  Steps::Metering.publish_or_spool(event)
1040
1073
  rescue StandardError => e
@@ -29,6 +29,8 @@ module Legion
29
29
  node_id: opts[:node_id],
30
30
  worker_id: opts[:worker_id],
31
31
  agent_id: opts[:agent_id],
32
+ request_id: opts[:request_id],
33
+ caller: opts[:caller],
32
34
  request_type: opts[:request_type],
33
35
  tier: opts[:tier],
34
36
  provider: opts[:provider],
@@ -514,6 +514,7 @@ module Legion
514
514
  log.debug '[llm][inference] chat_single asking session'
515
515
  response = block ? session.ask(message, &block) : session.ask(message)
516
516
  log.debug "[llm][inference] chat_single response_class=#{response.class} response_nil=#{response.nil?}"
517
+ emit_non_pipeline_metering(response, model: opts[:model], provider: opts[:provider])
517
518
 
518
519
  if response && !block && defined?(Quality::ShadowEval) && Quality::ShadowEval.enabled?
519
520
  msgs = session.respond_to?(:messages) ? session.messages : nil
@@ -711,6 +712,19 @@ module Legion
711
712
  esc.fetch(:quality_threshold, 50)
712
713
  end
713
714
 
715
+ def emit_non_pipeline_metering(response, model:, provider:)
716
+ return unless response
717
+
718
+ input = response.respond_to?(:input_tokens) ? response.input_tokens.to_i : 0
719
+ output = response.respond_to?(:output_tokens) ? response.output_tokens.to_i : 0
720
+ Legion::LLM::Metering.emit(
721
+ provider: provider, model_id: model, request_type: 'chat',
722
+ tier: 'direct', input_tokens: input, output_tokens: output, total_tokens: input + output
723
+ )
724
+ rescue StandardError => e
725
+ handle_exception(e, level: :warn, operation: 'llm.inference.non_pipeline_metering')
726
+ end
727
+
714
728
  def enterprise_privacy?
715
729
  if Legion.const_defined?('Settings', false) && Legion::Settings.respond_to?(:enterprise_privacy?)
716
730
  Legion::Settings.enterprise_privacy?
@@ -719,9 +733,21 @@ module Legion
719
733
  end
720
734
  end
721
735
 
736
+ def emit_privacy_blocked_audit
737
+ Legion::LLM::Audit.emit_prompt(
738
+ request_id: nil, conversation_id: nil, caller: nil,
739
+ routing: {}, tokens: {}, status: 'privacy_blocked',
740
+ error: { class: 'PrivacyModeError', message: 'External tiers blocked by enterprise privacy' },
741
+ timestamp: Time.now, request_type: 'chat'
742
+ )
743
+ rescue StandardError => e
744
+ handle_exception(e, level: :warn, operation: 'llm.inference.emit_privacy_blocked_audit')
745
+ end
746
+
722
747
  def assert_external_allowed!
723
748
  return unless enterprise_privacy?
724
749
 
750
+ emit_privacy_blocked_audit
725
751
  raise Legion::LLM::PrivacyModeError,
726
752
  'External LLM tiers are disabled: enterprise_data_privacy is enabled. ' \
727
753
  'Only local and fleet tiers are permitted.'
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.8.21'
5
+ VERSION = '0.8.22'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.21
4
+ version: 0.8.22
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity