legion-llm 0.8.22 → 0.8.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 06c8e0373f627e588f41b1c2d8fda18fd75bbc0e673ab270ec0d08bfe27695ec
4
- data.tar.gz: 15c2e48761d2e797e144178db61363344663c9860918ddd001088532eaad84ee
3
+ metadata.gz: 1c7a3d39cf4e2e31494ee7e354680e57ae1c6c1feabe2f39e8707a06701c6a15
4
+ data.tar.gz: c8733d7f96801aa19c35458f4e527574815909dd87e89ed80d589bf565a8467c
5
5
  SHA512:
6
- metadata.gz: e4420346b02d8ec03fb5b80d930f678256245b4b80bd75076e9be15e9abf82d3f569af16f0278cbc78ec0bf8deb6058484045beb27fee8db265f8abf37a67d87
7
- data.tar.gz: e1477099d25547be1bd2f3556a19d1d38a6ce7c77eb3b856c03c0605d1d2510eaa173828e63ed7e1d2763dc2d4f036e754bc93c274b5a98b3437de72bc72aeaf
6
+ metadata.gz: 37049fdb4a5dc838fecc0d3b6c57e48bbcb72d490b8e8460e04cdd7a19728d82e8c2c3f48ab0bfc059bc2245d56f40f279d79b082e31c7f4c85fa57d86270e42
7
+ data.tar.gz: 06ae35daf7458e38b4990c2a230a5f72bffd65bc99bc3dd2bbb16be4ff60aa3665c3b807c3b19637f407fc1e55b6f88148c433d39474411bfb222fe9e07d412c
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.8.24] - 2026-04-23
4
+
5
+ ### Fixed
6
+ - All AMQP transport messages (audit, metering, tool, escalation) now include identity headers (`x-legion-identity`, `x-legion-credential`, `x-legion-hostname`) extracted from the `caller` field. Previously only prompt audit events carried identity in the body — tool audit and metering messages had no identity at all.
7
+ - Embedding metering events now include `caller` context.
8
+ - Non-pipeline `chat_single` metering events now include `caller` context from kwargs.
9
+
10
+ ## [0.8.23] - 2026-04-23
11
+
12
+ ### Fixed
13
+ - `Call::StructuredOutput` prompt-fallback path passed `messages:` (plural) to `chat_single` which only accepts `message:` (singular), leaking the unknown kwarg into `RubyLLM::Chat.new`. Visible as repeated "unknown keyword: :messages" warnings during dream cycle contradiction detection. Flattened instruction + messages into a single string via `extract_user_content`.
14
+
3
15
  ## [0.8.22] - 2026-04-22
4
16
 
5
17
  ### Fixed
@@ -462,9 +462,16 @@ module Legion
462
462
  end
463
463
 
464
464
  def emit_embedding_metering(provider:, model:, tokens:)
465
+ caller = begin
466
+ Legion::LLM.settings[:caller]
467
+ rescue StandardError => e
468
+ handle_exception(e, level: :debug, operation: 'llm.embeddings.metering.caller')
469
+ nil
470
+ end
465
471
  Legion::LLM::Metering.emit(
466
472
  provider: provider, model_id: model, request_type: 'embed',
467
- tier: 'cloud', input_tokens: tokens.to_i, output_tokens: 0, total_tokens: tokens.to_i
473
+ tier: 'cloud', input_tokens: tokens.to_i, output_tokens: 0, total_tokens: tokens.to_i,
474
+ caller: caller
468
475
  )
469
476
  rescue StandardError => e
470
477
  handle_exception(e, level: :warn, operation: 'llm.embeddings.metering')
@@ -36,10 +36,10 @@ module Legion
36
36
  instruction = "You MUST respond with valid JSON matching this schema:\n" \
37
37
  "```json\n#{Legion::JSON.dump(schema)}\n```\n" \
38
38
  'Respond with ONLY the JSON object, no other text.'
39
- augmented = [{ role: 'system', content: instruction }] + Array(messages)
39
+ user_content = extract_user_content(messages, instruction)
40
40
  Legion::LLM::Inference.send(:chat_single,
41
41
  model: model, provider: provider, intent: nil, tier: nil,
42
- messages: augmented, **opts.except(:attempt))
42
+ message: user_content, **opts.except(:attempt))
43
43
  end
44
44
  end
45
45
 
@@ -55,10 +55,10 @@ module Legion
55
55
 
56
56
  def retry_with_instruction(messages, schema, model, provider: nil, **opts)
57
57
  instruction = "Your previous response was not valid JSON. Respond with ONLY a valid JSON object matching this schema:\n#{Legion::JSON.dump(schema)}"
58
- augmented = Array(messages) + [{ role: 'user', content: instruction }]
58
+ user_content = extract_user_content(messages, instruction)
59
59
  result = Legion::LLM::Inference.send(:chat_single,
60
60
  model: model, provider: provider, intent: nil, tier: nil,
61
- messages: augmented, **opts.except(:attempt))
61
+ message: user_content, **opts.except(:attempt))
62
62
 
63
63
  parsed = Legion::JSON.load(result[:content])
64
64
  { data: parsed, raw: result[:content], model: result[:model], valid: true, retried: true }
@@ -67,6 +67,15 @@ module Legion
67
67
  { data: nil, error: e.message, valid: false }
68
68
  end
69
69
 
70
+ def extract_user_content(messages, instruction)
71
+ parts = [instruction]
72
+ Array(messages).each do |msg|
73
+ content = msg[:content] || msg['content']
74
+ parts << content.to_s unless content.to_s.empty?
75
+ end
76
+ parts.join("\n\n")
77
+ end
78
+
70
79
  def supports_response_format?(model)
71
80
  SCHEMA_CAPABLE_MODELS.any? { |m| model.to_s.include?(m) }
72
81
  end
@@ -514,7 +514,7 @@ module Legion
514
514
  log.debug '[llm][inference] chat_single asking session'
515
515
  response = block ? session.ask(message, &block) : session.ask(message)
516
516
  log.debug "[llm][inference] chat_single response_class=#{response.class} response_nil=#{response.nil?}"
517
- emit_non_pipeline_metering(response, model: opts[:model], provider: opts[:provider])
517
+ emit_non_pipeline_metering(response, model: opts[:model], provider: opts[:provider], caller: kwargs[:caller])
518
518
 
519
519
  if response && !block && defined?(Quality::ShadowEval) && Quality::ShadowEval.enabled?
520
520
  msgs = session.respond_to?(:messages) ? session.messages : nil
@@ -712,14 +712,15 @@ module Legion
712
712
  esc.fetch(:quality_threshold, 50)
713
713
  end
714
714
 
715
- def emit_non_pipeline_metering(response, model:, provider:)
715
+ def emit_non_pipeline_metering(response, model:, provider:, caller: nil)
716
716
  return unless response
717
717
 
718
718
  input = response.respond_to?(:input_tokens) ? response.input_tokens.to_i : 0
719
719
  output = response.respond_to?(:output_tokens) ? response.output_tokens.to_i : 0
720
720
  Legion::LLM::Metering.emit(
721
721
  provider: provider, model_id: model, request_type: 'chat',
722
- tier: 'direct', input_tokens: input, output_tokens: output, total_tokens: input + output
722
+ tier: 'direct', input_tokens: input, output_tokens: output, total_tokens: input + output,
723
+ caller: caller
723
724
  )
724
725
  rescue StandardError => e
725
726
  handle_exception(e, level: :warn, operation: 'llm.inference.non_pipeline_metering')
@@ -65,6 +65,19 @@ module Legion
65
65
  h['x-legion-llm-model'] = model_val.to_s if model_val
66
66
  h['x-legion-llm-request-type'] = @options[:request_type].to_s if @options[:request_type]
67
67
  h['x-legion-llm-schema-version'] = '1.0.0'
68
+ h.merge(identity_headers)
69
+ end
70
+
71
+ def identity_headers
72
+ caller = @options[:caller]
73
+ return {} unless caller.is_a?(Hash)
74
+
75
+ rb = caller[:requested_by] || caller['requested_by'] || {}
76
+ h = {}
77
+ identity = rb[:identity] || rb['identity'] || rb[:username] || rb['username']
78
+ h['x-legion-identity'] = identity.to_s if identity
79
+ h['x-legion-credential'] = (rb[:credential] || rb['credential']).to_s if rb[:credential] || rb['credential']
80
+ h['x-legion-hostname'] = (rb[:hostname] || rb['hostname']).to_s if rb[:hostname] || rb['hostname']
68
81
  h
69
82
  end
70
83
 
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.8.22'
5
+ VERSION = '0.8.24'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.22
4
+ version: 0.8.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity