legion-llm 0.8.23 → 0.8.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bea0deb0330e257b0a513675970bd988c5b157170e1bf46569482e9203578681
4
- data.tar.gz: 9af8c0c5e9d6911f95f738bfd840c3dd1989e2503da7042357332e1c394fe930
3
+ metadata.gz: 6b37f926f357a862c036b2e3d4676b579ae505ba6f76b7b7da332ba3aeeed8ac
4
+ data.tar.gz: 59ab5db376ac8be2a45cda642dec1d22c8eda3e76782b9f2cd5689233bcc2063
5
5
  SHA512:
6
- metadata.gz: ff80717d479fb79c9c2ea60123828b50c218ce549d90c7d6a9605885c8791c1a078c48a232d4b5437213c904206c326d7b832348eabf694caef8e7cb30abdfcd
7
- data.tar.gz: d81969d08b0dd13e6447a662aaeb4c4a0c43fe07cfe3e4a2af328bd30ab9d09df0e817a6ffaa8b296e44ebdb00d4a4f2b70f55a3f86c59e1f8b18c0207fb4da2
6
+ metadata.gz: 6994e774e6a0551c720ef0a2c7a5aa4681f324c9e8c09a624ca2e227565cd87ca26f1457ed108967ae3e17593e6a5ee8df7fdf20515aa0ccdd277f2eb16db827
7
+ data.tar.gz: 815d14a99f379c2d655341776507e96be1625582b4fd4989597c4c299a1086be394dbad785b4b33e2d1423419a37ea37392ee617fa75fbb88208dac6d6864bce
data/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.8.25] - 2026-04-24
4
+
5
+ ### Fixed
6
+ - `StructuredOutput.generate`, `handle_parse_error`, and `retry_with_instruction` used hash-style access (`result[:content]`, `result[:model]`) on the return value of `chat_single`, but `chat_single` returns a `RubyLLM::Message` object which only supports method access (`.content`, `.model_id`). All four access sites now use `respond_to?` duck-typing so both hash and Message objects work. Visible as `undefined method '[]' for an instance of RubyLLM::Message` in Apollo's `llm_detects_conflict?` and any structured output caller using non-schema-capable models (e.g. ollama/qwen).
7
+ - `Call::Embeddings.generate` crashed with `NoMethodError` on `.size` when `response.vectors` was a flat array (`[0.007, ...]`) instead of nested (`[[0.007, ...]]`). RubyLLM's OpenAI provider unwraps single-input embedding responses. Added `normalize_vectors_first` to detect and handle both flat and nested vector formats before dimension enforcement.
8
+
9
+ ## [0.8.24] - 2026-04-23
10
+
11
+ ### Fixed
12
+ - All AMQP transport messages (audit, metering, tool, escalation) now include identity headers (`x-legion-identity`, `x-legion-credential`, `x-legion-hostname`) extracted from the `caller` field. Previously only prompt audit events carried identity in the body — tool audit and metering messages had no identity at all.
13
+ - Embedding metering events now include `caller` context.
14
+ - Non-pipeline `chat_single` metering events now include `caller` context from kwargs.
15
+
3
16
  ## [0.8.23] - 2026-04-23
4
17
 
5
18
  ### Fixed
@@ -27,7 +27,8 @@ module Legion
27
27
 
28
28
  response = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
29
29
  emit_embedding_metering(provider: provider, model: model, tokens: response.input_tokens)
30
- vector = apply_dimension_enforcement(response.vectors.first, provider)
30
+ vector = normalize_vectors_first(response.vectors)
31
+ vector = apply_dimension_enforcement(vector, provider)
31
32
  return dimension_error(model, provider, vector) if vector.is_a?(String)
32
33
 
33
34
  { vector: vector, model: model, provider: provider, dimensions: vector&.size || 0, tokens: response.input_tokens }
@@ -101,6 +102,16 @@ module Legion
101
102
  opts
102
103
  end
103
104
 
105
+ def normalize_vectors_first(vectors)
106
+ return nil if vectors.nil? || (vectors.is_a?(Array) && vectors.empty?)
107
+
108
+ first = vectors.first
109
+ return first if first.is_a?(Array)
110
+ return vectors if vectors.is_a?(Array) && vectors.first.is_a?(Numeric)
111
+
112
+ first
113
+ end
114
+
104
115
  def apply_dimension_enforcement(vector, provider)
105
116
  return vector unless enforce_dimension? && vector.is_a?(Array)
106
117
 
@@ -462,9 +473,16 @@ module Legion
462
473
  end
463
474
 
464
475
  def emit_embedding_metering(provider:, model:, tokens:)
476
+ caller = begin
477
+ Legion::LLM.settings[:caller]
478
+ rescue StandardError => e
479
+ handle_exception(e, level: :debug, operation: 'llm.embeddings.metering.caller')
480
+ nil
481
+ end
465
482
  Legion::LLM::Metering.emit(
466
483
  provider: provider, model_id: model, request_type: 'embed',
467
- tier: 'cloud', input_tokens: tokens.to_i, output_tokens: 0, total_tokens: tokens.to_i
484
+ tier: 'cloud', input_tokens: tokens.to_i, output_tokens: 0, total_tokens: tokens.to_i,
485
+ caller: caller
468
486
  )
469
487
  rescue StandardError => e
470
488
  handle_exception(e, level: :warn, operation: 'llm.embeddings.metering')
@@ -15,8 +15,11 @@ module Legion
15
15
  result = call_with_schema(messages, schema, model, provider: provider, **)
16
16
  log.info "[llm][structured_output] model=#{model} provider=#{provider} valid=true"
17
17
 
18
- parsed = Legion::JSON.load(result[:content])
19
- { data: parsed, raw: result[:content], model: result[:model], valid: true }
18
+ content = result.respond_to?(:content) ? result.content : result[:content]
19
+ raw_model = result.respond_to?(:model_id) ? result.model_id : result[:model]
20
+
21
+ parsed = Legion::JSON.load(content)
22
+ { data: parsed, raw: content, model: raw_model, valid: true }
20
23
  rescue ::JSON::ParserError => e
21
24
  log.warn "[llm][structured_output] model=#{model} provider=#{provider} parse_error=#{e.message}"
22
25
  handle_parse_error(e, messages, schema, model, provider, result, **)
@@ -49,7 +52,8 @@ module Legion
49
52
  if retry_enabled? && attempt < max_retries
50
53
  retry_with_instruction(messages, schema, model, provider: provider, attempt: attempt + 1, **opts)
51
54
  else
52
- { data: nil, error: "JSON parse failed: #{error.message}", raw: result&.dig(:content), valid: false }
55
+ raw = result.respond_to?(:content) ? result&.content : result&.dig(:content)
56
+ { data: nil, error: "JSON parse failed: #{error.message}", raw: raw, valid: false }
53
57
  end
54
58
  end
55
59
 
@@ -60,8 +64,11 @@ module Legion
60
64
  model: model, provider: provider, intent: nil, tier: nil,
61
65
  message: user_content, **opts.except(:attempt))
62
66
 
63
- parsed = Legion::JSON.load(result[:content])
64
- { data: parsed, raw: result[:content], model: result[:model], valid: true, retried: true }
67
+ retry_content = result.respond_to?(:content) ? result.content : result[:content]
68
+ retry_model = result.respond_to?(:model_id) ? result.model_id : result[:model]
69
+
70
+ parsed = Legion::JSON.load(retry_content)
71
+ { data: parsed, raw: retry_content, model: retry_model, valid: true, retried: true }
65
72
  rescue StandardError => e
66
73
  handle_exception(e, level: :warn)
67
74
  { data: nil, error: e.message, valid: false }
@@ -514,7 +514,7 @@ module Legion
514
514
  log.debug '[llm][inference] chat_single asking session'
515
515
  response = block ? session.ask(message, &block) : session.ask(message)
516
516
  log.debug "[llm][inference] chat_single response_class=#{response.class} response_nil=#{response.nil?}"
517
- emit_non_pipeline_metering(response, model: opts[:model], provider: opts[:provider])
517
+ emit_non_pipeline_metering(response, model: opts[:model], provider: opts[:provider], caller: kwargs[:caller])
518
518
 
519
519
  if response && !block && defined?(Quality::ShadowEval) && Quality::ShadowEval.enabled?
520
520
  msgs = session.respond_to?(:messages) ? session.messages : nil
@@ -712,14 +712,15 @@ module Legion
712
712
  esc.fetch(:quality_threshold, 50)
713
713
  end
714
714
 
715
- def emit_non_pipeline_metering(response, model:, provider:)
715
+ def emit_non_pipeline_metering(response, model:, provider:, caller: nil)
716
716
  return unless response
717
717
 
718
718
  input = response.respond_to?(:input_tokens) ? response.input_tokens.to_i : 0
719
719
  output = response.respond_to?(:output_tokens) ? response.output_tokens.to_i : 0
720
720
  Legion::LLM::Metering.emit(
721
721
  provider: provider, model_id: model, request_type: 'chat',
722
- tier: 'direct', input_tokens: input, output_tokens: output, total_tokens: input + output
722
+ tier: 'direct', input_tokens: input, output_tokens: output, total_tokens: input + output,
723
+ caller: caller
723
724
  )
724
725
  rescue StandardError => e
725
726
  handle_exception(e, level: :warn, operation: 'llm.inference.non_pipeline_metering')
@@ -65,6 +65,19 @@ module Legion
65
65
  h['x-legion-llm-model'] = model_val.to_s if model_val
66
66
  h['x-legion-llm-request-type'] = @options[:request_type].to_s if @options[:request_type]
67
67
  h['x-legion-llm-schema-version'] = '1.0.0'
68
+ h.merge(identity_headers)
69
+ end
70
+
71
+ def identity_headers
72
+ caller = @options[:caller]
73
+ return {} unless caller.is_a?(Hash)
74
+
75
+ rb = caller[:requested_by] || caller['requested_by'] || {}
76
+ h = {}
77
+ identity = rb[:identity] || rb['identity'] || rb[:username] || rb['username']
78
+ h['x-legion-identity'] = identity.to_s if identity
79
+ h['x-legion-credential'] = (rb[:credential] || rb['credential']).to_s if rb[:credential] || rb['credential']
80
+ h['x-legion-hostname'] = (rb[:hostname] || rb['hostname']).to_s if rb[:hostname] || rb['hostname']
68
81
  h
69
82
  end
70
83
 
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.8.23'
5
+ VERSION = '0.8.25'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.23
4
+ version: 0.8.25
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity