legion-llm 0.8.23 → 0.8.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/lib/legion/llm/call/embeddings.rb +20 -2
- data/lib/legion/llm/call/structured_output.rb +12 -5
- data/lib/legion/llm/inference.rb +4 -3
- data/lib/legion/llm/transport/message.rb +13 -0
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6b37f926f357a862c036b2e3d4676b579ae505ba6f76b7b7da332ba3aeeed8ac
|
|
4
|
+
data.tar.gz: 59ab5db376ac8be2a45cda642dec1d22c8eda3e76782b9f2cd5689233bcc2063
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6994e774e6a0551c720ef0a2c7a5aa4681f324c9e8c09a624ca2e227565cd87ca26f1457ed108967ae3e17593e6a5ee8df7fdf20515aa0ccdd277f2eb16db827
|
|
7
|
+
data.tar.gz: 815d14a99f379c2d655341776507e96be1625582b4fd4989597c4c299a1086be394dbad785b4b33e2d1423419a37ea37392ee617fa75fbb88208dac6d6864bce
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.25] - 2026-04-24
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- `StructuredOutput.generate`, `handle_parse_error`, and `retry_with_instruction` used hash-style access (`result[:content]`, `result[:model]`) on the return value of `chat_single`, but `chat_single` returns a `RubyLLM::Message` object which only supports method access (`.content`, `.model_id`). All four access sites now use `respond_to?` duck-typing so both hash and Message objects work. Visible as `undefined method '[]' for an instance of RubyLLM::Message` in Apollo's `llm_detects_conflict?` and any structured output caller using non-schema-capable models (e.g. ollama/qwen).
|
|
7
|
+
- `Call::Embeddings.generate` crashed with `NoMethodError` on `.size` when `response.vectors` was a flat array (`[0.007, ...]`) instead of nested (`[[0.007, ...]]`). RubyLLM's OpenAI provider unwraps single-input embedding responses. Added `normalize_vectors_first` to detect and handle both flat and nested vector formats before dimension enforcement.
|
|
8
|
+
|
|
9
|
+
## [0.8.24] - 2026-04-23
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
- All AMQP transport messages (audit, metering, tool, escalation) now include identity headers (`x-legion-identity`, `x-legion-credential`, `x-legion-hostname`) extracted from the `caller` field. Previously only prompt audit events carried identity in the body — tool audit and metering messages had no identity at all.
|
|
13
|
+
- Embedding metering events now include `caller` context.
|
|
14
|
+
- Non-pipeline `chat_single` metering events now include `caller` context from kwargs.
|
|
15
|
+
|
|
3
16
|
## [0.8.23] - 2026-04-23
|
|
4
17
|
|
|
5
18
|
### Fixed
|
|
@@ -27,7 +27,8 @@ module Legion
|
|
|
27
27
|
|
|
28
28
|
response = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
|
|
29
29
|
emit_embedding_metering(provider: provider, model: model, tokens: response.input_tokens)
|
|
30
|
-
vector =
|
|
30
|
+
vector = normalize_vectors_first(response.vectors)
|
|
31
|
+
vector = apply_dimension_enforcement(vector, provider)
|
|
31
32
|
return dimension_error(model, provider, vector) if vector.is_a?(String)
|
|
32
33
|
|
|
33
34
|
{ vector: vector, model: model, provider: provider, dimensions: vector&.size || 0, tokens: response.input_tokens }
|
|
@@ -101,6 +102,16 @@ module Legion
|
|
|
101
102
|
opts
|
|
102
103
|
end
|
|
103
104
|
|
|
105
|
+
def normalize_vectors_first(vectors)
|
|
106
|
+
return nil if vectors.nil? || (vectors.is_a?(Array) && vectors.empty?)
|
|
107
|
+
|
|
108
|
+
first = vectors.first
|
|
109
|
+
return first if first.is_a?(Array)
|
|
110
|
+
return vectors if vectors.is_a?(Array) && vectors.first.is_a?(Numeric)
|
|
111
|
+
|
|
112
|
+
first
|
|
113
|
+
end
|
|
114
|
+
|
|
104
115
|
def apply_dimension_enforcement(vector, provider)
|
|
105
116
|
return vector unless enforce_dimension? && vector.is_a?(Array)
|
|
106
117
|
|
|
@@ -462,9 +473,16 @@ module Legion
|
|
|
462
473
|
end
|
|
463
474
|
|
|
464
475
|
def emit_embedding_metering(provider:, model:, tokens:)
|
|
476
|
+
caller = begin
|
|
477
|
+
Legion::LLM.settings[:caller]
|
|
478
|
+
rescue StandardError => e
|
|
479
|
+
handle_exception(e, level: :debug, operation: 'llm.embeddings.metering.caller')
|
|
480
|
+
nil
|
|
481
|
+
end
|
|
465
482
|
Legion::LLM::Metering.emit(
|
|
466
483
|
provider: provider, model_id: model, request_type: 'embed',
|
|
467
|
-
tier: 'cloud', input_tokens: tokens.to_i, output_tokens: 0, total_tokens: tokens.to_i
|
|
484
|
+
tier: 'cloud', input_tokens: tokens.to_i, output_tokens: 0, total_tokens: tokens.to_i,
|
|
485
|
+
caller: caller
|
|
468
486
|
)
|
|
469
487
|
rescue StandardError => e
|
|
470
488
|
handle_exception(e, level: :warn, operation: 'llm.embeddings.metering')
|
|
@@ -15,8 +15,11 @@ module Legion
|
|
|
15
15
|
result = call_with_schema(messages, schema, model, provider: provider, **)
|
|
16
16
|
log.info "[llm][structured_output] model=#{model} provider=#{provider} valid=true"
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
content = result.respond_to?(:content) ? result.content : result[:content]
|
|
19
|
+
raw_model = result.respond_to?(:model_id) ? result.model_id : result[:model]
|
|
20
|
+
|
|
21
|
+
parsed = Legion::JSON.load(content)
|
|
22
|
+
{ data: parsed, raw: content, model: raw_model, valid: true }
|
|
20
23
|
rescue ::JSON::ParserError => e
|
|
21
24
|
log.warn "[llm][structured_output] model=#{model} provider=#{provider} parse_error=#{e.message}"
|
|
22
25
|
handle_parse_error(e, messages, schema, model, provider, result, **)
|
|
@@ -49,7 +52,8 @@ module Legion
|
|
|
49
52
|
if retry_enabled? && attempt < max_retries
|
|
50
53
|
retry_with_instruction(messages, schema, model, provider: provider, attempt: attempt + 1, **opts)
|
|
51
54
|
else
|
|
52
|
-
|
|
55
|
+
raw = result.respond_to?(:content) ? result&.content : result&.dig(:content)
|
|
56
|
+
{ data: nil, error: "JSON parse failed: #{error.message}", raw: raw, valid: false }
|
|
53
57
|
end
|
|
54
58
|
end
|
|
55
59
|
|
|
@@ -60,8 +64,11 @@ module Legion
|
|
|
60
64
|
model: model, provider: provider, intent: nil, tier: nil,
|
|
61
65
|
message: user_content, **opts.except(:attempt))
|
|
62
66
|
|
|
63
|
-
|
|
64
|
-
|
|
67
|
+
retry_content = result.respond_to?(:content) ? result.content : result[:content]
|
|
68
|
+
retry_model = result.respond_to?(:model_id) ? result.model_id : result[:model]
|
|
69
|
+
|
|
70
|
+
parsed = Legion::JSON.load(retry_content)
|
|
71
|
+
{ data: parsed, raw: retry_content, model: retry_model, valid: true, retried: true }
|
|
65
72
|
rescue StandardError => e
|
|
66
73
|
handle_exception(e, level: :warn)
|
|
67
74
|
{ data: nil, error: e.message, valid: false }
|
data/lib/legion/llm/inference.rb
CHANGED
|
@@ -514,7 +514,7 @@ module Legion
|
|
|
514
514
|
log.debug '[llm][inference] chat_single asking session'
|
|
515
515
|
response = block ? session.ask(message, &block) : session.ask(message)
|
|
516
516
|
log.debug "[llm][inference] chat_single response_class=#{response.class} response_nil=#{response.nil?}"
|
|
517
|
-
emit_non_pipeline_metering(response, model: opts[:model], provider: opts[:provider])
|
|
517
|
+
emit_non_pipeline_metering(response, model: opts[:model], provider: opts[:provider], caller: kwargs[:caller])
|
|
518
518
|
|
|
519
519
|
if response && !block && defined?(Quality::ShadowEval) && Quality::ShadowEval.enabled?
|
|
520
520
|
msgs = session.respond_to?(:messages) ? session.messages : nil
|
|
@@ -712,14 +712,15 @@ module Legion
|
|
|
712
712
|
esc.fetch(:quality_threshold, 50)
|
|
713
713
|
end
|
|
714
714
|
|
|
715
|
-
def emit_non_pipeline_metering(response, model:, provider:)
|
|
715
|
+
def emit_non_pipeline_metering(response, model:, provider:, caller: nil)
|
|
716
716
|
return unless response
|
|
717
717
|
|
|
718
718
|
input = response.respond_to?(:input_tokens) ? response.input_tokens.to_i : 0
|
|
719
719
|
output = response.respond_to?(:output_tokens) ? response.output_tokens.to_i : 0
|
|
720
720
|
Legion::LLM::Metering.emit(
|
|
721
721
|
provider: provider, model_id: model, request_type: 'chat',
|
|
722
|
-
tier: 'direct', input_tokens: input, output_tokens: output, total_tokens: input + output
|
|
722
|
+
tier: 'direct', input_tokens: input, output_tokens: output, total_tokens: input + output,
|
|
723
|
+
caller: caller
|
|
723
724
|
)
|
|
724
725
|
rescue StandardError => e
|
|
725
726
|
handle_exception(e, level: :warn, operation: 'llm.inference.non_pipeline_metering')
|
|
@@ -65,6 +65,19 @@ module Legion
|
|
|
65
65
|
h['x-legion-llm-model'] = model_val.to_s if model_val
|
|
66
66
|
h['x-legion-llm-request-type'] = @options[:request_type].to_s if @options[:request_type]
|
|
67
67
|
h['x-legion-llm-schema-version'] = '1.0.0'
|
|
68
|
+
h.merge(identity_headers)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def identity_headers
|
|
72
|
+
caller = @options[:caller]
|
|
73
|
+
return {} unless caller.is_a?(Hash)
|
|
74
|
+
|
|
75
|
+
rb = caller[:requested_by] || caller['requested_by'] || {}
|
|
76
|
+
h = {}
|
|
77
|
+
identity = rb[:identity] || rb['identity'] || rb[:username] || rb['username']
|
|
78
|
+
h['x-legion-identity'] = identity.to_s if identity
|
|
79
|
+
h['x-legion-credential'] = (rb[:credential] || rb['credential']).to_s if rb[:credential] || rb['credential']
|
|
80
|
+
h['x-legion-hostname'] = (rb[:hostname] || rb['hostname']).to_s if rb[:hostname] || rb['hostname']
|
|
68
81
|
h
|
|
69
82
|
end
|
|
70
83
|
|
data/lib/legion/llm/version.rb
CHANGED