legion-llm 0.9.19 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +58 -0
- data/lib/legion/llm/api/native/helpers.rb +20 -0
- data/lib/legion/llm/api/native/inference.rb +17 -3
- data/lib/legion/llm/api/native/providers.rb +4 -1
- data/lib/legion/llm/call/dispatch.rb +8 -1
- data/lib/legion/llm/call/embeddings.rb +123 -10
- data/lib/legion/llm/call/lex_llm_adapter.rb +99 -24
- data/lib/legion/llm/call/providers.rb +7 -1
- data/lib/legion/llm/discovery.rb +23 -2
- data/lib/legion/llm/inference/conversation.rb +17 -291
- data/lib/legion/llm/inference/executor.rb +82 -48
- data/lib/legion/llm/inference/native_tool_loop.rb +149 -0
- data/lib/legion/llm/inference/steps/gaia_advisory.rb +4 -0
- data/lib/legion/llm/inference/steps/rag_context.rb +2 -0
- data/lib/legion/llm/inference/steps/sticky_runners.rb +11 -1
- data/lib/legion/llm/inference/steps/tool_discovery.rb +2 -1
- data/lib/legion/llm/inference/steps/trigger_match.rb +85 -15
- data/lib/legion/llm/inventory.rb +16 -5
- data/lib/legion/llm/metering.rb +116 -42
- data/lib/legion/llm/router/health_tracker.rb +38 -0
- data/lib/legion/llm/router.rb +60 -6
- data/lib/legion/llm/settings.rb +9 -2
- data/lib/legion/llm/tools/confidence.rb +1 -25
- data/lib/legion/llm/tools/dispatcher.rb +8 -1
- data/lib/legion/llm/tools/interceptors/python_venv.rb +13 -5
- data/lib/legion/llm/tools/special.rb +325 -0
- data/lib/legion/llm/tools.rb +1 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +1 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8c8c98a439d2e96bba437e5e8b4bf8c47c01277a4079bd459c7257e2990278c6
|
|
4
|
+
data.tar.gz: f9344c761ebf18b4c5ab271ac8cb5858ce46f791588ace438726002d2907c70e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9574535d0eeca84d522858dd323e8d028994b46b3d3f78a37a8094a4f1a692fbdd68bf24e8a061160b5238a2c3e4f73141e29bf70c6423f18e4b4441937f5417
|
|
7
|
+
data.tar.gz: 69aa8eccf10beb687b637b7442d9eb8a7bae0d42405fc9cfd47f8c8d5c036b7724df6cb55c10d8264f0701f46f8282f0a44d8622d607a6129a09ab4c39ad2e99
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,63 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.23] - 2026-05-13
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Router: `registry_entry_for_provider` for explicit provider model resolution
|
|
7
|
+
- Router: model denylist (`deny_model`, `model_denied?`, `excluded_by_denial?`) — config errors auto-deny models
|
|
8
|
+
- Executor: config error detection (`CONFIG_ERROR_PATTERNS`) — prevents circuit breaker trips on auth/validation errors
|
|
9
|
+
- Executor: step timing hash on response (`metrics.timing`, `metrics.latency_legionio_ms`)
|
|
10
|
+
- API: `/api/llm/inference` response includes `provider`, `instance`, `tier`, `metrics`
|
|
11
|
+
- API: `/api/llm/providers` surfaces `source` and `credential_fingerprint`
|
|
12
|
+
- Inventory: provider-scoped queries skip unrelated providers
|
|
13
|
+
- Metering: disk-based JSONL spool when transport unavailable (was dropping events)
|
|
14
|
+
- Discovery: `report_discovery_failure` reports connection failures to health tracker
|
|
15
|
+
- Providers: `enabled: false` instances not registered; `default_model` in metadata
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
- Router: tier-aware model fallback — global default no longer bleeds across providers
|
|
19
|
+
- Inventory: single-source offerings (native_provider preferred over discovery to eliminate duplicates)
|
|
20
|
+
- Inventory: dedup normalizes `"default"` instance name
|
|
21
|
+
- Discovery: concise connection error log (no stacktrace for unreachable providers)
|
|
22
|
+
- Settings: removed `claude` from `native_providers` list
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
- Cache spec rewritten to use real `Legion::Cache` instead of fragile stubs
|
|
26
|
+
|
|
27
|
+
## [0.9.22] - 2026-05-12
|
|
28
|
+
|
|
29
|
+
### Added
|
|
30
|
+
- Pin `legion_list_special_tools` before client and registry tools so models can inspect Legion special tools and the current `Legion::Settings::Extensions` inventory.
|
|
31
|
+
- Surface special Ruby runtime execution with current process/PATH environment metadata, and add Legion-managed Python and pip tools when `legionio setup python` is available.
|
|
32
|
+
|
|
33
|
+
### Changed
|
|
34
|
+
- Route Python command interception through the same Legion Python runtime detection used by special tool injection.
|
|
35
|
+
- Replace ad hoc `/api/llm/inference` tool-payload debug prints with structured debug logging.
|
|
36
|
+
|
|
37
|
+
### Fixed
|
|
38
|
+
- Chunk Ollama embedding requests according to configured model context limits and aggregate chunk vectors so large Apollo knowledge-capture documents do not exceed provider context windows.
|
|
39
|
+
|
|
40
|
+
## [0.9.21] - 2026-05-12
|
|
41
|
+
|
|
42
|
+
### Fixed
|
|
43
|
+
- Route metering strictly through `legion-transport`, dropping events when transport is unavailable instead of writing metric events to `Legion::Data::Spool`.
|
|
44
|
+
- Keep override confidence database access read-only by removing `Legion::Data::Local` upserts from `legion-llm`.
|
|
45
|
+
- Stop conversation history and sticky state from writing directly to `Legion::Data` tables.
|
|
46
|
+
|
|
47
|
+
## [0.9.20] - 2026-05-12
|
|
48
|
+
|
|
49
|
+
### Added
|
|
50
|
+
- Added `llm.gaia.advisory_enabled`, defaulting to `true`, so GAIA pre-request advisory shaping can be disabled without code changes.
|
|
51
|
+
|
|
52
|
+
### Fixed
|
|
53
|
+
- Preserve accumulated streamed native tool-call arguments from lex-llm provider responses instead of rebuilding final responses from partial stream chunks.
|
|
54
|
+
- Symbolize extension tool arguments before invoking runner keyword methods so JSON string keys such as `chat_id` satisfy Ruby keyword parameters.
|
|
55
|
+
- Match tool triggers from `Legion::Settings::Extensions` registry entries and keep registry tools injectable alongside client tools with better diagnostics.
|
|
56
|
+
- Skip trigger matching cleanly when `Legion::Settings::Extensions` is not loaded instead of warning through a rescued `NameError`.
|
|
57
|
+
- Accumulate only stream fallback state in the lex-llm adapter instead of retaining every streamed chunk when providers return final messages.
|
|
58
|
+
- Apply explicit vLLM tool-name forcing only on the first native tool-loop round, allowing follow-up automatic tool calls after the requested tool returns.
|
|
59
|
+
- Ignore absent GAIA advisory context-window limits when sizing RAG retrieval instead of routing nil through debug exception handling.
|
|
60
|
+
|
|
3
61
|
## [0.9.19] - 2026-05-11
|
|
4
62
|
|
|
5
63
|
### Added
|
|
@@ -498,6 +498,26 @@ module Legion
|
|
|
498
498
|
|
|
499
499
|
nil
|
|
500
500
|
end
|
|
501
|
+
|
|
502
|
+
define_method(:build_response_metrics) do |pipeline_response|
|
|
503
|
+
routing = pipeline_response.routing || {}
|
|
504
|
+
timestamps = pipeline_response.timestamps || {}
|
|
505
|
+
metrics = {}
|
|
506
|
+
|
|
507
|
+
if (latency = routing[:latency_ms])
|
|
508
|
+
metrics[:latency_ms] = latency
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
step_timings = timestamps[:step_timings]
|
|
512
|
+
if step_timings.is_a?(Hash) && step_timings.any?
|
|
513
|
+
metrics[:timing] = step_timings
|
|
514
|
+
total = step_timings[:total].to_i
|
|
515
|
+
external = step_timings[:provider_call].to_i + step_timings[:tool_calls].to_i
|
|
516
|
+
metrics[:latency_legionio_ms] = total - external if total.positive?
|
|
517
|
+
end
|
|
518
|
+
|
|
519
|
+
metrics.empty? ? nil : metrics
|
|
520
|
+
end
|
|
501
521
|
end
|
|
502
522
|
|
|
503
523
|
log.debug('[llm][api][helpers] shared helpers registered')
|
|
@@ -43,6 +43,11 @@ module Legion
|
|
|
43
43
|
|
|
44
44
|
tools = raw_tools || []
|
|
45
45
|
validate_tools!(tools) unless tools.empty?
|
|
46
|
+
raw_tool_count = raw_tools.is_a?(Array) ? raw_tools.size : 0
|
|
47
|
+
log.debug(
|
|
48
|
+
"[llm][api][tools] action=request_tools_received request_id=#{request_id} " \
|
|
49
|
+
"has_tools=#{body.key?(:tools)} raw_tools_class=#{raw_tools&.class} raw_tools_count=#{raw_tool_count}"
|
|
50
|
+
)
|
|
46
51
|
|
|
47
52
|
caller_identity = identity_canonical_name(env)
|
|
48
53
|
last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
|
|
@@ -179,11 +184,15 @@ module Legion
|
|
|
179
184
|
request_id: request_id,
|
|
180
185
|
content: full_text,
|
|
181
186
|
model: (routing[:model] || routing['model']).to_s,
|
|
187
|
+
provider: (routing[:provider] || routing['provider'])&.to_s,
|
|
188
|
+
instance: (routing[:instance] || routing['instance'])&.to_s,
|
|
189
|
+
tier: (routing[:tier] || routing['tier'])&.to_s,
|
|
182
190
|
input_tokens: token_value(tokens, :input),
|
|
183
191
|
output_tokens: token_value(tokens, :output),
|
|
184
192
|
tool_calls: extract_tool_calls(pipeline_response),
|
|
185
|
-
conversation_id: pipeline_response.conversation_id
|
|
186
|
-
|
|
193
|
+
conversation_id: pipeline_response.conversation_id,
|
|
194
|
+
metrics: build_response_metrics(pipeline_response)
|
|
195
|
+
}.compact
|
|
187
196
|
done_payload[:thinking] = pipeline_response.thinking if include_thinking && pipeline_response.thinking
|
|
188
197
|
emit_sse_event(out, 'done', {
|
|
189
198
|
**done_payload
|
|
@@ -232,11 +241,16 @@ module Legion
|
|
|
232
241
|
tool_calls: tool_calls,
|
|
233
242
|
stop_reason: pipeline_response.stop&.dig(:reason)&.to_s,
|
|
234
243
|
model: (routing[:model] || routing['model']).to_s,
|
|
244
|
+
provider: (routing[:provider] || routing['provider'])&.to_s,
|
|
245
|
+
instance: (routing[:instance] || routing['instance'])&.to_s,
|
|
246
|
+
tier: (routing[:tier] || routing['tier'])&.to_s,
|
|
235
247
|
input_tokens: token_value(tokens, :input),
|
|
236
248
|
output_tokens: token_value(tokens, :output),
|
|
237
|
-
conversation_id: pipeline_response.conversation_id
|
|
249
|
+
conversation_id: pipeline_response.conversation_id,
|
|
250
|
+
metrics: build_response_metrics(pipeline_response)
|
|
238
251
|
}
|
|
239
252
|
payload[:thinking] = pipeline_response.thinking if include_thinking && pipeline_response.thinking
|
|
253
|
+
payload.compact!
|
|
240
254
|
json_response(payload, status_code: 200)
|
|
241
255
|
end
|
|
242
256
|
rescue Legion::LLM::AuthError => e
|
|
@@ -87,7 +87,7 @@ module Legion
|
|
|
87
87
|
provider_key = entry[:provider].to_sym
|
|
88
88
|
instance_key = entry[:instance].to_sym
|
|
89
89
|
|
|
90
|
-
{
|
|
90
|
+
result = {
|
|
91
91
|
provider: entry[:provider].to_s,
|
|
92
92
|
instance: entry[:instance].to_s,
|
|
93
93
|
tier: entry.dig(:metadata, :tier)&.to_s,
|
|
@@ -102,6 +102,9 @@ module Legion
|
|
|
102
102
|
end,
|
|
103
103
|
native: true
|
|
104
104
|
}
|
|
105
|
+
result[:source] = entry.dig(:metadata, :source) if entry.dig(:metadata, :source)
|
|
106
|
+
result[:credential_fingerprint] = entry.dig(:metadata, :credential_fingerprint) if entry.dig(:metadata, :credential_fingerprint)
|
|
107
|
+
result
|
|
105
108
|
end
|
|
106
109
|
end
|
|
107
110
|
end
|
|
@@ -250,6 +250,14 @@ module Legion
|
|
|
250
250
|
ext = Registry.for(provider, instance: instance)
|
|
251
251
|
return ext if ext
|
|
252
252
|
|
|
253
|
+
if instance && instance.to_s != 'default'
|
|
254
|
+
ext = Registry.for(provider, instance: :default)
|
|
255
|
+
if ext
|
|
256
|
+
log.warn("[llm][native] instance_fallback provider=#{provider} requested=#{instance} using=default")
|
|
257
|
+
return ext
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
253
261
|
instance_suffix = instance ? "/#{instance}" : ''
|
|
254
262
|
log.error("[llm][native] provider_not_registered provider=#{provider}#{instance_suffix}")
|
|
255
263
|
raise Legion::LLM::ProviderError,
|
|
@@ -296,7 +304,6 @@ module Legion
|
|
|
296
304
|
|
|
297
305
|
tool_calls = normalize_tool_calls(raw[:tool_calls] || raw['tool_calls'] || raw[:tools] || raw['tools'] || result)
|
|
298
306
|
stop_reason = raw[:stop_reason] || raw['stop_reason'] || (tool_calls.any? ? :tool_use : nil)
|
|
299
|
-
|
|
300
307
|
{
|
|
301
308
|
result: result,
|
|
302
309
|
model: raw[:model] || raw['model'],
|
|
@@ -24,11 +24,13 @@ module Legion
|
|
|
24
24
|
return unavailable_result(model, provider) unless provider
|
|
25
25
|
|
|
26
26
|
model ||= resolve_model
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
text = coerce_text(text)
|
|
28
|
+
text_length = text.length
|
|
29
|
+
prepared_texts = prepare_embedding_texts(text, provider: provider, model: model, task: task)
|
|
30
|
+
dispatch_text = prepared_texts.one? ? prepared_texts.first : prepared_texts
|
|
29
31
|
|
|
30
32
|
log.info("[llm][embed] action=generate provider=#{provider} instance=#{instance || 'default'} " \
|
|
31
|
-
"model=#{model} task=#{task} text_chars=#{text_length}")
|
|
33
|
+
"model=#{model} task=#{task} text_chars=#{text_length} chunks=#{prepared_texts.size}")
|
|
32
34
|
|
|
33
35
|
started_at = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
34
36
|
response = Dispatch.call(
|
|
@@ -36,24 +38,29 @@ module Legion
|
|
|
36
38
|
instance: instance,
|
|
37
39
|
capability: :embed,
|
|
38
40
|
model: model,
|
|
39
|
-
text:
|
|
41
|
+
text: dispatch_text,
|
|
40
42
|
dimensions: dimensions
|
|
41
43
|
)
|
|
42
44
|
elapsed = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - started_at) * 1000).round(1)
|
|
43
45
|
|
|
44
|
-
vector =
|
|
46
|
+
vector = if prepared_texts.size > 1
|
|
47
|
+
aggregate_vectors(response[:result], weights: prepared_texts.map(&:length), model: model, provider: provider)
|
|
48
|
+
else
|
|
49
|
+
normalize_vector(response[:result])
|
|
50
|
+
end
|
|
45
51
|
vector = enforce_dimensions(vector) if enforce_dimension?
|
|
46
52
|
tokens = extract_tokens(response)
|
|
47
53
|
|
|
48
54
|
log.info("[llm][embed] action=generate.complete provider=#{provider} instance=#{instance || 'default'} " \
|
|
49
|
-
"model=#{model} dimensions=#{vector&.size || 0} tokens=#{tokens} duration_ms=#{elapsed}")
|
|
55
|
+
"model=#{model} dimensions=#{vector&.size || 0} tokens=#{tokens} chunks=#{prepared_texts.size} duration_ms=#{elapsed}")
|
|
50
56
|
|
|
51
57
|
{
|
|
52
58
|
vector: vector,
|
|
53
59
|
model: model,
|
|
54
60
|
provider: provider,
|
|
55
61
|
dimensions: vector&.size || 0,
|
|
56
|
-
tokens: tokens
|
|
62
|
+
tokens: tokens,
|
|
63
|
+
chunks: prepared_texts.size
|
|
57
64
|
}
|
|
58
65
|
rescue StandardError => e
|
|
59
66
|
handle_exception(e, level: :warn, operation: 'llm.embeddings.generate')
|
|
@@ -70,7 +77,20 @@ module Legion
|
|
|
70
77
|
log.info("[llm][embed] action=generate_batch provider=#{provider} instance=#{instance || 'default'} " \
|
|
71
78
|
"model=#{model} count=#{texts.size} task=#{task}")
|
|
72
79
|
|
|
73
|
-
|
|
80
|
+
raw_texts = texts.map { |t| coerce_text(t) }
|
|
81
|
+
prepared_texts = raw_texts.map { |t| prepare_embedding_texts(t, provider: provider, model: model, task: task) }
|
|
82
|
+
if prepared_texts.any? { |chunks| chunks.size > 1 }
|
|
83
|
+
return generate_chunked_batch(
|
|
84
|
+
raw_texts,
|
|
85
|
+
model: model,
|
|
86
|
+
provider: provider,
|
|
87
|
+
instance: instance,
|
|
88
|
+
dimensions: dimensions,
|
|
89
|
+
task: task
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
texts = prepared_texts.map(&:first)
|
|
74
94
|
|
|
75
95
|
started_at = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
76
96
|
response = Dispatch.call(
|
|
@@ -122,11 +142,71 @@ module Legion
|
|
|
122
142
|
end
|
|
123
143
|
|
|
124
144
|
def apply_prefix(text, model:, task:)
|
|
125
|
-
|
|
126
|
-
prefix = PREFIX_REGISTRY.dig(base, task)
|
|
145
|
+
prefix = prefix_for(model, task)
|
|
127
146
|
prefix ? "#{prefix}#{text}" : text
|
|
128
147
|
end
|
|
129
148
|
|
|
149
|
+
def prepare_embedding_texts(text, provider:, model:, task:)
|
|
150
|
+
prefix = prefix_for(model, task).to_s
|
|
151
|
+
chunks = chunk_text(text, embedding_chunk_chars(provider: provider, model: model, prefix: prefix))
|
|
152
|
+
chunks.map { |chunk| prefix.empty? ? chunk : "#{prefix}#{chunk}" }
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def prefix_for(model, task)
|
|
156
|
+
registry = Legion::LLM::Settings.value(:embedding, :prefix_registry, default: PREFIX_REGISTRY)
|
|
157
|
+
model_prefixes = Legion::LLM::Settings.config_value(registry, model_base(model), {})
|
|
158
|
+
Legion::LLM::Settings.config_value(model_prefixes, task)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def embedding_chunk_chars(provider:, model:, prefix:)
|
|
162
|
+
return nil unless provider.to_s == 'ollama'
|
|
163
|
+
|
|
164
|
+
embedding = Legion::LLM::Settings.value(:embedding, default: {})
|
|
165
|
+
context_chars = Legion::LLM::Settings.config_value(embedding, :ollama_context_chars, {})
|
|
166
|
+
limit = Legion::LLM::Settings.config_value(context_chars, model.to_s) ||
|
|
167
|
+
Legion::LLM::Settings.config_value(context_chars, model_base(model)) ||
|
|
168
|
+
Legion::LLM::Settings.config_value(embedding, :ollama_default_context_chars)
|
|
169
|
+
limit = limit.to_i
|
|
170
|
+
return nil unless limit.positive?
|
|
171
|
+
|
|
172
|
+
[limit - prefix.length, 1].max
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def chunk_text(text, max_chars)
|
|
176
|
+
return [text] unless max_chars.to_i.positive?
|
|
177
|
+
return [text] if text.length <= max_chars
|
|
178
|
+
|
|
179
|
+
chunks = []
|
|
180
|
+
remaining = text.dup
|
|
181
|
+
until remaining.empty?
|
|
182
|
+
chunk, remaining = next_text_chunk(remaining, max_chars)
|
|
183
|
+
chunks << chunk unless chunk.empty?
|
|
184
|
+
end
|
|
185
|
+
chunks
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def next_text_chunk(text, max_chars)
|
|
189
|
+
return [text, ''] if text.length <= max_chars
|
|
190
|
+
|
|
191
|
+
slice = text[0, max_chars]
|
|
192
|
+
boundary = chunk_boundary(slice, max_chars)
|
|
193
|
+
chunk = text[0, boundary].strip
|
|
194
|
+
remaining = text[boundary..].to_s.strip
|
|
195
|
+
[chunk.empty? ? text[0, max_chars] : chunk, remaining]
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def chunk_boundary(slice, max_chars)
|
|
199
|
+
candidates = [slice.rindex("\n\n"), slice.rindex("\n"), slice.rindex('. '), slice.rindex(' ')]
|
|
200
|
+
boundary = candidates.compact.max
|
|
201
|
+
return max_chars unless boundary && boundary >= (max_chars * 0.5)
|
|
202
|
+
|
|
203
|
+
boundary + 1
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def model_base(model)
|
|
207
|
+
model.to_s.split(':').first
|
|
208
|
+
end
|
|
209
|
+
|
|
130
210
|
def normalize_vector(result)
|
|
131
211
|
return nil if result.nil?
|
|
132
212
|
return result if result.is_a?(Array) && result.first.is_a?(Numeric)
|
|
@@ -145,6 +225,39 @@ module Legion
|
|
|
145
225
|
end
|
|
146
226
|
end
|
|
147
227
|
|
|
228
|
+
def aggregate_vectors(result, weights:, model:, provider:)
|
|
229
|
+
vectors = normalize_batch(result, model, provider).map { |entry| entry[:vector] }
|
|
230
|
+
usable = vectors.each_with_index.filter_map do |vector, index|
|
|
231
|
+
next unless vector.is_a?(Array) && vector.first.is_a?(Numeric)
|
|
232
|
+
|
|
233
|
+
[vector, [weights[index].to_i, 1].max]
|
|
234
|
+
end
|
|
235
|
+
return nil if usable.empty?
|
|
236
|
+
|
|
237
|
+
dimensions = usable.first.first.size
|
|
238
|
+
usable.select! { |vector, _weight| vector.size == dimensions }
|
|
239
|
+
total_weight = usable.sum { |_vector, weight| weight }.to_f
|
|
240
|
+
Array.new(dimensions) do |index|
|
|
241
|
+
usable.sum { |vector, weight| vector[index].to_f * weight } / total_weight
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def generate_chunked_batch(texts, model:, provider:, instance:, dimensions:, task:)
|
|
246
|
+
log.info("[llm][embed] action=generate_batch.chunked provider=#{provider} instance=#{instance || 'default'} " \
|
|
247
|
+
"model=#{model} count=#{texts.size}")
|
|
248
|
+
|
|
249
|
+
texts.each_with_index.map do |text, index|
|
|
250
|
+
generate(
|
|
251
|
+
text: text,
|
|
252
|
+
model: model,
|
|
253
|
+
provider: provider,
|
|
254
|
+
instance: instance,
|
|
255
|
+
dimensions: dimensions,
|
|
256
|
+
task: task
|
|
257
|
+
).merge(index: index)
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
148
261
|
def enforce_dimension?
|
|
149
262
|
Legion::LLM::Settings.value(:embedding, :enforce_dimension) != false
|
|
150
263
|
end
|
|
@@ -35,8 +35,8 @@ module Legion
|
|
|
35
35
|
end
|
|
36
36
|
|
|
37
37
|
def stream(model:, messages:, **opts, &block)
|
|
38
|
-
|
|
39
|
-
provider.stream_chat(
|
|
38
|
+
accumulator = build_stream_accumulator
|
|
39
|
+
response = provider.stream_chat(
|
|
40
40
|
messages: normalize_messages(messages, system: opts[:system]),
|
|
41
41
|
tools: normalize_tools(opts[:tools]),
|
|
42
42
|
temperature: opts[:temperature],
|
|
@@ -47,11 +47,15 @@ module Legion
|
|
|
47
47
|
tool_prefs: opts[:tool_prefs],
|
|
48
48
|
model: model_info(model, offering_metadata: opts[:offering_metadata])
|
|
49
49
|
) do |chunk|
|
|
50
|
-
|
|
50
|
+
accumulate_stream_chunk(accumulator, chunk)
|
|
51
51
|
block&.call(chunk)
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
if response
|
|
55
|
+
message_response(response, offering_metadata: opts[:offering_metadata])
|
|
56
|
+
else
|
|
57
|
+
chunk_response(accumulator, offering_metadata: opts[:offering_metadata])
|
|
58
|
+
end
|
|
55
59
|
end
|
|
56
60
|
|
|
57
61
|
def embed(model:, text:, dimensions: nil, **opts)
|
|
@@ -158,8 +162,8 @@ module Legion
|
|
|
158
162
|
message_hash = normalize_hash(message)
|
|
159
163
|
message_class.new(
|
|
160
164
|
role: message_hash[:role] || :user,
|
|
161
|
-
content: message_hash[:content]
|
|
162
|
-
tool_calls: message_hash[:tool_calls],
|
|
165
|
+
content: normalize_message_content(message_hash[:content]),
|
|
166
|
+
tool_calls: normalize_message_tool_calls(message_hash[:tool_calls]),
|
|
163
167
|
tool_call_id: message_hash[:tool_call_id]
|
|
164
168
|
)
|
|
165
169
|
end
|
|
@@ -222,6 +226,47 @@ module Legion
|
|
|
222
226
|
{ role: :user, content: value }
|
|
223
227
|
end
|
|
224
228
|
|
|
229
|
+
def normalize_message_content(content)
|
|
230
|
+
return content if content.nil? || content.is_a?(String)
|
|
231
|
+
return content if content.respond_to?(:attachments)
|
|
232
|
+
|
|
233
|
+
if content.is_a?(Array)
|
|
234
|
+
text_parts = content.filter_map { |part| text_part_content(part) }
|
|
235
|
+
return text_parts.join("\n\n") unless text_parts.empty?
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
text_part_content(content) || content.to_s
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def text_part_content(part)
|
|
242
|
+
return unless part.respond_to?(:transform_keys)
|
|
243
|
+
|
|
244
|
+
normalized = part.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
245
|
+
return unless normalized[:type].to_s == 'text'
|
|
246
|
+
|
|
247
|
+
normalized[:text].to_s
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def normalize_message_tool_calls(tool_calls)
|
|
251
|
+
return tool_calls unless tool_calls.is_a?(Array)
|
|
252
|
+
|
|
253
|
+
tool_calls.filter_map do |tool_call|
|
|
254
|
+
normalized = normalize_hash(tool_call)
|
|
255
|
+
name = normalized[:name]
|
|
256
|
+
next if name.to_s.empty?
|
|
257
|
+
|
|
258
|
+
arguments = normalized[:arguments] || {}
|
|
259
|
+
[
|
|
260
|
+
name.to_sym,
|
|
261
|
+
lex_llm_namespace::ToolCall.new(
|
|
262
|
+
id: normalized[:id],
|
|
263
|
+
name: name.to_s,
|
|
264
|
+
arguments: arguments
|
|
265
|
+
)
|
|
266
|
+
]
|
|
267
|
+
end.to_h
|
|
268
|
+
end
|
|
269
|
+
|
|
225
270
|
def message_response(response, offering_metadata: nil)
|
|
226
271
|
{
|
|
227
272
|
result: response.content,
|
|
@@ -234,19 +279,52 @@ module Legion
|
|
|
234
279
|
}.compact
|
|
235
280
|
end
|
|
236
281
|
|
|
237
|
-
def
|
|
238
|
-
last = chunks.reverse.find { |chunk| chunk.respond_to?(:input_tokens) }
|
|
239
|
-
tool_calls = chunks.filter_map { |chunk| chunk.tool_calls if chunk.respond_to?(:tool_calls) }.reduce({}) do |memo, calls|
|
|
240
|
-
memo.merge(calls || {})
|
|
241
|
-
end
|
|
282
|
+
def build_stream_accumulator
|
|
242
283
|
{
|
|
243
|
-
|
|
244
|
-
model:
|
|
284
|
+
content: +'',
|
|
285
|
+
model: nil,
|
|
286
|
+
usage: {},
|
|
287
|
+
raw: nil,
|
|
288
|
+
tool_calls: {},
|
|
289
|
+
thinking_text: +'',
|
|
290
|
+
thinking_signature: nil
|
|
291
|
+
}
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def accumulate_stream_chunk(accumulator, chunk)
|
|
295
|
+
accumulator[:content] << chunk.content.to_s if chunk.respond_to?(:content) && !chunk.content.nil?
|
|
296
|
+
accumulate_stream_usage(accumulator, chunk)
|
|
297
|
+
accumulator[:tool_calls].merge!(chunk.tool_calls || {}) if chunk.respond_to?(:tool_calls)
|
|
298
|
+
accumulate_stream_thinking(accumulator, chunk)
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def accumulate_stream_usage(accumulator, chunk)
|
|
302
|
+
return unless chunk.respond_to?(:input_tokens)
|
|
303
|
+
|
|
304
|
+
accumulator[:model] = chunk.model_id if chunk.respond_to?(:model_id)
|
|
305
|
+
accumulator[:usage] = usage_hash(chunk)
|
|
306
|
+
accumulator[:raw] = chunk.raw if chunk.respond_to?(:raw)
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def accumulate_stream_thinking(accumulator, chunk)
|
|
310
|
+
return unless chunk.respond_to?(:thinking)
|
|
311
|
+
|
|
312
|
+
thinking = normalize_thinking_value(chunk.thinking)
|
|
313
|
+
content = thinking[:content]
|
|
314
|
+
accumulator[:thinking_text] << content.to_s unless content.nil?
|
|
315
|
+
accumulator[:thinking_signature] ||= thinking[:signature]
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def chunk_response(accumulator, offering_metadata: nil)
|
|
319
|
+
tool_calls = accumulator[:tool_calls]
|
|
320
|
+
{
|
|
321
|
+
result: accumulator[:content],
|
|
322
|
+
model: accumulator[:model],
|
|
245
323
|
tool_calls: tool_calls.empty? ? nil : tool_calls,
|
|
246
324
|
stop_reason: tool_calls.empty? ? nil : :tool_use,
|
|
247
|
-
thinking: stream_thinking_hash(
|
|
248
|
-
usage:
|
|
249
|
-
metadata: response_metadata(
|
|
325
|
+
thinking: stream_thinking_hash(accumulator),
|
|
326
|
+
usage: accumulator[:usage],
|
|
327
|
+
metadata: response_metadata(accumulator[:raw], offering_metadata: offering_metadata)
|
|
250
328
|
}.compact
|
|
251
329
|
end
|
|
252
330
|
|
|
@@ -284,15 +362,11 @@ module Legion
|
|
|
284
362
|
}
|
|
285
363
|
end
|
|
286
364
|
|
|
287
|
-
def stream_thinking_hash(
|
|
288
|
-
|
|
289
|
-
normalize_thinking_value(chunk.thinking) if chunk.respond_to?(:thinking)
|
|
290
|
-
end
|
|
291
|
-
thinking_text = thinking_parts.filter_map { |part| part[:content] }.join
|
|
292
|
-
signature = thinking_parts.find { |part| part[:signature] }&.dig(:signature)
|
|
365
|
+
def stream_thinking_hash(accumulator)
|
|
366
|
+
thinking_text = accumulator[:thinking_text]
|
|
293
367
|
return nil if thinking_text.empty?
|
|
294
368
|
|
|
295
|
-
{ content: thinking_text, signature:
|
|
369
|
+
{ content: thinking_text, signature: accumulator[:thinking_signature], enabled: true }.compact
|
|
296
370
|
end
|
|
297
371
|
|
|
298
372
|
def thinking_hash(response)
|
|
@@ -325,7 +399,8 @@ module Legion
|
|
|
325
399
|
|
|
326
400
|
def response_metadata(response = nil, offering_metadata: nil)
|
|
327
401
|
metadata = normalize_offering_metadata(offering_metadata)
|
|
328
|
-
raw = response.
|
|
402
|
+
raw = response.is_a?(Hash) ? response : nil
|
|
403
|
+
raw ||= response.raw if response.respond_to?(:raw)
|
|
329
404
|
metadata[:raw_model] = raw['model'] if raw.is_a?(Hash) && raw['model']
|
|
330
405
|
metadata.empty? ? {} : { offering: metadata }
|
|
331
406
|
end
|
|
@@ -80,6 +80,8 @@ module Legion
|
|
|
80
80
|
|
|
81
81
|
def register_provider_instance(provider_module, family, aliases, instance_id, config)
|
|
82
82
|
normalized_config = normalize_instance_config(config)
|
|
83
|
+
return if normalized_config[:enabled] == false
|
|
84
|
+
|
|
83
85
|
registry_config = adapter_instance_config(normalized_config, instance_id)
|
|
84
86
|
metadata = instance_metadata(normalized_config)
|
|
85
87
|
adapter = Call::LexLLMAdapter.new(family, provider_module.provider_class, instance_config: registry_config)
|
|
@@ -107,7 +109,11 @@ module Legion
|
|
|
107
109
|
end
|
|
108
110
|
|
|
109
111
|
def instance_metadata(config)
|
|
110
|
-
{ tier: config[:tier], capabilities: config[:capabilities] || [] }
|
|
112
|
+
meta = { tier: config[:tier], capabilities: config[:capabilities] || [] }
|
|
113
|
+
meta[:default_model] = config[:default_model] if config[:default_model]
|
|
114
|
+
meta[:source] = config[:source] if config[:source]
|
|
115
|
+
meta[:credential_fingerprint] = config[:credential_fingerprint] if config[:credential_fingerprint]
|
|
116
|
+
meta
|
|
111
117
|
end
|
|
112
118
|
|
|
113
119
|
def safe_provider_family(provider_module)
|
data/lib/legion/llm/discovery.rb
CHANGED
|
@@ -141,8 +141,7 @@ module Legion
|
|
|
141
141
|
}
|
|
142
142
|
end
|
|
143
143
|
rescue StandardError => e
|
|
144
|
-
|
|
145
|
-
operation: "discovery.offerings.#{entry[:provider]}/#{entry[:instance]}")
|
|
144
|
+
report_discovery_failure(entry, e)
|
|
146
145
|
[]
|
|
147
146
|
end
|
|
148
147
|
end
|
|
@@ -165,6 +164,28 @@ module Legion
|
|
|
165
164
|
|
|
166
165
|
private
|
|
167
166
|
|
|
167
|
+
def report_discovery_failure(entry, error)
|
|
168
|
+
provider = entry[:provider]
|
|
169
|
+
instance = entry[:instance]
|
|
170
|
+
connection_error = error.is_a?(Faraday::ConnectionFailed) ||
|
|
171
|
+
error.message.match?(/connection refused|connect.*timeout|no route to host/i)
|
|
172
|
+
|
|
173
|
+
if connection_error
|
|
174
|
+
log.warn("[llm][discovery] provider=#{provider} instance=#{instance} unreachable: #{error.message}")
|
|
175
|
+
else
|
|
176
|
+
handle_exception(error, level: :warn, handled: true,
|
|
177
|
+
operation: "discovery.offerings.#{provider}/#{instance}")
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
return unless defined?(Router) && Router.respond_to?(:health_tracker)
|
|
181
|
+
|
|
182
|
+
Router.health_tracker.report(
|
|
183
|
+
provider: provider, instance: instance,
|
|
184
|
+
signal: :error, value: 1,
|
|
185
|
+
metadata: { reason: error.class.name, source: :discovery }
|
|
186
|
+
)
|
|
187
|
+
end
|
|
188
|
+
|
|
168
189
|
def normalize_offering(offering)
|
|
169
190
|
data = if offering.is_a?(Hash)
|
|
170
191
|
offering
|