legion-llm 0.9.19 → 0.9.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4743dd41922fbca3818f72bb48d353314ed2895ce0981e779ac29315c8ffea3b
4
- data.tar.gz: a235df9596b11ddfd94ef5f075a9785f4bce0ae9c849db8b0b5845bde83af4ac
3
+ metadata.gz: 428a14e141f5cbbb278e05f49fd198ef13f6e789727037c90154a855b76a8b34
4
+ data.tar.gz: 8dc2aea0cd776675aad1c8ff198b35f0eba573e4a37c6e2bcdc0b6dfbbb7210b
5
5
  SHA512:
6
- metadata.gz: 1c02e4859ef4bd824e854275fcbb1eadfe243b13477c9af9a9f2f3c484579eefa10bc70d0b1735c85b433b476ca9a8dd69b5fa788cdeafc651dcc370f71cfc40
7
- data.tar.gz: 9f3ae0f1adba6bbe56653f0afce38c0eaa0dd4121b02279f5d9053be84682774f07401e346a855320c1bc006929d8ca184c88896098cd52697869c9b8d9f4630
6
+ metadata.gz: 3b9f1b9fae5371eefcbfbc89262bfa422e23df0e2e52d56735c5f3af9912b7245883ae4864568b6d8828e2dfdc3ab8c3d9fd4f125f662d6f8ae51602976d9952
7
+ data.tar.gz: dcdbf11006d26b929779bdb0e2ae8a541225b3a62c820dd027ef6801198a5056eb1d9a93e7cd504846b11b2196c187d7a415e263592864c3eae5ace4153b31ee
data/CHANGELOG.md CHANGED
@@ -1,5 +1,39 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.22] - 2026-05-12
4
+
5
+ ### Added
6
+ - Pin `legion_list_special_tools` before client and registry tools so models can inspect Legion special tools and the current `Legion::Settings::Extensions` inventory.
7
+ - Surface special Ruby runtime execution with current process/PATH environment metadata, and add Legion-managed Python and pip tools when `legionio setup python` is available.
8
+
9
+ ### Changed
10
+ - Route Python command interception through the same Legion Python runtime detection used by special tool injection.
11
+ - Replace ad hoc `/api/llm/inference` tool-payload debug prints with structured debug logging.
12
+
13
+ ### Fixed
14
+ - Chunk Ollama embedding requests according to configured model context limits and aggregate chunk vectors so large Apollo knowledge-capture documents do not exceed provider context windows.
15
+
16
+ ## [0.9.21] - 2026-05-12
17
+
18
+ ### Fixed
19
+ - Route metering strictly through `legion-transport`, dropping events when transport is unavailable instead of writing metric events to `Legion::Data::Spool`.
20
+ - Keep override confidence database access read-only by removing `Legion::Data::Local` upserts from `legion-llm`.
21
+ - Stop conversation history and sticky state from writing directly to `Legion::Data` tables.
22
+
23
+ ## [0.9.20] - 2026-05-12
24
+
25
+ ### Added
26
+ - Added `llm.gaia.advisory_enabled`, defaulting to `true`, so GAIA pre-request advisory shaping can be disabled without code changes.
27
+
28
+ ### Fixed
29
+ - Preserve accumulated streamed native tool-call arguments from lex-llm provider responses instead of rebuilding final responses from partial stream chunks.
30
+ - Symbolize extension tool arguments before invoking runner keyword methods so JSON string keys such as `chat_id` satisfy Ruby keyword parameters.
31
+ - Match tool triggers from `Legion::Settings::Extensions` registry entries and keep registry tools injectable alongside client tools with better diagnostics.
32
+ - Skip trigger matching cleanly when `Legion::Settings::Extensions` is not loaded instead of warning through a rescued `NameError`.
33
+ - Accumulate only stream fallback state in the lex-llm adapter instead of retaining every streamed chunk when providers return final messages.
34
+ - Apply explicit vLLM tool-name forcing only on the first native tool-loop round, allowing follow-up automatic tool calls after the requested tool returns.
35
+ - Ignore absent GAIA advisory context-window limits when sizing RAG retrieval instead of routing nil through debug exception handling.
36
+
3
37
  ## [0.9.19] - 2026-05-11
4
38
 
5
39
  ### Added
@@ -43,6 +43,11 @@ module Legion
43
43
 
44
44
  tools = raw_tools || []
45
45
  validate_tools!(tools) unless tools.empty?
46
+ raw_tool_count = raw_tools.is_a?(Array) ? raw_tools.size : 0
47
+ log.debug(
48
+ "[llm][api][tools] action=request_tools_received request_id=#{request_id} " \
49
+ "has_tools=#{body.key?(:tools)} raw_tools_class=#{raw_tools&.class} raw_tools_count=#{raw_tool_count}"
50
+ )
46
51
 
47
52
  caller_identity = identity_canonical_name(env)
48
53
  last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
@@ -250,6 +250,14 @@ module Legion
250
250
  ext = Registry.for(provider, instance: instance)
251
251
  return ext if ext
252
252
 
253
+ if instance && instance.to_s != 'default'
254
+ ext = Registry.for(provider, instance: :default)
255
+ if ext
256
+ log.warn("[llm][native] instance_fallback provider=#{provider} requested=#{instance} using=default")
257
+ return ext
258
+ end
259
+ end
260
+
253
261
  instance_suffix = instance ? "/#{instance}" : ''
254
262
  log.error("[llm][native] provider_not_registered provider=#{provider}#{instance_suffix}")
255
263
  raise Legion::LLM::ProviderError,
@@ -296,7 +304,6 @@ module Legion
296
304
 
297
305
  tool_calls = normalize_tool_calls(raw[:tool_calls] || raw['tool_calls'] || raw[:tools] || raw['tools'] || result)
298
306
  stop_reason = raw[:stop_reason] || raw['stop_reason'] || (tool_calls.any? ? :tool_use : nil)
299
-
300
307
  {
301
308
  result: result,
302
309
  model: raw[:model] || raw['model'],
@@ -24,11 +24,13 @@ module Legion
24
24
  return unavailable_result(model, provider) unless provider
25
25
 
26
26
  model ||= resolve_model
27
- text_length = text.to_s.length
28
- text = apply_prefix(coerce_text(text), model: model, task: task)
27
+ text = coerce_text(text)
28
+ text_length = text.length
29
+ prepared_texts = prepare_embedding_texts(text, provider: provider, model: model, task: task)
30
+ dispatch_text = prepared_texts.one? ? prepared_texts.first : prepared_texts
29
31
 
30
32
  log.info("[llm][embed] action=generate provider=#{provider} instance=#{instance || 'default'} " \
31
- "model=#{model} task=#{task} text_chars=#{text_length}")
33
+ "model=#{model} task=#{task} text_chars=#{text_length} chunks=#{prepared_texts.size}")
32
34
 
33
35
  started_at = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
34
36
  response = Dispatch.call(
@@ -36,24 +38,29 @@ module Legion
36
38
  instance: instance,
37
39
  capability: :embed,
38
40
  model: model,
39
- text: text,
41
+ text: dispatch_text,
40
42
  dimensions: dimensions
41
43
  )
42
44
  elapsed = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - started_at) * 1000).round(1)
43
45
 
44
- vector = normalize_vector(response[:result])
46
+ vector = if prepared_texts.size > 1
47
+ aggregate_vectors(response[:result], weights: prepared_texts.map(&:length), model: model, provider: provider)
48
+ else
49
+ normalize_vector(response[:result])
50
+ end
45
51
  vector = enforce_dimensions(vector) if enforce_dimension?
46
52
  tokens = extract_tokens(response)
47
53
 
48
54
  log.info("[llm][embed] action=generate.complete provider=#{provider} instance=#{instance || 'default'} " \
49
- "model=#{model} dimensions=#{vector&.size || 0} tokens=#{tokens} duration_ms=#{elapsed}")
55
+ "model=#{model} dimensions=#{vector&.size || 0} tokens=#{tokens} chunks=#{prepared_texts.size} duration_ms=#{elapsed}")
50
56
 
51
57
  {
52
58
  vector: vector,
53
59
  model: model,
54
60
  provider: provider,
55
61
  dimensions: vector&.size || 0,
56
- tokens: tokens
62
+ tokens: tokens,
63
+ chunks: prepared_texts.size
57
64
  }
58
65
  rescue StandardError => e
59
66
  handle_exception(e, level: :warn, operation: 'llm.embeddings.generate')
@@ -70,7 +77,20 @@ module Legion
70
77
  log.info("[llm][embed] action=generate_batch provider=#{provider} instance=#{instance || 'default'} " \
71
78
  "model=#{model} count=#{texts.size} task=#{task}")
72
79
 
73
- texts = texts.map { |t| apply_prefix(coerce_text(t), model: model, task: task) }
80
+ raw_texts = texts.map { |t| coerce_text(t) }
81
+ prepared_texts = raw_texts.map { |t| prepare_embedding_texts(t, provider: provider, model: model, task: task) }
82
+ if prepared_texts.any? { |chunks| chunks.size > 1 }
83
+ return generate_chunked_batch(
84
+ raw_texts,
85
+ model: model,
86
+ provider: provider,
87
+ instance: instance,
88
+ dimensions: dimensions,
89
+ task: task
90
+ )
91
+ end
92
+
93
+ texts = prepared_texts.map(&:first)
74
94
 
75
95
  started_at = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
76
96
  response = Dispatch.call(
@@ -122,11 +142,71 @@ module Legion
122
142
  end
123
143
 
124
144
  def apply_prefix(text, model:, task:)
125
- base = model.to_s.split(':').first
126
- prefix = PREFIX_REGISTRY.dig(base, task)
145
+ prefix = prefix_for(model, task)
127
146
  prefix ? "#{prefix}#{text}" : text
128
147
  end
129
148
 
149
+ def prepare_embedding_texts(text, provider:, model:, task:)
150
+ prefix = prefix_for(model, task).to_s
151
+ chunks = chunk_text(text, embedding_chunk_chars(provider: provider, model: model, prefix: prefix))
152
+ chunks.map { |chunk| prefix.empty? ? chunk : "#{prefix}#{chunk}" }
153
+ end
154
+
155
+ def prefix_for(model, task)
156
+ registry = Legion::LLM::Settings.value(:embedding, :prefix_registry, default: PREFIX_REGISTRY)
157
+ model_prefixes = Legion::LLM::Settings.config_value(registry, model_base(model), {})
158
+ Legion::LLM::Settings.config_value(model_prefixes, task)
159
+ end
160
+
161
+ def embedding_chunk_chars(provider:, model:, prefix:)
162
+ return nil unless provider.to_s == 'ollama'
163
+
164
+ embedding = Legion::LLM::Settings.value(:embedding, default: {})
165
+ context_chars = Legion::LLM::Settings.config_value(embedding, :ollama_context_chars, {})
166
+ limit = Legion::LLM::Settings.config_value(context_chars, model.to_s) ||
167
+ Legion::LLM::Settings.config_value(context_chars, model_base(model)) ||
168
+ Legion::LLM::Settings.config_value(embedding, :ollama_default_context_chars)
169
+ limit = limit.to_i
170
+ return nil unless limit.positive?
171
+
172
+ [limit - prefix.length, 1].max
173
+ end
174
+
175
+ def chunk_text(text, max_chars)
176
+ return [text] unless max_chars.to_i.positive?
177
+ return [text] if text.length <= max_chars
178
+
179
+ chunks = []
180
+ remaining = text.dup
181
+ until remaining.empty?
182
+ chunk, remaining = next_text_chunk(remaining, max_chars)
183
+ chunks << chunk unless chunk.empty?
184
+ end
185
+ chunks
186
+ end
187
+
188
+ def next_text_chunk(text, max_chars)
189
+ return [text, ''] if text.length <= max_chars
190
+
191
+ slice = text[0, max_chars]
192
+ boundary = chunk_boundary(slice, max_chars)
193
+ chunk = text[0, boundary].strip
194
+ remaining = text[boundary..].to_s.strip
195
+ [chunk.empty? ? text[0, max_chars] : chunk, remaining]
196
+ end
197
+
198
+ def chunk_boundary(slice, max_chars)
199
+ candidates = [slice.rindex("\n\n"), slice.rindex("\n"), slice.rindex('. '), slice.rindex(' ')]
200
+ boundary = candidates.compact.max
201
+ return max_chars unless boundary && boundary >= (max_chars * 0.5)
202
+
203
+ boundary + 1
204
+ end
205
+
206
+ def model_base(model)
207
+ model.to_s.split(':').first
208
+ end
209
+
130
210
  def normalize_vector(result)
131
211
  return nil if result.nil?
132
212
  return result if result.is_a?(Array) && result.first.is_a?(Numeric)
@@ -145,6 +225,39 @@ module Legion
145
225
  end
146
226
  end
147
227
 
228
+ def aggregate_vectors(result, weights:, model:, provider:)
229
+ vectors = normalize_batch(result, model, provider).map { |entry| entry[:vector] }
230
+ usable = vectors.each_with_index.filter_map do |vector, index|
231
+ next unless vector.is_a?(Array) && vector.first.is_a?(Numeric)
232
+
233
+ [vector, [weights[index].to_i, 1].max]
234
+ end
235
+ return nil if usable.empty?
236
+
237
+ dimensions = usable.first.first.size
238
+ usable.select! { |vector, _weight| vector.size == dimensions }
239
+ total_weight = usable.sum { |_vector, weight| weight }.to_f
240
+ Array.new(dimensions) do |index|
241
+ usable.sum { |vector, weight| vector[index].to_f * weight } / total_weight
242
+ end
243
+ end
244
+
245
+ def generate_chunked_batch(texts, model:, provider:, instance:, dimensions:, task:)
246
+ log.info("[llm][embed] action=generate_batch.chunked provider=#{provider} instance=#{instance || 'default'} " \
247
+ "model=#{model} count=#{texts.size}")
248
+
249
+ texts.each_with_index.map do |text, index|
250
+ generate(
251
+ text: text,
252
+ model: model,
253
+ provider: provider,
254
+ instance: instance,
255
+ dimensions: dimensions,
256
+ task: task
257
+ ).merge(index: index)
258
+ end
259
+ end
260
+
148
261
  def enforce_dimension?
149
262
  Legion::LLM::Settings.value(:embedding, :enforce_dimension) != false
150
263
  end
@@ -35,8 +35,8 @@ module Legion
35
35
  end
36
36
 
37
37
  def stream(model:, messages:, **opts, &block)
38
- chunks = []
39
- provider.stream_chat(
38
+ accumulator = build_stream_accumulator
39
+ response = provider.stream_chat(
40
40
  messages: normalize_messages(messages, system: opts[:system]),
41
41
  tools: normalize_tools(opts[:tools]),
42
42
  temperature: opts[:temperature],
@@ -47,11 +47,15 @@ module Legion
47
47
  tool_prefs: opts[:tool_prefs],
48
48
  model: model_info(model, offering_metadata: opts[:offering_metadata])
49
49
  ) do |chunk|
50
- chunks << chunk
50
+ accumulate_stream_chunk(accumulator, chunk)
51
51
  block&.call(chunk)
52
52
  end
53
53
 
54
- chunk_response(chunks, offering_metadata: opts[:offering_metadata])
54
+ if response
55
+ message_response(response, offering_metadata: opts[:offering_metadata])
56
+ else
57
+ chunk_response(accumulator, offering_metadata: opts[:offering_metadata])
58
+ end
55
59
  end
56
60
 
57
61
  def embed(model:, text:, dimensions: nil, **opts)
@@ -158,8 +162,8 @@ module Legion
158
162
  message_hash = normalize_hash(message)
159
163
  message_class.new(
160
164
  role: message_hash[:role] || :user,
161
- content: message_hash[:content].to_s,
162
- tool_calls: message_hash[:tool_calls],
165
+ content: normalize_message_content(message_hash[:content]),
166
+ tool_calls: normalize_message_tool_calls(message_hash[:tool_calls]),
163
167
  tool_call_id: message_hash[:tool_call_id]
164
168
  )
165
169
  end
@@ -222,6 +226,47 @@ module Legion
222
226
  { role: :user, content: value }
223
227
  end
224
228
 
229
+ def normalize_message_content(content)
230
+ return content if content.nil? || content.is_a?(String)
231
+ return content if content.respond_to?(:attachments)
232
+
233
+ if content.is_a?(Array)
234
+ text_parts = content.filter_map { |part| text_part_content(part) }
235
+ return text_parts.join("\n\n") unless text_parts.empty?
236
+ end
237
+
238
+ text_part_content(content) || content.to_s
239
+ end
240
+
241
+ def text_part_content(part)
242
+ return unless part.respond_to?(:transform_keys)
243
+
244
+ normalized = part.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
245
+ return unless normalized[:type].to_s == 'text'
246
+
247
+ normalized[:text].to_s
248
+ end
249
+
250
+ def normalize_message_tool_calls(tool_calls)
251
+ return tool_calls unless tool_calls.is_a?(Array)
252
+
253
+ tool_calls.filter_map do |tool_call|
254
+ normalized = normalize_hash(tool_call)
255
+ name = normalized[:name]
256
+ next if name.to_s.empty?
257
+
258
+ arguments = normalized[:arguments] || {}
259
+ [
260
+ name.to_sym,
261
+ lex_llm_namespace::ToolCall.new(
262
+ id: normalized[:id],
263
+ name: name.to_s,
264
+ arguments: arguments
265
+ )
266
+ ]
267
+ end.to_h
268
+ end
269
+
225
270
  def message_response(response, offering_metadata: nil)
226
271
  {
227
272
  result: response.content,
@@ -234,19 +279,52 @@ module Legion
234
279
  }.compact
235
280
  end
236
281
 
237
- def chunk_response(chunks, offering_metadata: nil)
238
- last = chunks.reverse.find { |chunk| chunk.respond_to?(:input_tokens) }
239
- tool_calls = chunks.filter_map { |chunk| chunk.tool_calls if chunk.respond_to?(:tool_calls) }.reduce({}) do |memo, calls|
240
- memo.merge(calls || {})
241
- end
282
+ def build_stream_accumulator
242
283
  {
243
- result: chunks.filter_map(&:content).join,
244
- model: last&.model_id,
284
+ content: +'',
285
+ model: nil,
286
+ usage: {},
287
+ raw: nil,
288
+ tool_calls: {},
289
+ thinking_text: +'',
290
+ thinking_signature: nil
291
+ }
292
+ end
293
+
294
+ def accumulate_stream_chunk(accumulator, chunk)
295
+ accumulator[:content] << chunk.content.to_s if chunk.respond_to?(:content) && !chunk.content.nil?
296
+ accumulate_stream_usage(accumulator, chunk)
297
+ accumulator[:tool_calls].merge!(chunk.tool_calls || {}) if chunk.respond_to?(:tool_calls)
298
+ accumulate_stream_thinking(accumulator, chunk)
299
+ end
300
+
301
+ def accumulate_stream_usage(accumulator, chunk)
302
+ return unless chunk.respond_to?(:input_tokens)
303
+
304
+ accumulator[:model] = chunk.model_id if chunk.respond_to?(:model_id)
305
+ accumulator[:usage] = usage_hash(chunk)
306
+ accumulator[:raw] = chunk.raw if chunk.respond_to?(:raw)
307
+ end
308
+
309
+ def accumulate_stream_thinking(accumulator, chunk)
310
+ return unless chunk.respond_to?(:thinking)
311
+
312
+ thinking = normalize_thinking_value(chunk.thinking)
313
+ content = thinking[:content]
314
+ accumulator[:thinking_text] << content.to_s unless content.nil?
315
+ accumulator[:thinking_signature] ||= thinking[:signature]
316
+ end
317
+
318
+ def chunk_response(accumulator, offering_metadata: nil)
319
+ tool_calls = accumulator[:tool_calls]
320
+ {
321
+ result: accumulator[:content],
322
+ model: accumulator[:model],
245
323
  tool_calls: tool_calls.empty? ? nil : tool_calls,
246
324
  stop_reason: tool_calls.empty? ? nil : :tool_use,
247
- thinking: stream_thinking_hash(chunks),
248
- usage: last ? usage_hash(last) : {},
249
- metadata: response_metadata(last, offering_metadata: offering_metadata)
325
+ thinking: stream_thinking_hash(accumulator),
326
+ usage: accumulator[:usage],
327
+ metadata: response_metadata(accumulator[:raw], offering_metadata: offering_metadata)
250
328
  }.compact
251
329
  end
252
330
 
@@ -284,15 +362,11 @@ module Legion
284
362
  }
285
363
  end
286
364
 
287
- def stream_thinking_hash(chunks)
288
- thinking_parts = chunks.filter_map do |chunk|
289
- normalize_thinking_value(chunk.thinking) if chunk.respond_to?(:thinking)
290
- end
291
- thinking_text = thinking_parts.filter_map { |part| part[:content] }.join
292
- signature = thinking_parts.find { |part| part[:signature] }&.dig(:signature)
365
+ def stream_thinking_hash(accumulator)
366
+ thinking_text = accumulator[:thinking_text]
293
367
  return nil if thinking_text.empty?
294
368
 
295
- { content: thinking_text, signature: signature, enabled: true }.compact
369
+ { content: thinking_text, signature: accumulator[:thinking_signature], enabled: true }.compact
296
370
  end
297
371
 
298
372
  def thinking_hash(response)
@@ -325,7 +399,8 @@ module Legion
325
399
 
326
400
  def response_metadata(response = nil, offering_metadata: nil)
327
401
  metadata = normalize_offering_metadata(offering_metadata)
328
- raw = response.respond_to?(:raw) ? response.raw : nil
402
+ raw = response.is_a?(Hash) ? response : nil
403
+ raw ||= response.raw if response.respond_to?(:raw)
329
404
  metadata[:raw_model] = raw['model'] if raw.is_a?(Hash) && raw['model']
330
405
  metadata.empty? ? {} : { offering: metadata }
331
406
  end