legion-llm 0.5.15 → 0.5.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/llm/pipeline/executor.rb +10 -2
- data/lib/legion/llm/routes.rb +84 -59
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2dea674b5405be2c2863f1c6dd568f21ec8baad8db42eeaa457cd6dcdc881bc8
|
|
4
|
+
data.tar.gz: ee16678e6be6bc612d906bdd754d7e3db79f803c52b465fe3fb2ed762812aa20
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0a743021a3a3540290cfc4ea3c119fdc42bbba38eb5115b2883fefc6a4da0bceda04c45136c72d233559d9de53c41af198bfa057eed5579fc345121fade8cd74
|
|
7
|
+
data.tar.gz: 58ba674f0aa898bd75895bfaeb93b5f31bf2aa7f6dc0dbbc8d3f0afcb96cbfcabb80b68798c2c8758f8df8726c9bfd86a567e1e531728f45f91af96b53e15e7b
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.16] - 2026-03-28
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- `POST /api/llm/inference` endpoint now routes through the 18-step pipeline when `pipeline_enabled?` is true — previously it created a bare `RubyLLM` session and called `session.ask` directly, bypassing RAG (step 8), GAIA advisory (step 7), knowledge capture (step 19), billing, and classification
|
|
7
|
+
- `POST /api/llm/chat` sync fallback path now routes through the pipeline (previously called `session.ask` on a bare session the same way)
|
|
8
|
+
- `_dispatch_chat` pipeline gate now fires when `messages:` array is present in addition to `message:` string — `Legion::LLM.chat(messages: [...])` was silently falling through to the legacy path even with `pipeline_enabled: true`
|
|
9
|
+
- `Pipeline::Executor#step_provider_call` and `#step_provider_call_stream` now inject prior messages via `session.add_message` before the final `ask` — multi-turn conversations passed as a `messages:` array now correctly preserve history at the provider level
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- `spec/legion/llm/pipeline/executor_multi_turn_spec.rb`: specs verifying prior-message injection in single-turn, multi-turn, two-message, and streaming cases
|
|
13
|
+
- `spec/legion/llm/routes_inference_spec.rb`: specs verifying that `Legion::LLM.chat(messages: [...])` routes through the pipeline, carries tracing/timeline, handles multi-turn history, passes tool classes, and falls back gracefully when pipeline is disabled
|
|
14
|
+
|
|
3
15
|
## [0.5.15] - 2026-03-28
|
|
4
16
|
|
|
5
17
|
### Added
|
|
@@ -166,7 +166,11 @@ module Legion
|
|
|
166
166
|
)
|
|
167
167
|
session.with_instructions(injected_system) if injected_system
|
|
168
168
|
|
|
169
|
-
|
|
169
|
+
messages = @request.messages
|
|
170
|
+
prior = messages.size > 1 ? messages[0..-2] : []
|
|
171
|
+
prior.each { |m| session.add_message(m) }
|
|
172
|
+
|
|
173
|
+
message_content = messages.last&.dig(:content)
|
|
170
174
|
@raw_response = message_content ? session.ask(message_content) : session
|
|
171
175
|
|
|
172
176
|
@timestamps[:provider_end] = Time.now
|
|
@@ -228,7 +232,11 @@ module Legion
|
|
|
228
232
|
(@request.tools || []).each { |tool| session.with_tool(tool) if tool.is_a?(Class) }
|
|
229
233
|
ToolRegistry.tools.each { |t| session.with_tool(t) } if defined?(ToolRegistry)
|
|
230
234
|
|
|
231
|
-
|
|
235
|
+
messages = @request.messages
|
|
236
|
+
prior = messages.size > 1 ? messages[0..-2] : []
|
|
237
|
+
prior.each { |m| session.add_message(m) }
|
|
238
|
+
|
|
239
|
+
message_content = messages.last&.dig(:content)
|
|
232
240
|
@raw_response = session.ask(message_content, &)
|
|
233
241
|
|
|
234
242
|
@timestamps[:provider_end] = Time.now
|
data/lib/legion/llm/routes.rb
CHANGED
|
@@ -244,21 +244,41 @@ module Legion
|
|
|
244
244
|
json_response({ request_id: request_id, poll_key: "llm:#{request_id}:status" },
|
|
245
245
|
status_code: 202)
|
|
246
246
|
else
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
{
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
247
|
+
result = Legion::LLM.chat(message: message, model: model, provider: provider,
|
|
248
|
+
caller: { source: 'api', path: request.path })
|
|
249
|
+
if result.is_a?(Legion::LLM::Pipeline::Response)
|
|
250
|
+
raw_msg = result.message
|
|
251
|
+
content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
|
|
252
|
+
routing = result.routing || {}
|
|
253
|
+
resolved_model = routing[:model] || routing['model']
|
|
254
|
+
tokens = result.tokens || {}
|
|
255
|
+
Legion::Logging.info "API: LLM chat request #{request_id} completed sync model=#{resolved_model}" if defined?(Legion::Logging)
|
|
256
|
+
json_response(
|
|
257
|
+
{
|
|
258
|
+
response: content,
|
|
259
|
+
meta: {
|
|
260
|
+
model: resolved_model.to_s,
|
|
261
|
+
tokens_in: tokens[:input],
|
|
262
|
+
tokens_out: tokens[:output]
|
|
263
|
+
}
|
|
264
|
+
},
|
|
265
|
+
status_code: 201
|
|
266
|
+
)
|
|
267
|
+
else
|
|
268
|
+
response = result
|
|
269
|
+
Legion::Logging.info "API: LLM chat request #{request_id} completed sync" if defined?(Legion::Logging)
|
|
270
|
+
json_response(
|
|
271
|
+
{
|
|
272
|
+
response: response.respond_to?(:content) ? response.content : response.to_s,
|
|
273
|
+
meta: {
|
|
274
|
+
model: response.respond_to?(:model_id) ? response.model_id.to_s : model.to_s,
|
|
275
|
+
tokens_in: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
|
|
276
|
+
tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
|
|
277
|
+
}
|
|
278
|
+
},
|
|
279
|
+
status_code: 201
|
|
280
|
+
)
|
|
281
|
+
end
|
|
262
282
|
end
|
|
263
283
|
end
|
|
264
284
|
end
|
|
@@ -288,19 +308,14 @@ module Legion
|
|
|
288
308
|
|
|
289
309
|
tools = raw_tools || []
|
|
290
310
|
|
|
291
|
-
|
|
292
|
-
model: model,
|
|
293
|
-
provider: provider,
|
|
294
|
-
caller: { source: 'api', path: request.path }
|
|
295
|
-
)
|
|
296
|
-
|
|
311
|
+
tool_declarations = []
|
|
297
312
|
unless tools.empty?
|
|
298
313
|
validate_tools!(tools)
|
|
299
314
|
|
|
300
315
|
tool_declarations = tools.map do |t|
|
|
301
316
|
ts = t.respond_to?(:transform_keys) ? t.transform_keys(&:to_sym) : t
|
|
302
|
-
tname
|
|
303
|
-
tdesc
|
|
317
|
+
tname = ts[:name].to_s
|
|
318
|
+
tdesc = ts[:description].to_s
|
|
304
319
|
tparams = ts[:parameters] || {}
|
|
305
320
|
Class.new do
|
|
306
321
|
define_singleton_method(:tool_name) { tname }
|
|
@@ -309,45 +324,55 @@ module Legion
|
|
|
309
324
|
define_method(:call) { |**_| raise NotImplementedError, "#{tname} executes client-side only" }
|
|
310
325
|
end
|
|
311
326
|
end
|
|
312
|
-
session.with_tools(*tool_declarations)
|
|
313
327
|
end
|
|
314
328
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
duped = messages.dup
|
|
320
|
-
duped.delete_at(idx)
|
|
321
|
-
duped
|
|
322
|
-
else
|
|
323
|
-
messages
|
|
324
|
-
end
|
|
325
|
-
else
|
|
326
|
-
messages
|
|
327
|
-
end
|
|
328
|
-
prior_messages.each { |m| session.add_message(m) }
|
|
329
|
-
|
|
330
|
-
prompt = (last_user || {})[:content] || (last_user || {})['content'] || ''
|
|
331
|
-
response = session.ask(prompt)
|
|
332
|
-
|
|
333
|
-
tc_list = if response.respond_to?(:tool_calls) && response.tool_calls
|
|
334
|
-
Array(response.tool_calls).map do |tc|
|
|
335
|
-
{
|
|
336
|
-
id: tc.respond_to?(:id) ? tc.id : nil,
|
|
337
|
-
name: tc.respond_to?(:name) ? tc.name : tc.to_s,
|
|
338
|
-
arguments: tc.respond_to?(:arguments) ? tc.arguments : {}
|
|
339
|
-
}
|
|
340
|
-
end
|
|
341
|
-
end
|
|
329
|
+
normalized_messages = messages.map do |m|
|
|
330
|
+
ms = m.respond_to?(:transform_keys) ? m.transform_keys(&:to_sym) : m
|
|
331
|
+
{ role: ms[:role].to_s, content: ms[:content].to_s }
|
|
332
|
+
end
|
|
342
333
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
334
|
+
result = Legion::LLM.chat(
|
|
335
|
+
messages: normalized_messages,
|
|
336
|
+
model: model,
|
|
337
|
+
provider: provider,
|
|
338
|
+
tools: tool_declarations,
|
|
339
|
+
caller: { source: 'api', path: request.path }
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
if result.is_a?(Legion::LLM::Pipeline::Response)
|
|
343
|
+
raw_msg = result.message
|
|
344
|
+
content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
|
|
345
|
+
routing = result.routing || {}
|
|
346
|
+
resolved_model = routing[:model] || routing['model']
|
|
347
|
+
tokens = result.tokens || {}
|
|
348
|
+
json_response({
|
|
349
|
+
content: content,
|
|
350
|
+
tool_calls: nil,
|
|
351
|
+
stop_reason: result.stop&.dig(:reason)&.to_s,
|
|
352
|
+
model: resolved_model.to_s,
|
|
353
|
+
input_tokens: tokens[:input],
|
|
354
|
+
output_tokens: tokens[:output]
|
|
355
|
+
}, status_code: 200)
|
|
356
|
+
else
|
|
357
|
+
response = result
|
|
358
|
+
tc_list = if response.respond_to?(:tool_calls) && response.tool_calls
|
|
359
|
+
Array(response.tool_calls).map do |tc|
|
|
360
|
+
{
|
|
361
|
+
id: tc.respond_to?(:id) ? tc.id : nil,
|
|
362
|
+
name: tc.respond_to?(:name) ? tc.name : tc.to_s,
|
|
363
|
+
arguments: tc.respond_to?(:arguments) ? tc.arguments : {}
|
|
364
|
+
}
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
json_response({
|
|
368
|
+
content: response.respond_to?(:content) ? response.content : response.to_s,
|
|
369
|
+
tool_calls: tc_list,
|
|
370
|
+
stop_reason: response.respond_to?(:stop_reason) ? response.stop_reason : nil,
|
|
371
|
+
model: response.respond_to?(:model_id) ? response.model_id.to_s : model.to_s,
|
|
372
|
+
input_tokens: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
|
|
373
|
+
output_tokens: response.respond_to?(:output_tokens) ? response.output_tokens : nil
|
|
374
|
+
}, status_code: 200)
|
|
375
|
+
end
|
|
351
376
|
rescue StandardError => e
|
|
352
377
|
Legion::Logging.error "[api/llm/inference] #{e.class}: #{e.message}" if defined?(Legion::Logging)
|
|
353
378
|
json_error('inference_error', e.message, status_code: 500)
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -230,7 +230,7 @@ module Legion
|
|
|
230
230
|
end
|
|
231
231
|
|
|
232
232
|
def _dispatch_chat(model:, provider:, intent:, tier:, escalate:, max_escalations:, quality_check:, message:, **kwargs, &)
|
|
233
|
-
if pipeline_enabled? && message
|
|
233
|
+
if pipeline_enabled? && (message || kwargs[:messages])
|
|
234
234
|
return chat_via_pipeline(model: model, provider: provider, intent: intent, tier: tier,
|
|
235
235
|
message: message, escalate: escalate, max_escalations: max_escalations,
|
|
236
236
|
quality_check: quality_check, **kwargs, &)
|