legion-llm 0.5.15 → 0.5.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 42d72ed366b2266f182b9a2e950d02c735956ff0b3ee51f712ed4686370b7274
4
- data.tar.gz: 9af0c1f6f15ecea5d029868fde0a51305307a5de14391cf31e7c85307cccdee0
3
+ metadata.gz: 2dea674b5405be2c2863f1c6dd568f21ec8baad8db42eeaa457cd6dcdc881bc8
4
+ data.tar.gz: ee16678e6be6bc612d906bdd754d7e3db79f803c52b465fe3fb2ed762812aa20
5
5
  SHA512:
6
- metadata.gz: 844583a7565f8bbc167f12330b51c32ba57b74802b23e624158f1afbaa3020dffd25e7324ad4ec19fe2ad0eaf2dccdfb6b6d8c673f06805f0b12f2613ac5f6f7
7
- data.tar.gz: 26ec627a507e4e8d14e8a9e6155bb852c44d36af9e44b60f02dc247e1d307801bffbdc486a54745894430453c1fde22902662b7253feb8d5c841b92971aa2326
6
+ metadata.gz: 0a743021a3a3540290cfc4ea3c119fdc42bbba38eb5115b2883fefc6a4da0bceda04c45136c72d233559d9de53c41af198bfa057eed5579fc345121fade8cd74
7
+ data.tar.gz: 58ba674f0aa898bd75895bfaeb93b5f31bf2aa7f6dc0dbbc8d3f0afcb96cbfcabb80b68798c2c8758f8df8726c9bfd86a567e1e531728f45f91af96b53e15e7b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.5.16] - 2026-03-28
4
+
5
+ ### Fixed
6
+ - `POST /api/llm/inference` endpoint now routes through the 18-step pipeline when `pipeline_enabled?` is true — previously it created a bare `RubyLLM` session and called `session.ask` directly, bypassing RAG (step 8), GAIA advisory (step 7), knowledge capture (step 19), billing, and classification
7
+ - `POST /api/llm/chat` sync fallback path now routes through the pipeline (previously called `session.ask` on a bare session the same way)
8
+ - `_dispatch_chat` pipeline gate now fires when `messages:` array is present in addition to `message:` string — `Legion::LLM.chat(messages: [...])` was silently falling through to the legacy path even with `pipeline_enabled: true`
9
+ - `Pipeline::Executor#step_provider_call` and `#step_provider_call_stream` now inject prior messages via `session.add_message` before the final `ask` — multi-turn conversations passed as a `messages:` array now correctly preserve history at the provider level
10
+
11
+ ### Added
12
+ - `spec/legion/llm/pipeline/executor_multi_turn_spec.rb`: specs verifying prior-message injection in single-turn, multi-turn, two-message, and streaming cases
13
+ - `spec/legion/llm/routes_inference_spec.rb`: specs verifying that `Legion::LLM.chat(messages: [...])` routes through the pipeline, carries tracing/timeline, handles multi-turn history, passes tool classes, and falls back gracefully when pipeline is disabled
14
+
3
15
  ## [0.5.15] - 2026-03-28
4
16
 
5
17
  ### Added
@@ -166,7 +166,11 @@ module Legion
166
166
  )
167
167
  session.with_instructions(injected_system) if injected_system
168
168
 
169
- message_content = @request.messages.last&.dig(:content)
169
+ messages = @request.messages
170
+ prior = messages.size > 1 ? messages[0..-2] : []
171
+ prior.each { |m| session.add_message(m) }
172
+
173
+ message_content = messages.last&.dig(:content)
170
174
  @raw_response = message_content ? session.ask(message_content) : session
171
175
 
172
176
  @timestamps[:provider_end] = Time.now
@@ -228,7 +232,11 @@ module Legion
228
232
  (@request.tools || []).each { |tool| session.with_tool(tool) if tool.is_a?(Class) }
229
233
  ToolRegistry.tools.each { |t| session.with_tool(t) } if defined?(ToolRegistry)
230
234
 
231
- message_content = @request.messages.last&.dig(:content)
235
+ messages = @request.messages
236
+ prior = messages.size > 1 ? messages[0..-2] : []
237
+ prior.each { |m| session.add_message(m) }
238
+
239
+ message_content = messages.last&.dig(:content)
232
240
  @raw_response = session.ask(message_content, &)
233
241
 
234
242
  @timestamps[:provider_end] = Time.now
@@ -244,21 +244,41 @@ module Legion
244
244
  json_response({ request_id: request_id, poll_key: "llm:#{request_id}:status" },
245
245
  status_code: 202)
246
246
  else
247
- session = Legion::LLM.chat(model: model, provider: provider,
248
- caller: { source: 'api', path: request.path })
249
- response = session.ask(message)
250
- Legion::Logging.info "API: LLM chat request #{request_id} completed sync model=#{session.model}" if defined?(Legion::Logging)
251
- json_response(
252
- {
253
- response: response.content,
254
- meta: {
255
- model: session.model.to_s,
256
- tokens_in: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
257
- tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
258
- }
259
- },
260
- status_code: 201
261
- )
247
+ result = Legion::LLM.chat(message: message, model: model, provider: provider,
248
+ caller: { source: 'api', path: request.path })
249
+ if result.is_a?(Legion::LLM::Pipeline::Response)
250
+ raw_msg = result.message
251
+ content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
252
+ routing = result.routing || {}
253
+ resolved_model = routing[:model] || routing['model']
254
+ tokens = result.tokens || {}
255
+ Legion::Logging.info "API: LLM chat request #{request_id} completed sync model=#{resolved_model}" if defined?(Legion::Logging)
256
+ json_response(
257
+ {
258
+ response: content,
259
+ meta: {
260
+ model: resolved_model.to_s,
261
+ tokens_in: tokens[:input],
262
+ tokens_out: tokens[:output]
263
+ }
264
+ },
265
+ status_code: 201
266
+ )
267
+ else
268
+ response = result
269
+ Legion::Logging.info "API: LLM chat request #{request_id} completed sync" if defined?(Legion::Logging)
270
+ json_response(
271
+ {
272
+ response: response.respond_to?(:content) ? response.content : response.to_s,
273
+ meta: {
274
+ model: response.respond_to?(:model_id) ? response.model_id.to_s : model.to_s,
275
+ tokens_in: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
276
+ tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
277
+ }
278
+ },
279
+ status_code: 201
280
+ )
281
+ end
262
282
  end
263
283
  end
264
284
  end
@@ -288,19 +308,14 @@ module Legion
288
308
 
289
309
  tools = raw_tools || []
290
310
 
291
- session = Legion::LLM.chat(
292
- model: model,
293
- provider: provider,
294
- caller: { source: 'api', path: request.path }
295
- )
296
-
311
+ tool_declarations = []
297
312
  unless tools.empty?
298
313
  validate_tools!(tools)
299
314
 
300
315
  tool_declarations = tools.map do |t|
301
316
  ts = t.respond_to?(:transform_keys) ? t.transform_keys(&:to_sym) : t
302
- tname = ts[:name].to_s
303
- tdesc = ts[:description].to_s
317
+ tname = ts[:name].to_s
318
+ tdesc = ts[:description].to_s
304
319
  tparams = ts[:parameters] || {}
305
320
  Class.new do
306
321
  define_singleton_method(:tool_name) { tname }
@@ -309,45 +324,55 @@ module Legion
309
324
  define_method(:call) { |**_| raise NotImplementedError, "#{tname} executes client-side only" }
310
325
  end
311
326
  end
312
- session.with_tools(*tool_declarations)
313
327
  end
314
328
 
315
- last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
316
- prior_messages = if last_user
317
- idx = messages.rindex(last_user)
318
- if idx
319
- duped = messages.dup
320
- duped.delete_at(idx)
321
- duped
322
- else
323
- messages
324
- end
325
- else
326
- messages
327
- end
328
- prior_messages.each { |m| session.add_message(m) }
329
-
330
- prompt = (last_user || {})[:content] || (last_user || {})['content'] || ''
331
- response = session.ask(prompt)
332
-
333
- tc_list = if response.respond_to?(:tool_calls) && response.tool_calls
334
- Array(response.tool_calls).map do |tc|
335
- {
336
- id: tc.respond_to?(:id) ? tc.id : nil,
337
- name: tc.respond_to?(:name) ? tc.name : tc.to_s,
338
- arguments: tc.respond_to?(:arguments) ? tc.arguments : {}
339
- }
340
- end
341
- end
329
+ normalized_messages = messages.map do |m|
330
+ ms = m.respond_to?(:transform_keys) ? m.transform_keys(&:to_sym) : m
331
+ { role: ms[:role].to_s, content: ms[:content].to_s }
332
+ end
342
333
 
343
- json_response({
344
- content: response.content,
345
- tool_calls: tc_list,
346
- stop_reason: response.respond_to?(:stop_reason) ? response.stop_reason : nil,
347
- model: session.model.to_s,
348
- input_tokens: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
349
- output_tokens: response.respond_to?(:output_tokens) ? response.output_tokens : nil
350
- }, status_code: 200)
334
+ result = Legion::LLM.chat(
335
+ messages: normalized_messages,
336
+ model: model,
337
+ provider: provider,
338
+ tools: tool_declarations,
339
+ caller: { source: 'api', path: request.path }
340
+ )
341
+
342
+ if result.is_a?(Legion::LLM::Pipeline::Response)
343
+ raw_msg = result.message
344
+ content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
345
+ routing = result.routing || {}
346
+ resolved_model = routing[:model] || routing['model']
347
+ tokens = result.tokens || {}
348
+ json_response({
349
+ content: content,
350
+ tool_calls: nil,
351
+ stop_reason: result.stop&.dig(:reason)&.to_s,
352
+ model: resolved_model.to_s,
353
+ input_tokens: tokens[:input],
354
+ output_tokens: tokens[:output]
355
+ }, status_code: 200)
356
+ else
357
+ response = result
358
+ tc_list = if response.respond_to?(:tool_calls) && response.tool_calls
359
+ Array(response.tool_calls).map do |tc|
360
+ {
361
+ id: tc.respond_to?(:id) ? tc.id : nil,
362
+ name: tc.respond_to?(:name) ? tc.name : tc.to_s,
363
+ arguments: tc.respond_to?(:arguments) ? tc.arguments : {}
364
+ }
365
+ end
366
+ end
367
+ json_response({
368
+ content: response.respond_to?(:content) ? response.content : response.to_s,
369
+ tool_calls: tc_list,
370
+ stop_reason: response.respond_to?(:stop_reason) ? response.stop_reason : nil,
371
+ model: response.respond_to?(:model_id) ? response.model_id.to_s : model.to_s,
372
+ input_tokens: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
373
+ output_tokens: response.respond_to?(:output_tokens) ? response.output_tokens : nil
374
+ }, status_code: 200)
375
+ end
351
376
  rescue StandardError => e
352
377
  Legion::Logging.error "[api/llm/inference] #{e.class}: #{e.message}" if defined?(Legion::Logging)
353
378
  json_error('inference_error', e.message, status_code: 500)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.5.15'
5
+ VERSION = '0.5.16'
6
6
  end
7
7
  end
data/lib/legion/llm.rb CHANGED
@@ -230,7 +230,7 @@ module Legion
230
230
  end
231
231
 
232
232
  def _dispatch_chat(model:, provider:, intent:, tier:, escalate:, max_escalations:, quality_check:, message:, **kwargs, &)
233
- if pipeline_enabled? && message
233
+ if pipeline_enabled? && (message || kwargs[:messages])
234
234
  return chat_via_pipeline(model: model, provider: provider, intent: intent, tier: tier,
235
235
  message: message, escalate: escalate, max_escalations: max_escalations,
236
236
  quality_check: quality_check, **kwargs, &)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.15
4
+ version: 0.5.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity