RubyGems - legion-llm - Versions diffs - 0.5.15 → 0.5.16 - Mend

legion-llm 0.5.15 → 0.5.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +12 -0
data/lib/legion/llm/pipeline/executor.rb +10 -2
data/lib/legion/llm/routes.rb +84 -59
data/lib/legion/llm/version.rb +1 -1
data/lib/legion/llm.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 42d72ed366b2266f182b9a2e950d02c735956ff0b3ee51f712ed4686370b7274
-  data.tar.gz: 9af0c1f6f15ecea5d029868fde0a51305307a5de14391cf31e7c85307cccdee0
+  metadata.gz: 2dea674b5405be2c2863f1c6dd568f21ec8baad8db42eeaa457cd6dcdc881bc8
+  data.tar.gz: ee16678e6be6bc612d906bdd754d7e3db79f803c52b465fe3fb2ed762812aa20
 SHA512:
-  metadata.gz: 844583a7565f8bbc167f12330b51c32ba57b74802b23e624158f1afbaa3020dffd25e7324ad4ec19fe2ad0eaf2dccdfb6b6d8c673f06805f0b12f2613ac5f6f7
-  data.tar.gz: 26ec627a507e4e8d14e8a9e6155bb852c44d36af9e44b60f02dc247e1d307801bffbdc486a54745894430453c1fde22902662b7253feb8d5c841b92971aa2326
+  metadata.gz: 0a743021a3a3540290cfc4ea3c119fdc42bbba38eb5115b2883fefc6a4da0bceda04c45136c72d233559d9de53c41af198bfa057eed5579fc345121fade8cd74
+  data.tar.gz: 58ba674f0aa898bd75895bfaeb93b5f31bf2aa7f6dc0dbbc8d3f0afcb96cbfcabb80b68798c2c8758f8df8726c9bfd86a567e1e531728f45f91af96b53e15e7b

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # Legion LLM Changelog
+## [0.5.16] - 2026-03-28
+### Fixed
+- `POST /api/llm/inference` endpoint now routes through the 18-step pipeline when `pipeline_enabled?` is true — previously it created a bare `RubyLLM` session and called `session.ask` directly, bypassing RAG (step 8), GAIA advisory (step 7), knowledge capture (step 19), billing, and classification
+- `POST /api/llm/chat` sync fallback path now routes through the pipeline (previously called `session.ask` on a bare session the same way)
+- `_dispatch_chat` pipeline gate now fires when `messages:` array is present in addition to `message:` string — `Legion::LLM.chat(messages: [...])` was silently falling through to the legacy path even with `pipeline_enabled: true`
+- `Pipeline::Executor#step_provider_call` and `#step_provider_call_stream` now inject prior messages via `session.add_message` before the final `ask` — multi-turn conversations passed as a `messages:` array now correctly preserve history at the provider level
+### Added
+- `spec/legion/llm/pipeline/executor_multi_turn_spec.rb`: specs verifying prior-message injection in single-turn, multi-turn, two-message, and streaming cases
+- `spec/legion/llm/routes_inference_spec.rb`: specs verifying that `Legion::LLM.chat(messages: [...])` routes through the pipeline, carries tracing/timeline, handles multi-turn history, passes tool classes, and falls back gracefully when pipeline is disabled
 ## [0.5.15] - 2026-03-28
 ### Added

data/lib/legion/llm/pipeline/executor.rb CHANGED Viewed

@@ -166,7 +166,11 @@ module Legion
           )
           session.with_instructions(injected_system) if injected_system
-          message_content = @request.messages.last&.dig(:content)
+          messages = @request.messages
+          prior    = messages.size > 1 ? messages[0..-2] : []
+          prior.each { |m| session.add_message(m) }
+          message_content = messages.last&.dig(:content)
           @raw_response = message_content ? session.ask(message_content) : session
           @timestamps[:provider_end] = Time.now
@@ -228,7 +232,11 @@ module Legion
           (@request.tools || []).each { |tool| session.with_tool(tool) if tool.is_a?(Class) }
           ToolRegistry.tools.each { |t| session.with_tool(t) } if defined?(ToolRegistry)
-          message_content = @request.messages.last&.dig(:content)
+          messages = @request.messages
+          prior    = messages.size > 1 ? messages[0..-2] : []
+          prior.each { |m| session.add_message(m) }
+          message_content = messages.last&.dig(:content)
           @raw_response = session.ask(message_content, &)
           @timestamps[:provider_end] = Time.now

data/lib/legion/llm/routes.rb CHANGED Viewed

@@ -244,21 +244,41 @@ module Legion
             json_response({ request_id: request_id, poll_key: "llm:#{request_id}:status" },
                           status_code: 202)
           else
-            session  = Legion::LLM.chat(model: model, provider: provider,
-                                        caller: { source: 'api', path: request.path })
-            response = session.ask(message)
-            Legion::Logging.info "API: LLM chat request #{request_id} completed sync model=#{session.model}" if defined?(Legion::Logging)
-            json_response(
-              {
-                response: response.content,
-                meta:     {
-                  model:      session.model.to_s,
-                  tokens_in:  response.respond_to?(:input_tokens) ? response.input_tokens : nil,
-                  tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
-                }
-              },
-              status_code: 201
-            )
+            result = Legion::LLM.chat(message: message, model: model, provider: provider,
+                                      caller: { source: 'api', path: request.path })
+            if result.is_a?(Legion::LLM::Pipeline::Response)
+              raw_msg  = result.message
+              content  = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
+              routing  = result.routing || {}
+              resolved_model = routing[:model] || routing['model']
+              tokens = result.tokens || {}
+              Legion::Logging.info "API: LLM chat request #{request_id} completed sync model=#{resolved_model}" if defined?(Legion::Logging)
+              json_response(
+                {
+                  response: content,
+                  meta:     {
+                    model:      resolved_model.to_s,
+                    tokens_in:  tokens[:input],
+                    tokens_out: tokens[:output]
+                  }
+                },
+                status_code: 201
+              )
+            else
+              response = result
+              Legion::Logging.info "API: LLM chat request #{request_id} completed sync" if defined?(Legion::Logging)
+              json_response(
+                {
+                  response: response.respond_to?(:content) ? response.content : response.to_s,
+                  meta:     {
+                    model:      response.respond_to?(:model_id) ? response.model_id.to_s : model.to_s,
+                    tokens_in:  response.respond_to?(:input_tokens) ? response.input_tokens : nil,
+                    tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
+                  }
+                },
+                status_code: 201
+              )
+            end
           end
         end
       end
@@ -288,19 +308,14 @@ module Legion
           tools = raw_tools || []
-          session = Legion::LLM.chat(
-            model:    model,
-            provider: provider,
-            caller:   { source: 'api', path: request.path }
-          )
+          tool_declarations = []
           unless tools.empty?
             validate_tools!(tools)
             tool_declarations = tools.map do |t|
               ts = t.respond_to?(:transform_keys) ? t.transform_keys(&:to_sym) : t
-              tname  = ts[:name].to_s
-              tdesc  = ts[:description].to_s
+              tname   = ts[:name].to_s
+              tdesc   = ts[:description].to_s
               tparams = ts[:parameters] || {}
               Class.new do
                 define_singleton_method(:tool_name)   { tname }
@@ -309,45 +324,55 @@ module Legion
                 define_method(:call) { |**_| raise NotImplementedError, "#{tname} executes client-side only" }
               end
             end
-            session.with_tools(*tool_declarations)
           end
-          last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
-          prior_messages = if last_user
-                             idx = messages.rindex(last_user)
-                             if idx
-                               duped = messages.dup
-                               duped.delete_at(idx)
-                               duped
-                             else
-                               messages
-                             end
-                           else
-                             messages
-                           end
-          prior_messages.each { |m| session.add_message(m) }
-          prompt   = (last_user || {})[:content] || (last_user || {})['content'] || ''
-          response = session.ask(prompt)
-          tc_list = if response.respond_to?(:tool_calls) && response.tool_calls
-                      Array(response.tool_calls).map do |tc|
-                        {
-                          id:        tc.respond_to?(:id) ? tc.id : nil,
-                          name:      tc.respond_to?(:name) ? tc.name : tc.to_s,
-                          arguments: tc.respond_to?(:arguments) ? tc.arguments : {}
-                        }
-                      end
-                    end
+          normalized_messages = messages.map do |m|
+            ms = m.respond_to?(:transform_keys) ? m.transform_keys(&:to_sym) : m
+            { role: ms[:role].to_s, content: ms[:content].to_s }
+          end
-          json_response({
-                          content:       response.content,
-                          tool_calls:    tc_list,
-                          stop_reason:   response.respond_to?(:stop_reason) ? response.stop_reason : nil,
-                          model:         session.model.to_s,
-                          input_tokens:  response.respond_to?(:input_tokens) ? response.input_tokens : nil,
-                          output_tokens: response.respond_to?(:output_tokens) ? response.output_tokens : nil
-                        }, status_code: 200)
+          result = Legion::LLM.chat(
+            messages: normalized_messages,
+            model:    model,
+            provider: provider,
+            tools:    tool_declarations,
+            caller:   { source: 'api', path: request.path }
+          )
+          if result.is_a?(Legion::LLM::Pipeline::Response)
+            raw_msg   = result.message
+            content   = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
+            routing   = result.routing || {}
+            resolved_model = routing[:model] || routing['model']
+            tokens = result.tokens || {}
+            json_response({
+                            content:       content,
+                            tool_calls:    nil,
+                            stop_reason:   result.stop&.dig(:reason)&.to_s,
+                            model:         resolved_model.to_s,
+                            input_tokens:  tokens[:input],
+                            output_tokens: tokens[:output]
+                          }, status_code: 200)
+          else
+            response = result
+            tc_list = if response.respond_to?(:tool_calls) && response.tool_calls
+                        Array(response.tool_calls).map do |tc|
+                          {
+                            id:        tc.respond_to?(:id) ? tc.id : nil,
+                            name:      tc.respond_to?(:name) ? tc.name : tc.to_s,
+                            arguments: tc.respond_to?(:arguments) ? tc.arguments : {}
+                          }
+                        end
+                      end
+            json_response({
+                            content:       response.respond_to?(:content) ? response.content : response.to_s,
+                            tool_calls:    tc_list,
+                            stop_reason:   response.respond_to?(:stop_reason) ? response.stop_reason : nil,
+                            model:         response.respond_to?(:model_id) ? response.model_id.to_s : model.to_s,
+                            input_tokens:  response.respond_to?(:input_tokens) ? response.input_tokens : nil,
+                            output_tokens: response.respond_to?(:output_tokens) ? response.output_tokens : nil
+                          }, status_code: 200)
+          end
         rescue StandardError => e
           Legion::Logging.error "[api/llm/inference] #{e.class}: #{e.message}" if defined?(Legion::Logging)
           json_error('inference_error', e.message, status_code: 500)

data/lib/legion/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Legion
   module LLM
-    VERSION = '0.5.15'
+    VERSION = '0.5.16'
   end
 end

data/lib/legion/llm.rb CHANGED Viewed

@@ -230,7 +230,7 @@ module Legion
       end
       def _dispatch_chat(model:, provider:, intent:, tier:, escalate:, max_escalations:, quality_check:, message:, **kwargs, &)
-        if pipeline_enabled? && message
+        if pipeline_enabled? && (message || kwargs[:messages])
           return chat_via_pipeline(model: model, provider: provider, intent: intent, tier: tier,
                                    message: message, escalate: escalate, max_escalations: max_escalations,
                                    quality_check: quality_check, **kwargs, &)

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legion-llm
 version: !ruby/object:Gem::Version
-  version: 0.5.15
+  version: 0.5.16
 platform: ruby
 authors:
 - Esity