RubyGems - legion-llm - Versions diffs - 0.9.36 → 0.9.37 - Mend

legion-llm 0.9.36 → 0.9.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/lib/legion/llm/api/openai/responses.rb +12 -4
data/lib/legion/llm/call/dispatch.rb +1 -0
data/lib/legion/llm/call/lex_llm_adapter.rb +220 -0
data/lib/legion/llm/inference/executor.rb +32 -0
data/lib/legion/llm/inference/route_attempts.rb +35 -0
data/lib/legion/llm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 93611da95712602a9f99e00c4b34523c23838a99d34c3c441ea6bef642231e3f
-  data.tar.gz: 6ced6ad0b6091c5a3d53702b867eea5f04d35199892338023aebb6bb452ed867
+  metadata.gz: 05ce805ec96361b4a033e7d14a5e9e49e80de7415c75b22fd2af44b41ae447e0
+  data.tar.gz: b28f87cc01e43a8165c41d72b315373948e0465094bbb103402ea4e5f66d37bc
 SHA512:
-  metadata.gz: aa99ed858c6bef1fc214a45d4d59e51f1e9f0262f75dcdbd0f60645d59296edf6fa57e47dfa706dd0b06ec7c7f6dbf572f3832235d0d7125cd9992ec65aa6eee
-  data.tar.gz: dfe7e2db5cf883de39a5ac47438408a858372a52dd82230baa4a624e33e17b0558eb50359237345afa5b8a1df432b164149c3fce540304ac56ffbad888110c33
+  metadata.gz: 75a99d484b509a4f361b7fae7d3df534e17d6148a613e0e6c9c67b9a6315a73d3ec6830031cd840588c5feb5fd064433638b69191d89b3c0ebddcb9d333d0b62
+  data.tar.gz: 9bc998ea9c5e12ec2f3b545bc0b24dc44258aa42fc92882217aae59b5ca7c3c45594888584444c41479d93bc2015b435bfa371440b2b429277b93c05dff7a3dc

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,10 @@
 # Legion LLM Changelog
+## [0.9.37] - 2026-05-22
+### Changed
+- API: OpenAI Responses requests now dispatch to upstream `/v1/responses` through a native `:responses` provider capability instead of adapting Responses input through Chat Completions `stream_chat`, preserving upstream Responses streaming usage from `response.completed.response.usage`
 ## [0.9.36] - 2026-05-22
 ### Fixed

data/lib/legion/llm/api/openai/responses.rb CHANGED Viewed

@@ -76,13 +76,13 @@ module Legion
                         'X-Accel-Buffering' => 'no'
                 stream do |out|
-                  Responses.stream_response(out, executor, request_id: request_id, model: model)
+                  Responses.stream_response(out, executor, request_id: request_id, model: model, upstream_body: body)
                 rescue StandardError => e
                   handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses.stream', request_id: request_id)
                   out << "event: error\ndata: #{Legion::JSON.dump({ type: 'server_error', message: e.message })}\n\n"
                 end
               else
-                pipeline_response = executor.call
+                pipeline_response = executor.call_responses(body: body, stream: false)
                 response_body = Responses.format_response(pipeline_response, request_id: request_id, model: model)
                 log.info("[llm][api][openai][responses] action=complete request_id=#{request_id} model=#{response_body[:model]}")
@@ -179,7 +179,7 @@ module Legion
             }
           end
-          def self.stream_response(out, executor, request_id:, model:) # rubocop:disable Metrics/MethodLength
+          def self.stream_response(out, executor, request_id:, model:, upstream_body: nil) # rubocop:disable Metrics/MethodLength
             created_at = Time.now.to_i
             seq = 0
             in_progress_response = { id: request_id, object: 'response', created_at: created_at,
@@ -218,7 +218,7 @@ module Legion
             full_text = +''
-            pipeline_response = executor.call_stream do |chunk|
+            pipeline_response = call_streaming_executor(executor, upstream_body: upstream_body) do |chunk|
               text = chunk.respond_to?(:content) ? chunk.content.to_s : chunk.to_s
               next if text.empty?
@@ -282,6 +282,14 @@ module Legion
             log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
           end
+          def self.call_streaming_executor(executor, upstream_body: nil, &)
+            if upstream_body && executor.respond_to?(:call_responses)
+              executor.call_responses(body: upstream_body, stream: true, &)
+            else
+              executor.call_stream(&)
+            end
+          end
           def self.sse_event(name, payload)
             "event: #{name}\ndata: #{Legion::JSON.dump(payload)}\n\n"
           end

data/lib/legion/llm/call/dispatch.rb CHANGED Viewed

@@ -168,6 +168,7 @@ module Legion
         CAPABILITY_METHODS = {
           chat:         :chat,
           stream:       :stream,
+          responses:    :responses,
           embed:        :embed,
           image:        :image,
           count_tokens: :count_tokens

data/lib/legion/llm/call/lex_llm_adapter.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'event_stream_parser'
 require 'legion/logging/helper'
 module Legion
@@ -58,6 +59,24 @@ module Legion
           end
         end
+        def responses(model:, body:, messages:, stream: false, **opts, &)
+          payload = build_responses_payload(
+            body:     body,
+            model:    model,
+            messages: messages,
+            stream:   stream,
+            system:   opts[:system],
+            tools:    opts[:tools]
+          )
+          if stream
+            stream_responses_payload(payload, offering_metadata: opts[:offering_metadata], &)
+          else
+            response = provider.connection.post(responses_url, payload)
+            responses_hash_response(response.body, offering_metadata: opts[:offering_metadata])
+          end
+        end
         def embed(model:, text:, dimensions: nil, **opts)
           model_info = model_info(model, offering_metadata: opts[:offering_metadata])
           response = provider.embed(
@@ -136,6 +155,207 @@ module Legion
           end
         end
+        def responses_url = '/v1/responses'
+        def build_responses_payload(body:, model:, messages:, stream:, system: nil, tools: nil)
+          payload = normalize_hash(body).dup
+          payload[:model] = model
+          payload[:stream] = stream
+          payload[:input] = responses_input(messages)
+          system_content = normalize_response_system(system)
+          payload[:instructions] = system_content if present_system?(system_content)
+          formatted_tools = responses_tools(tools)
+          payload[:tools] = formatted_tools if formatted_tools.any?
+          deep_compact(payload)
+        end
+        def responses_input(messages)
+          Array(messages).map do |message|
+            normalized = normalize_hash(message)
+            if normalized[:role].to_s == 'tool'
+              next({
+                type:    'function_call_output',
+                call_id: normalized[:tool_call_id].to_s,
+                output:  normalize_message_content(normalized[:content]).to_s
+              })
+            end
+            {
+              role:         normalized[:role]&.to_s || 'user',
+              content:      normalize_message_content(normalized[:content]).to_s,
+              tool_call_id: normalized[:tool_call_id]
+            }.compact
+          end
+        end
+        def normalize_response_system(system)
+          return nil if system.nil?
+          return system[:content] || system['content'] if system.is_a?(Hash)
+          system.to_s
+        end
+        def responses_tools(tools)
+          normalize_tools(tools).values.map do |tool|
+            {
+              type:        'function',
+              name:        tool.name.to_s,
+              description: tool.description.to_s,
+              parameters:  tool.params_schema || { type: 'object', properties: {} }
+            }
+          end
+        end
+        def deep_compact(value)
+          case value
+          when Hash
+            value.each_with_object({}) do |(key, hash_value), compacted|
+              compact_value = deep_compact(hash_value)
+              compacted[key] = compact_value unless compact_value.nil?
+            end
+          when Array
+            value.map { |entry| deep_compact(entry) }.compact
+          else
+            value
+          end
+        end
+        def stream_responses_payload(payload, offering_metadata: nil, &block)
+          accumulator = build_responses_stream_accumulator
+          parser = EventStreamParser::Parser.new
+          response = provider.connection.post(responses_url, payload) do |req|
+            req.headers['Accept'] = 'text/event-stream'
+            attach_responses_stream_handler(req, parser, accumulator, block)
+          end
+          responses_stream_response(accumulator, response.body, offering_metadata: offering_metadata)
+        end
+        def build_responses_stream_accumulator
+          {
+            content:   +'',
+            model:     nil,
+            usage:     {},
+            completed: nil,
+            raw:       nil
+          }
+        end
+        def attach_responses_stream_handler(req, parser, accumulator, block)
+          handler = proc do |chunk, *_args|
+            parser.feed(chunk) do |_event, data|
+              handle_responses_stream_data(data, accumulator, block)
+            end
+          end
+          if req.options.respond_to?(:on_data=)
+            req.options.on_data = handler
+          else
+            req.options[:on_data] = handler
+          end
+        end
+        def handle_responses_stream_data(data, accumulator, block)
+          return if data == '[DONE]'
+          parsed = Legion::JSON.parse(data, symbolize_names: false)
+          return unless parsed.is_a?(Hash)
+          accumulator[:raw] = parsed
+          case parsed['type']
+          when 'response.output_text.delta'
+            accumulate_responses_text_delta(parsed, accumulator, block)
+          when 'response.completed'
+            response = parsed['response'] || {}
+            accumulator[:completed] = response
+            accumulator[:model] = response['model'] if response['model']
+            accumulator[:usage] = responses_usage(response['usage'])
+          end
+        end
+        def accumulate_responses_text_delta(parsed, accumulator, block)
+          delta = parsed['delta'].to_s
+          return if delta.empty?
+          accumulator[:content] << delta
+          block&.call(
+            lex_llm_namespace::Chunk.new(
+              role:     :assistant,
+              content:  delta,
+              model_id: parsed['model'],
+              raw:      parsed,
+              tokens:   nil
+            )
+          )
+        end
+        def responses_stream_response(accumulator, response_body, offering_metadata: nil)
+          completed = accumulator[:completed] || {}
+          content = accumulator[:content]
+          content = extract_responses_text(completed) if content.empty?
+          {
+            result:   content,
+            model:    accumulator[:model] || completed['model'],
+            usage:    accumulator[:usage],
+            metadata: response_metadata(completed.empty? ? response_body : completed, offering_metadata: offering_metadata)
+          }.compact
+        end
+        def responses_hash_response(body, offering_metadata: nil)
+          normalized = normalize_string_hash(body)
+          {
+            result:   extract_responses_text(normalized),
+            model:    normalized['model'],
+            usage:    responses_usage(normalized['usage']),
+            metadata: response_metadata(normalized, offering_metadata: offering_metadata)
+          }.compact
+        end
+        def normalize_string_hash(value)
+          return value.map { |entry| normalize_string_hash(entry) } if value.is_a?(Array)
+          return {} unless value.respond_to?(:each_pair)
+          value.each_with_object({}) do |(key, hash_value), normalized|
+            normalized[key.to_s] = normalize_string_hash_value(hash_value)
+          end
+        end
+        def normalize_string_hash_value(value)
+          return normalize_string_hash(value) if value.respond_to?(:each_pair)
+          return value.map { |entry| normalize_string_hash_value(entry) } if value.is_a?(Array)
+          value
+        end
+        def extract_responses_text(body)
+          return body['output_text'].to_s if body['output_text']
+          Array(body['output']).flat_map do |item|
+            Array(item['content']).filter_map do |content|
+              next unless %w[output_text text].include?(content['type'].to_s)
+              content['text']
+            end
+          end.join
+        end
+        def responses_usage(usage)
+          usage = normalize_string_hash(usage)
+          input = usage['input_tokens'] || usage['prompt_tokens']
+          output = usage['output_tokens'] || usage['completion_tokens']
+          {
+            input_tokens:       input.to_i,
+            output_tokens:      output.to_i,
+            cache_read_tokens:  usage.dig('input_tokens_details', 'cached_tokens').to_i,
+            cache_write_tokens: usage.dig('input_tokens_details', 'cache_creation_tokens').to_i
+          }
+        end
         def model_info(model, offering_metadata: nil)
           offering = normalize_offering_metadata(offering_metadata)
           lex_llm_namespace::Model::Info.new(

data/lib/legion/llm/inference/executor.rb CHANGED Viewed

@@ -124,6 +124,14 @@ module Legion
           build_response
         end
+        def call_responses(body:, stream: false, &)
+          log.debug "[llm][executor] action=call_responses request_id=#{@request.id} profile=#{@profile} stream=#{stream}"
+          execute_pre_provider_steps
+          execute_provider_request_responses(body: body, stream: stream, &)
+          execute_post_provider_steps
+          build_response
+        end
         private
         def llm_setting(key, default = nil)
@@ -1339,6 +1347,30 @@ module Legion
           @raw_response = Call::NativeResponseAdapter.new(result)
         end
+        def execute_provider_request_responses(body:, stream:, &block)
+          @timestamps[:provider_start] = Time.now
+          @timeline.record(
+            category: :provider, key: 'provider:request_sent',
+            exchange_id: @exchange_id, direction: :outbound,
+            detail: "responses from #{@resolved_provider}",
+            from: 'pipeline', to: "provider:#{@resolved_provider}"
+          )
+          raise Legion::LLM::ProviderError, "Native provider not registered: #{@resolved_provider}" unless use_native_dispatch?(@resolved_provider)
+          result = dispatch_responses_request(
+            body:         body,
+            messages:     native_dispatch_messages,
+            stream:       stream,
+            stream_block: block
+          )
+          merge_response_offering_metadata(result[:metadata])
+          @raw_response = Call::NativeResponseAdapter.new(result)
+          @timestamps[:provider_end] = Time.now
+          record_provider_response
+        end
         def normalize_message_content(content)
           return content if content.nil? || content.is_a?(String)
           return content unless content.is_a?(Array)

data/lib/legion/llm/inference/route_attempts.rb CHANGED Viewed

@@ -24,6 +24,41 @@ module Legion
           end
         end
+        def dispatch_responses_request(body:, messages:, stream:, stream_block: nil)
+          raise Legion::LLM::ProviderError, 'Responses API upstream dispatch is not supported for fleet providers' if fleet_dispatch?
+          idempotency_key = next_route_idempotency_key
+          result = Call::Dispatch.call(
+            provider:   @resolved_provider,
+            instance:   @resolved_instance,
+            capability: :responses,
+            model:      @resolved_model,
+            body:       body,
+            messages:   messages,
+            stream:     stream,
+            **native_dispatch_options,
+            &stream_block
+          )
+          record_route_attempt(
+            dispatch_path:   :direct,
+            operation:       :responses,
+            status:          :success,
+            idempotency_key: idempotency_key,
+            selected_lane:   nil
+          )
+          result
+        rescue StandardError => e
+          record_route_attempt(
+            dispatch_path:   :direct,
+            operation:       :responses,
+            status:          :failure,
+            idempotency_key: idempotency_key,
+            selected_lane:   nil,
+            failure_reason:  e.message
+          )
+          raise
+        end
         def dispatch_direct_request(capability:, operation:, messages:, stream_block: nil)
           idempotency_key = next_route_idempotency_key
           result = Call::Dispatch.call(

data/lib/legion/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Legion
   module LLM
-    VERSION = '0.9.36'
+    VERSION = '0.9.37'
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legion-llm
 version: !ruby/object:Gem::Version
-  version: 0.9.36
+  version: 0.9.37
 platform: ruby
 authors:
 - Esity