RubyGems - legion-llm - Versions diffs - 0.9.32 → 0.9.33 - Mend

legion-llm 0.9.32 → 0.9.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/lib/legion/llm/api/openai/responses.rb +271 -0
data/lib/legion/llm/api.rb +2 -0
data/lib/legion/llm/version.rb +1 -1
metadata +2 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 715bd8c0918939545eda0cf832d81aa23e69c807ed6055fdb4d95f4177c99449
-  data.tar.gz: 12e42d3d2fdc02c4ca7764a264af90631141952982d9726c02d0e9ea7de87a92
+  metadata.gz: 768503fd466d914e0cc3f4bb713ac0a710b392ebc0098eab5d6c2def05f5f9f5
+  data.tar.gz: 9b0823a06222164a83123d0b895c701da3415aee51bca8b8ab303e6f1b72a532
 SHA512:
-  metadata.gz: 6f2fd1a0ea8b18ed222f2713adb4f4a48ce57e90d5a3ac2242f7ae648ed297b6375d1143c0e29777e4c5ade60e479e09997c2af04cf0e1d3b81225ef3a14276f
-  data.tar.gz: 41d0daa21a98518c4192881bd3231ca1e308b6231d06e22b6102531feee4f6aef1ced50d9e43b1f65b7e8f344e95406ebb682bf445d356011aafd6d4ce241a37
+  metadata.gz: 8b85a0b079619ed107b79044b6cfaa29f58a11881cd67a883cc2d107efebbf19240241d892daf983abd89876391fb3152434da27ce7e3a043f9170bde75d747e
+  data.tar.gz: 3c64a0f8744c10882cf02510480dd9757c68053454dabfd639340b16561f8640011fba2d4cb120d32c4ee532ac497db10167c516656562f722aa415f2800e49a

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 # Legion LLM Changelog
+## [0.9.33] - 2026-05-22
+### Added
+- API: OpenAI Responses API endpoint (`POST /v1/responses` and `POST /api/llm/inference/v1/responses`) for Codex CLI compatibility
+- API: Supports streaming (SSE with `response.*` events), tool declarations, and `instructions` parameter
 ## [0.9.31] - 2026-05-18
 ### Added

data/lib/legion/llm/api/openai/responses.rb ADDED Viewed

@@ -0,0 +1,271 @@
+# frozen_string_literal: true
+require 'securerandom'
+require 'legion/logging/helper'
+require 'legion/llm/types'
+module Legion
+  module LLM
+    module API
+      module OpenAI
+        module Responses
+          extend Legion::Logging::Helper
+          def self.registered(app)
+            log.debug('[llm][api][openai][responses] registering POST /v1/responses + /api/llm/inference/v1/responses')
+            handler = build_handler
+            app.post('/v1/responses') { instance_exec(&handler) }
+            app.post('/api/llm/inference/v1/responses') { instance_exec(&handler) }
+            log.debug('[llm][api][openai][responses] routes registered')
+          end
+          def self.build_handler # rubocop:disable Metrics/MethodLength
+            proc do # rubocop:disable Metrics/BlockLength
+              require_llm!
+              body = parse_request_body
+              request_id = "resp_#{SecureRandom.hex(16)}"
+              input = body[:input]
+              messages = case input
+                         when Array
+                           Responses.normalize_input_array(input)
+                         when String
+                           [{ role: 'user', content: input }]
+                         else
+                           halt 400, { 'Content-Type' => 'application/json' },
+                                Legion::JSON.dump({ error: { message: 'input is required (string or array)',
+                                                             type: 'invalid_request_error', code: nil } })
+                         end
+              messages = [{ role: 'system', content: body[:instructions].to_s }] + messages if body[:instructions]
+              model = body[:model] || Legion::LLM::Settings.value(:default_model) || 'default'
+              streaming = body[:stream] == true
+              tool_declarations = Responses.build_tool_declarations(body[:tools])
+              log.info(
+                "[llm][api][openai][responses] action=accepted request_id=#{request_id} " \
+                "model=#{model} stream=#{streaming} tools=#{tool_declarations.size}"
+              )
+              effective_caller = build_server_caller(source: 'openai_responses', path: request.path, env: env)
+              require 'legion/llm/inference/request' unless defined?(Legion::LLM::Inference::Request)
+              require 'legion/llm/inference/executor' unless defined?(Legion::LLM::Inference::Executor)
+              inference_request = Legion::LLM::Inference::Request.build(
+                id:       request_id,
+                messages: messages,
+                routing:  { model: model },
+                tools:    tool_declarations,
+                caller:   effective_caller,
+                stream:   streaming,
+                cache:    { strategy: :default, cacheable: true }
+              )
+              executor = Legion::LLM::Inference::Executor.new(inference_request)
+              if streaming
+                content_type 'text/event-stream'
+                headers 'Cache-Control'     => 'no-cache',
+                        'Connection'        => 'keep-alive',
+                        'X-Accel-Buffering' => 'no'
+                stream do |out|
+                  Responses.stream_response(out, executor, request_id: request_id, model: model)
+                rescue StandardError => e
+                  handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses.stream', request_id: request_id)
+                  out << "event: error\ndata: #{Legion::JSON.dump({ type: 'server_error', message: e.message })}\n\n"
+                end
+              else
+                pipeline_response = executor.call
+                response_body = Responses.format_response(pipeline_response, request_id: request_id, model: model)
+                log.info("[llm][api][openai][responses] action=complete request_id=#{request_id} model=#{response_body[:model]}")
+                content_type :json
+                status 200
+                Legion::JSON.dump(response_body)
+              end
+            rescue Legion::LLM::AuthError => e
+              handle_exception(e, level: :error, handled: true, operation: 'llm.api.openai.responses.auth')
+              halt 401, { 'Content-Type' => 'application/json' },
+                   Legion::JSON.dump({ error: { message: e.message, type: 'authentication_error' } })
+            rescue Legion::LLM::RateLimitError => e
+              handle_exception(e, level: :warn, handled: true, operation: 'llm.api.openai.responses.rate_limit')
+              halt 429, { 'Content-Type' => 'application/json' },
+                   Legion::JSON.dump({ error: { message: e.message, type: 'rate_limit_error' } })
+            rescue Legion::LLM::ProviderDown, Legion::LLM::ProviderError => e
+              handle_exception(e, level: :error, handled: true, operation: 'llm.api.openai.responses.provider')
+              halt 502, { 'Content-Type' => 'application/json' },
+                   Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
+            rescue StandardError => e
+              handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses')
+              halt 500, { 'Content-Type' => 'application/json' },
+                   Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
+            end
+          end
+          def self.normalize_input_array(input)
+            input.filter_map do |item|
+              item = item.transform_keys(&:to_sym) if item.respond_to?(:transform_keys)
+              case item[:type]&.to_s
+              when 'function_call_output'
+                { role: 'tool', tool_call_id: item[:call_id], content: item[:output].to_s }
+              else
+                role = item[:role]&.to_s
+                next unless role
+                content = item[:content]
+                content = content.to_s if content && !content.is_a?(Array)
+                { role: role, content: content }.compact
+              end
+            end
+          end
+          def self.build_tool_declarations(tools)
+            return [] if tools.nil? || !tools.is_a?(Array) || tools.empty?
+            tools.filter_map do |tool|
+              t = tool.respond_to?(:transform_keys) ? tool.transform_keys(&:to_sym) : tool
+              fn = t[:function] || t
+              fn = fn.transform_keys(&:to_sym) if fn.respond_to?(:transform_keys)
+              next unless fn[:name].to_s.length.positive?
+              Legion::LLM::Types::ToolDefinition.build(
+                name:        fn[:name].to_s,
+                description: fn[:description].to_s,
+                parameters:  fn[:parameters] || {},
+                source:      { type: :client, executable: true }
+              )
+            rescue StandardError => e
+              handle_exception(e, level: :warn, handled: true, operation: 'llm.api.openai.responses.build_tool')
+              nil
+            end
+          end
+          def self.format_response(pipeline_response, request_id:, model:)
+            routing = pipeline_response.routing || {}
+            tokens = pipeline_response.tokens || {}
+            raw_msg = pipeline_response.message
+            content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']).to_s : raw_msg.to_s
+            resolved_model = (routing[:model] || routing['model'] || model).to_s
+            output = []
+            tool_calls = build_output_tool_calls(pipeline_response)
+            output.concat(tool_calls)
+            output << {
+              type:    'message',
+              id:      "msg_#{SecureRandom.hex(12)}",
+              role:    'assistant',
+              content: [{ type: 'output_text', text: content }],
+              status:  'completed'
+            }
+            input_tokens = extract_token(tokens, :input)
+            output_tokens = extract_token(tokens, :output)
+            {
+              id:         request_id,
+              object:     'response',
+              created_at: Time.now.to_i,
+              model:      resolved_model,
+              output:     output,
+              usage:      {
+                input_tokens:  input_tokens,
+                output_tokens: output_tokens,
+                total_tokens:  input_tokens.to_i + output_tokens.to_i
+              },
+              status:     'completed'
+            }
+          end
+          def self.stream_response(out, executor, request_id:, model:)
+            out << "event: response.created\ndata: #{Legion::JSON.dump({ id: request_id, object: 'response', status: 'in_progress' })}\n\n"
+            msg_id = "msg_#{SecureRandom.hex(12)}"
+            item_event = { type: 'message', id: msg_id, role: 'assistant', content: [], status: 'in_progress' }
+            out << "event: response.output_item.added\ndata: #{Legion::JSON.dump({ output_index: 0, item: item_event })}\n\n"
+            full_text = +''
+            pipeline_response = executor.call_stream do |chunk|
+              text = chunk.respond_to?(:content) ? chunk.content.to_s : chunk.to_s
+              next if text.empty?
+              full_text << text
+              delta_event = { content_index: 0, delta: text }
+              out << "event: response.output_text.delta\ndata: #{Legion::JSON.dump(delta_event)}\n\n"
+            end
+            routing = pipeline_response.routing || {}
+            tokens = pipeline_response.tokens || {}
+            resolved_model = (routing[:model] || routing['model'] || model).to_s
+            input_tokens = extract_token(tokens, :input)
+            output_tokens = extract_token(tokens, :output)
+            out << "event: response.output_text.done\ndata: #{Legion::JSON.dump({ content_index: 0, text: full_text })}\n\n"
+            done_item = {
+              output_index: 0,
+              item:         { type: 'message', id: msg_id, role: 'assistant',
+                              content: [{ type: 'output_text', text: full_text }], status: 'completed' }
+            }
+            out << "event: response.output_item.done\ndata: #{Legion::JSON.dump(done_item)}\n\n"
+            done_data = {
+              id:     request_id,
+              object: 'response',
+              model:  resolved_model,
+              status: 'completed',
+              usage:  {
+                input_tokens:  input_tokens,
+                output_tokens: output_tokens,
+                total_tokens:  input_tokens.to_i + output_tokens.to_i
+              }
+            }
+            out << "event: response.completed\ndata: #{Legion::JSON.dump(done_data)}\n\n"
+            log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
+          end
+          def self.build_output_tool_calls(pipeline_response)
+            tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
+            return [] unless tools_data.is_a?(Array) && !tools_data.empty?
+            tools_data.filter_map do |tc|
+              name = tc.respond_to?(:name) ? tc.name : (tc[:name] || tc['name'])
+              args = tc.respond_to?(:arguments) ? tc.arguments : (tc[:arguments] || tc['arguments'] || {})
+              tc_id = tc.respond_to?(:id) ? tc.id : (tc[:id] || tc['id'] || "call_#{SecureRandom.hex(8)}")
+              next unless name
+              {
+                type:      'function_call',
+                id:        "fc_#{SecureRandom.hex(12)}",
+                call_id:   tc_id,
+                name:      name.to_s,
+                arguments: args.is_a?(String) ? args : Legion::JSON.dump(args),
+                status:    'completed'
+              }
+            end
+          end
+          def self.extract_token(tokens, key)
+            return 0 if tokens.nil?
+            return (tokens[key] || tokens[key.to_s] || 0).to_i if tokens.is_a?(Hash)
+            method_name = { input: :input_tokens, output: :output_tokens }[key]
+            return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
+            0
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/llm/api.rb CHANGED Viewed

@@ -15,6 +15,7 @@ require_relative 'api/translators/openai_response'
 require_relative 'api/openai/chat_completions'
 require_relative 'api/openai/models'
 require_relative 'api/openai/embeddings'
+require_relative 'api/openai/responses'
 require_relative 'api/translators/anthropic_request'
 require_relative 'api/translators/anthropic_response'
 require_relative 'api/anthropic/messages'
@@ -41,6 +42,7 @@ module Legion
         OpenAI::ChatCompletions.registered(app)
         OpenAI::Models.registered(app)
         OpenAI::Embeddings.registered(app)
+        OpenAI::Responses.registered(app)
         Anthropic::Messages.registered(app)
         log.debug('[llm][api] all routes registered')
       end

data/lib/legion/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Legion
   module LLM
-    VERSION = '0.9.32'
+    VERSION = '0.9.33'
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legion-llm
 version: !ruby/object:Gem::Version
-  version: 0.9.32
+  version: 0.9.33
 platform: ruby
 authors:
 - Esity
@@ -204,6 +204,7 @@ files:
 - lib/legion/llm/api/openai/chat_completions.rb
 - lib/legion/llm/api/openai/embeddings.rb
 - lib/legion/llm/api/openai/models.rb
+- lib/legion/llm/api/openai/responses.rb
 - lib/legion/llm/api/translators/anthropic_request.rb
 - lib/legion/llm/api/translators/anthropic_response.rb
 - lib/legion/llm/api/translators/openai_request.rb