RubyGems - llm_gateway - Versions diffs - 0.3.0 → 0.4.0 - Mend

llm_gateway 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +26 -0
data/README.md +544 -186
data/Rakefile +1 -2
data/docs/migration-guide.md +135 -0
data/lib/llm_gateway/adapters/adapter.rb +173 -0
data/lib/llm_gateway/adapters/anthropic/acts_like_messages.rb +23 -0
data/lib/llm_gateway/adapters/{claude → anthropic}/bidirectional_message_mapper.rb +31 -3
data/lib/llm_gateway/adapters/{claude → anthropic}/input_mapper.rb +4 -3
data/lib/llm_gateway/adapters/anthropic/messages_adapter.rb +19 -0
data/lib/llm_gateway/adapters/{claude → anthropic}/output_mapper.rb +1 -1
data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +110 -0
data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +53 -0
data/lib/llm_gateway/adapters/groq/chat_completions_adapter.rb +47 -0
data/lib/llm_gateway/adapters/groq/option_mapper.rb +27 -0
data/lib/llm_gateway/adapters/input_message_sanitizer.rb +93 -0
data/lib/llm_gateway/adapters/openai/acts_like_chat_completions.rb +22 -0
data/lib/llm_gateway/adapters/openai/acts_like_responses.rb +31 -0
data/lib/llm_gateway/adapters/{open_ai → openai}/chat_completions/bidirectional_message_mapper.rb +9 -2
data/lib/llm_gateway/adapters/{open_ai → openai}/chat_completions/input_mapper.rb +1 -6
data/lib/llm_gateway/adapters/openai/chat_completions/input_message_sanitizer.rb +65 -0
data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +39 -0
data/lib/llm_gateway/adapters/{open_ai → openai}/chat_completions/output_mapper.rb +1 -1
data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +242 -0
data/lib/llm_gateway/adapters/openai/chat_completions_adapter.rb +20 -0
data/lib/llm_gateway/adapters/{open_ai → openai}/file_output_mapper.rb +1 -1
data/lib/llm_gateway/adapters/openai/prompt_cache_option_mapper.rb +39 -0
data/lib/llm_gateway/adapters/{open_ai → openai}/responses/bidirectional_message_mapper.rb +52 -4
data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +106 -0
data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +41 -0
data/lib/llm_gateway/adapters/{open_ai → openai}/responses/output_mapper.rb +1 -1
data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +340 -0
data/lib/llm_gateway/adapters/openai/responses_adapter.rb +20 -0
data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb +206 -0
data/lib/llm_gateway/adapters/openai_codex/option_mapper.rb +28 -0
data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +38 -0
data/lib/llm_gateway/adapters/option_mapper.rb +13 -0
data/lib/llm_gateway/adapters/stream_accumulator.rb +91 -0
data/lib/llm_gateway/adapters/structs.rb +145 -0
data/lib/llm_gateway/base_client.rb +62 -1
data/lib/llm_gateway/client.rb +45 -129
data/lib/llm_gateway/clients/anthropic.rb +167 -0
data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +162 -0
data/lib/llm_gateway/clients/claude_code/token_manager.rb +112 -0
data/lib/llm_gateway/clients/groq.rb +54 -0
data/lib/llm_gateway/clients/openai.rb +208 -0
data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +258 -0
data/lib/llm_gateway/clients/openai_codex/token_manager.rb +71 -0
data/lib/llm_gateway/errors.rb +21 -0
data/lib/llm_gateway/prompt.rb +12 -1
data/lib/llm_gateway/provider_registry.rb +37 -0
data/lib/llm_gateway/version.rb +1 -1
data/lib/llm_gateway.rb +165 -14
data/scripts/create_anthropic_credentials.rb +106 -0
data/scripts/create_openai_codex_credentials.rb +116 -0
data/scripts/generate_handoff_live_fixture.rb +169 -0
data/scripts/generate_handoff_media_fixture.rb +167 -0
metadata +64 -28
data/lib/llm_gateway/adapters/claude/client.rb +0 -60
data/lib/llm_gateway/adapters/groq/bidirectional_message_mapper.rb +0 -18
data/lib/llm_gateway/adapters/groq/client.rb +0 -58
data/lib/llm_gateway/adapters/groq/input_mapper.rb +0 -18
data/lib/llm_gateway/adapters/groq/output_mapper.rb +0 -10
data/lib/llm_gateway/adapters/open_ai/client.rb +0 -80
data/lib/llm_gateway/adapters/open_ai/responses/input_mapper.rb +0 -62
data/sample/claude_code_clone/agent.rb +0 -65
data/sample/claude_code_clone/claude_code_clone.rb +0 -40
data/sample/claude_code_clone/prompt.rb +0 -79
data/sample/claude_code_clone/run.rb +0 -47
data/sample/claude_code_clone/tools/bash_tool.rb +0 -54
data/sample/claude_code_clone/tools/edit_tool.rb +0 -61
data/sample/claude_code_clone/tools/grep_tool.rb +0 -113
data/sample/claude_code_clone/tools/read_tool.rb +0 -61
data/sample/claude_code_clone/tools/todowrite_tool.rb +0 -98

data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb ADDED Viewed

@@ -0,0 +1,206 @@
+# frozen_string_literal: true
+require "json"
+require_relative "../openai/responses/input_mapper"
+module LlmGateway
+  module Adapters
+    module OpenAICodex
+      # Custom input mapper for the Codex backend.
+      #
+      # The Codex Responses endpoint rejects several content block types that
+      # the standard OpenAI Responses InputMapper passes through:
+      #   - "reasoning" and "summary_text" blocks are never accepted as input.
+      #   - "thinking" blocks are only valid when they carry an encrypted
+      #     `signature`; unsigned thinking blocks must be dropped.
+      #
+      # Additional normalisation:
+      #   - Tool-result output is coerced to recognised Responses input types
+      #     (input_text / input_image).
+      #   - Assistant text content is always sent as "output_text" (not
+      #     "input_text") because Codex is strict about directionality.
+      #   - function_call / tool_use blocks inside an assistant turn are
+      #     promoted to top-level function_call items so that Codex can match
+      #     them against the subsequent function_call_output items.
+      class InputMapper < OpenAI::Responses::InputMapper
+        def self.map_messages(messages)
+          return messages unless messages.is_a?(Array)
+          mapper  = message_mapper
+          stripped = strip_reasoning_blocks(messages)
+          mapped = stripped.each_with_object([]) do |msg, acc|
+            next unless msg.is_a?(Hash)
+            role    = msg[:role]
+            content = msg[:content]
+            if %w[user developer].include?(role) && tool_result_message?(content)
+              # Responses API expects tool results as top-level input items.
+              # Also normalise nested tool_result output blocks to Responses
+              # input types (text → input_text, image → input_image).
+              content.each { |part| acc << map_tool_result_for_responses(part, mapper) }
+              next
+            end
+            if role == "assistant" && content.is_a?(Array)
+              acc.concat(map_assistant_content(content, mapper))
+              next
+            end
+            mapped_content =
+              if content.is_a?(Array)
+                content.map { |part| mapper.map_content(part) }
+              else
+                [ mapper.map_content(content) ]
+              end
+            acc << { role: role, content: mapped_content }
+          end
+          normalize_assistant_content_types(mapped)
+        end
+        # Recursively strip Codex-incompatible content blocks from a message tree.
+        #
+        #   "reasoning"    → always removed
+        #   "summary_text" → always removed
+        #   "thinking"     → removed unless :signature is present
+        def self.strip_reasoning_blocks(obj)
+          case obj
+          when Array
+            obj.map { |item| strip_reasoning_blocks(item) }.compact
+          when Hash
+            type = obj[:type]
+            return nil if %w[reasoning summary_text].include?(type)
+            return nil if type == "thinking" && obj[:signature].nil?
+            obj.each_with_object({}) do |(k, v), acc|
+              result = strip_reasoning_blocks(v)
+              acc[k] = result unless result.nil?
+            end
+          else
+            obj
+          end
+        end
+        # Ensure assistant messages carry "output_text" rather than "input_text".
+        # The BidirectionalMessageMapper maps plain text blocks to "input_text";
+        # Codex is strict about directionality and rejects "input_text" on the
+        # assistant side.
+        def self.normalize_assistant_content_types(messages)
+          return messages unless messages.is_a?(Array)
+          messages.map do |msg|
+            next msg unless msg.is_a?(Hash) && msg[:role] == "assistant" && msg[:content].is_a?(Array)
+            msg.merge(
+              content: msg[:content].map do |part|
+                part.is_a?(Hash) && part[:type] == "input_text" ? part.merge(type: "output_text") : part
+              end
+            )
+          end
+        end
+        def self.tool_result_message?(content)
+          content.is_a?(Array) &&
+            content.first.is_a?(Hash) &&
+            content.first[:type] == "tool_result"
+        end
+        # Map assistant content blocks into Codex-compatible top-level items.
+        #
+        # - thinking with signature  → parsed JSON reasoning item (the encrypted
+        #                               signature *is* the serialised item)
+        # - tool_use / function_call → top-level function_call item
+        # - text / *_text variants   → output_text inside an assistant content block
+        # - anything else            → delegated to the BidirectionalMessageMapper
+        def self.map_assistant_content(content, mapper)
+          text_parts = []
+          items      = []
+          content.each do |part|
+            next unless part.is_a?(Hash)
+            case part[:type]
+            when "tool_use", "function_call"
+              call_id   = part[:id] || part[:call_id]
+              arguments = part[:input] || part[:arguments] || {}
+              arguments = JSON.generate(arguments) unless arguments.is_a?(String)
+              items << {
+                type: "function_call",
+                call_id: call_id,
+                name: part[:name],
+                arguments: arguments
+              }.compact
+            when "thinking"
+              # Only signed thinking blocks survive strip_reasoning_blocks;
+              # the signature payload is the full reasoning item JSON.
+              signature = part[:signature]
+              if signature
+                begin
+                  items << JSON.parse(signature, symbolize_names: true)
+                rescue JSON::ParserError
+                  # Malformed signature — silently drop.
+                end
+              end
+            when "text", "input_text", "output_text"
+              text_parts << { type: "output_text", text: part[:text].to_s }
+            else
+              mapped = mapper.map_content(part)
+              text_parts << mapped if mapped
+            end
+          end
+          # Text parts form a single assistant message; tool/reasoning items follow.
+          items.unshift({ role: "assistant", content: text_parts }) if text_parts.any?
+          items
+        end
+        # Wrap a tool_result part in the Responses wire format, normalising the
+        # nested output content types along the way.
+        def self.map_tool_result_for_responses(part, mapper)
+          return mapper.map_content(part) unless part.is_a?(Hash) && part[:type] == "tool_result"
+          mapper.map_content(part.merge(content: normalize_tool_result_output(part[:content])))
+        end
+        # Coerce each element of a tool result's output array to a Responses
+        # input type (input_text or input_image).
+        def self.normalize_tool_result_output(output)
+          Array(output).map do |item|
+            case item
+            when String
+              { type: "input_text", text: item }
+            when Hash
+              type = item[:type] || item["type"]
+              case type
+              when "text", "input_text", "output_text"
+                { type: "input_text", text: (item[:text] || item["text"]).to_s }
+              when "image", "input_image"
+                data      = item[:data]      || item["data"]
+                mime      = item[:mimeType]  || item["mimeType"] ||
+                            item[:media_type] || item["media_type"] || "image/png"
+                image_url = item[:image_url] || item["image_url"] ||
+                            "data:#{mime};base64,#{data}"
+                { type: "input_image", image_url: image_url }
+              else
+                item
+              end
+            else
+              { type: "input_text", text: item.to_s }
+            end
+          end
+        end
+        private_class_method :strip_reasoning_blocks, :normalize_assistant_content_types,
+                             :tool_result_message?, :map_assistant_content,
+                             :map_tool_result_for_responses, :normalize_tool_result_output
+      end
+    end
+  end
+end

data/lib/llm_gateway/adapters/openai_codex/option_mapper.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+require_relative "../openai/responses/option_mapper"
+module LlmGateway
+  module Adapters
+    module OpenAICodex
+      module OptionMapper
+        module_function
+        def map(options)
+          mapped_options = OpenAI::Responses::OptionMapper.map(options)
+          # Codex endpoint currently rejects token limit parameters.
+          mapped_options.delete(:max_output_tokens)
+          mapped_options.delete(:max_completion_tokens)
+          # Codex transport does not use retention flags in the request body.
+          mapped_options.delete(:prompt_cache_retention)
+          mapped_options.delete(:cacheRetention)
+          mapped_options.delete(:cache_retention)
+          mapped_options
+        end
+      end
+    end
+  end
+end

data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb ADDED Viewed

@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+require_relative "../adapter"
+require_relative "../openai/acts_like_responses"
+require_relative "../openai/responses/output_mapper"
+require_relative "option_mapper"
+require_relative "../openai/responses/stream_mapper"
+require_relative "../openai/file_output_mapper"
+require_relative "input_mapper"
+require_relative "../input_message_sanitizer"
+module LlmGateway
+  module Adapters
+    module OpenAICodex
+      class ResponsesAdapter < Adapter
+        include ActsLikeOpenAIResponses
+        private
+        def input_mapper
+          OpenAICodex::InputMapper
+        end
+        def option_mapper
+          OptionMapper
+        end
+        def perform_chat(messages, tools:, system:, **options)
+          client.chat_codex(messages, tools: tools, system: system, **options)
+        end
+        def perform_stream(messages, tools:, system:, **options, &block)
+          client.stream_codex(messages, tools: tools, system: system, **options, &block)
+        end
+      end
+    end
+  end
+end

data/lib/llm_gateway/adapters/option_mapper.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+module LlmGateway
+  module Adapters
+    module OptionMapper
+      module_function
+      def map(options)
+        options
+      end
+    end
+  end
+end

data/lib/llm_gateway/adapters/stream_accumulator.rb ADDED Viewed

@@ -0,0 +1,91 @@
+require "json"
+class StreamAccumulator
+  attr_accessor :blocks, :message_hash, :usage_hash
+  def initialize
+    @message_hash = {}
+    @usage_hash = {
+      input_tokens: 0,
+      cache_creation_input_tokens: 0,
+      cache_read_input_tokens: 0,
+      output_tokens: 0,
+      reasoning_tokens: 0
+    }
+    @blocks = []
+  end
+  def result
+    message_hash.merge(
+      usage: usage_hash,
+      content: serialized_blocks
+    )
+  end
+  def push(event)
+    return unless event
+    case event.type
+    when :text_start
+      blocks[event.content_index] = {
+        type: "text",
+        text: ""
+      }
+      blocks[event.content_index][:text] += event.delta
+    when :text_delta, :text_end
+      blocks[event.content_index][:text] += event.delta
+    when :tool_start
+      blocks[event.content_index] = {
+        type: "tool_use",
+        id: event.id,
+        name: event.name,
+        input: ""
+      }
+    when :tool_delta, :tool_end
+      blocks[event.content_index][:input] += event.delta
+    when :message_start
+      message_hash.merge!(event.delta)
+      usage_hash.each_key do |key|
+        usage_hash[key] += event.usage_increment.fetch(key, 0)
+      end
+    when :reasoning_start
+      blocks[event.content_index] = {
+        type: "reasoning",
+        reasoning: "",
+        signature: ""
+      }
+      blocks[event.content_index][:reasoning] += event.delta
+      blocks[event.content_index][:signature] += event.respond_to?(:signature) ? event.signature : ""
+    when :reasoning_delta
+      blocks[event.content_index][:reasoning] += event.delta
+      blocks[event.content_index][:signature] += event.signature
+    when :reasoning_end
+      blocks[event.content_index][:reasoning] += event.delta
+      blocks[event.content_index][:signature] += event.respond_to?(:signature) ? event.signature : ""
+    when :message_delta
+      message_hash.merge!(event.delta)
+      usage_hash.each_key do |key|
+        usage_hash[key] += event.usage_increment.fetch(key, 0)
+      end
+    when :message_end
+    end
+  end
+  private
+  def serialized_blocks
+    blocks.map do |block|
+      next block unless block[:type] == "tool_use"
+      block.merge(input: LlmGateway::Utils.deep_symbolize_keys(parse_tool_input(block[:input])))
+    end
+  end
+  def parse_tool_input(input)
+    return {} if input.nil? || input.empty?
+    JSON.parse(input)
+  rescue JSON::ParserError
+    {}
+  end
+end

data/lib/llm_gateway/adapters/structs.rb ADDED Viewed

@@ -0,0 +1,145 @@
+require "dry-struct"
+require "dry-types"
+module Types
+  include Dry.Types()
+end
+class BaseStruct < Dry::Struct
+  transform_keys(&:to_sym)
+end
+class AssistantStreamEvent < BaseStruct
+  EventType = Types::Coercible::Symbol.enum(:text_start, :text_delta, :text_end, :tool_start, :tool_delta, :tool_end, :reasoning_start, :reasoning_delta, :reasoning_end)
+  attribute :type, EventType
+  attribute :delta, Types::Coercible::String.default { "" }
+  attribute :content_index, Types::Integer
+end
+class AssistantToolStartEvent < AssistantStreamEvent
+  attribute :id, Types::String
+  attribute :name, Types::String
+  attribute :content_index, Types::Integer
+end
+class AssistantStreamReasoningEvent < AssistantStreamEvent
+  attribute :signature, Types::Coercible::String.default { "" }
+  attribute :content_index, Types::Integer
+end
+class AssistantStreamMessageEvent < BaseStruct
+  EventType = Types::Coercible::Symbol.enum(:message_start, :message_delta, :message_end)
+  attribute :type, EventType
+  attribute :delta, Types::Coercible::Hash.default { {} }
+  attribute :usage_increment, Types::Coercible::Hash.default { {} }
+end
+class TextContent < BaseStruct
+  attribute :type, Types::String.enum("text")
+  attribute :text, Types::String
+  def to_h
+    {
+      type: type,
+      text: text
+    }
+  end
+end
+class ReasoningContent < BaseStruct
+  attribute :type, Types::String.enum("reasoning")
+  attribute :reasoning, Types::String
+  attribute? :signature, Types::String.optional
+  def to_h
+    result = {
+      type: type,
+      reasoning: reasoning
+    }
+    result[:signature] = signature unless signature.nil?
+    result
+  end
+end
+class ToolCall < BaseStruct
+  attribute :id, Types::String
+  attribute :type, Types::String.enum("tool_use")
+  attribute :name, Types::String
+  attribute :input, Types::Hash
+  def to_h
+    {
+      id: id,
+      type: type,
+      name: name,
+      input: input
+    }
+  end
+end
+class ToolResult < BaseStruct
+  attribute :type, Types::String.enum("tool_result")
+  attribute :tool_use_id, Types::String
+  attribute :content, Types::String
+end
+class AssistantMessage < BaseStruct
+  ContentBlock =
+    Types.Instance(TextContent) |
+    Types.Instance(ReasoningContent) |
+    Types.Instance(ToolCall)
+  attribute :id, Types::String
+  attribute :model, Types::String
+  attribute :usage, Types::Hash
+  attribute :role, Types::String.enum("assistant")
+  attribute :stop_reason, Types::String.enum("stop", "length", "tool_use", "toolUse", "error", "aborted")
+  attribute :provider, Types::String
+  attribute :api, Types::String
+  attribute? :error_message, Types::String.optional
+  attribute :content, Types::Array.of(ContentBlock)
+  def self.new(attributes)
+    attrs = attributes.to_h.transform_keys(&:to_sym)
+    attrs[:content] = Array(attrs[:content]).map { |block| build_content_block(block) }
+    super(attrs)
+  end
+  def to_h
+    result = {
+      id: id,
+      model: model,
+      usage: usage,
+      role: role,
+      stop_reason: stop_reason,
+      provider: provider,
+      api: api,
+      content: content.map(&:to_h)
+    }
+    result[:error_message] = error_message unless error_message.nil?
+    result
+  end
+  def self.build_content_block(block)
+    return block if block.is_a?(TextContent) || block.is_a?(ReasoningContent) || block.is_a?(ToolCall)
+    case block[:type] || block["type"]
+    when "text"
+      TextContent.new(block)
+    when "reasoning"
+      ReasoningContent.new(block)
+    when "thinking"
+      ReasoningContent.new(type: "reasoning", reasoning: block[:thinking] || block["thinking"] || block[:reasoning] || block["reasoning"], signature: block[:signature] || block["signature"])
+    when "tool_use"
+      ToolCall.new(block)
+    else
+      raise ArgumentError, "Unsupported content block type: #{block[:type] || block['type']}"
+    end
+  end
+  private_class_method :build_content_block
+end

data/lib/llm_gateway/base_client.rb CHANGED Viewed

@@ -60,8 +60,70 @@ module LlmGateway
       process_response(response)
     end
+    def post_stream(url_part, body = nil, extra_headers = {}, &block)
+      endpoint = "#{base_endpoint}/#{url_part.sub(%r{^/}, "")}"
+      uri = URI(endpoint)
+      http = Net::HTTP.new(uri.host, uri.port)
+      http.use_ssl = true
+      http.read_timeout = 480
+      http.open_timeout = 10
+      body.merge!(stream: true)
+      request = Net::HTTP::Post.new(uri)
+      headers = build_headers.merge(extra_headers)
+      headers.each { |key, value| request[key] = value }
+      request.body = body.to_json if body
+      http.request(request) do |response|
+        unless response.code.to_i == 200
+          # Collect full body for error handling
+          full_body = +""
+          response.read_body { |chunk| full_body << chunk }
+          # Create a response-like object with the body for handle_error
+          response.instance_variable_set(:@body, full_body)
+          response.instance_variable_set(:@read, true)
+          handle_error(response)
+        end
+        parse_sse_stream(response, &block)
+      end
+    end
     protected
+    def parse_sse_stream(response)
+      buffer = +""
+      response.read_body do |chunk|
+        buffer << chunk
+        while (idx = buffer.index("\n\n"))
+          raw_event = buffer.slice!(0, idx + 2)
+          event_type = nil
+          data_lines = []
+          raw_event.each_line do |line|
+            line = line.chomp
+            if line.start_with?("event:")
+              event_type = line.sub(/^event:\s*/, "")
+            elsif line.start_with?("data:")
+              data_lines << line.sub(/^data:\s*/, "")
+            end
+          end
+          next if data_lines.empty?
+          data_str = data_lines.join("\n")
+          next if data_str == "[DONE]"
+          data = begin
+            LlmGateway::Utils.deep_symbolize_keys(JSON.parse(data_str))
+          rescue JSON::ParserError
+            { raw: data_str }
+          end
+          yield({ event: event_type, data: data })
+        end
+      end
+    end
     def make_request(endpoint, method, params = nil, extra_headers = {})
       uri = URI(endpoint)
       http = Net::HTTP.new(uri.host, uri.port)
@@ -73,7 +135,6 @@ module LlmGateway
       headers = build_headers.merge(extra_headers)
       headers.each { |key, value| request[key] = value }
       request.body = params.to_json if params
       http.request(request)
     end