RubyGems - llm_gateway - Versions diffs - 0.4.0 → 0.6.0 - Mend

llm_gateway 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

checksums.yaml +4 -4
data/.pi/skills/live-provider-testing/SKILL.md +183 -0
data/.pi/skills/options-development/SKILL.md +131 -0
data/CHANGELOG.md +43 -0
data/README.md +110 -41
data/Rakefile +1 -0
data/docs/migration_guide_0.6.0.md +386 -0
data/lib/llm_gateway/adapters/adapter.rb +8 -44
data/lib/llm_gateway/adapters/anthropic/acts_like_messages.rb +0 -2
data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +106 -27
data/lib/llm_gateway/adapters/anthropic/output_mapper.rb +0 -33
data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +59 -47
data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +48 -6
data/lib/llm_gateway/adapters/groq/chat_completions_adapter.rb +3 -2
data/lib/llm_gateway/adapters/groq/input_mapper.rb +44 -0
data/lib/llm_gateway/adapters/groq/option_mapper.rb +89 -4
data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +336 -0
data/lib/llm_gateway/adapters/openai/acts_like_chat_completions.rb +0 -2
data/lib/llm_gateway/adapters/openai/acts_like_responses.rb +0 -6
data/lib/llm_gateway/adapters/openai/chat_completions/input_mapper.rb +135 -72
data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +100 -10
data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +193 -170
data/lib/llm_gateway/adapters/openai/chat_completions_adapter.rb +0 -1
data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +128 -68
data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +99 -10
data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +106 -275
data/lib/llm_gateway/adapters/openai/responses_adapter.rb +0 -1
data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb +3 -3
data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +0 -5
data/lib/llm_gateway/adapters/stream_mapper.rb +57 -0
data/lib/llm_gateway/adapters/structs.rb +102 -52
data/lib/llm_gateway/base_client.rb +2 -4
data/lib/llm_gateway/client.rb +10 -66
data/lib/llm_gateway/clients/anthropic.rb +5 -4
data/lib/llm_gateway/clients/groq.rb +18 -4
data/lib/llm_gateway/clients/openai.rb +20 -18
data/lib/llm_gateway/prompt.rb +35 -17
data/lib/llm_gateway/version.rb +1 -1
data/lib/llm_gateway.rb +5 -29
metadata +8 -10
data/lib/llm_gateway/adapters/anthropic/bidirectional_message_mapper.rb +0 -111
data/lib/llm_gateway/adapters/openai/chat_completions/bidirectional_message_mapper.rb +0 -110
data/lib/llm_gateway/adapters/openai/chat_completions/output_mapper.rb +0 -40
data/lib/llm_gateway/adapters/openai/responses/bidirectional_message_mapper.rb +0 -120
data/lib/llm_gateway/adapters/openai/responses/output_mapper.rb +0 -47
data/lib/llm_gateway/adapters/stream_accumulator.rb +0 -91
data/scripts/generate_handoff_live_fixture.rb +0 -169
data/scripts/generate_handoff_media_fixture.rb +0 -167

data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb CHANGED Viewed

@@ -1,337 +1,168 @@
 # frozen_string_literal: true
-require_relative "../../structs"
+require_relative "../../stream_mapper"
 module LlmGateway
   module Adapters
     module OpenAI
       module Responses
-        class StreamMapper
-          def map(chunk)
-            queued_event = shift_queued_event
-            return queued_event if queued_event
+        class StreamMapper < LlmGateway::Adapters::StreamMapper
+          def map(chunk, &block)
             event_type = chunk[:event]
             data = chunk[:data] || {}
             raise_stream_error!(data) if event_type == "error" || data[:error] || data[:type] == "error"
+            push_patches(patches_for(event_type, data), &block)
+          end
+          private
+          def patches_for(event_type, data)
             case event_type
             when "response.created"
-              stash_response(data[:response])
-              nil
+              response_created_patches(data[:response])
             when "response.output_item.added"
-              map_output_item_added(data)
-            when "response.output_item.done"
-              map_output_item_done(data)
+              output_item_added_patches(data)
             when "response.content_part.added"
-              map_content_part_added(data)
-            when "response.content_part.done", "response.output_text.done"
-              map_text_done(data)
+              content_part_added_patches(data)
+            when "response.content_part.done"
+              content_part_done_patches(data)
             when "response.output_text.delta"
-              AssistantStreamEvent.new(
-                type: :text_delta,
-                content_index: content_index_for(data[:output_index] || 0),
-                delta: data[:delta] || ""
-              )
+              [ { type: :text_delta, delta: data[:delta] || "" } ]
             when "response.function_call_arguments.delta"
-              AssistantStreamEvent.new(
-                type: :tool_delta,
-                content_index: content_index_for(data[:output_index] || 0),
-                delta: data[:delta] || ""
-              )
+              [ { type: :tool_delta, delta: data[:delta] || "" } ]
             when "response.function_call_arguments.done"
-              map_tool_done(data)
+              [ { type: :tool_end, delta: "" } ]
+            when "response.reasoning_summary_part.added"
+              [ { type: :reasoning_start, delta: "", signature: "" } ]
             when "response.reasoning_summary_text.delta"
-              output_index = data[:output_index] || 0
-              mark_reasoning_has_content(output_index)
-              AssistantStreamReasoningEvent.new(
-                type: :reasoning_delta,
-                content_index: content_index_for(output_index),
-                delta: data[:delta] || "",
-                signature: ""
-              )
+              [ { type: :reasoning_delta, delta: data[:delta] || "", signature: "" } ]
+            when "response.reasoning_summary_part.done"
+              [ { type: :reasoning_end, delta: "", signature: "" } ]
             when "response.completed"
-              map_response_completed(data[:response])
+              response_completed_patches(data[:response])
             else
-              nil
+              []
             end
           end
-          private
-          def map_output_item_added(data)
-            item = data[:item] || {}
-            output_index = data[:output_index] || 0
+          def response_created_patches(response)
+            response ||= {}
-            case item[:type]
-            when "reasoning"
-              mark_reasoning_started(output_index)
-              AssistantStreamReasoningEvent.new(
-                type: :reasoning_start,
-                content_index: register_content_index(output_index),
-                delta: "",
-                signature: ""
-              )
-            when "message"
-              register_content_index(output_index)
-              ensure_message_started(role: item[:role] || "assistant")
-            when "function_call"
-              stash_role("assistant")
-              mark_tool_started(output_index)
-              AssistantToolStartEvent.new(
-                type: :tool_start,
-                content_index: register_content_index(output_index),
-                delta: "",
-                id: item[:call_id] || item[:id],
-                name: item[:name]
-              )
-            else
-              nil
-            end
+            [
+              {
+                type: :message_start,
+                delta: {
+                  id: response[:id],
+                  model: response[:model],
+                  role: "assistant",
+                  timestamp: timestamp_milliseconds(response[:created_at])
+                }.compact
+              }
+            ]
           end
-          def map_output_item_done(data)
+          def output_item_added_patches(data)
             item = data[:item] || {}
-            output_index = data[:output_index] || 0
             case item[:type]
-            when "reasoning"
-              map_reasoning_done(output_index, item)
+            when "message"
+              return [] unless accumulator.message_hash.empty?
+              [
+                {
+                  type: :message_start,
+                  delta: { role: item[:role] || "assistant" }
+                }
+              ]
             when "function_call"
-              map_function_call_done(output_index, item)
-            else
-              nil
-            end
-          end
-          def map_reasoning_done(output_index, item)
-            content_index = content_index_for(output_index)
-            summary_text = extract_reasoning_summary_text(item)
-            if reasoning_started_without_content?(output_index) && !summary_text.empty?
-              queue_event(
-                AssistantStreamReasoningEvent.new(
-                  type: :reasoning_end,
-                  content_index:,
+              [
+                {
+                  type: :tool_start,
                   delta: "",
-                  signature: ""
-                )
-              )
-              mark_reasoning_completed(output_index)
-              return AssistantStreamReasoningEvent.new(
-                type: :reasoning_delta,
-                content_index:,
-                delta: summary_text,
-                signature: ""
-              )
+                  id: item[:call_id] || item[:id],
+                  name: item[:name]
+                }
+              ]
+            else
+              []
             end
-            mark_reasoning_completed(output_index)
-            AssistantStreamReasoningEvent.new(
-              type: :reasoning_end,
-              content_index:,
-              delta: "",
-              signature: ""
-            )
           end
-          def map_function_call_done(output_index, item)
-            return nil if tool_started?(output_index)
-            mark_tool_started(output_index)
-            queue_event(
-              AssistantStreamEvent.new(
-                type: :tool_end,
-                content_index: content_index_for(output_index),
-                delta: ""
-              )
-            )
+          def content_part_added_patches(data)
+            part = data[:part] || {}
+            return [] unless part[:type] == "output_text"
-            AssistantToolStartEvent.new(
-              type: :tool_start,
-              content_index: register_content_index(output_index),
-              delta: "",
-              id: item[:call_id] || item[:id],
-              name: item[:name]
-            )
+            [ { type: :text_start, delta: "" } ]
           end
-          def map_content_part_added(data)
+          def content_part_done_patches(data)
             part = data[:part] || {}
-            return nil unless part[:type] == "output_text"
+            return [] unless part.empty? || part[:type] == "output_text"
-            AssistantStreamEvent.new(
-              type: :text_start,
-              content_index: content_index_for(data[:output_index] || 0),
-              delta: ""
-            )
+            [ { type: :text_end, delta: "" } ]
           end
-          def map_text_done(data)
-            AssistantStreamEvent.new(
-              type: :text_end,
-              content_index: content_index_for(data[:output_index] || 0),
-              delta: ""
-            )
-          end
-          def map_tool_done(data)
-            AssistantStreamEvent.new(
-              type: :tool_end,
-              content_index: content_index_for(data[:output_index] || 0),
-              delta: ""
-            )
-          end
+          def response_completed_patches(response)
+            response ||= {}
+            patch = {
+              type: :message_delta,
+              delta: {
+                id: response[:id],
+                model: response[:model],
+                role: "assistant",
+                timestamp: timestamp_milliseconds(response[:created_at]),
+                stop_reason: stop_reason_for(response)
+              }.compact
+            }
+            patch[:usage] = usage(response) if response.key?(:usage)
-          def map_response_completed(response)
-            stash_response(response)
-            AssistantStreamMessageEvent.new(
-              type: message_started? ? :message_delta : :message_start,
-              delta: pending_message_attributes.merge(role: pending_message_attributes[:role] || "assistant", stop_reason: stop_reason_for(response)),
-              usage_increment: usage_increment(response)
-            ).tap do
-              @message_started = true
-              clear_pending_message_attributes
-            end
+            [
+              patch,
+              { type: :message_end }
+            ]
           end
-          def usage_increment(response)
+          def usage(response)
             usage = response[:usage] || {}
+            cache_read = token_count(usage.dig(:input_tokens_details, :cached_tokens))
+            cache_write = token_count(
+              usage.dig(:input_tokens_details, :cache_write_tokens),
+              usage[:cache_write_tokens]
+            )
+            input_tokens = token_count(usage[:input_tokens])
+            input = [ input_tokens - cache_read - cache_write, 0 ].max
+            output = token_count(usage[:output_tokens])
             {
-              input_tokens: usage[:input_tokens] || 0,
-              cache_creation_input_tokens: 0,
-              cache_read_input_tokens: usage.dig(:input_tokens_details, :cached_tokens) || 0,
-              output_tokens: usage[:output_tokens] || 0,
-              reasoning_tokens: usage.dig(:output_tokens_details, :reasoning_tokens) || 0
+              input:,
+              cache_write:,
+              cache_read:,
+              output:,
+              total: input + cache_write + cache_read + output,
+              raw: usage
             }
           end
-          def stop_reason_for(response)
-            output = response[:output] || []
-            last_item = output.last || {}
-            tool_state.any? || last_item[:type] == "function_call" ? "tool_use" : "stop"
-          end
-          def ensure_message_started(role: "assistant")
-            return nil if message_started?
-            @message_started = true
-            AssistantStreamMessageEvent.new(
-              type: :message_start,
-              delta: pending_message_attributes.merge(role: role).compact,
-              usage_increment: {}
-            ).tap do
-              clear_pending_message_attributes
-            end
-          end
-          def extract_reasoning_summary_text(item)
-            Array(item[:summary]).filter_map do |summary|
-              next summary[:text] if summary.is_a?(Hash) && summary[:text]
-              next summary[:summary] if summary.is_a?(Hash) && summary[:summary]
-              next summary if summary.is_a?(String)
-            end.join
-          end
-          def mark_reasoning_started(output_index)
-            reasoning_state[output_index] = :started
-          end
-          def mark_reasoning_has_content(output_index)
-            reasoning_state[output_index] = :has_content
-          end
-          def mark_reasoning_completed(output_index)
-            reasoning_state[output_index] = :completed
-          end
-          def reasoning_started_without_content?(output_index)
-            reasoning_state[output_index] == :started
-          end
-          def reasoning_state
-            @reasoning_state ||= {}
-          end
-          def mark_tool_started(output_index)
-            tool_state[output_index] = :started
-          end
-          def tool_started?(output_index)
-            tool_state[output_index] == :started
-          end
-          def tool_state
-            @tool_state ||= {}
-          end
-          def stash_response(response)
-            response ||= {}
-            @pending_message_attributes = pending_message_attributes.merge(
-              id: response[:id],
-              model: response[:model]
-            ).compact
-          end
-          def stash_role(role)
-            @pending_message_attributes = pending_message_attributes.merge(role:)
-          end
-          def pending_message_attributes
-            @pending_message_attributes ||= {}
-          end
-          def clear_pending_message_attributes
-            @pending_message_attributes = {}
-          end
-          def register_content_index(output_index)
-            content_index_map[output_index] ||= next_content_index!
-          end
-          def content_index_for(output_index)
-            content_index_map.fetch(output_index) { register_content_index(output_index) }
-          end
-          def next_content_index!
-            @next_content_index ||= 0
-            current = @next_content_index
-            @next_content_index += 1
-            current
-          end
-          def content_index_map
-            @content_index_map ||= {}
+          def token_count(*values)
+            values.compact.first.to_i
           end
-          def message_started?
-            @message_started ||= false
-          end
+          def timestamp_milliseconds(unix_seconds)
+            return nil if unix_seconds.nil?
-          def queue_event(event)
-            queued_events << event
+            (unix_seconds.to_f * 1000).to_i
           end
-          def shift_queued_event
-            queued_events.shift
-          end
+          def stop_reason_for(response)
+            output = response[:output] || []
+            last_item = output.last || {}
-          def queued_events
-            @queued_events ||= []
+            tool_seen? || last_item[:type] == "function_call" ? "tool_use" : "stop"
           end
-          def raise_stream_error!(data)
-            error = data[:error].is_a?(Hash) ? data[:error] : data
-            message = error[:message] || "Stream error"
-            code = error[:code] || error[:type]
-            if LlmGateway::Errors.context_overflow_message?(message)
-              raise LlmGateway::Errors::PromptTooLong.new(message, code)
-            end
-            raise LlmGateway::Errors::APIStatusError.new(message, code)
+          def tool_seen?
+            accumulator.blocks.any? { |content_block| content_block && content_block[:type] == "tool_use" }
           end
         end
       end

data/lib/llm_gateway/adapters/openai/responses_adapter.rb CHANGED Viewed

@@ -4,7 +4,6 @@ require_relative "../adapter"
 require_relative "acts_like_responses"
 require_relative "../input_message_sanitizer"
 require_relative "responses/input_mapper"
-require_relative "responses/output_mapper"
 require_relative "responses/option_mapper"
 require_relative "file_output_mapper"
 require_relative "responses/stream_mapper"

data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb CHANGED Viewed

@@ -26,7 +26,7 @@ module LlmGateway
         def self.map_messages(messages)
           return messages unless messages.is_a?(Array)
-          mapper  = message_mapper
+          mapper = self
           stripped = strip_reasoning_blocks(messages)
           mapped = stripped.each_with_object([]) do |msg, acc|
@@ -85,7 +85,7 @@ module LlmGateway
         end
         # Ensure assistant messages carry "output_text" rather than "input_text".
-        # The BidirectionalMessageMapper maps plain text blocks to "input_text";
+        # The base Responses input mapper maps plain text blocks to "input_text";
         # Codex is strict about directionality and rejects "input_text" on the
         # assistant side.
         def self.normalize_assistant_content_types(messages)
@@ -114,7 +114,7 @@ module LlmGateway
         #                               signature *is* the serialised item)
         # - tool_use / function_call → top-level function_call item
         # - text / *_text variants   → output_text inside an assistant content block
-        # - anything else            → delegated to the BidirectionalMessageMapper
+        # - anything else            → delegated to the Responses input mapper
         def self.map_assistant_content(content, mapper)
           text_parts = []
           items      = []

data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb CHANGED Viewed

@@ -2,7 +2,6 @@
 require_relative "../adapter"
 require_relative "../openai/acts_like_responses"
-require_relative "../openai/responses/output_mapper"
 require_relative "option_mapper"
 require_relative "../openai/responses/stream_mapper"
 require_relative "../openai/file_output_mapper"
@@ -25,10 +24,6 @@ module LlmGateway
           OptionMapper
         end
-        def perform_chat(messages, tools:, system:, **options)
-          client.chat_codex(messages, tools: tools, system: system, **options)
-        end
         def perform_stream(messages, tools:, system:, **options, &block)
           client.stream_codex(messages, tools: tools, system: system, **options, &block)
         end

data/lib/llm_gateway/adapters/stream_mapper.rb ADDED Viewed

@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+require_relative "normalized_stream_accumulator"
+module LlmGateway
+  module Adapters
+    class StreamMapper
+      def initialize(provider:, api:)
+        @provider = provider
+        @api = api
+      end
+      def result
+        accumulator.final_message
+      end
+      private
+      attr_reader :provider, :api
+      def accumulator
+        @accumulator ||= LlmGateway::Adapters::NormalizedStreamAccumulator.new(provider:, api:)
+      end
+      def push_patches(patches, &block)
+        patches.each do |patch|
+          accumulator.push(patch, &block)
+        end
+        nil
+      end
+      def raise_stream_error!(data, overload_codes: [])
+        error = stream_error_payload(data)
+        message = error[:message] || error["message"] || "Stream error"
+        code = error[:code] || error["code"] || error[:type] || error["type"]
+        if LlmGateway::Errors.context_overflow_message?(message)
+          raise LlmGateway::Errors::PromptTooLong.new(message, code)
+        end
+        if Array(overload_codes).any? { |overload_code| overload_code.to_s == code.to_s }
+          raise LlmGateway::Errors::OverloadError.new(message, code)
+        end
+        raise LlmGateway::Errors::APIStatusError.new(message, code)
+      end
+      def stream_error_payload(data)
+        data ||= {}
+        error = data[:error] || data["error"]
+        error.is_a?(Hash) ? error : data
+      end
+    end
+  end
+end