RubyGems - llm_gateway - Versions diffs - 0.5.0 → 0.7.0 - Mend

llm_gateway 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +38 -0
data/README.md +350 -43
data/docs/migration_guide_0.6.0.md +386 -0
data/docs/migration_guide_0.7.0.md +193 -0
data/lib/llm_gateway/adapters/adapter.rb +8 -11
data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +24 -0
data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +61 -11
data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +1 -1
data/lib/llm_gateway/adapters/groq/option_mapper.rb +1 -1
data/lib/llm_gateway/adapters/input_message_sanitizer.rb +98 -7
data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +132 -39
data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +1 -1
data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +40 -16
data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +47 -31
data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +1 -1
data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +173 -24
data/lib/llm_gateway/adapters/stream_mapper.rb +9 -2
data/lib/llm_gateway/adapters/structs.rb +140 -55
data/lib/llm_gateway/agents/event.rb +105 -0
data/lib/llm_gateway/agents/file_session_manager.rb +100 -0
data/lib/llm_gateway/agents/harness.rb +176 -0
data/lib/llm_gateway/agents/in_memory_session_manager.rb +222 -0
data/lib/llm_gateway/agents/tools/bash_tool.rb +132 -0
data/lib/llm_gateway/agents/tools/edit_tool.rb +215 -0
data/lib/llm_gateway/agents/tools/read_tool.rb +143 -0
data/lib/llm_gateway/agents/tools/tool_utils.rb +164 -0
data/lib/llm_gateway/agents/tools/write_tool.rb +34 -0
data/lib/llm_gateway/base_client.rb +5 -7
data/lib/llm_gateway/clients/anthropic.rb +10 -9
data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +2 -2
data/lib/llm_gateway/clients/groq.rb +8 -6
data/lib/llm_gateway/clients/openai.rb +22 -20
data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +4 -4
data/lib/llm_gateway/prompt.rb +107 -52
data/lib/llm_gateway/utils.rb +116 -13
data/lib/llm_gateway/version.rb +1 -1
data/lib/llm_gateway.rb +7 -21
metadata +13 -2

data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb CHANGED Viewed

@@ -92,9 +92,9 @@ module LlmGateway
                 delta: {
                   id: data[:id],
                   model: data[:model],
-                  role: delta[:role] || "assistant"
-                }.compact,
-                usage_increment: {}
+                  role: delta[:role] || "assistant",
+                  timestamp: timestamp_milliseconds(data[:created])
+                }.compact
               }
             ]
           end
@@ -198,34 +198,58 @@ module LlmGateway
               *close_active_block_patches(active_block_type:),
               {
                 type: :message_delta,
-                delta: { stop_reason: normalize_stop_reason(finish_reason) },
-                usage_increment: {}
+                delta: { stop_reason: normalize_stop_reason(finish_reason) }
               }
             ]
           end
           def final_usage_patches(data)
+            patch = {
+              type: :message_delta,
+              delta: {}
+            }
+            patch[:usage] = usage(data) if data.key?(:usage)
             [
-              {
-                type: accumulator.message_hash.empty? ? :message_start : :message_delta,
-                delta: {},
-                usage_increment: usage_increment(data)
-              }
+              patch,
+              { type: :message_end }
             ]
           end
-          def usage_increment(data)
+          def usage(data)
             usage = data[:usage] || {}
+            cache_read = token_count(
+              usage.dig(:prompt_tokens_details, :cached_tokens),
+              usage[:prompt_cache_hit_tokens]
+            )
+            cache_write = token_count(
+              usage.dig(:prompt_tokens_details, :cache_write_tokens),
+              usage[:cache_write_tokens]
+            )
+            prompt_tokens = token_count(usage[:prompt_tokens])
+            input = [ prompt_tokens - cache_read - cache_write, 0 ].max
+            output = token_count(usage[:completion_tokens])
             {
-              input_tokens: usage[:prompt_tokens] || 0,
-              cache_creation_input_tokens: 0,
-              cache_read_input_tokens: usage.dig(:prompt_tokens_details, :cached_tokens) || 0,
-              output_tokens: usage[:completion_tokens] || 0,
-              reasoning_tokens: usage.dig(:completion_tokens_details, :reasoning_tokens) || 0
+              input:,
+              cache_write:,
+              cache_read:,
+              output:,
+              total: input + cache_write + cache_read + output,
+              raw: usage
             }
           end
+          def token_count(*values)
+            values.compact.first.to_i
+          end
+          def timestamp_milliseconds(unix_seconds)
+            return nil if unix_seconds.nil?
+            (unix_seconds.to_f * 1000).to_i
+          end
           def normalize_stop_reason(finish_reason)
             case finish_reason
             when "tool_calls"

data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb CHANGED Viewed

@@ -37,22 +37,28 @@ module LlmGateway
               return tools unless tools
               tools.map do |tool|
-                mapped_tool = {
-                  type: "function",
-                  name: tool[:name],
-                  description: tool[:description],
-                  parameters: tool[:input_schema]
-                }
-                [ :contents, :content ].each do |key|
-                  next unless tool[key].is_a?(Array)
-                  mapped_tool[key] = tool[key].map do |entry|
-                    entry.is_a?(Hash) ? map_content(entry.transform_keys(&:to_sym)) : entry
+                tool = tool.transform_keys(&:to_sym)
+                if tool[:name].nil?
+                  tool
+                else
+                  mapped_tool = {
+                    type: "function",
+                    name: tool[:name],
+                    description: tool[:description],
+                    parameters: tool[:input_schema]
+                  }
+                  [ :contents, :content ].each do |key|
+                    next unless tool[key].is_a?(Array)
+                    mapped_tool[key] = tool[key].map do |entry|
+                      entry.is_a?(Hash) ? map_content(entry.transform_keys(&:to_sym)) : entry
+                    end
                   end
-                end
-                mapped_tool
+                  mapped_tool
+                end
               end
             end
@@ -85,30 +91,40 @@ module LlmGateway
             def map_assistant_history_message(msg)
               blocks = (msg[:content] || []).map { |b| b.transform_keys(&:to_sym) }
-              text_blocks = blocks.select { |b| b[:type] == "text" }
-              tool_use_blocks = blocks.select { |b| b[:type] == "tool_use" }
               result = []
-              if text_blocks.any?
-                result << {
-                  role: "assistant",
-                  content: text_blocks.map { |b| { type: "output_text", text: b[:text] } }
-                }
-              end
-              tool_use_blocks.each do |b|
-                result << {
-                  type: "function_call",
-                  call_id: b[:id],
-                  name: b[:name],
-                  arguments: b[:input].is_a?(Hash) ? b[:input].to_json : (b[:input] || {}).to_json
-                }
+              blocks.each do |block|
+                case block[:type]
+                when "text"
+                  result << {
+                    role: "assistant",
+                    content: [ { type: "output_text", text: block[:text] } ]
+                  }
+                when "tool_use"
+                  result << {
+                    type: "function_call",
+                    call_id: block[:id],
+                    name: block[:name],
+                    arguments: block[:input].is_a?(Hash) ? block[:input].to_json : (block[:input] || {}).to_json
+                  }
+                when "server_tool_use"
+                  result << map_server_tool_use_history_item(block)
+                end
               end
               result
             end
+            def map_server_tool_use_history_item(block)
+              input = block[:input].is_a?(Hash) ? block[:input] : {}
+              {
+                id: block[:id],
+                type: block[:name],
+                status: "completed"
+              }.merge(input)
+            end
             def map_messages_content(message)
               message[:content].map { |content| map_content(content) }
             end

data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb CHANGED Viewed

@@ -58,7 +58,7 @@ module LlmGateway
           module_function
           def map(options)
-            mapped_options = options.reject { |key, _| MANAGED_OPTIONS.include?(key) }
+            mapped_options = options.except(*MANAGED_OPTIONS)
             mapped_options[:max_output_tokens] = options[:max_completion_tokens] || options[:max_output_tokens] || DEFAULT_MAX_OUTPUT_TOKENS
             cache_key = options[:cache_key]

data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require "json"
 require_relative "../../stream_mapper"
 module LlmGateway
@@ -23,10 +25,16 @@ module LlmGateway
               response_created_patches(data[:response])
             when "response.output_item.added"
               output_item_added_patches(data)
+            when "response.output_item.done"
+              output_item_done_patches(data)
             when "response.content_part.added"
               content_part_added_patches(data)
-            when "response.content_part.done"
+            when "response.content_part.done", "response.output_text.done"
               content_part_done_patches(data)
+            when "response.code_interpreter_call_code.delta"
+              code_interpreter_code_delta_patches(data)
+            when "response.code_interpreter_call.in_progress", "response.code_interpreter_call.interpreting", "response.code_interpreter_call.completed", "response.code_interpreter_call_code.done"
+              []
             when "response.output_text.delta"
               [ { type: :text_delta, delta: data[:delta] || "" } ]
             when "response.function_call_arguments.delta"
@@ -55,9 +63,9 @@ module LlmGateway
                 delta: {
                   id: response[:id],
                   model: response[:model],
-                  role: "assistant"
-                }.compact,
-                usage_increment: {}
+                  role: "assistant",
+                  timestamp: timestamp_milliseconds(response[:created_at])
+                }.compact
               }
             ]
           end
@@ -72,8 +80,7 @@ module LlmGateway
               [
                 {
                   type: :message_start,
-                  delta: { role: item[:role] || "assistant" },
-                  usage_increment: {}
+                  delta: { role: item[:role] || "assistant" }
                 }
               ]
             when "function_call"
@@ -85,6 +92,38 @@ module LlmGateway
                   name: item[:name]
                 }
               ]
+            when "code_interpreter_call"
+              state = code_interpreter_state[data[:output_index] || 0] = {
+                id: item[:id],
+                container_id: item[:container_id],
+                outputs: item[:outputs],
+                input_opened: false,
+                input_closed: false
+              }
+              container_id_to_tool_id[state[:container_id]] = state[:id] if state[:container_id]
+              [
+                {
+                  type: :tool_start,
+                  delta: "",
+                  id: item[:id],
+                  name: "code_interpreter_call",
+                  tool_type: "server_tool_use"
+                }
+              ]
+            else
+              []
+            end
+          end
+          def output_item_done_patches(data)
+            item = data[:item] || {}
+            case item[:type]
+            when "code_interpreter_call"
+              code_interpreter_done_patches(data[:output_index] || 0, item)
+            when "message"
+              container_file_citation_patches(item)
             else
               []
             end
@@ -101,38 +140,136 @@ module LlmGateway
             part = data[:part] || {}
             return [] unless part.empty? || part[:type] == "output_text"
-            [ { type: :text_end, delta: "" } ]
+            citations = container_file_citation_patches(data)
+            return citations unless accumulator.active_block_type == :text
+            [ { type: :text_end, delta: "" } ] + citations
           end
-          def response_completed_patches(response)
-            response ||= {}
+          def code_interpreter_code_delta_patches(data)
+            output_index = data[:output_index] || 0
+            state = code_interpreter_state[output_index] ||= {
+              id: nil,
+              container_id: nil,
+              outputs: nil,
+              input_opened: false,
+              input_closed: false
+            }
+            delta = escape_json_string_fragment(data[:delta] || "")
+            delta = "{\"code\":\"#{delta}" unless state[:input_opened]
+            state[:input_opened] = true
+            [ { type: :tool_delta, delta: } ]
+          end
+          def code_interpreter_done_patches(output_index, item)
+            state = code_interpreter_state[output_index] ||= {}
+            state[:id] ||= item[:id]
+            state[:container_id] = item[:container_id] if item.key?(:container_id)
+            state[:outputs] = item[:outputs] if item.key?(:outputs)
+            container_id_to_tool_id[state[:container_id]] = state[:id] if state[:container_id] && state[:id]
+            return [] if state[:input_closed]
+            opening = state[:input_opened] ? "" : "{\"code\":\""
+            state[:input_opened] = true
+            closing = "\"," + JSON.generate(container_id: state[:container_id], outputs: state[:outputs])[1..]
+            state[:input_closed] = true
             [
+              { type: :tool_delta, delta: opening + closing },
+              { type: :tool_end, delta: "" }
+            ]
+          end
+          def container_file_citation_patches(data)
+            extract_annotations(data).filter_map do |annotation|
+              next unless annotation[:type] == "container_file_citation"
+              container_id = annotation[:container_id]
+              file_id = annotation[:file_id]
+              filename = annotation[:filename]
+              tool_id = container_id_to_tool_id[container_id]
+              next unless tool_id
+              key = [ tool_id, container_id, file_id, filename ]
+              next if emitted_citation_keys[key]
+              emitted_citation_keys[key] = true
               {
-                type: accumulator.message_hash.empty? ? :message_start : :message_delta,
-                delta: {
-                  id: response[:id],
-                  model: response[:model],
-                  role: "assistant",
-                  stop_reason: stop_reason_for(response)
-                }.compact,
-                usage_increment: usage_increment(response)
+                type: :tool_result_start,
+                delta: JSON.generate(container_id:, file_id:, filename:),
+                tool_use_id: tool_id,
+                name: "container_file_citation_tool_result"
               }
+            end.flat_map { |start| [ start, { type: :tool_result_end, delta: "" } ] }
+          end
+          def extract_annotations(data)
+            annotations = []
+            annotations.concat(Array(data[:annotations]))
+            annotations.concat(Array(data.dig(:part, :annotations)))
+            annotations.concat(Array(data.dig(:item, :annotations)))
+            Array(data.dig(:item, :content)).each do |content_part|
+              annotations.concat(Array(content_part[:annotations])) if content_part.is_a?(Hash)
+            end
+            annotations
+          end
+          def escape_json_string_fragment(value)
+            JSON.generate(value)[1...-1]
+          end
+          def response_completed_patches(response)
+            response ||= {}
+            patch = {
+              type: :message_delta,
+              delta: {
+                id: response[:id],
+                model: response[:model],
+                role: "assistant",
+                timestamp: timestamp_milliseconds(response[:created_at]),
+                stop_reason: stop_reason_for(response)
+              }.compact
+            }
+            patch[:usage] = usage(response) if response.key?(:usage)
+            [
+              patch,
+              { type: :message_end }
             ]
           end
-          def usage_increment(response)
+          def usage(response)
             usage = response[:usage] || {}
+            cache_read = token_count(usage.dig(:input_tokens_details, :cached_tokens))
+            cache_write = token_count(
+              usage.dig(:input_tokens_details, :cache_write_tokens),
+              usage[:cache_write_tokens]
+            )
+            input_tokens = token_count(usage[:input_tokens])
+            input = [ input_tokens - cache_read - cache_write, 0 ].max
+            output = token_count(usage[:output_tokens])
             {
-              input_tokens: usage[:input_tokens] || 0,
-              cache_creation_input_tokens: 0,
-              cache_read_input_tokens: usage.dig(:input_tokens_details, :cached_tokens) || 0,
-              output_tokens: usage[:output_tokens] || 0,
-              reasoning_tokens: usage.dig(:output_tokens_details, :reasoning_tokens) || 0
+              input:,
+              cache_write:,
+              cache_read:,
+              output:,
+              total: input + cache_write + cache_read + output,
+              raw: usage
             }
           end
+          def token_count(*values)
+            values.compact.first.to_i
+          end
+          def timestamp_milliseconds(unix_seconds)
+            return nil if unix_seconds.nil?
+            (unix_seconds.to_f * 1000).to_i
+          end
           def stop_reason_for(response)
             output = response[:output] || []
             last_item = output.last || {}
@@ -141,7 +278,19 @@ module LlmGateway
           end
           def tool_seen?
-            accumulator.blocks.any? { |content_block| content_block && content_block[:type] == "tool_use" }
+            accumulator.blocks.any? { |content_block| content_block && [ "tool_use", "server_tool_use" ].include?(content_block[:type]) }
+          end
+          def code_interpreter_state
+            @code_interpreter_state ||= {}
+          end
+          def container_id_to_tool_id
+            @container_id_to_tool_id ||= {}
+          end
+          def emitted_citation_keys
+            @emitted_citation_keys ||= {}
           end
         end
       end

data/lib/llm_gateway/adapters/stream_mapper.rb CHANGED Viewed

@@ -5,14 +5,21 @@ require_relative "normalized_stream_accumulator"
 module LlmGateway
   module Adapters
     class StreamMapper
+      def initialize(provider:, api:)
+        @provider = provider
+        @api = api
+      end
       def result
-        accumulator.result
+        accumulator.final_message
       end
       private
+      attr_reader :provider, :api
       def accumulator
-        @accumulator ||= LlmGateway::Adapters::NormalizedStreamAccumulator.new
+        @accumulator ||= LlmGateway::Adapters::NormalizedStreamAccumulator.new(provider:, api:)
       end
       def push_patches(patches, &block)