RubyGems - ruby_llm - Versions diffs - 1.9.2 → 1.11.0 - Mend

ruby_llm 1.9.2 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

checksums.yaml +4 -4
data/README.md +5 -4
data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
data/lib/ruby_llm/active_record/message_methods.rb +41 -8
data/lib/ruby_llm/aliases.json +4 -16
data/lib/ruby_llm/chat.rb +10 -7
data/lib/ruby_llm/configuration.rb +2 -1
data/lib/ruby_llm/message.rb +37 -11
data/lib/ruby_llm/models.json +1902 -1785
data/lib/ruby_llm/models.rb +134 -12
data/lib/ruby_llm/provider.rb +9 -4
data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
data/lib/ruby_llm/providers/anthropic/media.rb +2 -2
data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
data/lib/ruby_llm/providers/bedrock/chat.rb +67 -15
data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
data/lib/ruby_llm/providers/openai/chat.rb +87 -3
data/lib/ruby_llm/providers/openai/media.rb +1 -1
data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
data/lib/ruby_llm/providers/openai.rb +1 -1
data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
data/lib/ruby_llm/providers/openrouter.rb +2 -0
data/lib/ruby_llm/providers/vertexai.rb +5 -1
data/lib/ruby_llm/providers/xai/chat.rb +15 -0
data/lib/ruby_llm/providers/xai/models.rb +75 -0
data/lib/ruby_llm/providers/xai.rb +28 -0
data/lib/ruby_llm/stream_accumulator.rb +111 -14
data/lib/ruby_llm/streaming.rb +54 -51
data/lib/ruby_llm/thinking.rb +49 -0
data/lib/ruby_llm/tokens.rb +47 -0
data/lib/ruby_llm/tool_call.rb +6 -3
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm.rb +10 -8
data/lib/tasks/models.rake +20 -12
metadata +15 -5

data/lib/ruby_llm/providers/openrouter/chat.rb ADDED Viewed

@@ -0,0 +1,154 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class OpenRouter
+      # Chat methods of the OpenRouter API integration
+      module Chat
+        module_function
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists
+          payload = {
+            model: model.id,
+            messages: format_messages(messages),
+            stream: stream
+          }
+          payload[:temperature] = temperature unless temperature.nil?
+          payload[:tools] = tools.map { |_, tool| OpenAI::Tools.tool_for(tool) } if tools.any?
+          if schema
+            strict = schema[:strict] != false
+            payload[:response_format] = {
+              type: 'json_schema',
+              json_schema: {
+                name: 'response',
+                schema: schema,
+                strict: strict
+              }
+            }
+          end
+          reasoning = build_reasoning(thinking)
+          payload[:reasoning] = reasoning if reasoning
+          payload[:stream_options] = { include_usage: true } if stream
+          payload
+        end
+        def parse_completion_response(response)
+          data = response.body
+          return if data.empty?
+          raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
+          message_data = data.dig('choices', 0, 'message')
+          return unless message_data
+          usage = data['usage'] || {}
+          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
+          thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
+          thinking_text = extract_thinking_text(message_data)
+          thinking_signature = extract_thinking_signature(message_data)
+          Message.new(
+            role: :assistant,
+            content: message_data['content'],
+            thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
+            tool_calls: OpenAI::Tools.parse_tool_calls(message_data['tool_calls']),
+            input_tokens: usage['prompt_tokens'],
+            output_tokens: usage['completion_tokens'],
+            cached_tokens: cached_tokens,
+            cache_creation_tokens: 0,
+            thinking_tokens: thinking_tokens,
+            model_id: data['model'],
+            raw: response
+          )
+        end
+        def format_messages(messages)
+          messages.map do |msg|
+            {
+              role: format_role(msg.role),
+              content: OpenAI::Media.format_content(msg.content),
+              tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
+              tool_call_id: msg.tool_call_id
+            }.compact.merge(format_thinking(msg))
+          end
+        end
+        def format_role(role)
+          case role
+          when :system
+            @config.openai_use_system_role ? 'system' : 'developer'
+          else
+            role.to_s
+          end
+        end
+        def build_reasoning(thinking)
+          return nil unless thinking&.enabled?
+          reasoning = {}
+          reasoning[:effort] = thinking.effort if thinking.respond_to?(:effort) && thinking.effort
+          reasoning[:max_tokens] = thinking.budget if thinking.respond_to?(:budget) && thinking.budget
+          reasoning[:enabled] = true if reasoning.empty?
+          reasoning
+        end
+        def format_thinking(msg)
+          thinking = msg.thinking
+          return {} unless thinking && msg.role == :assistant
+          details = []
+          if thinking.text
+            details << {
+              type: 'reasoning.text',
+              text: thinking.text,
+              signature: thinking.signature
+            }.compact
+          elsif thinking.signature
+            details << {
+              type: 'reasoning.encrypted',
+              data: thinking.signature
+            }
+          end
+          details.empty? ? {} : { reasoning_details: details }
+        end
+        def extract_thinking_text(message_data)
+          candidate = message_data['reasoning']
+          return candidate if candidate.is_a?(String)
+          details = message_data['reasoning_details']
+          return nil unless details.is_a?(Array)
+          text = details.filter_map do |detail|
+            case detail['type']
+            when 'reasoning.text'
+              detail['text']
+            when 'reasoning.summary'
+              detail['summary']
+            end
+          end.join
+          text.empty? ? nil : text
+        end
+        def extract_thinking_signature(message_data)
+          details = message_data['reasoning_details']
+          return nil unless details.is_a?(Array)
+          signature = details.filter_map do |detail|
+            detail['signature'] if detail['signature'].is_a?(String)
+          end.first
+          return signature if signature
+          encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
+          encrypted&.dig('data')
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/openrouter/streaming.rb ADDED Viewed

@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class OpenRouter
+      # Streaming methods of the OpenRouter API integration
+      module Streaming
+        module_function
+        def stream_url
+          completion_url
+        end
+        def build_chunk(data)
+          usage = data['usage'] || {}
+          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
+          delta = data.dig('choices', 0, 'delta') || {}
+          Chunk.new(
+            role: :assistant,
+            model_id: data['model'],
+            content: delta['content'],
+            thinking: Thinking.build(
+              text: extract_thinking_text(delta),
+              signature: extract_thinking_signature(delta)
+            ),
+            tool_calls: OpenAI::Tools.parse_tool_calls(delta['tool_calls'], parse_arguments: false),
+            input_tokens: usage['prompt_tokens'],
+            output_tokens: usage['completion_tokens'],
+            cached_tokens: cached_tokens,
+            cache_creation_tokens: 0,
+            thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
+          )
+        end
+        def parse_streaming_error(data)
+          OpenAI::Streaming.parse_streaming_error(data)
+        end
+        def extract_thinking_text(delta)
+          candidate = delta['reasoning']
+          return candidate if candidate.is_a?(String)
+          details = delta['reasoning_details']
+          return nil unless details.is_a?(Array)
+          text = details.filter_map do |detail|
+            case detail['type']
+            when 'reasoning.text'
+              detail['text']
+            when 'reasoning.summary'
+              detail['summary']
+            end
+          end.join
+          text.empty? ? nil : text
+        end
+        def extract_thinking_signature(delta)
+          details = delta['reasoning_details']
+          return nil unless details.is_a?(Array)
+          signature = details.filter_map do |detail|
+            detail['signature'] if detail['signature'].is_a?(String)
+          end.first
+          return signature if signature
+          encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
+          encrypted&.dig('data')
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/openrouter.rb CHANGED Viewed

@@ -4,7 +4,9 @@ module RubyLLM
   module Providers
     # OpenRouter API integration.
     class OpenRouter < OpenAI
+      include OpenRouter::Chat
       include OpenRouter::Models
+      include OpenRouter::Streaming
       def api_base
         'https://openrouter.ai/api/v1'

data/lib/ruby_llm/providers/vertexai.rb CHANGED Viewed

@@ -16,7 +16,11 @@ module RubyLLM
       end
       def api_base
-        "https://#{@config.vertexai_location}-aiplatform.googleapis.com/v1beta1"
+        if @config.vertexai_location.to_s == 'global'
+          'https://aiplatform.googleapis.com/v1beta1'
+        else
+          "https://#{@config.vertexai_location}-aiplatform.googleapis.com/v1beta1"
+        end
       end
       def headers

data/lib/ruby_llm/providers/xai/chat.rb ADDED Viewed

@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class XAI
+      # Chat implementation for xAI
+      # https://docs.x.ai/docs/api-reference#chat-completions
+      module Chat
+        def format_role(role)
+          role.to_s
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/xai/models.rb ADDED Viewed

@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class XAI
+      # Models metadata for xAI list models.
+      module Models
+        module_function
+        IMAGE_MODELS = %w[grok-2-image-1212].freeze
+        VISION_MODELS = %w[
+          grok-2-vision-1212
+          grok-4-0709
+          grok-4-fast-non-reasoning
+          grok-4-fast-reasoning
+          grok-4-1-fast-non-reasoning
+          grok-4-1-fast-reasoning
+        ].freeze
+        REASONING_MODELS = %w[
+          grok-3-mini
+          grok-4-0709
+          grok-4-fast-reasoning
+          grok-4-1-fast-reasoning
+          grok-code-fast-1
+        ].freeze
+        def parse_list_models_response(response, slug, _capabilities)
+          Array(response.body['data']).map do |model_data|
+            model_id = model_data['id']
+            Model::Info.new(
+              id: model_id,
+              name: format_display_name(model_id),
+              provider: slug,
+              family: 'grok',
+              created_at: model_data['created'] ? Time.at(model_data['created']) : nil,
+              context_window: nil,
+              max_output_tokens: nil,
+              modalities: modalities_for(model_id),
+              capabilities: capabilities_for(model_id),
+              pricing: {},
+              metadata: {
+                object: model_data['object'],
+                owned_by: model_data['owned_by']
+              }.compact
+            )
+          end
+        end
+        def modalities_for(model_id)
+          if IMAGE_MODELS.include?(model_id)
+            { input: ['text'], output: ['image'] }
+          else
+            input = ['text']
+            input << 'image' if VISION_MODELS.include?(model_id)
+            { input: input, output: ['text'] }
+          end
+        end
+        def capabilities_for(model_id)
+          return [] if IMAGE_MODELS.include?(model_id)
+          capabilities = %w[streaming function_calling structured_output]
+          capabilities << 'reasoning' if REASONING_MODELS.include?(model_id)
+          capabilities << 'vision' if VISION_MODELS.include?(model_id)
+          capabilities
+        end
+        def format_display_name(model_id)
+          model_id.tr('-', ' ').split.map(&:capitalize).join(' ')
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/xai.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    # xAI API integration
+    class XAI < OpenAI
+      include XAI::Chat
+      include XAI::Models
+      def api_base
+        'https://api.x.ai/v1'
+      end
+      def headers
+        {
+          'Authorization' => "Bearer #{@config.xai_api_key}",
+          'Content-Type' => 'application/json'
+        }
+      end
+      class << self
+        def configuration_requirements
+          %i[xai_api_key]
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/stream_accumulator.rb CHANGED Viewed

@@ -7,11 +7,16 @@ module RubyLLM
     def initialize
       @content = +''
+      @thinking_text = +''
+      @thinking_signature = nil
       @tool_calls = {}
       @input_tokens = nil
       @output_tokens = nil
       @cached_tokens = nil
       @cache_creation_tokens = nil
+      @thinking_tokens = nil
+      @inside_think_tag = false
+      @pending_think_tag = +''
       @latest_tool_call_id = nil
     end
@@ -19,12 +24,8 @@ module RubyLLM
       RubyLLM.logger.debug chunk.inspect if RubyLLM.config.log_stream_debug
       @model_id ||= chunk.model_id
-      if chunk.tool_call?
-        accumulate_tool_calls chunk.tool_calls
-      else
-        @content << (chunk.content || '')
-      end
+      handle_chunk_content(chunk)
+      append_thinking_from_chunk(chunk)
       count_tokens chunk
       RubyLLM.logger.debug inspect if RubyLLM.config.log_stream_debug
     end
@@ -33,12 +34,19 @@ module RubyLLM
       Message.new(
         role: :assistant,
         content: content.empty? ? nil : content,
+        thinking: Thinking.build(
+          text: @thinking_text.empty? ? nil : @thinking_text,
+          signature: @thinking_signature
+        ),
+        tokens: Tokens.build(
+          input: @input_tokens,
+          output: @output_tokens,
+          cached: @cached_tokens,
+          cache_creation: @cache_creation_tokens,
+          thinking: @thinking_tokens
+        ),
         model_id: model_id,
         tool_calls: tool_calls_from_stream,
-        input_tokens: @input_tokens,
-        output_tokens: @output_tokens,
-        cached_tokens: @cached_tokens,
-        cache_creation_tokens: @cache_creation_tokens,
         raw: response
       )
     end
@@ -58,12 +66,13 @@ module RubyLLM
         ToolCall.new(
           id: tc.id,
           name: tc.name,
-          arguments: arguments
+          arguments: arguments,
+          thought_signature: tc.thought_signature
         )
       end
     end
-    def accumulate_tool_calls(new_tool_calls)
+    def accumulate_tool_calls(new_tool_calls) # rubocop:disable Metrics/PerceivedComplexity
       RubyLLM.logger.debug "Accumulating tool calls: #{new_tool_calls}" if RubyLLM.config.log_stream_debug
       new_tool_calls.each_value do |tool_call|
         if tool_call.id
@@ -72,12 +81,18 @@ module RubyLLM
           @tool_calls[tool_call.id] = ToolCall.new(
             id: tool_call_id,
             name: tool_call.name,
-            arguments: tool_call_arguments
+            arguments: tool_call_arguments,
+            thought_signature: tool_call.thought_signature
           )
           @latest_tool_call_id = tool_call.id
         else
           existing = @tool_calls[@latest_tool_call_id]
-          existing.arguments << tool_call.arguments if existing
+          if existing
+            existing.arguments << tool_call.arguments
+            if tool_call.thought_signature && existing.thought_signature.nil?
+              existing.thought_signature = tool_call.thought_signature
+            end
+          end
         end
       end
     end
@@ -96,6 +111,88 @@ module RubyLLM
       @output_tokens = chunk.output_tokens if chunk.output_tokens
       @cached_tokens = chunk.cached_tokens if chunk.cached_tokens
       @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
+      @thinking_tokens = chunk.thinking_tokens if chunk.thinking_tokens
+    end
+    def handle_chunk_content(chunk)
+      return accumulate_tool_calls(chunk.tool_calls) if chunk.tool_call?
+      content_text = chunk.content || ''
+      if content_text.is_a?(String)
+        append_text_with_thinking(content_text)
+      else
+        @content << content_text.to_s
+      end
+    end
+    def append_text_with_thinking(text)
+      content_chunk, thinking_chunk = extract_think_tags(text)
+      @content << content_chunk
+      @thinking_text << thinking_chunk if thinking_chunk
+    end
+    def append_thinking_from_chunk(chunk)
+      thinking = chunk.thinking
+      return unless thinking
+      @thinking_text << thinking.text.to_s if thinking.text
+      @thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
+    end
+    def extract_think_tags(text)
+      start_tag = '<think>'
+      end_tag = '</think>'
+      remaining = @pending_think_tag + text
+      @pending_think_tag = +''
+      output = +''
+      thinking = +''
+      until remaining.empty?
+        remaining = if @inside_think_tag
+                      consume_think_content(remaining, end_tag, thinking)
+                    else
+                      consume_non_think_content(remaining, start_tag, output)
+                    end
+      end
+      [output, thinking.empty? ? nil : thinking]
+    end
+    def consume_think_content(remaining, end_tag, thinking)
+      end_index = remaining.index(end_tag)
+      if end_index
+        thinking << remaining.slice(0, end_index)
+        @inside_think_tag = false
+        remaining.slice((end_index + end_tag.length)..) || +''
+      else
+        suffix_len = longest_suffix_prefix(remaining, end_tag)
+        thinking << remaining.slice(0, remaining.length - suffix_len)
+        @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
+        +''
+      end
+    end
+    def consume_non_think_content(remaining, start_tag, output)
+      start_index = remaining.index(start_tag)
+      if start_index
+        output << remaining.slice(0, start_index)
+        @inside_think_tag = true
+        remaining.slice((start_index + start_tag.length)..) || +''
+      else
+        suffix_len = longest_suffix_prefix(remaining, start_tag)
+        output << remaining.slice(0, remaining.length - suffix_len)
+        @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
+        +''
+      end
+    end
+    def longest_suffix_prefix(text, tag)
+      max = [text.length, tag.length - 1].min
+      max.downto(1) do |len|
+        return len if text.end_with?(tag[0, len])
+      end
+      0
     end
   end
 end