RubyGems - ruby_llm - Versions diffs - 1.9.2 → 1.10.0 - Mend

ruby_llm 1.9.2 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/README.md +3 -2
data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
data/lib/ruby_llm/active_record/message_methods.rb +41 -8
data/lib/ruby_llm/aliases.json +0 -12
data/lib/ruby_llm/chat.rb +10 -7
data/lib/ruby_llm/configuration.rb +1 -1
data/lib/ruby_llm/message.rb +37 -11
data/lib/ruby_llm/models.json +1059 -857
data/lib/ruby_llm/models.rb +134 -12
data/lib/ruby_llm/provider.rb +4 -3
data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
data/lib/ruby_llm/providers/openai/chat.rb +87 -3
data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
data/lib/ruby_llm/providers/openai.rb +1 -1
data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
data/lib/ruby_llm/providers/openrouter.rb +2 -0
data/lib/ruby_llm/providers/vertexai.rb +5 -1
data/lib/ruby_llm/stream_accumulator.rb +111 -14
data/lib/ruby_llm/streaming.rb +54 -51
data/lib/ruby_llm/thinking.rb +49 -0
data/lib/ruby_llm/tokens.rb +47 -0
data/lib/ruby_llm/tool_call.rb +6 -3
data/lib/ruby_llm/version.rb +1 -1
data/lib/tasks/models.rake +19 -12
metadata +12 -5

data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb CHANGED Viewed

@@ -57,10 +57,15 @@ module RubyLLM
               role: :assistant,
               model_id: extract_model_id(data),
               content: extract_streaming_content(data),
+              thinking: Thinking.build(
+                text: extract_thinking_delta(data),
+                signature: extract_signature_delta(data)
+              ),
               input_tokens: extract_input_tokens(data),
               output_tokens: extract_output_tokens(data),
               cached_tokens: extract_cached_tokens(data),
               cache_creation_tokens: extract_cache_creation_tokens(data),
+              thinking_tokens: extract_thinking_tokens(data),
               tool_calls: extract_tool_calls(data)
             }
           end

data/lib/ruby_llm/providers/gemini/chat.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module RubyLLM
           "models/#{@model}:generateContent"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
           @model = model.id
           payload = {
             contents: format_messages(messages),
@@ -24,11 +24,30 @@ module RubyLLM
           payload[:generationConfig][:temperature] = temperature unless temperature.nil?
           payload[:generationConfig].merge!(structured_output_config(schema, model)) if schema
+          payload[:generationConfig][:thinkingConfig] = build_thinking_config(model, thinking) if thinking&.enabled?
           payload[:tools] = format_tools(tools) if tools.any?
           payload
         end
+        def build_thinking_config(_model, thinking)
+          config = { includeThoughts: true }
+          config[:thinkingLevel] = resolve_effort_level(thinking) if thinking&.effort
+          config[:thinkingBudget] = resolve_budget(thinking) if thinking&.budget
+          config
+        end
+        def resolve_effort_level(thinking)
+          thinking.respond_to?(:effort) ? thinking.effort : thinking
+        end
+        def resolve_budget(thinking)
+          budget = thinking.respond_to?(:budget) ? thinking.budget : thinking
+          budget.is_a?(Integer) ? budget : nil
+        end
         private
         def format_messages(messages)
@@ -56,20 +75,43 @@ module RubyLLM
           elsif msg.tool_result?
             format_tool_result(msg)
           else
-            Media.format_content(msg.content)
+            format_message_parts(msg)
           end
         end
+        def format_message_parts(msg)
+          parts = []
+          parts << build_thought_part(msg.thinking) if msg.role == :assistant && msg.thinking
+          content_parts = Media.format_content(msg.content)
+          parts.concat(content_parts.is_a?(Array) ? content_parts : [content_parts])
+          parts
+        end
+        def build_thought_part(thinking)
+          part = { thought: true }
+          part[:text] = thinking.text if thinking.text
+          part[:thoughtSignature] = thinking.signature if thinking.signature
+          part
+        end
         def parse_completion_response(response)
           data = response.body
+          parts = data.dig('candidates', 0, 'content', 'parts') || []
           tool_calls = extract_tool_calls(data)
           Message.new(
             role: :assistant,
-            content: parse_content(data),
+            content: extract_text_parts(parts) || parse_content(data),
+            thinking: Thinking.build(
+              text: extract_thought_parts(parts),
+              signature: extract_thought_signature(parts)
+            ),
             tool_calls: tool_calls,
             input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
             output_tokens: calculate_output_tokens(data),
+            thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
             model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
             raw: response
           )
@@ -93,6 +135,30 @@ module RubyLLM
           build_response_content(parts)
         end
+        def extract_text_parts(parts)
+          text_parts = parts.reject { |p| p['thought'] }
+          content = text_parts.filter_map { |p| p['text'] }.join
+          content.empty? ? nil : content
+        end
+        def extract_thought_parts(parts)
+          thought_parts = parts.select { |p| p['thought'] }
+          thoughts = thought_parts.filter_map { |p| p['text'] }.join
+          thoughts.empty? ? nil : thoughts
+        end
+        def extract_thought_signature(parts)
+          parts.each do |part|
+            signature = part['thoughtSignature'] ||
+                        part['thought_signature'] ||
+                        part.dig('functionCall', 'thoughtSignature') ||
+                        part.dig('functionCall', 'thought_signature')
+            return signature if signature
+          end
+          nil
+        end
         def function_call?(candidate)
           parts = candidate.dig('content', 'parts')
           parts&.any? { |p| p['functionCall'] }

data/lib/ruby_llm/providers/gemini/streaming.rb CHANGED Viewed

@@ -10,12 +10,19 @@ module RubyLLM
         end
         def build_chunk(data)
+          parts = data.dig('candidates', 0, 'content', 'parts') || []
           Chunk.new(
             role: :assistant,
             model_id: extract_model_id(data),
-            content: extract_content(data),
+            content: extract_text_content(parts),
+            thinking: Thinking.build(
+              text: extract_thought_content(parts),
+              signature: extract_thought_signature(parts)
+            ),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
+            thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
             tool_calls: extract_tool_calls(data)
           )
         end
@@ -26,6 +33,30 @@ module RubyLLM
           data['modelVersion']
         end
+        def extract_text_content(parts)
+          text_parts = parts.reject { |p| p['thought'] }
+          text = text_parts.filter_map { |p| p['text'] }.join
+          text.empty? ? nil : text
+        end
+        def extract_thought_content(parts)
+          thought_parts = parts.select { |p| p['thought'] }
+          thoughts = thought_parts.filter_map { |p| p['text'] }.join
+          thoughts.empty? ? nil : thoughts
+        end
+        def extract_thought_signature(parts)
+          parts.each do |part|
+            signature = part['thoughtSignature'] ||
+                        part['thought_signature'] ||
+                        part.dig('functionCall', 'thoughtSignature') ||
+                        part.dig('functionCall', 'thought_signature')
+            return signature if signature
+          end
+          nil
+        end
         def extract_content(data)
           return nil unless data['candidates']&.any?

data/lib/ruby_llm/providers/gemini/tools.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module RubyLLM
           }]
         end
-        def format_tool_call(msg)
+        def format_tool_call(msg) # rubocop:disable Metrics/PerceivedComplexity
           parts = []
           if msg.content && !(msg.content.respond_to?(:empty?) && msg.content.empty?)
@@ -21,13 +21,24 @@ module RubyLLM
             parts.concat(formatted_content.is_a?(Array) ? formatted_content : [formatted_content])
           end
+          fallback_signature = msg.thinking&.signature
+          used_fallback = false
           msg.tool_calls.each_value do |tool_call|
-            parts << {
+            part = {
               functionCall: {
                 name: tool_call.name,
                 args: tool_call.arguments
               }
             }
+            signature = tool_call.thought_signature
+            if signature.nil? && fallback_signature && !used_fallback
+              signature = fallback_signature
+              used_fallback = true
+            end
+            part[:thoughtSignature] = signature if signature
+            parts << part
           end
           parts
@@ -61,11 +72,13 @@ module RubyLLM
             next unless function_data
             id = SecureRandom.uuid
+            thought_signature = part['thoughtSignature'] || part['thought_signature']
             result[id] = ToolCall.new(
               id:,
               name: function_data['name'],
-              arguments: function_data['args'] || {}
+              arguments: function_data['args'] || {},
+              thought_signature: thought_signature
             )
           end

data/lib/ruby_llm/providers/gpustack/chat.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module RubyLLM
               content: GPUStack::Media.format_content(msg.content),
               tool_calls: format_tool_calls(msg.tool_calls),
               tool_call_id: msg.tool_call_id
-            }.compact
+            }.compact.merge(OpenAI::Chat.format_thinking(msg))
           end
         end

data/lib/ruby_llm/providers/mistral/chat.rb CHANGED Viewed

@@ -11,13 +11,70 @@ module RubyLLM
           role.to_s
         end
+        def format_messages(messages)
+          messages.map do |msg|
+            {
+              role: format_role(msg.role),
+              content: format_content_with_thinking(msg),
+              tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
+              tool_call_id: msg.tool_call_id
+            }.compact
+          end
+        end
         # rubocop:disable Metrics/ParameterLists
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil)
           payload = super
           payload.delete(:stream_options)
+          payload.delete(:reasoning_effort)
+          warn_on_unsupported_thinking(model, thinking)
           payload
         end
         # rubocop:enable Metrics/ParameterLists
+        def format_content_with_thinking(msg)
+          formatted_content = OpenAI::Media.format_content(msg.content)
+          return formatted_content unless msg.role == :assistant && msg.thinking
+          content_blocks = build_thinking_blocks(msg.thinking)
+          append_formatted_content(content_blocks, formatted_content)
+          content_blocks
+        end
+        def warn_on_unsupported_thinking(model, thinking)
+          return unless thinking&.enabled?
+          return if model.id.to_s.include?('magistral')
+          RubyLLM.logger.warn(
+            'Mistral thinking is only supported on Magistral models. ' \
+            "Ignoring thinking settings for #{model.id}."
+          )
+        end
+        def build_thinking_blocks(thinking)
+          return [] unless thinking
+          if thinking.text
+            [{
+              type: 'thinking',
+              thinking: [{ type: 'text', text: thinking.text }],
+              signature: thinking.signature
+            }.compact]
+          elsif thinking.signature
+            [{ type: 'thinking', signature: thinking.signature }]
+          else
+            []
+          end
+        end
+        def append_formatted_content(content_blocks, formatted_content)
+          if formatted_content.is_a?(Array)
+            content_blocks.concat(formatted_content)
+          elsif formatted_content
+            content_blocks << { type: 'text', text: formatted_content }
+          end
+        end
       end
     end
   end

data/lib/ruby_llm/providers/ollama/chat.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module RubyLLM
               content: Ollama::Media.format_content(msg.content),
               tool_calls: format_tool_calls(msg.tool_calls),
               tool_call_id: msg.tool_call_id
-            }.compact
+            }.compact.merge(OpenAI::Chat.format_thinking(msg))
           end
         end

data/lib/ruby_llm/providers/openai/capabilities.rb CHANGED Viewed

@@ -224,8 +224,8 @@ module RubyLLM
         end
         def self.normalize_temperature(temperature, model_id)
-          if model_id.match?(/^(o\d|gpt-5)/)
-            RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, ignoring provided value"
+          if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
+            RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, setting that instead."
             1.0
           elsif model_id.match?(/-search/)
             RubyLLM.logger.debug "Model #{model_id} does not accept temperature parameter, removing"
@@ -235,6 +235,10 @@ module RubyLLM
           end
         end
+        def self.temperature_close_to_one?(temperature)
+          (temperature.to_f - 1.0).abs <= Float::EPSILON
+        end
         def modalities_for(model_id)
           modalities = {
             input: ['text'],

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
         module_function
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists
           payload = {
             model: model.id,
             messages: format_messages(messages),
@@ -34,6 +34,9 @@ module RubyLLM
             }
           end
+          effort = resolve_effort(thinking)
+          payload[:reasoning_effort] = effort if effort
           payload[:stream_options] = { include_usage: true } if stream
           payload
         end
@@ -49,15 +52,21 @@ module RubyLLM
           usage = data['usage'] || {}
           cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
+          thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
+          content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
+          thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
+          thinking_signature = extract_thinking_signature(message_data)
           Message.new(
             role: :assistant,
-            content: message_data['content'],
+            content: content,
+            thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
             tool_calls: parse_tool_calls(message_data['tool_calls']),
             input_tokens: usage['prompt_tokens'],
             output_tokens: usage['completion_tokens'],
             cached_tokens: cached_tokens,
             cache_creation_tokens: 0,
+            thinking_tokens: thinking_tokens,
             model_id: data['model'],
             raw: response
           )
@@ -70,7 +79,7 @@ module RubyLLM
               content: Media.format_content(msg.content),
               tool_calls: format_tool_calls(msg.tool_calls),
               tool_call_id: msg.tool_call_id
-            }.compact
+            }.compact.merge(format_thinking(msg))
           end
         end
@@ -82,6 +91,81 @@ module RubyLLM
             role.to_s
           end
         end
+        def resolve_effort(thinking)
+          return nil unless thinking
+          thinking.respond_to?(:effort) ? thinking.effort : thinking
+        end
+        def format_thinking(msg)
+          return {} unless msg.role == :assistant
+          thinking = msg.thinking
+          return {} unless thinking
+          payload = {}
+          if thinking.text
+            payload[:reasoning] = thinking.text
+            payload[:reasoning_content] = thinking.text
+          end
+          payload[:reasoning_signature] = thinking.signature if thinking.signature
+          payload
+        end
+        def extract_thinking_text(message_data)
+          candidate = message_data['reasoning_content'] || message_data['reasoning'] || message_data['thinking']
+          candidate.is_a?(String) ? candidate : nil
+        end
+        def extract_thinking_signature(message_data)
+          candidate = message_data['reasoning_signature'] || message_data['signature']
+          candidate.is_a?(String) ? candidate : nil
+        end
+        def extract_content_and_thinking(content)
+          return extract_think_tag_content(content) if content.is_a?(String)
+          return [content, nil] unless content.is_a?(Array)
+          text = extract_text_from_blocks(content)
+          thinking = extract_thinking_from_blocks(content)
+          [text.empty? ? nil : text, thinking.empty? ? nil : thinking]
+        end
+        def extract_text_from_blocks(blocks)
+          blocks.filter_map do |block|
+            block['text'] if block['type'] == 'text' && block['text'].is_a?(String)
+          end.join
+        end
+        def extract_thinking_from_blocks(blocks)
+          blocks.filter_map do |block|
+            next unless block['type'] == 'thinking'
+            extract_thinking_text_from_block(block)
+          end.join
+        end
+        def extract_thinking_text_from_block(block)
+          thinking_block = block['thinking']
+          return thinking_block if thinking_block.is_a?(String)
+          if thinking_block.is_a?(Array)
+            return thinking_block.filter_map { |item| item['text'] if item['type'] == 'text' }.join
+          end
+          block['text'] if block['text'].is_a?(String)
+        end
+        def extract_think_tag_content(text)
+          return [text, nil] unless text.include?('<think>')
+          thinking = text.scan(%r{<think>(.*?)</think>}m).join
+          content = text.gsub(%r{<think>.*?</think>}m, '').strip
+          [content.empty? ? nil : content, thinking.empty? ? nil : thinking]
+        end
       end
     end
   end

data/lib/ruby_llm/providers/openai/streaming.rb CHANGED Viewed

@@ -14,16 +14,24 @@ module RubyLLM
         def build_chunk(data)
           usage = data['usage'] || {}
           cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
+          delta = data.dig('choices', 0, 'delta') || {}
+          content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
+          content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
           Chunk.new(
             role: :assistant,
             model_id: data['model'],
-            content: data.dig('choices', 0, 'delta', 'content'),
-            tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
+            content: content,
+            thinking: Thinking.build(
+              text: thinking_from_blocks || delta['reasoning_content'] || delta['reasoning'],
+              signature: delta['reasoning_signature']
+            ),
+            tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
             input_tokens: usage['prompt_tokens'],
             output_tokens: usage['completion_tokens'],
             cached_tokens: cached_tokens,
-            cache_creation_tokens: 0
+            cache_creation_tokens: 0,
+            thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
           )
         end

data/lib/ruby_llm/providers/openai/temperature.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class OpenAI
+      # Normalizes temperature for OpenAI models with provider-specific requirements.
+      module Temperature
+        module_function
+        def normalize(temperature, model_id)
+          if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
+            RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, setting that instead."
+            1.0
+          elsif model_id.include?('-search')
+            RubyLLM.logger.debug "Model #{model_id} does not accept temperature parameter, removing"
+            nil
+          else
+            temperature
+          end
+        end
+        def temperature_close_to_one?(temperature)
+          (temperature.to_f - 1.0).abs <= Float::EPSILON
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/openai.rb CHANGED Viewed

@@ -27,7 +27,7 @@ module RubyLLM
       end
       def maybe_normalize_temperature(temperature, model)
-        OpenAI::Capabilities.normalize_temperature(temperature, model.id)
+        OpenAI::Temperature.normalize(temperature, model.id)
       end
       class << self