RubyGems - ruby_llm - Versions diffs - 1.9.1 → 1.10.0 - Mend

ruby_llm 1.9.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

checksums.yaml +4 -4
data/README.md +3 -2
data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
data/lib/generators/ruby_llm/upgrade_to_v1_7/templates/migration.rb.tt +1 -1
data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
data/lib/ruby_llm/active_record/message_methods.rb +41 -8
data/lib/ruby_llm/aliases.json +101 -21
data/lib/ruby_llm/chat.rb +10 -7
data/lib/ruby_llm/configuration.rb +1 -1
data/lib/ruby_llm/message.rb +37 -11
data/lib/ruby_llm/models.json +21119 -10230
data/lib/ruby_llm/models.rb +271 -27
data/lib/ruby_llm/models_schema.json +2 -2
data/lib/ruby_llm/provider.rb +4 -3
data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
data/lib/ruby_llm/providers/bedrock/models.rb +21 -15
data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
data/lib/ruby_llm/providers/openai/chat.rb +87 -3
data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
data/lib/ruby_llm/providers/openai.rb +1 -1
data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
data/lib/ruby_llm/providers/openrouter.rb +2 -0
data/lib/ruby_llm/providers/vertexai.rb +5 -1
data/lib/ruby_llm/stream_accumulator.rb +111 -14
data/lib/ruby_llm/streaming.rb +76 -54
data/lib/ruby_llm/thinking.rb +49 -0
data/lib/ruby_llm/tokens.rb +47 -0
data/lib/ruby_llm/tool.rb +1 -1
data/lib/ruby_llm/tool_call.rb +6 -3
data/lib/ruby_llm/version.rb +1 -1
data/lib/tasks/models.rake +20 -13
metadata +12 -5

data/lib/ruby_llm/providers/bedrock/chat.rb CHANGED Viewed

@@ -16,46 +16,89 @@ module RubyLLM
           Anthropic::Chat.parse_completion_response response
         end
-        def format_message(msg)
+        def format_message(msg, thinking: nil)
+          thinking_enabled = thinking&.enabled?
           if msg.tool_call?
-            Anthropic::Tools.format_tool_call(msg)
+            format_tool_call_with_thinking(msg, thinking_enabled)
           elsif msg.tool_result?
             Anthropic::Tools.format_tool_result(msg)
           else
-            format_basic_message(msg)
+            format_basic_message_with_thinking(msg, thinking_enabled)
           end
         end
-        def format_basic_message(msg)
-          {
-            role: Anthropic::Chat.convert_role(msg.role),
-            content: Media.format_content(msg.content)
-          }
-        end
         private
         def completion_url
           "model/#{@model_id}/invoke"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           @model_id = model.id
           system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
           system_content = Anthropic::Chat.build_system_content(system_messages)
-          build_base_payload(chat_messages, model).tap do |payload|
+          build_base_payload(chat_messages, model, thinking).tap do |payload|
             Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:)
           end
         end
-        def build_base_payload(chat_messages, model)
-          {
+        def build_base_payload(chat_messages, model, thinking)
+          payload = {
             anthropic_version: 'bedrock-2023-05-31',
-            messages: chat_messages.map { |msg| format_message(msg) },
+            messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
             max_tokens: model.max_tokens || 4096
           }
+          thinking_payload = Anthropic::Chat.build_thinking_payload(thinking)
+          payload[:thinking] = thinking_payload if thinking_payload
+          payload
+        end
+        def format_basic_message_with_thinking(msg, thinking_enabled)
+          content_blocks = []
+          if msg.role == :assistant && thinking_enabled
+            thinking_block = Anthropic::Chat.build_thinking_block(msg.thinking)
+            content_blocks << thinking_block if thinking_block
+          end
+          Anthropic::Chat.append_formatted_content(content_blocks, msg.content)
+          {
+            role: Anthropic::Chat.convert_role(msg.role),
+            content: content_blocks
+          }
+        end
+        def format_tool_call_with_thinking(msg, thinking_enabled)
+          if msg.content.is_a?(RubyLLM::Content::Raw)
+            content_blocks = msg.content.value
+            content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
+            content_blocks = Anthropic::Chat.prepend_thinking_block(content_blocks, msg, thinking_enabled)
+            return { role: 'assistant', content: content_blocks }
+          end
+          content_blocks = Anthropic::Chat.prepend_thinking_block([], msg, thinking_enabled)
+          content_blocks << Anthropic::Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
+          msg.tool_calls.each_value do |tool_call|
+            content_blocks << {
+              type: 'tool_use',
+              id: tool_call.id,
+              name: tool_call.name,
+              input: tool_call.arguments
+            }
+          end
+          {
+            role: 'assistant',
+            content: content_blocks
+          }
         end
       end
     end

data/lib/ruby_llm/providers/bedrock/models.rb CHANGED Viewed

@@ -69,28 +69,34 @@ module RubyLLM
         end
         def model_id_with_region(model_id, model_data)
-          return model_id unless model_data['inferenceTypesSupported']&.include?('INFERENCE_PROFILE')
-          return model_id if model_data['inferenceTypesSupported']&.include?('ON_DEMAND')
+          normalize_inference_profile_id(
+            model_id,
+            model_data['inferenceTypesSupported'],
+            @config.bedrock_region
+          )
+        end
-          desired_region_prefix = inference_profile_region_prefix
+        def region_prefix(region)
+          region = region.to_s
+          return 'us' if region.empty?
-          # Return unchanged if model already has the correct region prefix
-          return model_id if model_id.start_with?("#{desired_region_prefix}.")
+          region[0, 2]
+        end
-          # Remove any existing region prefix (e.g., "us.", "eu.", "ap.")
-          clean_model_id = model_id.sub(/^[a-z]{2}\./, '')
+        def with_region_prefix(model_id, region)
+          desired_prefix = region_prefix(region)
+          return model_id if model_id.start_with?("#{desired_prefix}.")
-          # Apply the desired region prefix
-          "#{desired_region_prefix}.#{clean_model_id}"
+          clean_model_id = model_id.sub(/^[a-z]{2}\./, '')
+          "#{desired_prefix}.#{clean_model_id}"
         end
-        def inference_profile_region_prefix
-          # Extract region prefix from bedrock_region (e.g., "eu-west-3" -> "eu")
-          region = @config.bedrock_region.to_s
-          return 'us' if region.empty? # Default fallback
+        def normalize_inference_profile_id(model_id, inference_types, region)
+          types = Array(inference_types)
+          return model_id unless types.include?('INFERENCE_PROFILE')
+          return model_id if types.include?('ON_DEMAND')
-          # Take first two characters as the region prefix
-          region[0, 2]
+          with_region_prefix(model_id, region)
         end
       end
     end

data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb CHANGED Viewed

@@ -16,6 +16,31 @@ module RubyLLM
             extract_content_by_type(data)
           end
+          def extract_thinking_delta(data)
+            return nil unless data.is_a?(Hash)
+            if data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'thinking_delta'
+              return data.dig('delta', 'thinking')
+            end
+            if data['type'] == 'content_block_start' && data.dig('content_block', 'type') == 'thinking'
+              return data.dig('content_block', 'thinking') || data.dig('content_block', 'text')
+            end
+            nil
+          end
+          def extract_signature_delta(data)
+            return nil unless data.is_a?(Hash)
+            signature = extract_signature_from_delta(data)
+            return signature if signature
+            return nil unless data['type'] == 'content_block_start'
+            extract_signature_from_block(data['content_block'])
+          end
           def extract_tool_calls(data)
             data.dig('message', 'tool_calls') || data['tool_calls']
           end
@@ -47,6 +72,17 @@ module RubyLLM
             breakdown.values.compact.sum
           end
+          def extract_thinking_tokens(data)
+            data.dig('message', 'usage', 'thinking_tokens') ||
+              data.dig('message', 'usage', 'output_tokens_details', 'thinking_tokens') ||
+              data.dig('usage', 'thinking_tokens') ||
+              data.dig('usage', 'output_tokens_details', 'thinking_tokens') ||
+              data.dig('message', 'usage', 'reasoning_tokens') ||
+              data.dig('message', 'usage', 'output_tokens_details', 'reasoning_tokens') ||
+              data.dig('usage', 'reasoning_tokens') ||
+              data.dig('usage', 'output_tokens_details', 'reasoning_tokens')
+          end
           private
           def extract_content_by_type(data)
@@ -58,11 +94,32 @@ module RubyLLM
           end
           def extract_block_start_content(data)
-            data.dig('content_block', 'text').to_s
+            content_block = data['content_block'] || {}
+            return '' if %w[thinking redacted_thinking].include?(content_block['type'])
+            content_block['text'].to_s
           end
           def extract_delta_content(data)
-            data.dig('delta', 'text').to_s
+            delta = data['delta'] || {}
+            return '' if %w[thinking_delta signature_delta].include?(delta['type'])
+            delta['text'].to_s
+          end
+          def extract_signature_from_delta(data)
+            return unless data['type'] == 'content_block_delta'
+            return unless data.dig('delta', 'type') == 'signature_delta'
+            data.dig('delta', 'signature')
+          end
+          def extract_signature_from_block(content_block)
+            block = content_block || {}
+            return block['signature'] if block['type'] == 'thinking' && block['signature']
+            return block['data'] if block['type'] == 'redacted_thinking'
+            nil
           end
         end
       end

data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb CHANGED Viewed

@@ -57,10 +57,15 @@ module RubyLLM
               role: :assistant,
               model_id: extract_model_id(data),
               content: extract_streaming_content(data),
+              thinking: Thinking.build(
+                text: extract_thinking_delta(data),
+                signature: extract_signature_delta(data)
+              ),
               input_tokens: extract_input_tokens(data),
               output_tokens: extract_output_tokens(data),
               cached_tokens: extract_cached_tokens(data),
               cache_creation_tokens: extract_cache_creation_tokens(data),
+              thinking_tokens: extract_thinking_tokens(data),
               tool_calls: extract_tool_calls(data)
             }
           end

data/lib/ruby_llm/providers/gemini/chat.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module RubyLLM
           "models/#{@model}:generateContent"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
           @model = model.id
           payload = {
             contents: format_messages(messages),
@@ -24,11 +24,30 @@ module RubyLLM
           payload[:generationConfig][:temperature] = temperature unless temperature.nil?
           payload[:generationConfig].merge!(structured_output_config(schema, model)) if schema
+          payload[:generationConfig][:thinkingConfig] = build_thinking_config(model, thinking) if thinking&.enabled?
           payload[:tools] = format_tools(tools) if tools.any?
           payload
         end
+        def build_thinking_config(_model, thinking)
+          config = { includeThoughts: true }
+          config[:thinkingLevel] = resolve_effort_level(thinking) if thinking&.effort
+          config[:thinkingBudget] = resolve_budget(thinking) if thinking&.budget
+          config
+        end
+        def resolve_effort_level(thinking)
+          thinking.respond_to?(:effort) ? thinking.effort : thinking
+        end
+        def resolve_budget(thinking)
+          budget = thinking.respond_to?(:budget) ? thinking.budget : thinking
+          budget.is_a?(Integer) ? budget : nil
+        end
         private
         def format_messages(messages)
@@ -56,20 +75,43 @@ module RubyLLM
           elsif msg.tool_result?
             format_tool_result(msg)
           else
-            Media.format_content(msg.content)
+            format_message_parts(msg)
           end
         end
+        def format_message_parts(msg)
+          parts = []
+          parts << build_thought_part(msg.thinking) if msg.role == :assistant && msg.thinking
+          content_parts = Media.format_content(msg.content)
+          parts.concat(content_parts.is_a?(Array) ? content_parts : [content_parts])
+          parts
+        end
+        def build_thought_part(thinking)
+          part = { thought: true }
+          part[:text] = thinking.text if thinking.text
+          part[:thoughtSignature] = thinking.signature if thinking.signature
+          part
+        end
         def parse_completion_response(response)
           data = response.body
+          parts = data.dig('candidates', 0, 'content', 'parts') || []
           tool_calls = extract_tool_calls(data)
           Message.new(
             role: :assistant,
-            content: parse_content(data),
+            content: extract_text_parts(parts) || parse_content(data),
+            thinking: Thinking.build(
+              text: extract_thought_parts(parts),
+              signature: extract_thought_signature(parts)
+            ),
             tool_calls: tool_calls,
             input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
             output_tokens: calculate_output_tokens(data),
+            thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
             model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
             raw: response
           )
@@ -93,6 +135,30 @@ module RubyLLM
           build_response_content(parts)
         end
+        def extract_text_parts(parts)
+          text_parts = parts.reject { |p| p['thought'] }
+          content = text_parts.filter_map { |p| p['text'] }.join
+          content.empty? ? nil : content
+        end
+        def extract_thought_parts(parts)
+          thought_parts = parts.select { |p| p['thought'] }
+          thoughts = thought_parts.filter_map { |p| p['text'] }.join
+          thoughts.empty? ? nil : thoughts
+        end
+        def extract_thought_signature(parts)
+          parts.each do |part|
+            signature = part['thoughtSignature'] ||
+                        part['thought_signature'] ||
+                        part.dig('functionCall', 'thoughtSignature') ||
+                        part.dig('functionCall', 'thought_signature')
+            return signature if signature
+          end
+          nil
+        end
         def function_call?(candidate)
           parts = candidate.dig('content', 'parts')
           parts&.any? { |p| p['functionCall'] }

data/lib/ruby_llm/providers/gemini/streaming.rb CHANGED Viewed

@@ -10,12 +10,19 @@ module RubyLLM
         end
         def build_chunk(data)
+          parts = data.dig('candidates', 0, 'content', 'parts') || []
           Chunk.new(
             role: :assistant,
             model_id: extract_model_id(data),
-            content: extract_content(data),
+            content: extract_text_content(parts),
+            thinking: Thinking.build(
+              text: extract_thought_content(parts),
+              signature: extract_thought_signature(parts)
+            ),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
+            thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
             tool_calls: extract_tool_calls(data)
           )
         end
@@ -26,6 +33,30 @@ module RubyLLM
           data['modelVersion']
         end
+        def extract_text_content(parts)
+          text_parts = parts.reject { |p| p['thought'] }
+          text = text_parts.filter_map { |p| p['text'] }.join
+          text.empty? ? nil : text
+        end
+        def extract_thought_content(parts)
+          thought_parts = parts.select { |p| p['thought'] }
+          thoughts = thought_parts.filter_map { |p| p['text'] }.join
+          thoughts.empty? ? nil : thoughts
+        end
+        def extract_thought_signature(parts)
+          parts.each do |part|
+            signature = part['thoughtSignature'] ||
+                        part['thought_signature'] ||
+                        part.dig('functionCall', 'thoughtSignature') ||
+                        part.dig('functionCall', 'thought_signature')
+            return signature if signature
+          end
+          nil
+        end
         def extract_content(data)
           return nil unless data['candidates']&.any?

data/lib/ruby_llm/providers/gemini/tools.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module RubyLLM
           }]
         end
-        def format_tool_call(msg)
+        def format_tool_call(msg) # rubocop:disable Metrics/PerceivedComplexity
           parts = []
           if msg.content && !(msg.content.respond_to?(:empty?) && msg.content.empty?)
@@ -21,13 +21,24 @@ module RubyLLM
             parts.concat(formatted_content.is_a?(Array) ? formatted_content : [formatted_content])
           end
+          fallback_signature = msg.thinking&.signature
+          used_fallback = false
           msg.tool_calls.each_value do |tool_call|
-            parts << {
+            part = {
               functionCall: {
                 name: tool_call.name,
                 args: tool_call.arguments
               }
             }
+            signature = tool_call.thought_signature
+            if signature.nil? && fallback_signature && !used_fallback
+              signature = fallback_signature
+              used_fallback = true
+            end
+            part[:thoughtSignature] = signature if signature
+            parts << part
           end
           parts
@@ -61,11 +72,13 @@ module RubyLLM
             next unless function_data
             id = SecureRandom.uuid
+            thought_signature = part['thoughtSignature'] || part['thought_signature']
             result[id] = ToolCall.new(
               id:,
               name: function_data['name'],
-              arguments: function_data['args'] || {}
+              arguments: function_data['args'] || {},
+              thought_signature: thought_signature
             )
           end

data/lib/ruby_llm/providers/gpustack/chat.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module RubyLLM
               content: GPUStack::Media.format_content(msg.content),
               tool_calls: format_tool_calls(msg.tool_calls),
               tool_call_id: msg.tool_call_id
-            }.compact
+            }.compact.merge(OpenAI::Chat.format_thinking(msg))
           end
         end

data/lib/ruby_llm/providers/mistral/chat.rb CHANGED Viewed

@@ -11,13 +11,70 @@ module RubyLLM
           role.to_s
         end
+        def format_messages(messages)
+          messages.map do |msg|
+            {
+              role: format_role(msg.role),
+              content: format_content_with_thinking(msg),
+              tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
+              tool_call_id: msg.tool_call_id
+            }.compact
+          end
+        end
         # rubocop:disable Metrics/ParameterLists
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil)
           payload = super
           payload.delete(:stream_options)
+          payload.delete(:reasoning_effort)
+          warn_on_unsupported_thinking(model, thinking)
           payload
         end
         # rubocop:enable Metrics/ParameterLists
+        def format_content_with_thinking(msg)
+          formatted_content = OpenAI::Media.format_content(msg.content)
+          return formatted_content unless msg.role == :assistant && msg.thinking
+          content_blocks = build_thinking_blocks(msg.thinking)
+          append_formatted_content(content_blocks, formatted_content)
+          content_blocks
+        end
+        def warn_on_unsupported_thinking(model, thinking)
+          return unless thinking&.enabled?
+          return if model.id.to_s.include?('magistral')
+          RubyLLM.logger.warn(
+            'Mistral thinking is only supported on Magistral models. ' \
+            "Ignoring thinking settings for #{model.id}."
+          )
+        end
+        def build_thinking_blocks(thinking)
+          return [] unless thinking
+          if thinking.text
+            [{
+              type: 'thinking',
+              thinking: [{ type: 'text', text: thinking.text }],
+              signature: thinking.signature
+            }.compact]
+          elsif thinking.signature
+            [{ type: 'thinking', signature: thinking.signature }]
+          else
+            []
+          end
+        end
+        def append_formatted_content(content_blocks, formatted_content)
+          if formatted_content.is_a?(Array)
+            content_blocks.concat(formatted_content)
+          elsif formatted_content
+            content_blocks << { type: 'text', text: formatted_content }
+          end
+        end
       end
     end
   end

data/lib/ruby_llm/providers/ollama/chat.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module RubyLLM
               content: Ollama::Media.format_content(msg.content),
               tool_calls: format_tool_calls(msg.tool_calls),
               tool_call_id: msg.tool_call_id
-            }.compact
+            }.compact.merge(OpenAI::Chat.format_thinking(msg))
           end
         end

data/lib/ruby_llm/providers/openai/capabilities.rb CHANGED Viewed

@@ -224,8 +224,8 @@ module RubyLLM
         end
         def self.normalize_temperature(temperature, model_id)
-          if model_id.match?(/^(o\d|gpt-5)/)
-            RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, ignoring provided value"
+          if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
+            RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, setting that instead."
             1.0
           elsif model_id.match?(/-search/)
             RubyLLM.logger.debug "Model #{model_id} does not accept temperature parameter, removing"
@@ -235,6 +235,10 @@ module RubyLLM
           end
         end
+        def self.temperature_close_to_one?(temperature)
+          (temperature.to_f - 1.0).abs <= Float::EPSILON
+        end
         def modalities_for(model_id)
           modalities = {
             input: ['text'],