RubyGems - ruby_llm - Versions diffs - 1.9.2 → 1.10.0 - Mend

ruby_llm 1.9.2 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/README.md +3 -2
data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
data/lib/ruby_llm/active_record/message_methods.rb +41 -8
data/lib/ruby_llm/aliases.json +0 -12
data/lib/ruby_llm/chat.rb +10 -7
data/lib/ruby_llm/configuration.rb +1 -1
data/lib/ruby_llm/message.rb +37 -11
data/lib/ruby_llm/models.json +1059 -857
data/lib/ruby_llm/models.rb +134 -12
data/lib/ruby_llm/provider.rb +4 -3
data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
data/lib/ruby_llm/providers/openai/chat.rb +87 -3
data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
data/lib/ruby_llm/providers/openai.rb +1 -1
data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
data/lib/ruby_llm/providers/openrouter.rb +2 -0
data/lib/ruby_llm/providers/vertexai.rb +5 -1
data/lib/ruby_llm/stream_accumulator.rb +111 -14
data/lib/ruby_llm/streaming.rb +54 -51
data/lib/ruby_llm/thinking.rb +49 -0
data/lib/ruby_llm/tokens.rb +47 -0
data/lib/ruby_llm/tool_call.rb +6 -3
data/lib/ruby_llm/version.rb +1 -1
data/lib/tasks/models.rake +19 -12
metadata +12 -5

data/lib/ruby_llm/models.rb CHANGED Viewed

@@ -38,24 +38,53 @@ module RubyLLM
       end
       def refresh!(remote_only: false)
-        provider_models = fetch_from_providers(remote_only: remote_only)
-        models_dev_models = fetch_from_models_dev
-        merged_models = merge_models(provider_models, models_dev_models)
+        existing_models = load_existing_models
+        provider_fetch = fetch_provider_models(remote_only: remote_only)
+        log_provider_fetch(provider_fetch)
+        models_dev_fetch = fetch_models_dev_models(existing_models)
+        log_models_dev_fetch(models_dev_fetch)
+        merged_models = merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
         @instance = new(merged_models)
       end
-      def fetch_from_providers(remote_only: true)
+      def fetch_provider_models(remote_only: true) # rubocop:disable Metrics/PerceivedComplexity
         config = RubyLLM.config
+        provider_classes = remote_only ? Provider.remote_providers.values : Provider.providers.values
         configured_classes = if remote_only
                                Provider.configured_remote_providers(config)
                              else
                                Provider.configured_providers(config)
                              end
-        configured = configured_classes.map { |klass| klass.new(config) }
+        configured = configured_classes.select { |klass| provider_classes.include?(klass) }
+        result = {
+          models: [],
+          fetched_providers: [],
+          configured_names: configured.map(&:name),
+          failed: []
+        }
-        RubyLLM.logger.info "Fetching models from providers: #{configured.map(&:name).join(', ')}"
+        provider_classes.each do |provider_class|
+          next if remote_only && provider_class.local?
+          next unless provider_class.configured?(config)
+          begin
+            result[:models].concat(provider_class.new(config).list_models)
+            result[:fetched_providers] << provider_class.slug
+          rescue StandardError => e
+            result[:failed] << { name: provider_class.name, slug: provider_class.slug, error: e }
+          end
+        end
+        result[:fetched_providers].uniq!
+        result
+      end
-        configured.flat_map(&:list_models)
+      # Backwards-compatible wrapper used by specs.
+      def fetch_from_providers(remote_only: true)
+        fetch_provider_models(remote_only: remote_only)[:models]
       end
       def resolve(model_id, provider: nil, assume_exists: false, config: nil) # rubocop:disable Metrics/PerceivedComplexity
@@ -103,7 +132,7 @@ module RubyLLM
         instance.respond_to?(method, include_private) || super
       end
-      def fetch_from_models_dev
+      def fetch_models_dev_models(existing_models) # rubocop:disable Metrics/PerceivedComplexity
         RubyLLM.logger.info 'Fetching models from models.dev API...'
         connection = Connection.basic do |f|
@@ -121,7 +150,52 @@ module RubyLLM
             Model::Info.new(models_dev_model_to_info(model_data, provider_slug, provider_key.to_s))
           end
         end
-        models.reject { |model| model.provider.nil? || model.id.nil? }
+        { models: models.reject { |model| model.provider.nil? || model.id.nil? }, fetched: true }
+      rescue StandardError => e
+        RubyLLM.logger.warn("Failed to fetch models.dev (#{e.class}: #{e.message}). Keeping existing.")
+        {
+          models: existing_models.select { |model| model.metadata[:source] == 'models.dev' },
+          fetched: false
+        }
+      end
+      def load_existing_models
+        existing_models = instance&.all
+        existing_models = read_from_json if existing_models.nil? || existing_models.empty?
+        existing_models
+      end
+      def log_provider_fetch(provider_fetch)
+        RubyLLM.logger.info "Fetching models from providers: #{provider_fetch[:configured_names].join(', ')}"
+        provider_fetch[:failed].each do |failure|
+          RubyLLM.logger.warn(
+            "Failed to fetch #{failure[:name]} models (#{failure[:error].class}: #{failure[:error].message}). " \
+            'Keeping existing.'
+          )
+        end
+      end
+      def log_models_dev_fetch(models_dev_fetch)
+        return if models_dev_fetch[:fetched]
+        RubyLLM.logger.warn('Using cached models.dev data due to fetch failure.')
+      end
+      def merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
+        existing_by_provider = existing_models.group_by(&:provider)
+        preserved_models = existing_by_provider
+                           .except(*provider_fetch[:fetched_providers])
+                           .values
+                           .flatten
+        provider_models = provider_fetch[:models] + preserved_models
+        models_dev_models = if models_dev_fetch[:fetched]
+                              models_dev_fetch[:models]
+                            else
+                              existing_models.select { |model| model.metadata[:source] == 'models.dev' }
+                            end
+        merge_models(provider_models, models_dev_models)
       end
       def merge_models(provider_models, models_dev_models)
@@ -150,8 +224,23 @@ module RubyLLM
         # Direct match
         return models_dev_by_key[key] if models_dev_by_key[key]
-        # VertexAI uses same models as Gemini
         provider, model_id = key.split(':', 2)
+        if provider == 'bedrock'
+          normalized_id = model_id.sub(/^[a-z]{2}\./, '')
+          context_override = nil
+          normalized_id = normalized_id.gsub(/:(\d+)k\b/) do
+            context_override = Regexp.last_match(1).to_i * 1000
+            ''
+          end
+          bedrock_model = models_dev_by_key["bedrock:#{normalized_id}"]
+          if bedrock_model
+            data = bedrock_model.to_h.merge(id: model_id)
+            data[:context_window] = context_override if context_override
+            return Model::Info.new(data)
+          end
+        end
+        # VertexAI uses same models as Gemini
         return unless provider == 'vertexai'
         gemini_model = models_dev_by_key["gemini:#{model_id}"]
@@ -167,18 +256,48 @@ module RubyLLM
         end
       end
-      def add_provider_metadata(models_dev_model, provider_model)
+      def add_provider_metadata(models_dev_model, provider_model) # rubocop:disable Metrics/PerceivedComplexity
         data = models_dev_model.to_h
+        data[:name] = provider_model.name if blank_value?(data[:name])
+        data[:family] = provider_model.family if blank_value?(data[:family])
+        data[:created_at] = provider_model.created_at if blank_value?(data[:created_at])
+        data[:context_window] = provider_model.context_window if blank_value?(data[:context_window])
+        data[:max_output_tokens] = provider_model.max_output_tokens if blank_value?(data[:max_output_tokens])
+        data[:modalities] = provider_model.modalities.to_h if blank_value?(data[:modalities])
+        data[:pricing] = provider_model.pricing.to_h if blank_value?(data[:pricing])
         data[:metadata] = provider_model.metadata.merge(data[:metadata] || {})
         data[:capabilities] = (models_dev_model.capabilities + provider_model.capabilities).uniq
+        normalize_embedding_modalities(data)
         Model::Info.new(data)
       end
+      def normalize_embedding_modalities(data)
+        return unless data[:id].to_s.include?('embedding')
+        modalities = data[:modalities].to_h
+        modalities[:input] = ['text'] if modalities[:input].nil? || modalities[:input].empty?
+        modalities[:output] = ['embeddings']
+        data[:modalities] = modalities
+      end
+      def blank_value?(value)
+        return true if value.nil?
+        return value.empty? if value.is_a?(String) || value.is_a?(Array)
+        if value.is_a?(Hash)
+          return true if value.empty?
+          return value.values.all? { |nested| blank_value?(nested) }
+        end
+        false
+      end
       def models_dev_model_to_info(model_data, provider_slug, provider_key)
         modalities = normalize_models_dev_modalities(model_data[:modalities])
         capabilities = models_dev_capabilities(model_data, modalities)
-        {
+        data = {
           id: model_data[:id],
           name: model_data[:name] || model_data[:id],
           provider: provider_slug,
@@ -192,6 +311,9 @@ module RubyLLM
           pricing: models_dev_pricing(model_data[:cost]),
           metadata: models_dev_metadata(model_data, provider_key)
         }
+        normalize_embedding_modalities(data)
+        data
       end
       def models_dev_capabilities(model_data, modalities)

data/lib/ruby_llm/provider.rb CHANGED Viewed

@@ -37,7 +37,7 @@ module RubyLLM
       self.class.configuration_requirements
     end
-    def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
+    def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, &) # rubocop:disable Metrics/ParameterLists
       normalized_temperature = maybe_normalize_temperature(temperature, model)
       payload = Utils.deep_merge(
@@ -47,7 +47,8 @@ module RubyLLM
           temperature: normalized_temperature,
           model: model,
           stream: block_given?,
-          schema: schema
+          schema: schema,
+          thinking: thinking
         ),
         params
       )
@@ -144,7 +145,7 @@ module RubyLLM
       end
       def capabilities
-        raise NotImplementedError
+        nil
       end
       def configuration_requirements

data/lib/ruby_llm/providers/anthropic/chat.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module RubyLLM
   module Providers
     class Anthropic
-      # Chat methods of the OpenAI API integration
+      # Chat methods for the Anthropic API implementation
       module Chat
         module_function
@@ -11,11 +11,11 @@ module RubyLLM
           '/v1/messages'
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
           system_messages, chat_messages = separate_messages(messages)
           system_content = build_system_content(system_messages)
-          build_base_payload(chat_messages, model, stream).tap do |payload|
+          build_base_payload(chat_messages, model, stream, thinking).tap do |payload|
             add_optional_fields(payload, system_content:, tools:, temperature:)
           end
         end
@@ -45,13 +45,18 @@ module RubyLLM
           end
         end
-        def build_base_payload(chat_messages, model, stream)
-          {
+        def build_base_payload(chat_messages, model, stream, thinking)
+          payload = {
             model: model.id,
-            messages: chat_messages.map { |msg| format_message(msg) },
+            messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
             stream: stream,
             max_tokens: model.max_tokens || 4096
           }
+          thinking_payload = build_thinking_payload(thinking)
+          payload[:thinking] = thinking_payload if thinking_payload
+          payload
         end
         def add_optional_fields(payload, system_content:, tools:, temperature:)
@@ -65,9 +70,11 @@ module RubyLLM
           content_blocks = data['content'] || []
           text_content = extract_text_content(content_blocks)
+          thinking_content = extract_thinking_content(content_blocks)
+          thinking_signature = extract_thinking_signature(content_blocks)
           tool_use_blocks = Tools.find_tool_uses(content_blocks)
-          build_message(data, text_content, tool_use_blocks, response)
+          build_message(data, text_content, thinking_content, thinking_signature, tool_use_blocks, response)
         end
         def extract_text_content(blocks)
@@ -75,50 +82,158 @@ module RubyLLM
           text_blocks.map { |c| c['text'] }.join
         end
-        def build_message(data, content, tool_use_blocks, response)
+        def extract_thinking_content(blocks)
+          thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
+          thoughts = thinking_blocks.map { |c| c['thinking'] || c['text'] }.join
+          thoughts.empty? ? nil : thoughts
+        end
+        def extract_thinking_signature(blocks)
+          thinking_block = blocks.find { |c| c['type'] == 'thinking' } ||
+                           blocks.find { |c| c['type'] == 'redacted_thinking' }
+          thinking_block&.dig('signature') || thinking_block&.dig('data')
+        end
+        def build_message(data, content, thinking, thinking_signature, tool_use_blocks, response) # rubocop:disable Metrics/ParameterLists
           usage = data['usage'] || {}
           cached_tokens = usage['cache_read_input_tokens']
           cache_creation_tokens = usage['cache_creation_input_tokens']
           if cache_creation_tokens.nil? && usage['cache_creation'].is_a?(Hash)
             cache_creation_tokens = usage['cache_creation'].values.compact.sum
           end
+          thinking_tokens = usage.dig('output_tokens_details', 'thinking_tokens') ||
+                            usage.dig('output_tokens_details', 'reasoning_tokens') ||
+                            usage['thinking_tokens'] ||
+                            usage['reasoning_tokens']
           Message.new(
             role: :assistant,
             content: content,
+            thinking: Thinking.build(text: thinking, signature: thinking_signature),
             tool_calls: Tools.parse_tool_calls(tool_use_blocks),
             input_tokens: usage['input_tokens'],
             output_tokens: usage['output_tokens'],
             cached_tokens: cached_tokens,
             cache_creation_tokens: cache_creation_tokens,
+            thinking_tokens: thinking_tokens,
             model_id: data['model'],
             raw: response
           )
         end
-        def format_message(msg)
+        def format_message(msg, thinking: nil)
+          thinking_enabled = thinking&.enabled?
           if msg.tool_call?
-            Tools.format_tool_call(msg)
+            format_tool_call_with_thinking(msg, thinking_enabled)
           elsif msg.tool_result?
             Tools.format_tool_result(msg)
           else
-            format_basic_message(msg)
+            format_basic_message_with_thinking(msg, thinking_enabled)
           end
         end
-        def format_basic_message(msg)
+        def format_basic_message_with_thinking(msg, thinking_enabled)
+          content_blocks = []
+          if msg.role == :assistant && thinking_enabled
+            thinking_block = build_thinking_block(msg.thinking)
+            content_blocks << thinking_block if thinking_block
+          end
+          append_formatted_content(content_blocks, msg.content)
           {
             role: convert_role(msg.role),
-            content: Media.format_content(msg.content)
+            content: content_blocks
           }
         end
+        def format_tool_call_with_thinking(msg, thinking_enabled)
+          if msg.content.is_a?(RubyLLM::Content::Raw)
+            content_blocks = msg.content.value
+            content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
+            content_blocks = prepend_thinking_block(content_blocks, msg, thinking_enabled)
+            return { role: 'assistant', content: content_blocks }
+          end
+          content_blocks = prepend_thinking_block([], msg, thinking_enabled)
+          content_blocks << Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
+          msg.tool_calls.each_value do |tool_call|
+            content_blocks << {
+              type: 'tool_use',
+              id: tool_call.id,
+              name: tool_call.name,
+              input: tool_call.arguments
+            }
+          end
+          {
+            role: 'assistant',
+            content: content_blocks
+          }
+        end
+        def prepend_thinking_block(content_blocks, msg, thinking_enabled)
+          return content_blocks unless thinking_enabled
+          thinking_block = build_thinking_block(msg.thinking)
+          content_blocks.unshift(thinking_block) if thinking_block
+          content_blocks
+        end
+        def build_thinking_block(thinking)
+          return nil unless thinking
+          if thinking.text
+            {
+              type: 'thinking',
+              thinking: thinking.text,
+              signature: thinking.signature
+            }.compact
+          elsif thinking.signature
+            {
+              type: 'redacted_thinking',
+              data: thinking.signature
+            }
+          end
+        end
+        def append_formatted_content(content_blocks, content)
+          formatted_content = Media.format_content(content)
+          if formatted_content.is_a?(Array)
+            content_blocks.concat(formatted_content)
+          else
+            content_blocks << formatted_content
+          end
+        end
         def convert_role(role)
           case role
           when :tool, :user then 'user'
           else 'assistant'
           end
         end
+        def build_thinking_payload(thinking)
+          return nil unless thinking&.enabled?
+          budget = resolve_budget(thinking)
+          raise ArgumentError, 'Anthropic thinking requires a budget' if budget.nil?
+          {
+            type: 'enabled',
+            budget_tokens: budget
+          }
+        end
+        def resolve_budget(thinking)
+          budget = thinking.respond_to?(:budget) ? thinking.budget : thinking
+          budget.is_a?(Integer) ? budget : nil
+        end
       end
     end
   end

data/lib/ruby_llm/providers/anthropic/streaming.rb CHANGED Viewed

@@ -12,10 +12,16 @@ module RubyLLM
         end
         def build_chunk(data)
+          delta_type = data.dig('delta', 'type')
           Chunk.new(
             role: :assistant,
             model_id: extract_model_id(data),
-            content: data.dig('delta', 'text'),
+            content: extract_content_delta(data, delta_type),
+            thinking: Thinking.build(
+              text: extract_thinking_delta(data, delta_type),
+              signature: extract_signature_delta(data, delta_type)
+            ),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
             cached_tokens: extract_cached_tokens(data),
@@ -24,6 +30,24 @@ module RubyLLM
           )
         end
+        def extract_content_delta(data, delta_type)
+          return data.dig('delta', 'text') if delta_type == 'text_delta'
+          nil
+        end
+        def extract_thinking_delta(data, delta_type)
+          return data.dig('delta', 'thinking') if delta_type == 'thinking_delta'
+          nil
+        end
+        def extract_signature_delta(data, delta_type)
+          return data.dig('delta', 'signature') if delta_type == 'signature_delta'
+          nil
+        end
         def json_delta?(data)
           data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'input_json_delta'
         end

data/lib/ruby_llm/providers/bedrock/chat.rb CHANGED Viewed

@@ -16,46 +16,89 @@ module RubyLLM
           Anthropic::Chat.parse_completion_response response
         end
-        def format_message(msg)
+        def format_message(msg, thinking: nil)
+          thinking_enabled = thinking&.enabled?
           if msg.tool_call?
-            Anthropic::Tools.format_tool_call(msg)
+            format_tool_call_with_thinking(msg, thinking_enabled)
           elsif msg.tool_result?
             Anthropic::Tools.format_tool_result(msg)
           else
-            format_basic_message(msg)
+            format_basic_message_with_thinking(msg, thinking_enabled)
           end
         end
-        def format_basic_message(msg)
-          {
-            role: Anthropic::Chat.convert_role(msg.role),
-            content: Media.format_content(msg.content)
-          }
-        end
         private
         def completion_url
           "model/#{@model_id}/invoke"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           @model_id = model.id
           system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
           system_content = Anthropic::Chat.build_system_content(system_messages)
-          build_base_payload(chat_messages, model).tap do |payload|
+          build_base_payload(chat_messages, model, thinking).tap do |payload|
             Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:)
           end
         end
-        def build_base_payload(chat_messages, model)
-          {
+        def build_base_payload(chat_messages, model, thinking)
+          payload = {
             anthropic_version: 'bedrock-2023-05-31',
-            messages: chat_messages.map { |msg| format_message(msg) },
+            messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
             max_tokens: model.max_tokens || 4096
           }
+          thinking_payload = Anthropic::Chat.build_thinking_payload(thinking)
+          payload[:thinking] = thinking_payload if thinking_payload
+          payload
+        end
+        def format_basic_message_with_thinking(msg, thinking_enabled)
+          content_blocks = []
+          if msg.role == :assistant && thinking_enabled
+            thinking_block = Anthropic::Chat.build_thinking_block(msg.thinking)
+            content_blocks << thinking_block if thinking_block
+          end
+          Anthropic::Chat.append_formatted_content(content_blocks, msg.content)
+          {
+            role: Anthropic::Chat.convert_role(msg.role),
+            content: content_blocks
+          }
+        end
+        def format_tool_call_with_thinking(msg, thinking_enabled)
+          if msg.content.is_a?(RubyLLM::Content::Raw)
+            content_blocks = msg.content.value
+            content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
+            content_blocks = Anthropic::Chat.prepend_thinking_block(content_blocks, msg, thinking_enabled)
+            return { role: 'assistant', content: content_blocks }
+          end
+          content_blocks = Anthropic::Chat.prepend_thinking_block([], msg, thinking_enabled)
+          content_blocks << Anthropic::Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
+          msg.tool_calls.each_value do |tool_call|
+            content_blocks << {
+              type: 'tool_use',
+              id: tool_call.id,
+              name: tool_call.name,
+              input: tool_call.arguments
+            }
+          end
+          {
+            role: 'assistant',
+            content: content_blocks
+          }
         end
       end
     end

data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb CHANGED Viewed

@@ -16,6 +16,31 @@ module RubyLLM
             extract_content_by_type(data)
           end
+          def extract_thinking_delta(data)
+            return nil unless data.is_a?(Hash)
+            if data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'thinking_delta'
+              return data.dig('delta', 'thinking')
+            end
+            if data['type'] == 'content_block_start' && data.dig('content_block', 'type') == 'thinking'
+              return data.dig('content_block', 'thinking') || data.dig('content_block', 'text')
+            end
+            nil
+          end
+          def extract_signature_delta(data)
+            return nil unless data.is_a?(Hash)
+            signature = extract_signature_from_delta(data)
+            return signature if signature
+            return nil unless data['type'] == 'content_block_start'
+            extract_signature_from_block(data['content_block'])
+          end
           def extract_tool_calls(data)
             data.dig('message', 'tool_calls') || data['tool_calls']
           end
@@ -47,6 +72,17 @@ module RubyLLM
             breakdown.values.compact.sum
           end
+          def extract_thinking_tokens(data)
+            data.dig('message', 'usage', 'thinking_tokens') ||
+              data.dig('message', 'usage', 'output_tokens_details', 'thinking_tokens') ||
+              data.dig('usage', 'thinking_tokens') ||
+              data.dig('usage', 'output_tokens_details', 'thinking_tokens') ||
+              data.dig('message', 'usage', 'reasoning_tokens') ||
+              data.dig('message', 'usage', 'output_tokens_details', 'reasoning_tokens') ||
+              data.dig('usage', 'reasoning_tokens') ||
+              data.dig('usage', 'output_tokens_details', 'reasoning_tokens')
+          end
           private
           def extract_content_by_type(data)
@@ -58,11 +94,32 @@ module RubyLLM
           end
           def extract_block_start_content(data)
-            data.dig('content_block', 'text').to_s
+            content_block = data['content_block'] || {}
+            return '' if %w[thinking redacted_thinking].include?(content_block['type'])
+            content_block['text'].to_s
           end
           def extract_delta_content(data)
-            data.dig('delta', 'text').to_s
+            delta = data['delta'] || {}
+            return '' if %w[thinking_delta signature_delta].include?(delta['type'])
+            delta['text'].to_s
+          end
+          def extract_signature_from_delta(data)
+            return unless data['type'] == 'content_block_delta'
+            return unless data.dig('delta', 'type') == 'signature_delta'
+            data.dig('delta', 'signature')
+          end
+          def extract_signature_from_block(content_block)
+            block = content_block || {}
+            return block['signature'] if block['type'] == 'thinking' && block['signature']
+            return block['data'] if block['type'] == 'redacted_thinking'
+            nil
           end
         end
       end