RubyGems - dify_llm - Versions diffs - 1.9.1 → 1.14.1 - Mend

dify_llm 1.9.1 → 1.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (170) hide show

data/lib/ruby_llm/providers/openai/capabilities.rb CHANGED Viewed

@@ -3,13 +3,11 @@
 module RubyLLM
   module Providers
     class OpenAI
-      # Determines capabilities and pricing for OpenAI models
+      # Provider-level capability checks and narrow registry fallbacks.
       module Capabilities
         module_function
         MODEL_PATTERNS = {
-          dall_e: /^dall-e/,
-          chatgpt4o: /^chatgpt-4o/,
           gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
           gpt41_mini: /^gpt-4\.1-mini/,
           gpt41_nano: /^gpt-4\.1-nano/,
@@ -26,9 +24,9 @@ module RubyLLM
           gpt4o_realtime: /^gpt-4o-realtime/,
           gpt4o_search: /^gpt-4o-search/,
           gpt4o_transcribe: /^gpt-4o-transcribe/,
-          gpt5: /^gpt-5/,
-          gpt5_mini: /^gpt-5-mini/,
-          gpt5_nano: /^gpt-5-nano/,
+          gpt5: /^gpt-5(?!.*(?:mini|nano))/,
+          gpt5_mini: /^gpt-5.*mini/,
+          gpt5_nano: /^gpt-5.*nano/,
           o1: /^o1(?!-(?:mini|pro))/,
           o1_mini: /^o1-mini/,
           o1_pro: /^o1-pro/,
@@ -44,71 +42,6 @@ module RubyLLM
           moderation: /^(?:omni|text)-moderation/
         }.freeze
-        def context_window_for(model_id)
-          case model_family(model_id)
-          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
-               'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime',
-               'gpt4o_search', 'gpt4o_transcribe', 'gpt4o_mini_search', 'o1_mini' then 128_000
-          when 'gpt4' then 8_192
-          when 'gpt4o_mini_transcribe' then 16_000
-          when 'o1', 'o1_pro', 'o3_mini' then 200_000
-          when 'gpt35_turbo' then 16_385
-          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
-               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
-          else 4_096
-          end
-        end
-        def max_tokens_for(model_id)
-          case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
-          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
-          when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'gpt4o_mini_search' then 16_384
-          when 'babbage', 'davinci' then 16_384 # rubocop:disable Lint/DuplicateBranch
-          when 'gpt4' then 8_192
-          when 'gpt35_turbo' then 4_096
-          when 'gpt4_turbo', 'gpt4o_realtime', 'gpt4o_mini_realtime' then 4_096 # rubocop:disable Lint/DuplicateBranch
-          when 'gpt4o_mini_transcribe' then 2_000
-          when 'o1', 'o1_pro', 'o3_mini' then 100_000
-          when 'o1_mini' then 65_536
-          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
-               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
-          else 16_384 # rubocop:disable Lint/DuplicateBranch
-          end
-        end
-        def supports_vision?(model_id)
-          case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4',
-               'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search',
-               'gpt4o_mini_search' then true
-          else false
-          end
-        end
-        def supports_functions?(model_id)
-          case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o',
-               'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
-          when 'chatgpt4o', 'gpt35_turbo', 'o1_mini', 'gpt4o_mini_tts',
-               'gpt4o_transcribe', 'gpt4o_search', 'gpt4o_mini_search' then false
-          else false # rubocop:disable Lint/DuplicateBranch
-          end
-        end
-        def supports_structured_output?(model_id)
-          case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o',
-               'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
-          else false
-          end
-        end
-        def supports_json_mode?(model_id)
-          supports_structured_output?(model_id)
-        end
         PRICES = {
           gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
           gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
@@ -116,21 +49,19 @@ module RubyLLM
           gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
           gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
           gpt41_nano: { input: 0.1, output: 0.4 },
-          chatgpt4o: { input: 5.0, output: 15.0 },
           gpt4: { input: 10.0, output: 30.0 },
           gpt4_turbo: { input: 10.0, output: 30.0 },
-          gpt45: { input: 75.0, output: 150.0 },
           gpt35_turbo: { input: 0.5, output: 1.5 },
           gpt4o: { input: 2.5, output: 10.0 },
-          gpt4o_audio: { input: 2.5, output: 10.0, audio_input: 40.0, audio_output: 80.0 },
+          gpt4o_audio: { input: 2.5, output: 10.0 },
           gpt4o_mini: { input: 0.15, output: 0.6 },
-          gpt4o_mini_audio: { input: 0.15, output: 0.6, audio_input: 10.0, audio_output: 20.0 },
+          gpt4o_mini_audio: { input: 0.15, output: 0.6 },
           gpt4o_mini_realtime: { input: 0.6, output: 2.4 },
-          gpt4o_mini_transcribe: { input: 1.25, output: 5.0, audio_input: 3.0 },
+          gpt4o_mini_transcribe: { input: 1.25, output: 5.0 },
           gpt4o_mini_tts: { input: 0.6, output: 12.0 },
           gpt4o_realtime: { input: 5.0, output: 20.0 },
           gpt4o_search: { input: 2.5, output: 10.0 },
-          gpt4o_transcribe: { input: 2.5, output: 10.0, audio_input: 6.0 },
+          gpt4o_transcribe: { input: 2.5, output: 10.0 },
           o1: { input: 15.0, output: 60.0 },
           o1_mini: { input: 1.1, output: 4.4 },
           o1_pro: { input: 150.0, output: 600.0 },
@@ -146,153 +77,126 @@ module RubyLLM
           moderation: { price: 0.0 }
         }.freeze
-        def model_family(model_id)
-          MODEL_PATTERNS.each do |family, pattern|
-            return family.to_s if model_id.match?(pattern)
-          end
-          'other'
+        def supports_tool_choice?(_model_id)
+          true
         end
-        def input_price_for(model_id)
-          family = model_family(model_id).to_sym
-          prices = PRICES.fetch(family, { input: default_input_price })
-          prices[:input] || prices[:price] || default_input_price
-        end
-        def cached_input_price_for(model_id)
-          family = model_family(model_id).to_sym
-          prices = PRICES.fetch(family, {})
-          prices[:cached_input]
-        end
-        def output_price_for(model_id)
-          family = model_family(model_id).to_sym
-          prices = PRICES.fetch(family, { output: default_output_price })
-          prices[:output] || prices[:price] || default_output_price
+        def supports_tool_parallel_control?(_model_id)
+          true
         end
-        def model_type(model_id)
+        def context_window_for(model_id)
           case model_family(model_id)
-          when /embedding/ then 'embedding'
-          when /^tts|whisper|gpt4o_(?:mini_)?(?:transcribe|tts)$/ then 'audio'
-          when 'moderation' then 'moderation'
-          when /dall/ then 'image'
-          else 'chat'
+          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
+               'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
+               'gpt4o_transcribe', 'o1_mini' then 128_000
+          when 'gpt4' then 8_192
+          when 'gpt4o_mini_transcribe' then 16_000
+          when 'o1', 'o1_pro', 'o3_mini' then 200_000
+          when 'gpt35_turbo' then 16_385
+          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
+               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
+          else 4_096
           end
         end
-        def default_input_price
-          0.50
+        def max_tokens_for(model_id)
+          case model_family(model_id)
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
+          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
+          when 'gpt4' then 8_192
+          when 'gpt35_turbo' then 4_096
+          when 'gpt4o_mini_transcribe' then 2_000
+          when 'o1', 'o1_pro', 'o3_mini' then 100_000
+          when 'o1_mini' then 65_536
+          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
+               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
+          else 16_384
+          end
         end
-        def default_output_price
-          1.50
+        def critical_capabilities_for(model_id)
+          capabilities = []
+          capabilities << 'function_calling' if supports_functions?(model_id)
+          capabilities << 'structured_output' if supports_structured_output?(model_id)
+          capabilities << 'vision' if supports_vision?(model_id)
+          capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
+          capabilities
         end
-        def format_display_name(model_id)
-          model_id.then { |id| humanize(id) }
-                  .then { |name| apply_special_formatting(name) }
-        end
+        def pricing_for(model_id)
+          standard_pricing = {
+            input_per_million: input_price_for(model_id),
+            output_per_million: output_price_for(model_id)
+          }
-        def humanize(id)
-          id.tr('-', ' ')
-            .split
-            .map(&:capitalize)
-            .join(' ')
-        end
+          cached_price = cached_input_price_for(model_id)
+          standard_pricing[:cached_input_per_million] = cached_price if cached_price
-        def apply_special_formatting(name)
-          name
-            .gsub(/(\d{4}) (\d{2}) (\d{2})/, '\1\2\3')
-            .gsub(/^(?:Gpt|Chatgpt|Tts|Dall E) /) { |m| special_prefix_format(m.strip) }
-            .gsub(/^O([13]) /, 'O\1-')
-            .gsub(/^O[13] Mini/, '\0'.tr(' ', '-'))
-            .gsub(/\d\.\d /, '\0'.sub(' ', '-'))
-            .gsub(/4o (?=Mini|Preview|Turbo|Audio|Realtime|Transcribe|Tts)/, '4o-')
-            .gsub(/\bHd\b/, 'HD')
-            .gsub(/(?:Omni|Text) Moderation/, '\0'.tr(' ', '-'))
-            .gsub('Text Embedding', 'text-embedding-')
+          { text_tokens: { standard: standard_pricing } }
         end
-        def special_prefix_format(prefix)
-          case prefix # rubocop:disable Style/HashLikeCase
-          when 'Gpt' then 'GPT-'
-          when 'Chatgpt' then 'ChatGPT-'
-          when 'Tts' then 'TTS-'
-          when 'Dall E' then 'DALL-E-'
+        def model_family(model_id)
+          MODEL_PATTERNS.each do |family, pattern|
+            return family.to_s if model_id.match?(pattern)
           end
+          'other'
         end
-        def self.normalize_temperature(temperature, model_id)
-          if model_id.match?(/^(o\d|gpt-5)/)
-            RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, ignoring provided value"
-            1.0
-          elsif model_id.match?(/-search/)
-            RubyLLM.logger.debug "Model #{model_id} does not accept temperature parameter, removing"
-            nil
+        def supports_vision?(model_id)
+          case model_family(model_id)
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
+               'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search'
+            true
           else
-            temperature
+            false
           end
         end
-        def modalities_for(model_id)
-          modalities = {
-            input: ['text'],
-            output: ['text']
-          }
-          # Vision support
-          modalities[:input] << 'image' if supports_vision?(model_id)
-          modalities[:input] << 'audio' if model_id.match?(/whisper|audio|tts|transcribe/)
-          modalities[:input] << 'pdf' if supports_vision?(model_id)
-          modalities[:output] << 'audio' if model_id.match?(/tts|audio/)
-          modalities[:output] << 'image' if model_id.match?(/dall-e|image/)
-          modalities[:output] << 'embeddings' if model_id.match?(/embedding/)
-          modalities[:output] << 'moderation' if model_id.match?(/moderation/)
-          modalities
+        def supports_functions?(model_id)
+          case model_family(model_id)
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
+               'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
+            true
+          else
+            false
+          end
         end
-        def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
-          capabilities = []
-          capabilities << 'streaming' unless model_id.match?(/moderation|embedding/)
-          capabilities << 'function_calling' if supports_functions?(model_id)
-          capabilities << 'structured_output' if supports_json_mode?(model_id)
-          capabilities << 'batch' if model_id.match?(/embedding|batch/)
-          capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
-          if model_id.match?(/gpt-4-turbo|gpt-4o/)
-            capabilities << 'image_generation' if model_id.match?(/vision/)
-            capabilities << 'speech_generation' if model_id.match?(/audio/)
-            capabilities << 'transcription' if model_id.match?(/audio/)
+        def supports_structured_output?(model_id)
+          case model_family(model_id)
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4o',
+               'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
+            true
+          else
+            false
           end
-          capabilities
         end
-        def pricing_for(model_id)
-          standard_pricing = {
-            input_per_million: input_price_for(model_id),
-            output_per_million: output_price_for(model_id)
-          }
-          if respond_to?(:cached_input_price_for)
-            cached_price = cached_input_price_for(model_id)
-            standard_pricing[:cached_input_per_million] = cached_price if cached_price
-          end
+        def input_price_for(model_id)
+          price_for(model_id, :input, 0.50)
+        end
-          pricing = { text_tokens: { standard: standard_pricing } }
+        def output_price_for(model_id)
+          price_for(model_id, :output, 1.50)
+        end
-          if model_id.match?(/embedding|batch/)
-            pricing[:text_tokens][:batch] = {
-              input_per_million: standard_pricing[:input_per_million] * 0.5,
-              output_per_million: standard_pricing[:output_per_million] * 0.5
-            }
-          end
+        def cached_input_price_for(model_id)
+          family = model_family(model_id).to_sym
+          PRICES.fetch(family, {})[:cached_input]
+        end
-          pricing
+        def price_for(model_id, key, fallback)
+          family = model_family(model_id).to_sym
+          prices = PRICES.fetch(family, { key => fallback })
+          prices[key] || prices[:price] || fallback
         end
+        module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
+                        :model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
+                        :input_price_for, :output_price_for, :cached_input_price_for, :price_for
       end
     end
   end

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -11,7 +11,10 @@ module RubyLLM
         module_function
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+        # rubocop:disable Metrics/ParameterLists,Metrics/PerceivedComplexity
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil,
+                           thinking: nil, tool_prefs: nil)
+          tool_prefs ||= {}
           payload = {
             model: model.id,
             messages: format_messages(messages),
@@ -19,24 +22,34 @@ module RubyLLM
           }
           payload[:temperature] = temperature unless temperature.nil?
-          payload[:tools] = tools.map { |_, tool| tool_for(tool) } if tools.any?
+          if tools.any?
+            payload[:tools] = tools.map { |_, tool| tool_for(tool) }
+            payload[:tool_choice] = build_tool_choice(tool_prefs[:choice]) unless tool_prefs[:choice].nil?
+            payload[:parallel_tool_calls] = tool_prefs[:calls] == :many unless tool_prefs[:calls].nil?
+          end
           if schema
-            strict = schema[:strict] != false
+            schema_name = schema[:name]
+            schema_def = schema[:schema]
+            strict = schema[:strict]
             payload[:response_format] = {
               type: 'json_schema',
               json_schema: {
-                name: 'response',
-                schema: schema,
+                name: schema_name,
+                schema: schema_def,
                 strict: strict
               }
             }
           end
+          effort = resolve_effort(thinking)
+          payload[:reasoning_effort] = effort if effort
           payload[:stream_options] = { include_usage: true } if stream
           payload
         end
+        # rubocop:enable Metrics/ParameterLists,Metrics/PerceivedComplexity
         def parse_completion_response(response)
           data = response.body
@@ -49,15 +62,21 @@ module RubyLLM
           usage = data['usage'] || {}
           cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
+          thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
+          content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
+          thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
+          thinking_signature = extract_thinking_signature(message_data)
           Message.new(
             role: :assistant,
-            content: message_data['content'],
+            content: content,
+            thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
             tool_calls: parse_tool_calls(message_data['tool_calls']),
             input_tokens: usage['prompt_tokens'],
             output_tokens: usage['completion_tokens'],
             cached_tokens: cached_tokens,
             cache_creation_tokens: 0,
+            thinking_tokens: thinking_tokens,
             model_id: data['model'],
             raw: response
           )
@@ -70,7 +89,7 @@ module RubyLLM
               content: Media.format_content(msg.content),
               tool_calls: format_tool_calls(msg.tool_calls),
               tool_call_id: msg.tool_call_id
-            }.compact
+            }.compact.merge(format_thinking(msg))
           end
         end
@@ -82,6 +101,81 @@ module RubyLLM
             role.to_s
           end
         end
+        def resolve_effort(thinking)
+          return nil unless thinking
+          thinking.respond_to?(:effort) ? thinking.effort : thinking
+        end
+        def format_thinking(msg)
+          return {} unless msg.role == :assistant
+          thinking = msg.thinking
+          return {} unless thinking
+          payload = {}
+          if thinking.text
+            payload[:reasoning] = thinking.text
+            payload[:reasoning_content] = thinking.text
+          end
+          payload[:reasoning_signature] = thinking.signature if thinking.signature
+          payload
+        end
+        def extract_thinking_text(message_data)
+          candidate = message_data['reasoning_content'] || message_data['reasoning'] || message_data['thinking']
+          candidate.is_a?(String) ? candidate : nil
+        end
+        def extract_thinking_signature(message_data)
+          candidate = message_data['reasoning_signature'] || message_data['signature']
+          candidate.is_a?(String) ? candidate : nil
+        end
+        def extract_content_and_thinking(content)
+          return extract_think_tag_content(content) if content.is_a?(String)
+          return [content, nil] unless content.is_a?(Array)
+          text = extract_text_from_blocks(content)
+          thinking = extract_thinking_from_blocks(content)
+          [text.empty? ? nil : text, thinking.empty? ? nil : thinking]
+        end
+        def extract_text_from_blocks(blocks)
+          blocks.filter_map do |block|
+            block['text'] if block['type'] == 'text' && block['text'].is_a?(String)
+          end.join
+        end
+        def extract_thinking_from_blocks(blocks)
+          blocks.filter_map do |block|
+            next unless block['type'] == 'thinking'
+            extract_thinking_text_from_block(block)
+          end.join
+        end
+        def extract_thinking_text_from_block(block)
+          thinking_block = block['thinking']
+          return thinking_block if thinking_block.is_a?(String)
+          if thinking_block.is_a?(Array)
+            return thinking_block.filter_map { |item| item['text'] if item['type'] == 'text' }.join
+          end
+          block['text'] if block['text'].is_a?(String)
+        end
+        def extract_think_tag_content(text)
+          return [text, nil] unless text.include?('<think>')
+          thinking = text.scan(%r{<think>(.*?)</think>}m).join
+          content = text.gsub(%r{<think>.*?</think>}m, '').strip
+          [content.empty? ? nil : content, thinking.empty? ? nil : thinking]
+        end
       end
     end
   end

data/lib/ruby_llm/providers/openai/media.rb CHANGED Viewed

@@ -8,7 +8,10 @@ module RubyLLM
         module_function
         def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
-          return content.value if content.is_a?(RubyLLM::Content::Raw)
+          if content.is_a?(RubyLLM::Content::Raw)
+            value = content.value
+            return value.is_a?(Hash) ? value.to_json : value
+          end
           return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
           return content unless content.is_a?(Content)
@@ -37,7 +40,7 @@ module RubyLLM
           {
             type: 'image_url',
             image_url: {
-              url: image.url? ? image.source : image.for_llm
+              url: image.url? ? image.source.to_s : image.for_llm
             }
           }
         end

data/lib/ruby_llm/providers/openai/models.rb CHANGED Viewed

@@ -17,14 +17,12 @@ module RubyLLM
             Model::Info.new(
               id: model_id,
-              name: capabilities.format_display_name(model_id),
+              name: model_id,
               provider: slug,
-              family: capabilities.model_family(model_id),
               created_at: model_data['created'] ? Time.at(model_data['created']) : nil,
               context_window: capabilities.context_window_for(model_id),
               max_output_tokens: capabilities.max_tokens_for(model_id),
-              modalities: capabilities.modalities_for(model_id),
-              capabilities: capabilities.capabilities_for(model_id),
+              capabilities: capabilities.critical_capabilities_for(model_id),
               pricing: capabilities.pricing_for(model_id),
               metadata: {
                 object: model_data['object'],

data/lib/ruby_llm/providers/openai/streaming.rb CHANGED Viewed

@@ -14,16 +14,24 @@ module RubyLLM
         def build_chunk(data)
           usage = data['usage'] || {}
           cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
+          delta = data.dig('choices', 0, 'delta') || {}
+          content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
+          content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
           Chunk.new(
             role: :assistant,
             model_id: data['model'],
-            content: data.dig('choices', 0, 'delta', 'content'),
-            tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
+            content: content,
+            thinking: Thinking.build(
+              text: thinking_from_blocks || delta['reasoning_content'] || delta['reasoning'],
+              signature: delta['reasoning_signature']
+            ),
+            tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
             input_tokens: usage['prompt_tokens'],
             output_tokens: usage['completion_tokens'],
             cached_tokens: cached_tokens,
-            cache_creation_tokens: 0
+            cache_creation_tokens: 0,
+            thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
           )
         end

data/lib/ruby_llm/providers/openai/temperature.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class OpenAI
+      # Normalizes temperature for OpenAI models with provider-specific requirements.
+      module Temperature
+        module_function
+        def normalize(temperature, model_id)
+          if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
+            RubyLLM.logger.debug { "Model #{model_id} requires temperature=1.0, setting that instead." }
+            1.0
+          elsif model_id.include?('-search')
+            RubyLLM.logger.debug { "Model #{model_id} does not accept temperature parameter, removing" }
+            nil
+          else
+            temperature
+          end
+        end
+        def temperature_close_to_one?(temperature)
+          (temperature.to_f - 1.0).abs <= Float::EPSILON
+        end
+      end
+    end
+  end
+end