RubyGems - ruby_llm - Versions diffs - 1.14.1 → 1.15.0 - Mend

ruby_llm 1.14.1 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

checksums.yaml +4 -4
data/README.md +1 -3
data/lib/generators/ruby_llm/generator_helpers.rb +8 -0
data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +1 -1
data/lib/ruby_llm/active_record/acts_as.rb +3 -0
data/lib/ruby_llm/active_record/acts_as_legacy.rb +52 -25
data/lib/ruby_llm/active_record/chat_methods.rb +39 -22
data/lib/ruby_llm/active_record/message_methods.rb +17 -1
data/lib/ruby_llm/active_record/model_methods.rb +7 -9
data/lib/ruby_llm/active_record/payload_helpers.rb +3 -0
data/lib/ruby_llm/active_record/tool_call_methods.rb +3 -0
data/lib/ruby_llm/agent.rb +3 -2
data/lib/ruby_llm/aliases.json +34 -4
data/lib/ruby_llm/attachment.rb +11 -27
data/lib/ruby_llm/chat.rb +62 -21
data/lib/ruby_llm/cost.rb +224 -0
data/lib/ruby_llm/image.rb +37 -4
data/lib/ruby_llm/message.rb +20 -0
data/lib/ruby_llm/model/info.rb +17 -0
data/lib/ruby_llm/model/pricing_category.rb +13 -2
data/lib/ruby_llm/models.json +25168 -20374
data/lib/ruby_llm/models.rb +2 -1
data/lib/ruby_llm/models_schema.json +3 -0
data/lib/ruby_llm/provider.rb +10 -3
data/lib/ruby_llm/providers/anthropic/tools.rb +4 -1
data/lib/ruby_llm/providers/bedrock/chat.rb +24 -13
data/lib/ruby_llm/providers/bedrock/streaming.rb +4 -1
data/lib/ruby_llm/providers/gemini/chat.rb +8 -1
data/lib/ruby_llm/providers/gemini/images.rb +2 -2
data/lib/ruby_llm/providers/gemini/streaming.rb +4 -1
data/lib/ruby_llm/providers/gemini/tools.rb +3 -1
data/lib/ruby_llm/providers/mistral/capabilities.rb +6 -1
data/lib/ruby_llm/providers/mistral/chat.rb +55 -4
data/lib/ruby_llm/providers/openai/capabilities.rb +82 -12
data/lib/ruby_llm/providers/openai/chat.rb +45 -6
data/lib/ruby_llm/providers/openai/images.rb +58 -6
data/lib/ruby_llm/providers/openai/streaming.rb +5 -6
data/lib/ruby_llm/providers/openrouter/chat.rb +30 -6
data/lib/ruby_llm/providers/openrouter/images.rb +2 -2
data/lib/ruby_llm/providers/openrouter/models.rb +1 -1
data/lib/ruby_llm/providers/openrouter/streaming.rb +5 -6
data/lib/ruby_llm/railtie.rb +6 -0
data/lib/ruby_llm/tokens.rb +8 -0
data/lib/ruby_llm/tool.rb +24 -7
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm.rb +2 -4
data/lib/tasks/models.rake +13 -12
metadata +19 -4

data/lib/ruby_llm/models.rb CHANGED Viewed

@@ -356,7 +356,8 @@ module RubyLLM
         text_standard = {
           input_per_million: cost[:input],
           output_per_million: cost[:output],
-          cached_input_per_million: cost[:cache_read],
+          cache_read_input_per_million: cost[:cache_read],
+          cache_write_input_per_million: cost[:cache_write],
           reasoning_output_per_million: cost[:reasoning]
         }.compact

data/lib/ruby_llm/models_schema.json CHANGED Viewed

@@ -87,7 +87,10 @@
                 "type": "object",
                 "properties": {
                   "input_per_million": {"type": "number", "minimum": 0},
+                  "cache_read_input_per_million": {"type": "number", "minimum": 0},
+                  "cache_write_input_per_million": {"type": "number", "minimum": 0},
                   "cached_input_per_million": {"type": "number", "minimum": 0},
+                  "cache_creation_input_per_million": {"type": "number", "minimum": 0},
                   "output_per_million": {"type": "number", "minimum": 0},
                   "reasoning_output_per_million": {"type": "number", "minimum": 0}
                 }

data/lib/ruby_llm/provider.rb CHANGED Viewed

@@ -81,9 +81,10 @@ module RubyLLM
       parse_moderation_response(response, model:)
     end
-    def paint(prompt, model:, size:)
-      payload = render_image_payload(prompt, model:, size:)
-      response = @connection.post images_url, payload
+    def paint(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
+      validate_paint_inputs!(with:, mask:)
+      payload = render_image_payload(prompt, model:, size:, with:, mask:, params:)
+      response = @connection.post images_url(with:, mask:), payload
       parse_image_response(response, model:)
     end
@@ -225,6 +226,12 @@ module RubyLLM
     private
+    def validate_paint_inputs!(with:, mask:)
+      return if with.nil? && mask.nil?
+      raise UnsupportedAttachmentError, "#{name} does not support image references in paint"
+    end
     def build_audio_file_part(file_path)
       expanded_path = File.expand_path(file_path)
       mime_type = Marcel::MimeType.for(Pathname.new(expanded_path))

data/lib/ruby_llm/providers/anthropic/tools.rb CHANGED Viewed

@@ -45,10 +45,13 @@ module RubyLLM
         end
         def format_tool_result_block(msg)
+          content = msg.content
+          content = '(no output)' if content.nil? || (content.respond_to?(:empty?) && content.empty?)
           {
             type: 'tool_result',
             tool_use_id: msg.tool_call_id,
-            content: Media.format_content(msg.content)
+            content: Media.format_content(content)
           }
         end

data/lib/ruby_llm/providers/bedrock/chat.rb CHANGED Viewed

@@ -56,7 +56,7 @@ module RubyLLM
             content: parse_text_content(content_blocks),
             thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
             tool_calls: parse_tool_calls(content_blocks),
-            input_tokens: usage['inputTokens'],
+            input_tokens: input_tokens(usage),
             output_tokens: usage['outputTokens'],
             cached_tokens: usage['cacheReadInputTokens'],
             cache_creation_tokens: usage['cacheWriteInputTokens'],
@@ -66,6 +66,13 @@ module RubyLLM
           )
         end
+        def input_tokens(usage)
+          input_tokens = usage['inputTokens']
+          return unless input_tokens
+          [input_tokens.to_i - usage['cacheReadInputTokens'].to_i - usage['cacheWriteInputTokens'].to_i, 0].max
+        end
         def render_messages(messages)
           rendered = []
           tool_result_blocks = []
@@ -154,19 +161,23 @@ module RubyLLM
         def render_tool_result_content(content)
           return render_raw_tool_result_content(content.value) if content.is_a?(RubyLLM::Content::Raw)
+          return [{ json: content }] if content.is_a?(Hash) || content.is_a?(Array)
+          return render_content_tool_result_content(content) if content.is_a?(RubyLLM::Content)
-          if content.is_a?(Hash) || content.is_a?(Array)
-            [{ json: content }]
-          elsif content.is_a?(RubyLLM::Content)
-            blocks = []
-            blocks << { text: content.text } if content.text
-            content.attachments.each do |attachment|
-              blocks << { text: attachment.for_llm }
-            end
-            blocks
-          else
-            [{ text: content.to_s }]
-          end
+          [text_tool_result_block(content)]
+        end
+        def render_content_tool_result_content(content)
+          blocks = []
+          blocks << text_tool_result_block(content.text) unless content.text.to_s.empty?
+          content.attachments.each { |attachment| blocks << text_tool_result_block(attachment.for_llm) }
+          blocks.empty? ? [text_tool_result_block(nil)] : blocks
+        end
+        def text_tool_result_block(text)
+          text = text.to_s
+          text = '(no output)' if text.empty?
+          { text: text }
         end
         def render_raw_tool_result_content(raw_value)

data/lib/ruby_llm/providers/bedrock/streaming.rb CHANGED Viewed

@@ -158,7 +158,10 @@ module RubyLLM
         end
         def extract_input_tokens(metadata_usage, usage, message_usage)
-          metadata_usage['inputTokens'] || usage['inputTokens'] || message_usage['input_tokens']
+          bedrock_usage = metadata_usage['inputTokens'] ? metadata_usage : usage
+          return Bedrock::Chat.input_tokens(bedrock_usage) if bedrock_usage['inputTokens']
+          message_usage['input_tokens']
         end
         def extract_output_tokens(metadata_usage, usage)

data/lib/ruby_llm/providers/gemini/chat.rb CHANGED Viewed

@@ -118,7 +118,7 @@ module RubyLLM
               signature: extract_thought_signature(parts)
             ),
             tool_calls: tool_calls,
-            input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
+            input_tokens: input_tokens(data),
             output_tokens: calculate_output_tokens(data),
             cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
             thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
@@ -127,6 +127,13 @@ module RubyLLM
           )
         end
+        def input_tokens(data)
+          prompt_tokens = data.dig('usageMetadata', 'promptTokenCount')
+          return unless prompt_tokens
+          [prompt_tokens.to_i - data.dig('usageMetadata', 'cachedContentTokenCount').to_i, 0].max
+        end
         def convert_schema_to_gemini(schema)
           return nil unless schema

data/lib/ruby_llm/providers/gemini/images.rb CHANGED Viewed

@@ -5,11 +5,11 @@ module RubyLLM
     class Gemini
       # Image generation methods for the Gemini API implementation
       module Images
-        def images_url
+        def images_url(with: nil, mask: nil) # rubocop:disable Lint/UnusedMethodArgument
           "models/#{@model}:predict"
         end
-        def render_image_payload(prompt, model:, size:)
+        def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           RubyLLM.logger.debug { "Ignoring size #{size}. Gemini does not support image size customization." }
           @model = model
           {

data/lib/ruby_llm/providers/gemini/streaming.rb CHANGED Viewed

@@ -70,7 +70,10 @@ module RubyLLM
         end
         def extract_input_tokens(data)
-          data.dig('usageMetadata', 'promptTokenCount')
+          prompt_tokens = data.dig('usageMetadata', 'promptTokenCount')
+          return unless prompt_tokens
+          [prompt_tokens.to_i - data.dig('usageMetadata', 'cachedContentTokenCount').to_i, 0].max
         end
         def extract_output_tokens(data)

data/lib/ruby_llm/providers/gemini/tools.rb CHANGED Viewed

@@ -46,13 +46,15 @@ module RubyLLM
         def format_tool_result(msg, function_name = nil)
           function_name ||= msg.tool_call_id
+          content = msg.content
+          content = '(no output)' if content.nil? || (content.respond_to?(:empty?) && content.empty?)
           [{
             functionResponse: {
               name: function_name,
               response: {
                 name: function_name,
-                content: Media.format_content(msg.content)
+                content: Media.format_content(content)
               }
             }
           }]

data/lib/ruby_llm/providers/mistral/capabilities.rb CHANGED Viewed

@@ -31,6 +31,11 @@ module RubyLLM
           !model_id.match?(/embed|moderation|ocr|voxtral|transcriptions/) && supports_tools?(model_id)
         end
+        def supports_reasoning?(model_id)
+          model_id.match?(/magistral/) ||
+            model_id.match?(/\Amistral-(?:small-latest|medium-(?:3(?:[.-]5)?|latest))\z/)
+        end
         def format_display_name(model_id)
           case model_id
           when /mistral-large/ then 'Mistral Large'
@@ -101,7 +106,7 @@ module RubyLLM
             capabilities << 'structured_output' if supports_json_mode?(model_id)
             capabilities << 'vision' if supports_vision?(model_id)
-            capabilities << 'reasoning' if model_id.match?(/magistral/)
+            capabilities << 'reasoning' if supports_reasoning?(model_id)
             capabilities << 'batch' unless model_id.match?(/voxtral|ocr|embed|moderation/)
             capabilities << 'fine_tuning' if model_id.match?(/mistral-(small|medium|large)|devstral/)
             capabilities << 'distillation' if model_id.match?(/ministral/)

data/lib/ruby_llm/providers/mistral/chat.rb CHANGED Viewed

@@ -27,12 +27,30 @@ module RubyLLM
                            schema: nil, thinking: nil, tool_prefs: nil)
           payload = super
           payload.delete(:stream_options)
-          payload.delete(:reasoning_effort)
-          warn_on_unsupported_thinking(model, thinking)
+          configure_thinking_payload(payload, model, thinking)
+          normalize_required_tool_choice(payload)
           payload
         end
         # rubocop:enable Metrics/ParameterLists
+        def build_tool_choice(tool_choice)
+          return 'any' if tool_choice == :required
+          OpenAI::Tools.build_tool_choice(tool_choice)
+        end
+        def normalize_required_tool_choice(payload)
+          return unless payload[:tool_choice] == 'any' && Array(payload[:tools]).one?
+          function_name = payload.dig(:tools, 0, :function, :name)
+          return unless function_name
+          payload[:tool_choice] = {
+            type: 'function',
+            function: { name: function_name }
+          }
+        end
         def format_content_with_thinking(msg)
           formatted_content = OpenAI::Media.format_content(msg.content)
           return formatted_content unless msg.role == :assistant && msg.thinking
@@ -45,14 +63,47 @@ module RubyLLM
         def warn_on_unsupported_thinking(model, thinking)
           return unless thinking&.enabled?
-          return if model.id.to_s.include?('magistral')
+          return if native_reasoning_model?(model.id) || adjustable_reasoning_model?(model.id)
           RubyLLM.logger.warn(
-            'Mistral thinking is only supported on Magistral models. ' \
+            'Mistral thinking is only supported on Magistral and adjustable-reasoning models. ' \
             "Ignoring thinking settings for #{model.id}."
           )
         end
+        def configure_thinking_payload(payload, model, thinking)
+          return unless thinking&.enabled?
+          if native_reasoning_model?(model.id)
+            configure_native_reasoning_payload(payload, thinking)
+          elsif adjustable_reasoning_model?(model.id)
+            payload[:reasoning_effort] = reasoning_effort_for(thinking)
+          else
+            payload.delete(:reasoning_effort)
+            warn_on_unsupported_thinking(model, thinking)
+          end
+        end
+        def configure_native_reasoning_payload(payload, thinking)
+          payload.delete(:reasoning_effort)
+          payload[:prompt_mode] = thinking.effort == 'none' ? nil : 'reasoning'
+        end
+        def reasoning_effort_for(thinking)
+          effort = thinking.respond_to?(:effort) ? thinking.effort : nil
+          return effort if %w[high none].include?(effort)
+          'high'
+        end
+        def native_reasoning_model?(model_id)
+          model_id.to_s.include?('magistral')
+        end
+        def adjustable_reasoning_model?(model_id)
+          model_id.to_s.match?(/\Amistral-(?:small-latest|medium-(?:3(?:[.-]5)?|latest))\z/)
+        end
         def build_thinking_blocks(thinking)
           return [] unless thinking

data/lib/ruby_llm/providers/openai/capabilities.rb CHANGED Viewed

@@ -8,6 +8,9 @@ module RubyLLM
         module_function
         MODEL_PATTERNS = {
+          gpt_image15: /^gpt-image-1\.5/,
+          gpt_image_mini: /^gpt-image-1-mini/,
+          gpt_image: /^gpt-image-1(?:$|-)/,
           gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
           gpt41_mini: /^gpt-4\.1-mini/,
           gpt41_nano: /^gpt-4\.1-nano/,
@@ -43,6 +46,18 @@ module RubyLLM
         }.freeze
         PRICES = {
+          gpt_image: {
+            text: { input: 5.0, cached_input: 1.25 },
+            images: { input: 10.0, cached_input: 2.5, output: 40.0 }
+          },
+          gpt_image_mini: {
+            text: { input: 2.0, cached_input: 0.2 },
+            images: { input: 2.5, cached_input: 0.25, output: 8.0 }
+          },
+          gpt_image15: {
+            text: { input: 5.0, cached_input: 1.25, output: 10.0 },
+            images: { input: 8.0, cached_input: 2.0, output: 32.0 }
+          },
           gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
           gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
           gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
@@ -77,6 +92,20 @@ module RubyLLM
           moderation: { price: 0.0 }
         }.freeze
+        NIL_LIMIT_FAMILIES = %w[
+          gpt_image
+          gpt_image_mini
+          gpt_image15
+          gpt4o_mini_tts
+          tts1
+          tts1_hd
+          whisper
+          moderation
+          embedding3_large
+          embedding3_small
+          embedding_ada
+        ].freeze
         def supports_tool_choice?(_model_id)
           true
         end
@@ -86,7 +115,10 @@ module RubyLLM
         end
         def context_window_for(model_id)
-          case model_family(model_id)
+          family = model_family(model_id)
+          return nil if NIL_LIMIT_FAMILIES.include?(family)
+          case family
           when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
           when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
                'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
@@ -95,14 +127,15 @@ module RubyLLM
           when 'gpt4o_mini_transcribe' then 16_000
           when 'o1', 'o1_pro', 'o3_mini' then 200_000
           when 'gpt35_turbo' then 16_385
-          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
-               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
           else 4_096
           end
         end
         def max_tokens_for(model_id)
-          case model_family(model_id)
+          family = model_family(model_id)
+          return nil if NIL_LIMIT_FAMILIES.include?(family)
+          case family
           when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
           when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
           when 'gpt4' then 8_192
@@ -110,8 +143,6 @@ module RubyLLM
           when 'gpt4o_mini_transcribe' then 2_000
           when 'o1', 'o1_pro', 'o3_mini' then 100_000
           when 'o1_mini' then 65_536
-          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
-               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
           else 16_384
           end
         end
@@ -126,13 +157,15 @@ module RubyLLM
         end
         def pricing_for(model_id)
+          return image_pricing_for(model_id) if image_model?(model_id)
           standard_pricing = {
             input_per_million: input_price_for(model_id),
             output_per_million: output_price_for(model_id)
           }
           cached_price = cached_input_price_for(model_id)
-          standard_pricing[:cached_input_per_million] = cached_price if cached_price
+          standard_pricing[:cache_read_input_per_million] = cached_price if cached_price
           { text_tokens: { standard: standard_pricing } }
         end
@@ -147,8 +180,9 @@ module RubyLLM
         def supports_vision?(model_id)
           case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
-               'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search'
+          when 'gpt_image', 'gpt_image_mini', 'gpt_image15', 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini',
+               'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation',
+               'gpt4o_search'
             true
           else
             false
@@ -176,27 +210,63 @@ module RubyLLM
         end
         def input_price_for(model_id)
+          return family_prices(model_id).dig(:text, :input) if image_model?(model_id)
           price_for(model_id, :input, 0.50)
         end
         def output_price_for(model_id)
+          return family_prices(model_id).dig(:text, :output) if image_model?(model_id)
           price_for(model_id, :output, 1.50)
         end
         def cached_input_price_for(model_id)
+          return family_prices(model_id).dig(:text, :cached_input) if image_model?(model_id)
           family = model_family(model_id).to_sym
           PRICES.fetch(family, {})[:cached_input]
         end
+        def image_model?(model_id)
+          %w[gpt_image gpt_image_mini gpt_image15].include?(model_family(model_id))
+        end
+        def image_pricing_for(model_id)
+          text_pricing = {
+            input_per_million: input_price_for(model_id)
+          }
+          cached_text_price = cached_input_price_for(model_id)
+          text_pricing[:cache_read_input_per_million] = cached_text_price if cached_text_price
+          image_pricing = {
+            input_per_million: family_prices(model_id).dig(:images, :input),
+            output_per_million: family_prices(model_id).dig(:images, :output)
+          }
+          cached_image_price = family_prices(model_id).dig(:images, :cached_input)
+          image_pricing[:cache_read_input_per_million] = cached_image_price if cached_image_price
+          {
+            text_tokens: { standard: text_pricing },
+            images: { standard: image_pricing }
+          }
+        end
         def price_for(model_id, key, fallback)
-          family = model_family(model_id).to_sym
-          prices = PRICES.fetch(family, { key => fallback })
+          prices = family_prices(model_id)
+          prices = { key => fallback } if prices.empty?
           prices[key] || prices[:price] || fallback
         end
+        def family_prices(model_id)
+          family = model_family(model_id).to_sym
+          PRICES.fetch(family, {})
+        end
         module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
                         :model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
-                        :input_price_for, :output_price_for, :cached_input_price_for, :price_for
+                        :input_price_for, :output_price_for, :cached_input_price_for, :image_model?,
+                        :image_pricing_for, :price_for, :family_prices
       end
     end
   end

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -61,8 +61,7 @@ module RubyLLM
           return unless message_data
           usage = data['usage'] || {}
-          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
-          thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
+          thinking_tokens = thinking_tokens(usage)
           content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
           thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
           thinking_signature = extract_thinking_signature(message_data)
@@ -72,16 +71,56 @@ module RubyLLM
             content: content,
             thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
             tool_calls: parse_tool_calls(message_data['tool_calls']),
-            input_tokens: usage['prompt_tokens'],
-            output_tokens: usage['completion_tokens'],
-            cached_tokens: cached_tokens,
-            cache_creation_tokens: 0,
+            input_tokens: input_tokens(usage),
+            output_tokens: output_tokens(usage),
+            cached_tokens: cache_read_tokens(usage),
+            cache_creation_tokens: cache_write_tokens(usage),
             thinking_tokens: thinking_tokens,
             model_id: data['model'],
             raw: response
           )
         end
+        def input_tokens(usage)
+          return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
+          prompt_tokens = usage['prompt_tokens']
+          return unless prompt_tokens
+          [prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
+        end
+        def output_tokens(usage)
+          completion_tokens = usage['completion_tokens']
+          return unless completion_tokens
+          completion_tokens = completion_tokens.to_i
+          generated_tokens = generated_tokens_from_total(usage)
+          return completion_tokens unless generated_tokens && generated_tokens > completion_tokens
+          generated_tokens
+        end
+        def generated_tokens_from_total(usage)
+          prompt_tokens = usage['prompt_tokens']
+          total_tokens = usage['total_tokens']
+          return unless prompt_tokens && total_tokens
+          [total_tokens.to_i - prompt_tokens.to_i, 0].max
+        end
+        def cache_read_tokens(usage)
+          usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
+        end
+        def cache_write_tokens(usage)
+          usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
+        end
+        def thinking_tokens(usage)
+          usage.dig('completion_tokens_details', 'reasoning_tokens') || usage['reasoning_tokens']
+        end
         def format_messages(messages)
           messages.map do |msg|
             {

data/lib/ruby_llm/providers/openai/images.rb CHANGED Viewed

@@ -7,31 +7,83 @@ module RubyLLM
       module Images
         module_function
-        def images_url
-          'images/generations'
+        def images_url(with: nil, mask: nil)
+          editing?(with, mask) ? 'images/edits' : 'images/generations'
         end
-        def render_image_payload(prompt, model:, size:)
+        def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
+          return render_edit_payload(prompt, model:, with:, mask:, params:) if editing?(with, mask)
           {
             model: model,
             prompt: prompt,
             n: 1,
             size: size
-          }
+          }.merge(params)
         end
         def parse_image_response(response, model:)
           data = response.body
-          image_data = data['data'].first
+          image_data = Array(data['data']).first
+          raise Error.new(nil, 'Unexpected response format from OpenAI image API') unless image_data
           Image.new(
             url: image_data['url'],
             mime_type: 'image/png', # DALL-E typically returns PNGs
             revised_prompt: image_data['revised_prompt'],
             model_id: model,
-            data: image_data['b64_json']
+            data: image_data['b64_json'],
+            usage: data['usage'] || {}
           )
         end
+        def validate_paint_inputs!(with:, mask:)
+          return unless editing?(with, mask)
+          raise ArgumentError, 'with: is required when mask: is provided' if mask && !attachments?(with)
+        end
+        def render_edit_payload(prompt, model:, with:, mask:, params:)
+          payload = params.merge(
+            model: model,
+            prompt: prompt,
+            image: build_upload_parts(with, label: 'images'),
+            n: 1
+          )
+          payload[:mask] = build_upload_part(mask, label: 'mask') if mask
+          payload
+        end
+        def build_upload_parts(sources, label:)
+          Array(sources).filter_map do |source|
+            next if blank_attachment?(source)
+            build_upload_part(source, label:)
+          end
+        end
+        def build_upload_part(source, label:)
+          attachment = Attachment.new(source)
+          unless attachment.image?
+            raise UnsupportedAttachmentError,
+                  "OpenAI image editing only supports image attachments for #{label}"
+          end
+          Faraday::UploadIO.new(StringIO.new(attachment.content), attachment.mime_type, attachment.filename)
+        end
+        def editing?(with, mask)
+          attachments?(with) || !mask.nil?
+        end
+        def attachments?(value)
+          Array(value).any? { |item| !blank_attachment?(item) }
+        end
+        def blank_attachment?(value)
+          value.nil? || (value.is_a?(String) && value.strip.empty?)
+        end
       end
     end
   end