RubyGems - ruby_llm - Versions diffs - 1.14.0 → 1.15.0 - Mend

ruby_llm 1.14.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

checksums.yaml +4 -4
data/README.md +7 -5
data/lib/generators/ruby_llm/generator_helpers.rb +8 -0
data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +1 -1
data/lib/ruby_llm/active_record/acts_as.rb +3 -0
data/lib/ruby_llm/active_record/acts_as_legacy.rb +52 -25
data/lib/ruby_llm/active_record/chat_methods.rb +47 -23
data/lib/ruby_llm/active_record/message_methods.rb +19 -14
data/lib/ruby_llm/active_record/model_methods.rb +7 -9
data/lib/ruby_llm/active_record/payload_helpers.rb +29 -0
data/lib/ruby_llm/active_record/tool_call_methods.rb +5 -15
data/lib/ruby_llm/agent.rb +3 -2
data/lib/ruby_llm/aliases.json +53 -14
data/lib/ruby_llm/attachment.rb +11 -27
data/lib/ruby_llm/chat.rb +62 -21
data/lib/ruby_llm/cost.rb +224 -0
data/lib/ruby_llm/image.rb +37 -4
data/lib/ruby_llm/message.rb +20 -0
data/lib/ruby_llm/model/info.rb +17 -0
data/lib/ruby_llm/model/pricing_category.rb +13 -2
data/lib/ruby_llm/models.json +26511 -24930
data/lib/ruby_llm/models.rb +2 -1
data/lib/ruby_llm/models_schema.json +3 -0
data/lib/ruby_llm/provider.rb +10 -3
data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -133
data/lib/ruby_llm/providers/anthropic/models.rb +2 -8
data/lib/ruby_llm/providers/anthropic/tools.rb +4 -1
data/lib/ruby_llm/providers/bedrock/chat.rb +24 -13
data/lib/ruby_llm/providers/bedrock/streaming.rb +4 -1
data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -119
data/lib/ruby_llm/providers/gemini/capabilities.rb +45 -215
data/lib/ruby_llm/providers/gemini/chat.rb +8 -1
data/lib/ruby_llm/providers/gemini/images.rb +2 -2
data/lib/ruby_llm/providers/gemini/models.rb +2 -4
data/lib/ruby_llm/providers/gemini/streaming.rb +4 -1
data/lib/ruby_llm/providers/gemini/tools.rb +3 -1
data/lib/ruby_llm/providers/mistral/capabilities.rb +6 -1
data/lib/ruby_llm/providers/mistral/chat.rb +55 -4
data/lib/ruby_llm/providers/openai/capabilities.rb +157 -195
data/lib/ruby_llm/providers/openai/chat.rb +45 -6
data/lib/ruby_llm/providers/openai/images.rb +58 -6
data/lib/ruby_llm/providers/openai/models.rb +2 -4
data/lib/ruby_llm/providers/openai/streaming.rb +5 -6
data/lib/ruby_llm/providers/openrouter/chat.rb +30 -6
data/lib/ruby_llm/providers/openrouter/images.rb +2 -2
data/lib/ruby_llm/providers/openrouter/models.rb +1 -1
data/lib/ruby_llm/providers/openrouter/streaming.rb +5 -6
data/lib/ruby_llm/providers/perplexity/capabilities.rb +34 -99
data/lib/ruby_llm/providers/perplexity/models.rb +12 -14
data/lib/ruby_llm/railtie.rb +6 -0
data/lib/ruby_llm/tokens.rb +8 -0
data/lib/ruby_llm/tool.rb +24 -7
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm.rb +2 -4
data/lib/tasks/models.rake +13 -12
metadata +21 -5

data/lib/ruby_llm/providers/openai/capabilities.rb CHANGED Viewed

@@ -3,13 +3,14 @@
 module RubyLLM
   module Providers
     class OpenAI
-      # Determines capabilities and pricing for OpenAI models
+      # Provider-level capability checks and narrow registry fallbacks.
       module Capabilities
         module_function
         MODEL_PATTERNS = {
-          dall_e: /^dall-e/,
-          chatgpt4o: /^chatgpt-4o/,
+          gpt_image15: /^gpt-image-1\.5/,
+          gpt_image_mini: /^gpt-image-1-mini/,
+          gpt_image: /^gpt-image-1(?:$|-)/,
           gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
           gpt41_mini: /^gpt-4\.1-mini/,
           gpt41_nano: /^gpt-4\.1-nano/,
@@ -26,9 +27,9 @@ module RubyLLM
           gpt4o_realtime: /^gpt-4o-realtime/,
           gpt4o_search: /^gpt-4o-search/,
           gpt4o_transcribe: /^gpt-4o-transcribe/,
-          gpt5: /^gpt-5/,
-          gpt5_mini: /^gpt-5-mini/,
-          gpt5_nano: /^gpt-5-nano/,
+          gpt5: /^gpt-5(?!.*(?:mini|nano))/,
+          gpt5_mini: /^gpt-5.*mini/,
+          gpt5_nano: /^gpt-5.*nano/,
           o1: /^o1(?!-(?:mini|pro))/,
           o1_mini: /^o1-mini/,
           o1_pro: /^o1-pro/,
@@ -44,101 +45,38 @@ module RubyLLM
           moderation: /^(?:omni|text)-moderation/
         }.freeze
-        def context_window_for(model_id)
-          case model_family(model_id)
-          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
-               'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime',
-               'gpt4o_search', 'gpt4o_transcribe', 'gpt4o_mini_search', 'o1_mini' then 128_000
-          when 'gpt4' then 8_192
-          when 'gpt4o_mini_transcribe' then 16_000
-          when 'o1', 'o1_pro', 'o3_mini' then 200_000
-          when 'gpt35_turbo' then 16_385
-          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
-               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
-          else 4_096
-          end
-        end
-        def max_tokens_for(model_id)
-          case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
-          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
-          when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'gpt4o_mini_search' then 16_384
-          when 'babbage', 'davinci' then 16_384 # rubocop:disable Lint/DuplicateBranch
-          when 'gpt4' then 8_192
-          when 'gpt35_turbo' then 4_096
-          when 'gpt4_turbo', 'gpt4o_realtime', 'gpt4o_mini_realtime' then 4_096 # rubocop:disable Lint/DuplicateBranch
-          when 'gpt4o_mini_transcribe' then 2_000
-          when 'o1', 'o1_pro', 'o3_mini' then 100_000
-          when 'o1_mini' then 65_536
-          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
-               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
-          else 16_384 # rubocop:disable Lint/DuplicateBranch
-          end
-        end
-        def supports_vision?(model_id)
-          case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4',
-               'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search',
-               'gpt4o_mini_search' then true
-          else false
-          end
-        end
-        def supports_functions?(model_id)
-          case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o',
-               'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
-          when 'chatgpt4o', 'gpt35_turbo', 'o1_mini', 'gpt4o_mini_tts',
-               'gpt4o_transcribe', 'gpt4o_search', 'gpt4o_mini_search' then false
-          else false # rubocop:disable Lint/DuplicateBranch
-          end
-        end
-        def supports_tool_choice?(_model_id)
-          true
-        end
-        def supports_tool_parallel_control?(_model_id)
-          true
-        end
-        def supports_structured_output?(model_id)
-          case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o',
-               'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
-          else false
-          end
-        end
-        def supports_json_mode?(model_id)
-          supports_structured_output?(model_id)
-        end
         PRICES = {
+          gpt_image: {
+            text: { input: 5.0, cached_input: 1.25 },
+            images: { input: 10.0, cached_input: 2.5, output: 40.0 }
+          },
+          gpt_image_mini: {
+            text: { input: 2.0, cached_input: 0.2 },
+            images: { input: 2.5, cached_input: 0.25, output: 8.0 }
+          },
+          gpt_image15: {
+            text: { input: 5.0, cached_input: 1.25, output: 10.0 },
+            images: { input: 8.0, cached_input: 2.0, output: 32.0 }
+          },
           gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
           gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
           gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
           gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
           gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
           gpt41_nano: { input: 0.1, output: 0.4 },
-          chatgpt4o: { input: 5.0, output: 15.0 },
           gpt4: { input: 10.0, output: 30.0 },
           gpt4_turbo: { input: 10.0, output: 30.0 },
-          gpt45: { input: 75.0, output: 150.0 },
           gpt35_turbo: { input: 0.5, output: 1.5 },
           gpt4o: { input: 2.5, output: 10.0 },
-          gpt4o_audio: { input: 2.5, output: 10.0, audio_input: 40.0, audio_output: 80.0 },
+          gpt4o_audio: { input: 2.5, output: 10.0 },
           gpt4o_mini: { input: 0.15, output: 0.6 },
-          gpt4o_mini_audio: { input: 0.15, output: 0.6, audio_input: 10.0, audio_output: 20.0 },
+          gpt4o_mini_audio: { input: 0.15, output: 0.6 },
           gpt4o_mini_realtime: { input: 0.6, output: 2.4 },
-          gpt4o_mini_transcribe: { input: 1.25, output: 5.0, audio_input: 3.0 },
+          gpt4o_mini_transcribe: { input: 1.25, output: 5.0 },
           gpt4o_mini_tts: { input: 0.6, output: 12.0 },
           gpt4o_realtime: { input: 5.0, output: 20.0 },
           gpt4o_search: { input: 2.5, output: 10.0 },
-          gpt4o_transcribe: { input: 2.5, output: 10.0, audio_input: 6.0 },
+          gpt4o_transcribe: { input: 2.5, output: 10.0 },
           o1: { input: 15.0, output: 60.0 },
           o1_mini: { input: 1.1, output: 4.4 },
           o1_pro: { input: 150.0, output: 600.0 },
@@ -154,157 +92,181 @@ module RubyLLM
           moderation: { price: 0.0 }
         }.freeze
-        def model_family(model_id)
-          MODEL_PATTERNS.each do |family, pattern|
-            return family.to_s if model_id.match?(pattern)
-          end
-          'other'
-        end
+        NIL_LIMIT_FAMILIES = %w[
+          gpt_image
+          gpt_image_mini
+          gpt_image15
+          gpt4o_mini_tts
+          tts1
+          tts1_hd
+          whisper
+          moderation
+          embedding3_large
+          embedding3_small
+          embedding_ada
+        ].freeze
-        def input_price_for(model_id)
-          family = model_family(model_id).to_sym
-          prices = PRICES.fetch(family, { input: default_input_price })
-          prices[:input] || prices[:price] || default_input_price
+        def supports_tool_choice?(_model_id)
+          true
         end
-        def cached_input_price_for(model_id)
-          family = model_family(model_id).to_sym
-          prices = PRICES.fetch(family, {})
-          prices[:cached_input]
+        def supports_tool_parallel_control?(_model_id)
+          true
         end
-        def output_price_for(model_id)
-          family = model_family(model_id).to_sym
-          prices = PRICES.fetch(family, { output: default_output_price })
-          prices[:output] || prices[:price] || default_output_price
-        end
+        def context_window_for(model_id)
+          family = model_family(model_id)
+          return nil if NIL_LIMIT_FAMILIES.include?(family)
-        def model_type(model_id)
-          case model_family(model_id)
-          when /embedding/ then 'embedding'
-          when /^tts|whisper|gpt4o_(?:mini_)?(?:transcribe|tts)$/ then 'audio'
-          when 'moderation' then 'moderation'
-          when /dall/ then 'image'
-          else 'chat'
+          case family
+          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
+               'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
+               'gpt4o_transcribe', 'o1_mini' then 128_000
+          when 'gpt4' then 8_192
+          when 'gpt4o_mini_transcribe' then 16_000
+          when 'o1', 'o1_pro', 'o3_mini' then 200_000
+          when 'gpt35_turbo' then 16_385
+          else 4_096
           end
         end
-        def default_input_price
-          0.50
-        end
+        def max_tokens_for(model_id)
+          family = model_family(model_id)
+          return nil if NIL_LIMIT_FAMILIES.include?(family)
-        def default_output_price
-          1.50
+          case family
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
+          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
+          when 'gpt4' then 8_192
+          when 'gpt35_turbo' then 4_096
+          when 'gpt4o_mini_transcribe' then 2_000
+          when 'o1', 'o1_pro', 'o3_mini' then 100_000
+          when 'o1_mini' then 65_536
+          else 16_384
+          end
         end
-        def format_display_name(model_id)
-          model_id.then { |id| humanize(id) }
-                  .then { |name| apply_special_formatting(name) }
+        def critical_capabilities_for(model_id)
+          capabilities = []
+          capabilities << 'function_calling' if supports_functions?(model_id)
+          capabilities << 'structured_output' if supports_structured_output?(model_id)
+          capabilities << 'vision' if supports_vision?(model_id)
+          capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
+          capabilities
         end
-        def humanize(id)
-          id.tr('-', ' ')
-            .split
-            .map(&:capitalize)
-            .join(' ')
-        end
+        def pricing_for(model_id)
+          return image_pricing_for(model_id) if image_model?(model_id)
+          standard_pricing = {
+            input_per_million: input_price_for(model_id),
+            output_per_million: output_price_for(model_id)
+          }
-        def apply_special_formatting(name)
-          name
-            .gsub(/(\d{4}) (\d{2}) (\d{2})/, '\1\2\3')
-            .gsub(/^(?:Gpt|Chatgpt|Tts|Dall E) /) { |m| special_prefix_format(m.strip) }
-            .gsub(/^O([13]) /, 'O\1-')
-            .gsub(/^O[13] Mini/, '\0'.tr(' ', '-'))
-            .gsub(/\d\.\d /, '\0'.sub(' ', '-'))
-            .gsub(/4o (?=Mini|Preview|Turbo|Audio|Realtime|Transcribe|Tts)/, '4o-')
-            .gsub(/\bHd\b/, 'HD')
-            .gsub(/(?:Omni|Text) Moderation/, '\0'.tr(' ', '-'))
-            .gsub('Text Embedding', 'text-embedding-')
+          cached_price = cached_input_price_for(model_id)
+          standard_pricing[:cache_read_input_per_million] = cached_price if cached_price
+          { text_tokens: { standard: standard_pricing } }
         end
-        def special_prefix_format(prefix)
-          case prefix # rubocop:disable Style/HashLikeCase
-          when 'Gpt' then 'GPT-'
-          when 'Chatgpt' then 'ChatGPT-'
-          when 'Tts' then 'TTS-'
-          when 'Dall E' then 'DALL-E-'
+        def model_family(model_id)
+          MODEL_PATTERNS.each do |family, pattern|
+            return family.to_s if model_id.match?(pattern)
           end
+          'other'
         end
-        def self.normalize_temperature(temperature, model_id)
-          if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
-            RubyLLM.logger.debug { "Model #{model_id} requires temperature=1.0, setting that instead." }
-            1.0
-          elsif model_id.match?(/-search/)
-            RubyLLM.logger.debug { "Model #{model_id} does not accept temperature parameter, removing" }
-            nil
+        def supports_vision?(model_id)
+          case model_family(model_id)
+          when 'gpt_image', 'gpt_image_mini', 'gpt_image15', 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini',
+               'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation',
+               'gpt4o_search'
+            true
           else
-            temperature
+            false
           end
         end
-        def self.temperature_close_to_one?(temperature)
-          (temperature.to_f - 1.0).abs <= Float::EPSILON
+        def supports_functions?(model_id)
+          case model_family(model_id)
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
+               'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
+            true
+          else
+            false
+          end
         end
-        def modalities_for(model_id)
-          modalities = {
-            input: ['text'],
-            output: ['text']
-          }
+        def supports_structured_output?(model_id)
+          case model_family(model_id)
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4o',
+               'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
+            true
+          else
+            false
+          end
+        end
-          # Vision support
-          modalities[:input] << 'image' if supports_vision?(model_id)
-          modalities[:input] << 'audio' if model_id.match?(/whisper|audio|tts|transcribe/)
-          modalities[:input] << 'pdf' if supports_vision?(model_id)
-          modalities[:output] << 'audio' if model_id.match?(/tts|audio/)
-          modalities[:output] << 'image' if model_id.match?(/dall-e|image/)
-          modalities[:output] << 'embeddings' if model_id.match?(/embedding/)
-          modalities[:output] << 'moderation' if model_id.match?(/moderation/)
+        def input_price_for(model_id)
+          return family_prices(model_id).dig(:text, :input) if image_model?(model_id)
-          modalities
+          price_for(model_id, :input, 0.50)
         end
-        def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
-          capabilities = []
+        def output_price_for(model_id)
+          return family_prices(model_id).dig(:text, :output) if image_model?(model_id)
-          capabilities << 'streaming' unless model_id.match?(/moderation|embedding/)
-          capabilities << 'function_calling' if supports_functions?(model_id)
-          capabilities << 'structured_output' if supports_json_mode?(model_id)
-          capabilities << 'batch' if model_id.match?(/embedding|batch/)
-          capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
+          price_for(model_id, :output, 1.50)
+        end
-          if model_id.match?(/gpt-4-turbo|gpt-4o/)
-            capabilities << 'image_generation' if model_id.match?(/vision/)
-            capabilities << 'speech_generation' if model_id.match?(/audio/)
-            capabilities << 'transcription' if model_id.match?(/audio/)
-          end
+        def cached_input_price_for(model_id)
+          return family_prices(model_id).dig(:text, :cached_input) if image_model?(model_id)
-          capabilities
+          family = model_family(model_id).to_sym
+          PRICES.fetch(family, {})[:cached_input]
         end
-        def pricing_for(model_id)
-          standard_pricing = {
-            input_per_million: input_price_for(model_id),
-            output_per_million: output_price_for(model_id)
+        def image_model?(model_id)
+          %w[gpt_image gpt_image_mini gpt_image15].include?(model_family(model_id))
+        end
+        def image_pricing_for(model_id)
+          text_pricing = {
+            input_per_million: input_price_for(model_id)
           }
+          cached_text_price = cached_input_price_for(model_id)
+          text_pricing[:cache_read_input_per_million] = cached_text_price if cached_text_price
-          if respond_to?(:cached_input_price_for)
-            cached_price = cached_input_price_for(model_id)
-            standard_pricing[:cached_input_per_million] = cached_price if cached_price
-          end
+          image_pricing = {
+            input_per_million: family_prices(model_id).dig(:images, :input),
+            output_per_million: family_prices(model_id).dig(:images, :output)
+          }
+          cached_image_price = family_prices(model_id).dig(:images, :cached_input)
+          image_pricing[:cache_read_input_per_million] = cached_image_price if cached_image_price
-          pricing = { text_tokens: { standard: standard_pricing } }
+          {
+            text_tokens: { standard: text_pricing },
+            images: { standard: image_pricing }
+          }
+        end
-          if model_id.match?(/embedding|batch/)
-            pricing[:text_tokens][:batch] = {
-              input_per_million: standard_pricing[:input_per_million] * 0.5,
-              output_per_million: standard_pricing[:output_per_million] * 0.5
-            }
-          end
+        def price_for(model_id, key, fallback)
+          prices = family_prices(model_id)
+          prices = { key => fallback } if prices.empty?
+          prices[key] || prices[:price] || fallback
+        end
-          pricing
+        def family_prices(model_id)
+          family = model_family(model_id).to_sym
+          PRICES.fetch(family, {})
         end
+        module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
+                        :model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
+                        :input_price_for, :output_price_for, :cached_input_price_for, :image_model?,
+                        :image_pricing_for, :price_for, :family_prices
       end
     end
   end

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -61,8 +61,7 @@ module RubyLLM
           return unless message_data
           usage = data['usage'] || {}
-          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
-          thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
+          thinking_tokens = thinking_tokens(usage)
           content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
           thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
           thinking_signature = extract_thinking_signature(message_data)
@@ -72,16 +71,56 @@ module RubyLLM
             content: content,
             thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
             tool_calls: parse_tool_calls(message_data['tool_calls']),
-            input_tokens: usage['prompt_tokens'],
-            output_tokens: usage['completion_tokens'],
-            cached_tokens: cached_tokens,
-            cache_creation_tokens: 0,
+            input_tokens: input_tokens(usage),
+            output_tokens: output_tokens(usage),
+            cached_tokens: cache_read_tokens(usage),
+            cache_creation_tokens: cache_write_tokens(usage),
             thinking_tokens: thinking_tokens,
             model_id: data['model'],
             raw: response
           )
         end
+        def input_tokens(usage)
+          return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
+          prompt_tokens = usage['prompt_tokens']
+          return unless prompt_tokens
+          [prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
+        end
+        def output_tokens(usage)
+          completion_tokens = usage['completion_tokens']
+          return unless completion_tokens
+          completion_tokens = completion_tokens.to_i
+          generated_tokens = generated_tokens_from_total(usage)
+          return completion_tokens unless generated_tokens && generated_tokens > completion_tokens
+          generated_tokens
+        end
+        def generated_tokens_from_total(usage)
+          prompt_tokens = usage['prompt_tokens']
+          total_tokens = usage['total_tokens']
+          return unless prompt_tokens && total_tokens
+          [total_tokens.to_i - prompt_tokens.to_i, 0].max
+        end
+        def cache_read_tokens(usage)
+          usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
+        end
+        def cache_write_tokens(usage)
+          usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
+        end
+        def thinking_tokens(usage)
+          usage.dig('completion_tokens_details', 'reasoning_tokens') || usage['reasoning_tokens']
+        end
         def format_messages(messages)
           messages.map do |msg|
             {

data/lib/ruby_llm/providers/openai/images.rb CHANGED Viewed

@@ -7,31 +7,83 @@ module RubyLLM
       module Images
         module_function
-        def images_url
-          'images/generations'
+        def images_url(with: nil, mask: nil)
+          editing?(with, mask) ? 'images/edits' : 'images/generations'
         end
-        def render_image_payload(prompt, model:, size:)
+        def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
+          return render_edit_payload(prompt, model:, with:, mask:, params:) if editing?(with, mask)
           {
             model: model,
             prompt: prompt,
             n: 1,
             size: size
-          }
+          }.merge(params)
         end
         def parse_image_response(response, model:)
           data = response.body
-          image_data = data['data'].first
+          image_data = Array(data['data']).first
+          raise Error.new(nil, 'Unexpected response format from OpenAI image API') unless image_data
           Image.new(
             url: image_data['url'],
             mime_type: 'image/png', # DALL-E typically returns PNGs
             revised_prompt: image_data['revised_prompt'],
             model_id: model,
-            data: image_data['b64_json']
+            data: image_data['b64_json'],
+            usage: data['usage'] || {}
           )
         end
+        def validate_paint_inputs!(with:, mask:)
+          return unless editing?(with, mask)
+          raise ArgumentError, 'with: is required when mask: is provided' if mask && !attachments?(with)
+        end
+        def render_edit_payload(prompt, model:, with:, mask:, params:)
+          payload = params.merge(
+            model: model,
+            prompt: prompt,
+            image: build_upload_parts(with, label: 'images'),
+            n: 1
+          )
+          payload[:mask] = build_upload_part(mask, label: 'mask') if mask
+          payload
+        end
+        def build_upload_parts(sources, label:)
+          Array(sources).filter_map do |source|
+            next if blank_attachment?(source)
+            build_upload_part(source, label:)
+          end
+        end
+        def build_upload_part(source, label:)
+          attachment = Attachment.new(source)
+          unless attachment.image?
+            raise UnsupportedAttachmentError,
+                  "OpenAI image editing only supports image attachments for #{label}"
+          end
+          Faraday::UploadIO.new(StringIO.new(attachment.content), attachment.mime_type, attachment.filename)
+        end
+        def editing?(with, mask)
+          attachments?(with) || !mask.nil?
+        end
+        def attachments?(value)
+          Array(value).any? { |item| !blank_attachment?(item) }
+        end
+        def blank_attachment?(value)
+          value.nil? || (value.is_a?(String) && value.strip.empty?)
+        end
       end
     end
   end

data/lib/ruby_llm/providers/openai/models.rb CHANGED Viewed

@@ -17,14 +17,12 @@ module RubyLLM
             Model::Info.new(
               id: model_id,
-              name: capabilities.format_display_name(model_id),
+              name: model_id,
               provider: slug,
-              family: capabilities.model_family(model_id),
               created_at: model_data['created'] ? Time.at(model_data['created']) : nil,
               context_window: capabilities.context_window_for(model_id),
               max_output_tokens: capabilities.max_tokens_for(model_id),
-              modalities: capabilities.modalities_for(model_id),
-              capabilities: capabilities.capabilities_for(model_id),
+              capabilities: capabilities.critical_capabilities_for(model_id),
               pricing: capabilities.pricing_for(model_id),
               metadata: {
                 object: model_data['object'],

data/lib/ruby_llm/providers/openai/streaming.rb CHANGED Viewed

@@ -13,7 +13,6 @@ module RubyLLM
         def build_chunk(data)
           usage = data['usage'] || {}
-          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
           delta = data.dig('choices', 0, 'delta') || {}
           content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
           content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
@@ -27,11 +26,11 @@ module RubyLLM
               signature: delta['reasoning_signature']
             ),
             tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
-            input_tokens: usage['prompt_tokens'],
-            output_tokens: usage['completion_tokens'],
-            cached_tokens: cached_tokens,
-            cache_creation_tokens: 0,
-            thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
+            input_tokens: OpenAI::Chat.input_tokens(usage),
+            output_tokens: OpenAI::Chat.output_tokens(usage),
+            cached_tokens: OpenAI::Chat.cache_read_tokens(usage),
+            cache_creation_tokens: OpenAI::Chat.cache_write_tokens(usage),
+            thinking_tokens: OpenAI::Chat.thinking_tokens(usage)
           )
         end