RubyGems - ruby_llm - Versions diffs - 1.14.1 → 1.16.0 - Mend

ruby_llm 1.14.1 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

checksums.yaml +4 -4
data/README.md +6 -7
data/lib/generators/ruby_llm/generator_helpers.rb +8 -0
data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +1 -1
data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +1 -1
data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +3 -3
data/lib/ruby_llm/active_record/acts_as.rb +4 -26
data/lib/ruby_llm/active_record/acts_as_legacy.rb +123 -29
data/lib/ruby_llm/active_record/chat_methods.rb +41 -24
data/lib/ruby_llm/active_record/message_methods.rb +87 -4
data/lib/ruby_llm/active_record/model_methods.rb +7 -9
data/lib/ruby_llm/active_record/payload_helpers.rb +3 -0
data/lib/ruby_llm/active_record/tool_call_methods.rb +3 -0
data/lib/ruby_llm/agent.rb +4 -2
data/lib/ruby_llm/aliases.json +108 -75
data/lib/ruby_llm/aliases.rb +3 -0
data/lib/ruby_llm/attachment.rb +41 -40
data/lib/ruby_llm/chat.rb +229 -59
data/lib/ruby_llm/configuration.rb +14 -1
data/lib/ruby_llm/connection.rb +36 -7
data/lib/ruby_llm/content.rb +15 -1
data/lib/ruby_llm/cost.rb +224 -0
data/lib/ruby_llm/deprecator.rb +24 -0
data/lib/ruby_llm/embedding.rb +31 -1
data/lib/ruby_llm/error.rb +11 -75
data/lib/ruby_llm/error_middleware.rb +81 -0
data/lib/ruby_llm/image.rb +39 -4
data/lib/ruby_llm/instrumentation.rb +36 -0
data/lib/ruby_llm/message.rb +20 -0
data/lib/ruby_llm/mime_type.rb +25 -0
data/lib/ruby_llm/model/info.rb +53 -2
data/lib/ruby_llm/model/pricing.rb +19 -9
data/lib/ruby_llm/model/pricing_category.rb +13 -2
data/lib/ruby_llm/model/pricing_tier.rb +20 -9
data/lib/ruby_llm/model_registry.rb +39 -0
data/lib/ruby_llm/models.json +17817 -13942
data/lib/ruby_llm/models.rb +97 -31
data/lib/ruby_llm/models_schema.json +3 -0
data/lib/ruby_llm/provider.rb +20 -4
data/lib/ruby_llm/providers/anthropic/chat.rb +49 -15
data/lib/ruby_llm/providers/anthropic/models.rb +2 -0
data/lib/ruby_llm/providers/anthropic/streaming.rb +2 -0
data/lib/ruby_llm/providers/anthropic/tools.rb +32 -3
data/lib/ruby_llm/providers/azure/media.rb +1 -1
data/lib/ruby_llm/providers/bedrock/auth.rb +1 -0
data/lib/ruby_llm/providers/bedrock/chat.rb +26 -13
data/lib/ruby_llm/providers/bedrock/media.rb +21 -3
data/lib/ruby_llm/providers/bedrock/models.rb +1 -1
data/lib/ruby_llm/providers/bedrock/streaming.rb +10 -1
data/lib/ruby_llm/providers/bedrock.rb +2 -2
data/lib/ruby_llm/providers/deepseek/capabilities.rb +43 -0
data/lib/ruby_llm/providers/deepseek/chat.rb +9 -0
data/lib/ruby_llm/providers/gemini/chat.rb +10 -4
data/lib/ruby_llm/providers/gemini/images.rb +2 -2
data/lib/ruby_llm/providers/gemini/media.rb +16 -9
data/lib/ruby_llm/providers/gemini/streaming.rb +6 -1
data/lib/ruby_llm/providers/gemini/tools.rb +5 -1
data/lib/ruby_llm/providers/gpustack/chat.rb +8 -1
data/lib/ruby_llm/providers/gpustack/models.rb +2 -0
data/lib/ruby_llm/providers/mistral/capabilities.rb +7 -2
data/lib/ruby_llm/providers/mistral/chat.rb +56 -5
data/lib/ruby_llm/providers/mistral/media.rb +55 -0
data/lib/ruby_llm/providers/mistral/models.rb +2 -0
data/lib/ruby_llm/providers/mistral.rb +2 -2
data/lib/ruby_llm/providers/ollama/chat.rb +8 -1
data/lib/ruby_llm/providers/openai/capabilities.rb +82 -12
data/lib/ruby_llm/providers/openai/chat.rb +61 -7
data/lib/ruby_llm/providers/openai/images.rb +58 -6
data/lib/ruby_llm/providers/openai/media.rb +40 -16
data/lib/ruby_llm/providers/openai/streaming.rb +7 -6
data/lib/ruby_llm/providers/openai/tools.rb +2 -0
data/lib/ruby_llm/providers/openai/transcription.rb +1 -0
data/lib/ruby_llm/providers/openrouter/chat.rb +36 -8
data/lib/ruby_llm/providers/openrouter/images.rb +2 -2
data/lib/ruby_llm/providers/openrouter/models.rb +1 -1
data/lib/ruby_llm/providers/openrouter/streaming.rb +5 -6
data/lib/ruby_llm/providers/perplexity/chat.rb +11 -0
data/lib/ruby_llm/providers/perplexity/media.rb +62 -0
data/lib/ruby_llm/providers/perplexity.rb +2 -2
data/lib/ruby_llm/providers/vertexai.rb +5 -1
data/lib/ruby_llm/providers/xai/chat.rb +9 -0
data/lib/ruby_llm/providers/xai/models.rb +15 -27
data/lib/ruby_llm/providers/xai.rb +2 -2
data/lib/ruby_llm/railtie.rb +11 -1
data/lib/ruby_llm/stream_accumulator.rb +45 -30
data/lib/ruby_llm/streaming.rb +4 -0
data/lib/ruby_llm/tokens.rb +8 -0
data/lib/ruby_llm/tool.rb +24 -7
data/lib/ruby_llm/tool_concurrency.rb +105 -0
data/lib/ruby_llm/transcription.rb +2 -1
data/lib/ruby_llm/utils.rb +39 -0
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm.rb +11 -6
data/lib/tasks/models.rake +45 -16
data/lib/tasks/release.rake +50 -23
metadata +35 -13

data/lib/ruby_llm/providers/openai/capabilities.rb CHANGED Viewed

@@ -8,6 +8,9 @@ module RubyLLM
         module_function
         MODEL_PATTERNS = {
+          gpt_image15: /^gpt-image-1\.5/,
+          gpt_image_mini: /^gpt-image-1-mini/,
+          gpt_image: /^gpt-image-1(?:$|-)/,
           gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
           gpt41_mini: /^gpt-4\.1-mini/,
           gpt41_nano: /^gpt-4\.1-nano/,
@@ -43,6 +46,18 @@ module RubyLLM
         }.freeze
         PRICES = {
+          gpt_image: {
+            text: { input: 5.0, cached_input: 1.25 },
+            images: { input: 10.0, cached_input: 2.5, output: 40.0 }
+          },
+          gpt_image_mini: {
+            text: { input: 2.0, cached_input: 0.2 },
+            images: { input: 2.5, cached_input: 0.25, output: 8.0 }
+          },
+          gpt_image15: {
+            text: { input: 5.0, cached_input: 1.25, output: 10.0 },
+            images: { input: 8.0, cached_input: 2.0, output: 32.0 }
+          },
           gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
           gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
           gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
@@ -77,6 +92,20 @@ module RubyLLM
           moderation: { price: 0.0 }
         }.freeze
+        NIL_LIMIT_FAMILIES = %w[
+          gpt_image
+          gpt_image_mini
+          gpt_image15
+          gpt4o_mini_tts
+          tts1
+          tts1_hd
+          whisper
+          moderation
+          embedding3_large
+          embedding3_small
+          embedding_ada
+        ].freeze
         def supports_tool_choice?(_model_id)
           true
         end
@@ -86,7 +115,10 @@ module RubyLLM
         end
         def context_window_for(model_id)
-          case model_family(model_id)
+          family = model_family(model_id)
+          return nil if NIL_LIMIT_FAMILIES.include?(family)
+          case family
           when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
           when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
                'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
@@ -95,14 +127,15 @@ module RubyLLM
           when 'gpt4o_mini_transcribe' then 16_000
           when 'o1', 'o1_pro', 'o3_mini' then 200_000
           when 'gpt35_turbo' then 16_385
-          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
-               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
           else 4_096
           end
         end
         def max_tokens_for(model_id)
-          case model_family(model_id)
+          family = model_family(model_id)
+          return nil if NIL_LIMIT_FAMILIES.include?(family)
+          case family
           when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
           when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
           when 'gpt4' then 8_192
@@ -110,8 +143,6 @@ module RubyLLM
           when 'gpt4o_mini_transcribe' then 2_000
           when 'o1', 'o1_pro', 'o3_mini' then 100_000
           when 'o1_mini' then 65_536
-          when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
-               'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
           else 16_384
           end
         end
@@ -126,13 +157,15 @@ module RubyLLM
         end
         def pricing_for(model_id)
+          return image_pricing_for(model_id) if image_model?(model_id)
           standard_pricing = {
             input_per_million: input_price_for(model_id),
             output_per_million: output_price_for(model_id)
           }
           cached_price = cached_input_price_for(model_id)
-          standard_pricing[:cached_input_per_million] = cached_price if cached_price
+          standard_pricing[:cache_read_input_per_million] = cached_price if cached_price
           { text_tokens: { standard: standard_pricing } }
         end
@@ -147,8 +180,9 @@ module RubyLLM
         def supports_vision?(model_id)
           case model_family(model_id)
-          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
-               'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search'
+          when 'gpt_image', 'gpt_image_mini', 'gpt_image15', 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini',
+               'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation',
+               'gpt4o_search'
             true
           else
             false
@@ -176,27 +210,63 @@ module RubyLLM
         end
         def input_price_for(model_id)
+          return family_prices(model_id).dig(:text, :input) if image_model?(model_id)
           price_for(model_id, :input, 0.50)
         end
         def output_price_for(model_id)
+          return family_prices(model_id).dig(:text, :output) if image_model?(model_id)
           price_for(model_id, :output, 1.50)
         end
         def cached_input_price_for(model_id)
+          return family_prices(model_id).dig(:text, :cached_input) if image_model?(model_id)
           family = model_family(model_id).to_sym
           PRICES.fetch(family, {})[:cached_input]
         end
+        def image_model?(model_id)
+          %w[gpt_image gpt_image_mini gpt_image15].include?(model_family(model_id))
+        end
+        def image_pricing_for(model_id)
+          text_pricing = {
+            input_per_million: input_price_for(model_id)
+          }
+          cached_text_price = cached_input_price_for(model_id)
+          text_pricing[:cache_read_input_per_million] = cached_text_price if cached_text_price
+          image_pricing = {
+            input_per_million: family_prices(model_id).dig(:images, :input),
+            output_per_million: family_prices(model_id).dig(:images, :output)
+          }
+          cached_image_price = family_prices(model_id).dig(:images, :cached_input)
+          image_pricing[:cache_read_input_per_million] = cached_image_price if cached_image_price
+          {
+            text_tokens: { standard: text_pricing },
+            images: { standard: image_pricing }
+          }
+        end
         def price_for(model_id, key, fallback)
-          family = model_family(model_id).to_sym
-          prices = PRICES.fetch(family, { key => fallback })
+          prices = family_prices(model_id)
+          prices = { key => fallback } if prices.empty?
           prices[key] || prices[:price] || fallback
         end
+        def family_prices(model_id)
+          family = model_family(model_id).to_sym
+          PRICES.fetch(family, {})
+        end
         module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
                         :model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
-                        :input_price_for, :output_price_for, :cached_input_price_for, :price_for
+                        :input_price_for, :output_price_for, :cached_input_price_for, :image_model?,
+                        :image_pricing_for, :price_for, :family_prices
       end
     end
   end

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -61,8 +61,7 @@ module RubyLLM
           return unless message_data
           usage = data['usage'] || {}
-          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
-          thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
+          thinking_tokens = thinking_tokens(usage)
           content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
           thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
           thinking_signature = extract_thinking_signature(message_data)
@@ -72,27 +71,82 @@ module RubyLLM
             content: content,
             thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
             tool_calls: parse_tool_calls(message_data['tool_calls']),
-            input_tokens: usage['prompt_tokens'],
-            output_tokens: usage['completion_tokens'],
-            cached_tokens: cached_tokens,
-            cache_creation_tokens: 0,
+            input_tokens: input_tokens(usage),
+            output_tokens: output_tokens(usage),
+            cached_tokens: cache_read_tokens(usage),
+            cache_creation_tokens: cache_write_tokens(usage),
             thinking_tokens: thinking_tokens,
             model_id: data['model'],
             raw: response
           )
         end
+        def input_tokens(usage)
+          return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
+          prompt_tokens = usage['prompt_tokens']
+          return unless prompt_tokens
+          [prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
+        end
+        def output_tokens(usage)
+          completion_tokens = usage['completion_tokens']
+          return unless completion_tokens
+          completion_tokens = completion_tokens.to_i
+          generated_tokens = generated_tokens_from_total(usage)
+          return completion_tokens unless generated_tokens && generated_tokens > completion_tokens
+          generated_tokens
+        end
+        def generated_tokens_from_total(usage)
+          prompt_tokens = usage['prompt_tokens']
+          total_tokens = usage['total_tokens']
+          return unless prompt_tokens && total_tokens
+          [total_tokens.to_i - prompt_tokens.to_i, 0].max
+        end
+        def cache_read_tokens(usage)
+          usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
+        end
+        def cache_write_tokens(usage)
+          usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
+        end
+        def thinking_tokens(usage)
+          usage.dig('completion_tokens_details', 'reasoning_tokens') || usage['reasoning_tokens']
+        end
         def format_messages(messages)
           messages.map do |msg|
             {
               role: format_role(msg.role),
-              content: Media.format_content(msg.content),
+              content: format_message_content(msg),
               tool_calls: format_tool_calls(msg.tool_calls),
               tool_call_id: msg.tool_call_id
             }.compact.merge(format_thinking(msg))
           end
         end
+        def format_message_content(msg)
+          content = format_content(msg.content)
+          return '' if content.nil? && thinking_only_assistant_message?(msg)
+          content
+        end
+        def thinking_only_assistant_message?(msg)
+          msg.role == :assistant && msg.thinking && !msg.tool_call?
+        end
+        def format_content(content)
+          Media.format_content(content)
+        end
         def format_role(role)
           case role
           when :system

data/lib/ruby_llm/providers/openai/images.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # frozen_string_literal: true
+require 'faraday'
+require 'stringio'
 module RubyLLM
   module Providers
     class OpenAI
@@ -7,30 +10,79 @@ module RubyLLM
       module Images
         module_function
-        def images_url
-          'images/generations'
+        def images_url(with: nil, mask: nil)
+          editing?(with, mask) ? 'images/edits' : 'images/generations'
         end
-        def render_image_payload(prompt, model:, size:)
+        def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
+          return render_edit_payload(prompt, model:, with:, mask:, params:) if editing?(with, mask)
           {
             model: model,
             prompt: prompt,
             n: 1,
             size: size
-          }
+          }.merge(params)
         end
         def parse_image_response(response, model:)
           data = response.body
-          image_data = data['data'].first
+          image_data = Array(data['data']).first
+          raise Error.new(nil, 'Unexpected response format from OpenAI image API') unless image_data
           Image.new(
             url: image_data['url'],
             mime_type: 'image/png', # DALL-E typically returns PNGs
             revised_prompt: image_data['revised_prompt'],
             model_id: model,
-            data: image_data['b64_json']
+            data: image_data['b64_json'],
+            usage: data['usage'] || {}
+          )
+        end
+        def validate_paint_inputs!(with:, mask:)
+          return unless editing?(with, mask)
+          raise ArgumentError, 'with: is required when mask: is provided' if mask && !attachments?(with)
+        end
+        def render_edit_payload(prompt, model:, with:, mask:, params:)
+          payload = params.merge(
+            model: model,
+            prompt: prompt,
+            image: build_upload_parts(with),
+            n: 1
           )
+          payload[:mask] = build_upload_part(mask) if mask
+          payload
+        end
+        def build_upload_parts(sources)
+          Array(sources).filter_map do |source|
+            next if blank_attachment?(source)
+            build_upload_part(source)
+          end
+        end
+        def build_upload_part(source)
+          attachment = Attachment.new(source)
+          raise UnsupportedAttachmentError, attachment.mime_type unless attachment.image?
+          Faraday::UploadIO.new(StringIO.new(attachment.content), attachment.mime_type, attachment.filename)
+        end
+        def editing?(with, mask)
+          attachments?(with) || !mask.nil?
+        end
+        def attachments?(value)
+          Array(value).any? { |item| !blank_attachment?(item) }
+        end
+        def blank_attachment?(value)
+          value.nil? || (value.is_a?(String) && value.strip.empty?)
         end
       end
     end

data/lib/ruby_llm/providers/openai/media.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module RubyLLM
       module Media
         module_function
-        def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
+        def format_content(content, document_attachments: :pdf, image_attachments: true, audio_attachments: true)
           if content.is_a?(RubyLLM::Content::Raw)
             value = content.value
             return value.is_a?(Hash) ? value.to_json : value
@@ -19,23 +19,36 @@ module RubyLLM
           parts << format_text(content.text) if content.text
           content.attachments.each do |attachment|
-            case attachment.type
-            when :image
-              parts << format_image(attachment)
-            when :pdf
-              parts << format_pdf(attachment)
-            when :audio
-              parts << format_audio(attachment)
-            when :text
-              parts << format_text_file(attachment)
-            else
-              raise UnsupportedAttachmentError, attachment.type
-            end
+            parts << format_attachment(
+              attachment,
+              document_attachments:,
+              image_attachments:,
+              audio_attachments:
+            )
           end
           parts
         end
+        def format_attachment(attachment, document_attachments:, image_attachments:, audio_attachments:)
+          case attachment.type
+          when :image
+            raise UnsupportedAttachmentError, attachment.mime_type unless image_attachments
+            format_image(attachment)
+          when :audio
+            raise UnsupportedAttachmentError, attachment.mime_type unless audio_attachments
+            format_audio(attachment)
+          when :pdf, :document
+            format_document_attachment(attachment, document_attachments)
+          when :text
+            format_text_file(attachment)
+          else
+            raise UnsupportedAttachmentError, attachment.mime_type
+          end
+        end
         def format_image(image)
           {
             type: 'image_url',
@@ -45,16 +58,20 @@ module RubyLLM
           }
         end
-        def format_pdf(pdf)
+        def format_document(document)
           {
             type: 'file',
             file: {
-              filename: pdf.filename,
-              file_data: pdf.for_llm
+              filename: document.filename,
+              file_data: document.for_llm
             }
           }
         end
+        def format_pdf(pdf)
+          format_document(pdf)
+        end
         def format_text_file(text_file)
           {
             type: 'text',
@@ -78,6 +95,13 @@ module RubyLLM
             text: text
           }
         end
+        def format_document_attachment(attachment, strategy)
+          return format_document(attachment) if strategy == :all
+          return format_document(attachment) if strategy == :pdf && attachment.pdf?
+          raise UnsupportedAttachmentError, attachment.mime_type
+        end
       end
     end
   end

data/lib/ruby_llm/providers/openai/streaming.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require 'json'
 module RubyLLM
   module Providers
     class OpenAI
@@ -13,7 +15,6 @@ module RubyLLM
         def build_chunk(data)
           usage = data['usage'] || {}
-          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
           delta = data.dig('choices', 0, 'delta') || {}
           content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
           content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
@@ -27,11 +28,11 @@ module RubyLLM
               signature: delta['reasoning_signature']
             ),
             tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
-            input_tokens: usage['prompt_tokens'],
-            output_tokens: usage['completion_tokens'],
-            cached_tokens: cached_tokens,
-            cache_creation_tokens: 0,
-            thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
+            input_tokens: OpenAI::Chat.input_tokens(usage),
+            output_tokens: OpenAI::Chat.output_tokens(usage),
+            cached_tokens: OpenAI::Chat.cache_read_tokens(usage),
+            cache_creation_tokens: OpenAI::Chat.cache_write_tokens(usage),
+            thinking_tokens: OpenAI::Chat.thinking_tokens(usage)
           )
         end

data/lib/ruby_llm/providers/openai/tools.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require 'json'
 module RubyLLM
   module Providers
     class OpenAI

data/lib/ruby_llm/providers/openai/transcription.rb CHANGED Viewed

@@ -60,6 +60,7 @@ module RubyLLM
             language: data['language'],
             duration: data['duration'],
             segments: data['segments'],
+            words: data['words'],
             input_tokens: usage['input_tokens'] || usage['prompt_tokens'],
             output_tokens: usage['output_tokens'] || usage['completion_tokens']
           )

data/lib/ruby_llm/providers/openrouter/chat.rb CHANGED Viewed

@@ -52,7 +52,7 @@ module RubyLLM
         def parse_completion_response(response)
           data = response.body
-          return if data.empty?
+          return if data.nil? || data.empty?
           raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
@@ -60,8 +60,7 @@ module RubyLLM
           return unless message_data
           usage = data['usage'] || {}
-          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
-          thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
+          thinking_tokens = thinking_tokens(usage)
           thinking_text = extract_thinking_text(message_data)
           thinking_signature = extract_thinking_signature(message_data)
@@ -70,27 +69,56 @@ module RubyLLM
             content: message_data['content'],
             thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
             tool_calls: OpenAI::Tools.parse_tool_calls(message_data['tool_calls']),
-            input_tokens: usage['prompt_tokens'],
-            output_tokens: usage['completion_tokens'],
-            cached_tokens: cached_tokens,
-            cache_creation_tokens: 0,
+            input_tokens: input_tokens(usage),
+            output_tokens: output_tokens(usage),
+            cached_tokens: cache_read_tokens(usage),
+            cache_creation_tokens: cache_write_tokens(usage),
             thinking_tokens: thinking_tokens,
             model_id: data['model'],
             raw: response
           )
         end
+        def input_tokens(usage)
+          return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
+          prompt_tokens = usage['prompt_tokens']
+          return unless prompt_tokens
+          [prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
+        end
+        def output_tokens(usage)
+          OpenAI::Chat.output_tokens(usage)
+        end
+        def cache_read_tokens(usage)
+          usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
+        end
+        def cache_write_tokens(usage)
+          usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
+        end
+        def thinking_tokens(usage)
+          OpenAI::Chat.thinking_tokens(usage)
+        end
         def format_messages(messages)
           messages.map do |msg|
             {
               role: format_role(msg.role),
-              content: OpenAI::Media.format_content(msg.content),
+              content: format_content(msg.content),
               tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
               tool_call_id: msg.tool_call_id
             }.compact.merge(format_thinking(msg))
           end
         end
+        def format_content(content)
+          OpenAI::Media.format_content(content)
+        end
         def format_role(role)
           case role
           when :system

data/lib/ruby_llm/providers/openrouter/images.rb CHANGED Viewed

@@ -9,11 +9,11 @@ module RubyLLM
       module Images
         module_function
-        def images_url
+        def images_url(with: nil, mask: nil) # rubocop:disable Lint/UnusedMethodArgument
           'chat/completions'
         end
-        def render_image_payload(prompt, model:, size:)
+        def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           RubyLLM.logger.debug { "Ignoring size #{size}. OpenRouter image generation does not support size parameter." }
           {
             model: model,

data/lib/ruby_llm/providers/openrouter/models.rb CHANGED Viewed

@@ -23,7 +23,7 @@ module RubyLLM
             pricing_types = {
               prompt: :input_per_million,
               completion: :output_per_million,
-              input_cache_read: :cached_input_per_million,
+              input_cache_read: :cache_read_input_per_million,
               internal_reasoning: :reasoning_output_per_million
             }

data/lib/ruby_llm/providers/openrouter/streaming.rb CHANGED Viewed

@@ -13,7 +13,6 @@ module RubyLLM
         def build_chunk(data)
           usage = data['usage'] || {}
-          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
           delta = data.dig('choices', 0, 'delta') || {}
           Chunk.new(
@@ -25,11 +24,11 @@ module RubyLLM
               signature: extract_thinking_signature(delta)
             ),
             tool_calls: OpenAI::Tools.parse_tool_calls(delta['tool_calls'], parse_arguments: false),
-            input_tokens: usage['prompt_tokens'],
-            output_tokens: usage['completion_tokens'],
-            cached_tokens: cached_tokens,
-            cache_creation_tokens: 0,
-            thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
+            input_tokens: OpenRouter::Chat.input_tokens(usage),
+            output_tokens: OpenRouter::Chat.output_tokens(usage),
+            cached_tokens: OpenRouter::Chat.cache_read_tokens(usage),
+            cache_creation_tokens: OpenRouter::Chat.cache_write_tokens(usage),
+            thinking_tokens: OpenRouter::Chat.thinking_tokens(usage)
           )
         end

data/lib/ruby_llm/providers/perplexity/chat.rb CHANGED Viewed

@@ -10,6 +10,17 @@ module RubyLLM
         def format_role(role)
           role.to_s
         end
+        def format_messages(messages)
+          messages.map do |msg|
+            {
+              role: format_role(msg.role),
+              content: Perplexity::Media.format_content(msg.content),
+              tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
+              tool_call_id: msg.tool_call_id
+            }.compact.merge(OpenAI::Chat.format_thinking(msg))
+          end
+        end
       end
     end
   end