RubyGems - ruby_llm_community - Versions diffs - 0.0.2 → 0.0.3 - Mend

ruby_llm_community 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/lib/ruby_llm/chat.rb +7 -0
data/lib/ruby_llm/configuration.rb +0 -2
data/lib/ruby_llm/message.rb +7 -2
data/lib/ruby_llm/models.rb +0 -4
data/lib/ruby_llm/provider.rb +3 -1
data/lib/ruby_llm/providers/anthropic/chat.rb +34 -20
data/lib/ruby_llm/providers/anthropic/media.rb +70 -46
data/lib/ruby_llm/providers/anthropic/models.rb +8 -0
data/lib/ruby_llm/providers/anthropic/streaming.rb +2 -0
data/lib/ruby_llm/providers/bedrock/chat.rb +21 -10
data/lib/ruby_llm/providers/bedrock/media.rb +37 -25
data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +8 -0
data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +2 -0
data/lib/ruby_llm/providers/gemini/chat.rb +9 -2
data/lib/ruby_llm/providers/gemini/streaming.rb +9 -1
data/lib/ruby_llm/providers/mistral/chat.rb +1 -1
data/lib/ruby_llm/providers/openai/chat.rb +2 -1
data/lib/ruby_llm/providers/openai/response.rb +2 -1
data/lib/ruby_llm/providers/openai/streaming.rb +2 -1
data/lib/ruby_llm/providers/openai.rb +3 -3
data/lib/ruby_llm/stream_accumulator.rb +6 -0
data/lib/ruby_llm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d6c4123843627ce84d78bf83efb1c1a22392536bb6e4e6563c0580741e1792ce
-  data.tar.gz: 5276ad2720e554ff373629c0743a5d5e663f10298c3334814be25569e45e446f
+  metadata.gz: 2a8638f12f0c6d0e811f078bdf311d3dde1bc969f6881ed64cf1a5133256b574
+  data.tar.gz: 4bba1ef73b4624fca8ef83c4cd661d9bdb3a15f209da11a3a962cab24b4fdb80
 SHA512:
-  metadata.gz: 274c92a9613ad4fc0f0a384831d986338abde474aa2de176f75302a246d1e504e7be8c956db3e8aaad6a34e2bcd86551461358912634ede6b32e05296d7957c3
-  data.tar.gz: 2417dad579114f19579a94a69fe2b7f57759590332d683dc7b6d0946e6564f6c6f3e60010e278ab60928acc7aa5c899dba5ede08acf1f02eafb358a915405a11
+  metadata.gz: e083eaa68a50b78854d780b3ddd2dc2467652aceb62fd9633b1b9a75f48b89bb8ad885800f65cb376b4f2997336284d27baf3508f3fe2bbfe920cac064f85df5
+  data.tar.gz: c068573609da8e4755201536aea1084b08b819e6d928c8624090f3a20133e253a86688d4ad5db206254be86c5795d66e3f12ffe945e308dc5cf24af479c8d403

data/README.md CHANGED Viewed

@@ -6,7 +6,7 @@
 **One *beautiful* Ruby API for GPT, Claude, Gemini, and more.** Easily build chatbots, AI agents, RAG applications, and content generators. Features chat (text, images, audio, PDFs), image generation, embeddings, tools (function calling), structured output, Rails integration, and streaming. Works with OpenAI, Anthropic, Google Gemini, AWS Bedrock, DeepSeek, Mistral, Ollama (local models), OpenRouter, Perplexity, GPUStack, and any OpenAI-compatible API.
 <div class="badge-container">
-  <a href="https://badge.fury.io/rb/ruby_llm"><img src="https://badge.fury.io/rb/ruby_llm.svg?a=4" alt="Gem Version" /></a>
+  <a href="https://badge.fury.io/rb/ruby_llm"><img src="https://badge.fury.io/rb/ruby_llm.svg?a=5" alt="Gem Version" /></a>
   <a href="https://github.com/testdouble/standard"><img src="https://img.shields.io/badge/code_style-standard-brightgreen.svg" alt="Ruby Style Guide" /></a>
   <a href="https://rubygems.org/gems/ruby_llm"><img alt="Gem Downloads" src="https://img.shields.io/gem/dt/ruby_llm"></a>
   <a href="https://codecov.io/gh/crmne/ruby_llm"><img src="https://codecov.io/gh/crmne/ruby_llm/branch/main/graph/badge.svg" alt="codecov" /></a>

data/lib/ruby_llm/chat.rb CHANGED Viewed

@@ -25,6 +25,7 @@ module RubyLLM
       @temperature = 0.7
       @messages = []
       @tools = {}
+      @cache_prompts = { system: false, user: false, tools: false }
       @params = {}
       @headers = {}
       @schema = nil
@@ -127,12 +128,18 @@ module RubyLLM
       messages.each(&)
     end
+    def cache_prompts(system: false, user: false, tools: false)
+      @cache_prompts = { system: system, user: user, tools: tools }
+      self
+    end
     def complete(&) # rubocop:disable Metrics/PerceivedComplexity
       response = @provider.complete(
         messages,
         tools: @tools,
         temperature: @temperature,
         model: @model.id,
+        cache_prompts: @cache_prompts.dup,
         params: @params,
         headers: @headers,
         schema: @schema,

data/lib/ruby_llm/configuration.rb CHANGED Viewed

@@ -44,7 +44,6 @@ module RubyLLM
                   :logger,
                   :log_file,
                   :log_level,
-                  :log_assume_model_exists,
                   :log_stream_debug
     def initialize
@@ -64,7 +63,6 @@ module RubyLLM
       # Logging configuration
       @log_file = $stdout
       @log_level = ENV['RUBYLLM_DEBUG'] ? Logger::DEBUG : Logger::INFO
-      @log_assume_model_exists = true
       @log_stream_debug = ENV['RUBYLLM_STREAM_DEBUG'] == 'true'
     end

data/lib/ruby_llm/message.rb CHANGED Viewed

@@ -7,7 +7,8 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
-    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw,
+                :cached_tokens, :cache_creation_tokens
     attr_writer :content
     def initialize(options = {})
@@ -18,6 +19,8 @@ module RubyLLM
       @output_tokens = options[:output_tokens]
       @model_id = options[:model_id]
       @tool_call_id = options[:tool_call_id]
+      @cached_tokens = options[:cached_tokens]
+      @cache_creation_tokens = options[:cache_creation_tokens]
       @raw = options[:raw]
       ensure_valid_role
@@ -51,7 +54,9 @@ module RubyLLM
         tool_call_id: tool_call_id,
         input_tokens: input_tokens,
         output_tokens: output_tokens,
-        model_id: model_id
+        model_id: model_id,
+        cache_creation_tokens: cache_creation_tokens,
+        cached_tokens: cached_tokens
       }.compact
     end

data/lib/ruby_llm/models.rb CHANGED Viewed

@@ -70,10 +70,6 @@ module RubyLLM
             modalities: { input: %w[text image], output: %w[text] },
             metadata: { warning: 'Assuming model exists, capabilities may not be accurate' }
           )
-          if RubyLLM.config.log_assume_model_exists
-            RubyLLM.logger.warn "Assuming model '#{model_id}' exists for provider '#{provider}'. " \
-                                'Capabilities may not be accurately reflected.'
-          end
         else
           model = Models.find model_id, provider
           provider_class = Provider.providers[model.provider.to_sym] || raise(Error,

data/lib/ruby_llm/provider.rb CHANGED Viewed

@@ -40,7 +40,8 @@ module RubyLLM
       self.class.configuration_requirements
     end
-    def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
+    def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, # rubocop:disable Metrics/ParameterLists
+                 cache_prompts: { system: false, user: false, tools: false }, &)
       normalized_temperature = maybe_normalize_temperature(temperature, model)
       payload = Utils.deep_merge(
@@ -50,6 +51,7 @@ module RubyLLM
           tools: tools,
           temperature: normalized_temperature,
           model: model,
+          cache_prompts: cache_prompts,
           stream: block_given?,
           schema: schema
         )

data/lib/ruby_llm/providers/anthropic/chat.rb CHANGED Viewed

@@ -11,12 +11,14 @@ module RubyLLM
           '/v1/messages'
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+                           cache_prompts: { system: false, user: false, tools: false })
           system_messages, chat_messages = separate_messages(messages)
-          system_content = build_system_content(system_messages)
+          system_content = build_system_content(system_messages, cache: cache_prompts[:system])
-          build_base_payload(chat_messages, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:, temperature:)
+          build_base_payload(chat_messages, model, stream, cache: cache_prompts[:user]).tap do |payload|
+            add_optional_fields(payload, system_content:, tools:, temperature:,
+                                         cache_tools: cache_prompts[:tools])
           end
         end
@@ -24,28 +26,34 @@ module RubyLLM
           messages.partition { |msg| msg.role == :system }
         end
-        def build_system_content(system_messages)
-          if system_messages.length > 1
-            RubyLLM.logger.warn(
-              "Anthropic's Claude implementation only supports a single system message. " \
-              'Multiple system messages will be combined into one.'
-            )
+        def build_system_content(system_messages, cache: false)
+          system_messages.flat_map.with_index do |msg, idx|
+            message_cache = cache if idx == system_messages.size - 1
+            format_system_message(msg, cache: message_cache)
           end
-          system_messages.map(&:content).join("\n\n")
         end
-        def build_base_payload(chat_messages, model, stream)
+        def build_base_payload(chat_messages, model, stream, cache: false)
+          messages = chat_messages.map.with_index do |msg, idx|
+            message_cache = cache if idx == chat_messages.size - 1
+            format_message(msg, cache: message_cache)
+          end
           {
             model: model,
-            messages: chat_messages.map { |msg| format_message(msg) },
+            messages:,
             stream: stream,
             max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
           }
         end
-        def add_optional_fields(payload, system_content:, tools:, temperature:)
-          payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
+        def add_optional_fields(payload, system_content:, tools:, temperature:, cache_tools: false)
+          if tools.any?
+            tool_definitions = tools.values.map { |t| Tools.function_for(t) }
+            tool_definitions[-1][:cache_control] = { type: 'ephemeral' } if cache_tools
+            payload[:tools] = tool_definitions
+          end
           payload[:system] = system_content unless system_content.empty?
           payload[:temperature] = temperature unless temperature.nil?
         end
@@ -73,24 +81,30 @@ module RubyLLM
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),
             model_id: data['model'],
+            cache_creation_tokens: data.dig('usage', 'cache_creation_input_tokens'),
+            cached_tokens: data.dig('usage', 'cache_read_input_tokens'),
             raw: response
           )
         end
-        def format_message(msg)
+        def format_message(msg, cache: false)
           if msg.tool_call?
             Tools.format_tool_call(msg)
           elsif msg.tool_result?
             Tools.format_tool_result(msg)
           else
-            format_basic_message(msg)
+            format_basic_message(msg, cache:)
           end
         end
-        def format_basic_message(msg)
+        def format_system_message(msg, cache: false)
+          Media.format_content(msg.content, cache:)
+        end
+        def format_basic_message(msg, cache: false)
           {
             role: convert_role(msg.role),
-            content: Media.format_content(msg.content)
+            content: Media.format_content(msg.content, cache:)
           }
         end

data/lib/ruby_llm/providers/anthropic/media.rb CHANGED Viewed

@@ -7,13 +7,13 @@ module RubyLLM
       module Media
         module_function
-        def format_content(content)
+        def format_content(content, cache: false)
           # Convert Hash/Array back to JSON string for API
-          return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
-          return [format_text(content)] unless content.is_a?(Content)
+          return [format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array)
+          return [format_text(content, cache:)] unless content.is_a?(Content)
           parts = []
-          parts << format_text(content.text) if content.text
+          parts << format_text(content.text, cache:) if content.text
           content.attachments.each do |attachment|
             case attachment.type
@@ -31,60 +31,84 @@ module RubyLLM
           parts
         end
-        def format_text(text)
-          {
-            type: 'text',
-            text: text
-          }
+        def format_text(text, cache: false)
+          with_cache_control(
+            {
+              type: 'text',
+              text: text
+            },
+            cache:
+          )
         end
-        def format_image(image)
+        def format_image(image, cache: false)
           if image.url?
-            {
-              type: 'image',
-              source: {
-                type: 'url',
-                url: image.source
-              }
-            }
+            with_cache_control(
+              {
+                type: 'image',
+                source: {
+                  type: 'url',
+                  url: image.source
+                }
+              },
+              cache:
+            )
           else
-            {
-              type: 'image',
-              source: {
-                type: 'base64',
-                media_type: image.mime_type,
-                data: image.encoded
-              }
-            }
+            with_cache_control(
+              {
+                type: 'image',
+                source: {
+                  type: 'base64',
+                  media_type: image.mime_type,
+                  data: image.encoded
+                }
+              },
+              cache:
+            )
           end
         end
-        def format_pdf(pdf)
+        def format_pdf(pdf, cache: false)
           if pdf.url?
-            {
-              type: 'document',
-              source: {
-                type: 'url',
-                url: pdf.source
-              }
-            }
+            with_cache_control(
+              {
+                type: 'document',
+                source: {
+                  type: 'url',
+                  url: pdf.source
+                }
+              },
+              cache:
+            )
           else
-            {
-              type: 'document',
-              source: {
-                type: 'base64',
-                media_type: pdf.mime_type,
-                data: pdf.encoded
-              }
-            }
+            with_cache_control(
+              {
+                type: 'document',
+                source: {
+                  type: 'base64',
+                  media_type: pdf.mime_type,
+                  data: pdf.encoded
+                }
+              },
+              cache:
+            )
           end
         end
-        def format_text_file(text_file)
-          {
-            type: 'text',
-            text: Utils.format_text_file_for_llm(text_file)
-          }
+        def format_text_file(text_file, cache: false)
+          with_cache_control(
+            {
+              type: 'text',
+              text: Utils.format_text_file_for_llm(text_file)
+            },
+            cache:
+          )
+        end
+        def with_cache_control(hash, cache: false)
+          return hash unless cache
+          hash.merge(cache_control: { type: 'ephemeral' })
         end
       end
     end

data/lib/ruby_llm/providers/anthropic/models.rb CHANGED Viewed

@@ -42,6 +42,14 @@ module RubyLLM
         def extract_output_tokens(data)
           data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens')
         end
+        def extract_cached_tokens(data)
+          data.dig('message', 'usage', 'cache_read_input_tokens')
+        end
+        def extract_cache_creation_tokens(data)
+          data.dig('message', 'usage', 'cache_creation_input_tokens')
+        end
       end
     end
   end

data/lib/ruby_llm/providers/anthropic/streaming.rb CHANGED Viewed

@@ -18,6 +18,8 @@ module RubyLLM
             content: data.dig('delta', 'text'),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
+            cached_tokens: extract_cached_tokens(data),
+            cache_creation_tokens: extract_cache_creation_tokens(data),
             tool_calls: extract_tool_calls(data)
           )
         end

data/lib/ruby_llm/providers/bedrock/chat.rb CHANGED Viewed

@@ -17,20 +17,20 @@ module RubyLLM
           Anthropic::Chat.parse_completion_response response
         end
-        def format_message(msg)
+        def format_message(msg, cache: false)
           if msg.tool_call?
             Anthropic::Tools.format_tool_call(msg)
           elsif msg.tool_result?
             Anthropic::Tools.format_tool_result(msg)
           else
-            format_basic_message(msg)
+            format_basic_message(msg, cache:)
           end
         end
-        def format_basic_message(msg)
+        def format_basic_message(msg, cache: false)
           {
             role: Anthropic::Chat.convert_role(msg.role),
-            content: Media.format_content(msg.content)
+            content: Media.format_content(msg.content, cache:)
           }
         end
@@ -40,22 +40,33 @@ module RubyLLM
           "model/#{@model_id}/invoke"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+                           cache_prompts: { system: false, user: false, tools: false })
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
           system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
-          system_content = Anthropic::Chat.build_system_content(system_messages)
+          system_content = Anthropic::Chat.build_system_content(system_messages, cache: cache_prompts[:system])
-          build_base_payload(chat_messages, model).tap do |payload|
-            Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:)
+          build_base_payload(chat_messages, model, cache: cache_prompts[:user]).tap do |payload|
+            Anthropic::Chat.add_optional_fields(
+              payload,
+              system_content:,
+              tools:,
+              temperature:,
+              cache_tools: cache_prompts[:tools]
+            )
           end
         end
-        def build_base_payload(chat_messages, model)
+        def build_base_payload(chat_messages, model, cache: false)
+          messages = chat_messages.map.with_index do |msg, idx|
+            message_cache = cache if idx == chat_messages.size - 1
+            format_message(msg, cache: message_cache)
+          end
           {
             anthropic_version: 'bedrock-2023-05-31',
-            messages: chat_messages.map { |msg| format_message(msg) },
+            messages: messages,
             max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
           }
         end

data/lib/ruby_llm/providers/bedrock/media.rb CHANGED Viewed

@@ -10,22 +10,22 @@ module RubyLLM
         module_function
-        def format_content(content)
+        def format_content(content, cache: false)
           # Convert Hash/Array back to JSON string for API
-          return [Anthropic::Media.format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
-          return [Anthropic::Media.format_text(content)] unless content.is_a?(Content)
+          return [Anthropic::Media.format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array)
+          return [Anthropic::Media.format_text(content, cache:)] unless content.is_a?(Content)
           parts = []
-          parts << Anthropic::Media.format_text(content.text) if content.text
+          parts << Anthropic::Media.format_text(content.text, cache:) if content.text
           content.attachments.each do |attachment|
             case attachment.type
             when :image
-              parts << format_image(attachment)
+              parts << format_image(attachment, cache:)
             when :pdf
-              parts << format_pdf(attachment)
+              parts << format_pdf(attachment, cache:)
             when :text
-              parts << Anthropic::Media.format_text_file(attachment)
+              parts << Anthropic::Media.format_text_file(attachment, cache:)
             else
               raise UnsupportedAttachmentError, attachment.type
             end
@@ -34,26 +34,38 @@ module RubyLLM
           parts
         end
-        def format_image(image)
-          {
-            type: 'image',
-            source: {
-              type: 'base64',
-              media_type: image.mime_type,
-              data: image.encoded
-            }
-          }
+        def format_image(image, cache: false)
+          with_cache_control(
+            {
+              type: 'image',
+              source: {
+                type: 'base64',
+                media_type: image.mime_type,
+                data: image.encoded
+              }
+            },
+            cache:
+          )
         end
-        def format_pdf(pdf)
-          {
-            type: 'document',
-            source: {
-              type: 'base64',
-              media_type: pdf.mime_type,
-              data: pdf.encoded
-            }
-          }
+        def format_pdf(pdf, cache: false)
+          with_cache_control(
+            {
+              type: 'document',
+              source: {
+                type: 'base64',
+                media_type: pdf.mime_type,
+                data: pdf.encoded
+              }
+            },
+            cache:
+          )
+        end
+        def with_cache_control(hash, cache: false)
+          return hash unless cache
+          hash.merge(cache_control: { type: 'ephemeral' })
         end
       end
     end

data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb CHANGED Viewed

@@ -39,6 +39,14 @@ module RubyLLM
             data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens')
           end
+          def extract_cached_tokens(data)
+            data.dig('message', 'usage', 'cache_read_input_tokens')
+          end
+          def extract_cache_creation_tokens(data)
+            data.dig('message', 'usage', 'cache_creation_input_tokens')
+          end
           private
           def extract_content_by_type(data)

data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb CHANGED Viewed

@@ -71,6 +71,8 @@ module RubyLLM
               content: extract_streaming_content(data),
               input_tokens: extract_input_tokens(data),
               output_tokens: extract_output_tokens(data),
+              cached_tokens: extract_cached_tokens(data),
+              cache_creation_tokens: extract_cache_creation_tokens(data),
               tool_calls: extract_tool_calls(data)
             }
           end

data/lib/ruby_llm/providers/gemini/chat.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
           "models/#{@model}:generateContent"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),
@@ -80,7 +80,8 @@ module RubyLLM
             content: extract_content(data),
             tool_calls: tool_calls,
             input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
-            output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
+            output_tokens: calculate_output_tokens(data),
+            cached_tokens: data.dig('usageMetadata', 'cacheTokensDetails', 0, 'tokenCount') || 0,
             model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
             raw: response
           )
@@ -133,6 +134,12 @@ module RubyLLM
           parts = candidate.dig('content', 'parts')
           parts&.any? { |p| p['functionCall'] }
         end
+        def calculate_output_tokens(data)
+          candidates = data.dig('usageMetadata', 'candidatesTokenCount') || 0
+          thoughts = data.dig('usageMetadata', 'thoughtsTokenCount') || 0
+          candidates + thoughts
+        end
       end
     end
   end

data/lib/ruby_llm/providers/gemini/streaming.rb CHANGED Viewed

@@ -16,6 +16,7 @@ module RubyLLM
             content: extract_content(data),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
+            cached_tokens: extract_cached_tokens(data),
             tool_calls: extract_tool_calls(data)
           )
         end
@@ -42,7 +43,14 @@ module RubyLLM
         end
         def extract_output_tokens(data)
-          data.dig('usageMetadata', 'candidatesTokenCount')
+          candidates = data.dig('usageMetadata', 'candidatesTokenCount') || 0
+          thoughts = data.dig('usageMetadata', 'thoughtsTokenCount') || 0
+          total = candidates + thoughts
+          total.positive? ? total : nil
+        end
+        def extract_cached_tokens(data)
+          data.dig('usageMetadata', 'cachedContentTokenCount')
         end
         def parse_streaming_error(data)

data/lib/ruby_llm/providers/mistral/chat.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module RubyLLM
         end
         # rubocop:disable Metrics/ParameterLists
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists
           payload = super
           # Mistral doesn't support stream_options
           payload.delete(:stream_options)

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
         module_function
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Lint/UnusedMethodArgument, Metrics/ParameterLists
           payload = {
             model: model,
             messages: format_messages(messages),
@@ -56,6 +56,7 @@ module RubyLLM
             tool_calls: parse_tool_calls(message_data['tool_calls']),
             input_tokens: data['usage']['prompt_tokens'],
             output_tokens: data['usage']['completion_tokens'],
+            cached_tokens: data.dig('usage', 'prompt_tokens_details', 'cached_tokens'),
             model_id: data['model'],
             raw: response
           )

data/lib/ruby_llm/providers/openai/response.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
         module_function
-        def render_response_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+        def render_response_payload(messages, tools:, temperature:, model:, cache_prompts:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
           payload = {
             model: model,
             input: format_input(messages),
@@ -97,6 +97,7 @@ module RubyLLM
             tool_calls: parse_response_tool_calls(outputs),
             input_tokens: data['usage']['input_tokens'],
             output_tokens: data['usage']['output_tokens'],
+            cached_tokens: data.dig('usage', 'input_tokens_details', 'cached_tokens'),
             model_id: data['model'],
             raw: response
           )

data/lib/ruby_llm/providers/openai/streaming.rb CHANGED Viewed

@@ -87,7 +87,8 @@ module RubyLLM
             content: data.dig('choices', 0, 'delta', 'content'),
             tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
             input_tokens: data.dig('usage', 'prompt_tokens'),
-            output_tokens: data.dig('usage', 'completion_tokens')
+            output_tokens: data.dig('usage', 'completion_tokens'),
+            cached_tokens: data.dig('usage', 'cached_tokens')
           )
         end

data/lib/ruby_llm/providers/openai.rb CHANGED Viewed

@@ -17,12 +17,12 @@ module RubyLLM
         end
       end
-      def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+      def render_payload(messages, tools:, temperature:, model:, cache_prompts:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
         @using_responses_api = !audio_input?(messages)
         if @using_responses_api
-          render_response_payload(messages, tools: tools, temperature: temperature, model: model, stream: stream,
-                                            schema: schema)
+          render_response_payload(messages, tools: tools, temperature: temperature, model: model,
+                                            cache_prompts:, stream:, schema:)
         else
           super
         end

data/lib/ruby_llm/stream_accumulator.rb CHANGED Viewed

@@ -12,6 +12,8 @@ module RubyLLM
       @tool_calls = {}
       @input_tokens = 0
       @output_tokens = 0
+      @cached_tokens = 0
+      @cache_creation_tokens = 0
       @latest_tool_call_id = nil
     end
@@ -37,6 +39,8 @@ module RubyLLM
         tool_calls: tool_calls_from_stream,
         input_tokens: @input_tokens.positive? ? @input_tokens : nil,
         output_tokens: @output_tokens.positive? ? @output_tokens : nil,
+        cached_tokens: @cached_tokens.positive? ? @cached_tokens : nil,
+        cache_creation_tokens: @cache_creation_tokens.positive? ? @cache_creation_tokens : nil,
         raw: response
       )
     end
@@ -92,6 +96,8 @@ module RubyLLM
     def count_tokens(chunk)
       @input_tokens = chunk.input_tokens if chunk.input_tokens
       @output_tokens = chunk.output_tokens if chunk.output_tokens
+      @cached_tokens = chunk.cached_tokens if chunk.cached_tokens
+      @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
     end
   end
 end

data/lib/ruby_llm/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module RubyLLM
-  VERSION = '0.0.2'
+  VERSION = '0.0.3'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ruby_llm_community
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
 platform: ruby
 authors:
 - Paul Shippy