RubyGems - ruby_llm_community - Versions diffs - 0.0.2 → 0.0.4 - Mend

ruby_llm_community 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/README.md +2 -2
data/lib/ruby_llm/chat.rb +7 -0
data/lib/ruby_llm/configuration.rb +0 -2
data/lib/ruby_llm/message.rb +7 -2
data/lib/ruby_llm/models.rb +0 -4
data/lib/ruby_llm/provider.rb +3 -1
data/lib/ruby_llm/providers/anthropic/chat.rb +34 -20
data/lib/ruby_llm/providers/anthropic/media.rb +70 -46
data/lib/ruby_llm/providers/anthropic/models.rb +8 -0
data/lib/ruby_llm/providers/anthropic/streaming.rb +2 -0
data/lib/ruby_llm/providers/bedrock/chat.rb +21 -10
data/lib/ruby_llm/providers/bedrock/media.rb +37 -25
data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +8 -0
data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +2 -0
data/lib/ruby_llm/providers/gemini/chat.rb +9 -2
data/lib/ruby_llm/providers/gemini/streaming.rb +9 -1
data/lib/ruby_llm/providers/mistral/chat.rb +1 -1
data/lib/ruby_llm/providers/openai/chat.rb +2 -1
data/lib/ruby_llm/providers/openai/response.rb +2 -1
data/lib/ruby_llm/providers/openai/streaming.rb +2 -1
data/lib/ruby_llm/providers/openai.rb +3 -3
data/lib/ruby_llm/stream_accumulator.rb +6 -0
data/lib/ruby_llm/version.rb +1 -1
data/lib/{ruby_llm.rb → ruby_llm_community.rb} +6 -1
data/lib/shims/ruby_llm.rb +3 -0
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d6c4123843627ce84d78bf83efb1c1a22392536bb6e4e6563c0580741e1792ce
-  data.tar.gz: 5276ad2720e554ff373629c0743a5d5e663f10298c3334814be25569e45e446f
+  metadata.gz: 532c33922c77aa1376befb9d9ad5425c10b3dee2016fbb24ce3be431d09b3461
+  data.tar.gz: 8339735878bfe54c68524d42ec8d67c0e59d53174c6648cdbef272f2e13f5f54
 SHA512:
-  metadata.gz: 274c92a9613ad4fc0f0a384831d986338abde474aa2de176f75302a246d1e504e7be8c956db3e8aaad6a34e2bcd86551461358912634ede6b32e05296d7957c3
-  data.tar.gz: 2417dad579114f19579a94a69fe2b7f57759590332d683dc7b6d0946e6564f6c6f3e60010e278ab60928acc7aa5c899dba5ede08acf1f02eafb358a915405a11
+  metadata.gz: 3daddc9fb0eb2f4271352cc7158e8e2e1f8c39d83ddd97484aedb1d4a9fc4ae7edc61dae6bdc2603745a3e277fc426107dbfa33c2475c0f6ae319400dbf4827b
+  data.tar.gz: 1a9bd7a2b2bb3358bcd4c83b00e9c9aacf02c4e3b2ffe37dc368929fb5ae886f311bd108abfe939f55cd8b5d6038eeefcf2f802eac15220dad726beff41ee9d8

data/README.md CHANGED Viewed

@@ -6,7 +6,7 @@
 **One *beautiful* Ruby API for GPT, Claude, Gemini, and more.** Easily build chatbots, AI agents, RAG applications, and content generators. Features chat (text, images, audio, PDFs), image generation, embeddings, tools (function calling), structured output, Rails integration, and streaming. Works with OpenAI, Anthropic, Google Gemini, AWS Bedrock, DeepSeek, Mistral, Ollama (local models), OpenRouter, Perplexity, GPUStack, and any OpenAI-compatible API.
 <div class="badge-container">
-  <a href="https://badge.fury.io/rb/ruby_llm"><img src="https://badge.fury.io/rb/ruby_llm.svg?a=4" alt="Gem Version" /></a>
+  <a href="https://badge.fury.io/rb/ruby_llm"><img src="https://badge.fury.io/rb/ruby_llm.svg?a=5" alt="Gem Version" /></a>
   <a href="https://github.com/testdouble/standard"><img src="https://img.shields.io/badge/code_style-standard-brightgreen.svg" alt="Ruby Style Guide" /></a>
   <a href="https://rubygems.org/gems/ruby_llm"><img alt="Gem Downloads" src="https://img.shields.io/gem/dt/ruby_llm"></a>
   <a href="https://codecov.io/gh/crmne/ruby_llm"><img src="https://codecov.io/gh/crmne/ruby_llm/branch/main/graph/badge.svg" alt="codecov" /></a>
@@ -99,7 +99,7 @@ response = chat.with_schema(ProductSchema)
 Add to your Gemfile:
 ```ruby
-gem 'ruby_llm'
+gem 'ruby_llm_community'
 ```
 Then `bundle install`.

data/lib/ruby_llm/chat.rb CHANGED Viewed

@@ -25,6 +25,7 @@ module RubyLLM
       @temperature = 0.7
       @messages = []
       @tools = {}
+      @cache_prompts = { system: false, user: false, tools: false }
       @params = {}
       @headers = {}
       @schema = nil
@@ -127,12 +128,18 @@ module RubyLLM
       messages.each(&)
     end
+    def cache_prompts(system: false, user: false, tools: false)
+      @cache_prompts = { system: system, user: user, tools: tools }
+      self
+    end
     def complete(&) # rubocop:disable Metrics/PerceivedComplexity
       response = @provider.complete(
         messages,
         tools: @tools,
         temperature: @temperature,
         model: @model.id,
+        cache_prompts: @cache_prompts.dup,
         params: @params,
         headers: @headers,
         schema: @schema,

data/lib/ruby_llm/configuration.rb CHANGED Viewed

@@ -44,7 +44,6 @@ module RubyLLM
                   :logger,
                   :log_file,
                   :log_level,
-                  :log_assume_model_exists,
                   :log_stream_debug
     def initialize
@@ -64,7 +63,6 @@ module RubyLLM
       # Logging configuration
       @log_file = $stdout
       @log_level = ENV['RUBYLLM_DEBUG'] ? Logger::DEBUG : Logger::INFO
-      @log_assume_model_exists = true
       @log_stream_debug = ENV['RUBYLLM_STREAM_DEBUG'] == 'true'
     end

data/lib/ruby_llm/message.rb CHANGED Viewed

@@ -7,7 +7,8 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
-    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw,
+                :cached_tokens, :cache_creation_tokens
     attr_writer :content
     def initialize(options = {})
@@ -18,6 +19,8 @@ module RubyLLM
       @output_tokens = options[:output_tokens]
       @model_id = options[:model_id]
       @tool_call_id = options[:tool_call_id]
+      @cached_tokens = options[:cached_tokens]
+      @cache_creation_tokens = options[:cache_creation_tokens]
       @raw = options[:raw]
       ensure_valid_role
@@ -51,7 +54,9 @@ module RubyLLM
         tool_call_id: tool_call_id,
         input_tokens: input_tokens,
         output_tokens: output_tokens,
-        model_id: model_id
+        model_id: model_id,
+        cache_creation_tokens: cache_creation_tokens,
+        cached_tokens: cached_tokens
       }.compact
     end

data/lib/ruby_llm/models.rb CHANGED Viewed

@@ -70,10 +70,6 @@ module RubyLLM
             modalities: { input: %w[text image], output: %w[text] },
             metadata: { warning: 'Assuming model exists, capabilities may not be accurate' }
           )
-          if RubyLLM.config.log_assume_model_exists
-            RubyLLM.logger.warn "Assuming model '#{model_id}' exists for provider '#{provider}'. " \
-                                'Capabilities may not be accurately reflected.'
-          end
         else
           model = Models.find model_id, provider
           provider_class = Provider.providers[model.provider.to_sym] || raise(Error,

data/lib/ruby_llm/provider.rb CHANGED Viewed

@@ -40,7 +40,8 @@ module RubyLLM
       self.class.configuration_requirements
     end
-    def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
+    def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, # rubocop:disable Metrics/ParameterLists
+                 cache_prompts: { system: false, user: false, tools: false }, &)
       normalized_temperature = maybe_normalize_temperature(temperature, model)
       payload = Utils.deep_merge(
@@ -50,6 +51,7 @@ module RubyLLM
           tools: tools,
           temperature: normalized_temperature,
           model: model,
+          cache_prompts: cache_prompts,
           stream: block_given?,
           schema: schema
         )

data/lib/ruby_llm/providers/anthropic/chat.rb CHANGED Viewed

@@ -11,12 +11,14 @@ module RubyLLM
           '/v1/messages'
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+                           cache_prompts: { system: false, user: false, tools: false })
           system_messages, chat_messages = separate_messages(messages)
-          system_content = build_system_content(system_messages)
+          system_content = build_system_content(system_messages, cache: cache_prompts[:system])
-          build_base_payload(chat_messages, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:, temperature:)
+          build_base_payload(chat_messages, model, stream, cache: cache_prompts[:user]).tap do |payload|
+            add_optional_fields(payload, system_content:, tools:, temperature:,
+                                         cache_tools: cache_prompts[:tools])
           end
         end
@@ -24,28 +26,34 @@ module RubyLLM
           messages.partition { |msg| msg.role == :system }
         end
-        def build_system_content(system_messages)
-          if system_messages.length > 1
-            RubyLLM.logger.warn(
-              "Anthropic's Claude implementation only supports a single system message. " \
-              'Multiple system messages will be combined into one.'
-            )
+        def build_system_content(system_messages, cache: false)
+          system_messages.flat_map.with_index do |msg, idx|
+            message_cache = cache if idx == system_messages.size - 1
+            format_system_message(msg, cache: message_cache)
           end
-          system_messages.map(&:content).join("\n\n")
         end
-        def build_base_payload(chat_messages, model, stream)
+        def build_base_payload(chat_messages, model, stream, cache: false)
+          messages = chat_messages.map.with_index do |msg, idx|
+            message_cache = cache if idx == chat_messages.size - 1
+            format_message(msg, cache: message_cache)
+          end
           {
             model: model,
-            messages: chat_messages.map { |msg| format_message(msg) },
+            messages:,
             stream: stream,
             max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
           }
         end
-        def add_optional_fields(payload, system_content:, tools:, temperature:)
-          payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
+        def add_optional_fields(payload, system_content:, tools:, temperature:, cache_tools: false)
+          if tools.any?
+            tool_definitions = tools.values.map { |t| Tools.function_for(t) }
+            tool_definitions[-1][:cache_control] = { type: 'ephemeral' } if cache_tools
+            payload[:tools] = tool_definitions
+          end
           payload[:system] = system_content unless system_content.empty?
           payload[:temperature] = temperature unless temperature.nil?
         end
@@ -73,24 +81,30 @@ module RubyLLM
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),
             model_id: data['model'],
+            cache_creation_tokens: data.dig('usage', 'cache_creation_input_tokens'),
+            cached_tokens: data.dig('usage', 'cache_read_input_tokens'),
             raw: response
           )
         end
-        def format_message(msg)
+        def format_message(msg, cache: false)
           if msg.tool_call?
             Tools.format_tool_call(msg)
           elsif msg.tool_result?
             Tools.format_tool_result(msg)
           else
-            format_basic_message(msg)
+            format_basic_message(msg, cache:)
           end
         end
-        def format_basic_message(msg)
+        def format_system_message(msg, cache: false)
+          Media.format_content(msg.content, cache:)
+        end
+        def format_basic_message(msg, cache: false)
           {
             role: convert_role(msg.role),
-            content: Media.format_content(msg.content)
+            content: Media.format_content(msg.content, cache:)
           }
         end

data/lib/ruby_llm/providers/anthropic/media.rb CHANGED Viewed

@@ -7,13 +7,13 @@ module RubyLLM
       module Media
         module_function
-        def format_content(content)
+        def format_content(content, cache: false)
           # Convert Hash/Array back to JSON string for API
-          return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
-          return [format_text(content)] unless content.is_a?(Content)
+          return [format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array)
+          return [format_text(content, cache:)] unless content.is_a?(Content)
           parts = []
-          parts << format_text(content.text) if content.text
+          parts << format_text(content.text, cache:) if content.text
           content.attachments.each do |attachment|
             case attachment.type
@@ -31,60 +31,84 @@ module RubyLLM
           parts
         end
-        def format_text(text)
-          {
-            type: 'text',
-            text: text
-          }
+        def format_text(text, cache: false)
+          with_cache_control(
+            {
+              type: 'text',
+              text: text
+            },
+            cache:
+          )
         end
-        def format_image(image)
+        def format_image(image, cache: false)
           if image.url?
-            {
-              type: 'image',
-              source: {
-                type: 'url',
-                url: image.source
-              }
-            }
+            with_cache_control(
+              {
+                type: 'image',
+                source: {
+                  type: 'url',
+                  url: image.source
+                }
+              },
+              cache:
+            )
           else
-            {
-              type: 'image',
-              source: {
-                type: 'base64',
-                media_type: image.mime_type,
-                data: image.encoded
-              }
-            }
+            with_cache_control(
+              {
+                type: 'image',
+                source: {
+                  type: 'base64',
+                  media_type: image.mime_type,
+                  data: image.encoded
+                }
+              },
+              cache:
+            )
           end
         end
-        def format_pdf(pdf)
+        def format_pdf(pdf, cache: false)
           if pdf.url?
-            {
-              type: 'document',
-              source: {
-                type: 'url',
-                url: pdf.source
-              }
-            }
+            with_cache_control(
+              {
+                type: 'document',
+                source: {
+                  type: 'url',
+                  url: pdf.source
+                }
+              },
+              cache:
+            )
           else
-            {
-              type: 'document',
-              source: {
-                type: 'base64',
-                media_type: pdf.mime_type,
-                data: pdf.encoded
-              }
-            }
+            with_cache_control(
+              {
+                type: 'document',
+                source: {
+                  type: 'base64',
+                  media_type: pdf.mime_type,
+                  data: pdf.encoded
+                }
+              },
+              cache:
+            )
           end
         end
-        def format_text_file(text_file)
-          {
-            type: 'text',
-            text: Utils.format_text_file_for_llm(text_file)
-          }
+        def format_text_file(text_file, cache: false)
+          with_cache_control(
+            {
+              type: 'text',
+              text: Utils.format_text_file_for_llm(text_file)
+            },
+            cache:
+          )
+        end
+        def with_cache_control(hash, cache: false)
+          return hash unless cache
+          hash.merge(cache_control: { type: 'ephemeral' })
         end
       end
     end

data/lib/ruby_llm/providers/anthropic/models.rb CHANGED Viewed

@@ -42,6 +42,14 @@ module RubyLLM
         def extract_output_tokens(data)
           data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens')
         end
+        def extract_cached_tokens(data)
+          data.dig('message', 'usage', 'cache_read_input_tokens')
+        end
+        def extract_cache_creation_tokens(data)
+          data.dig('message', 'usage', 'cache_creation_input_tokens')
+        end
       end
     end
   end

data/lib/ruby_llm/providers/anthropic/streaming.rb CHANGED Viewed

@@ -18,6 +18,8 @@ module RubyLLM
             content: data.dig('delta', 'text'),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
+            cached_tokens: extract_cached_tokens(data),
+            cache_creation_tokens: extract_cache_creation_tokens(data),
             tool_calls: extract_tool_calls(data)
           )
         end

data/lib/ruby_llm/providers/bedrock/chat.rb CHANGED Viewed

@@ -17,20 +17,20 @@ module RubyLLM
           Anthropic::Chat.parse_completion_response response
         end
-        def format_message(msg)
+        def format_message(msg, cache: false)
           if msg.tool_call?
             Anthropic::Tools.format_tool_call(msg)
           elsif msg.tool_result?
             Anthropic::Tools.format_tool_result(msg)
           else
-            format_basic_message(msg)
+            format_basic_message(msg, cache:)
           end
         end
-        def format_basic_message(msg)
+        def format_basic_message(msg, cache: false)
           {
             role: Anthropic::Chat.convert_role(msg.role),
-            content: Media.format_content(msg.content)
+            content: Media.format_content(msg.content, cache:)
           }
         end
@@ -40,22 +40,33 @@ module RubyLLM
           "model/#{@model_id}/invoke"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+                           cache_prompts: { system: false, user: false, tools: false })
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
           system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
-          system_content = Anthropic::Chat.build_system_content(system_messages)
+          system_content = Anthropic::Chat.build_system_content(system_messages, cache: cache_prompts[:system])
-          build_base_payload(chat_messages, model).tap do |payload|
-            Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:)
+          build_base_payload(chat_messages, model, cache: cache_prompts[:user]).tap do |payload|
+            Anthropic::Chat.add_optional_fields(
+              payload,
+              system_content:,
+              tools:,
+              temperature:,
+              cache_tools: cache_prompts[:tools]
+            )
           end
         end
-        def build_base_payload(chat_messages, model)
+        def build_base_payload(chat_messages, model, cache: false)
+          messages = chat_messages.map.with_index do |msg, idx|
+            message_cache = cache if idx == chat_messages.size - 1
+            format_message(msg, cache: message_cache)
+          end
           {
             anthropic_version: 'bedrock-2023-05-31',
-            messages: chat_messages.map { |msg| format_message(msg) },
+            messages: messages,
             max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
           }
         end

data/lib/ruby_llm/providers/bedrock/media.rb CHANGED Viewed

@@ -10,22 +10,22 @@ module RubyLLM
         module_function
-        def format_content(content)
+        def format_content(content, cache: false)
           # Convert Hash/Array back to JSON string for API
-          return [Anthropic::Media.format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
-          return [Anthropic::Media.format_text(content)] unless content.is_a?(Content)
+          return [Anthropic::Media.format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array)
+          return [Anthropic::Media.format_text(content, cache:)] unless content.is_a?(Content)
           parts = []
-          parts << Anthropic::Media.format_text(content.text) if content.text
+          parts << Anthropic::Media.format_text(content.text, cache:) if content.text
           content.attachments.each do |attachment|
             case attachment.type
             when :image
-              parts << format_image(attachment)
+              parts << format_image(attachment, cache:)
             when :pdf
-              parts << format_pdf(attachment)
+              parts << format_pdf(attachment, cache:)
             when :text
-              parts << Anthropic::Media.format_text_file(attachment)
+              parts << Anthropic::Media.format_text_file(attachment, cache:)
             else
               raise UnsupportedAttachmentError, attachment.type
             end
@@ -34,26 +34,38 @@ module RubyLLM
           parts
         end
-        def format_image(image)
-          {
-            type: 'image',
-            source: {
-              type: 'base64',
-              media_type: image.mime_type,
-              data: image.encoded
-            }
-          }
+        def format_image(image, cache: false)
+          with_cache_control(
+            {
+              type: 'image',
+              source: {
+                type: 'base64',
+                media_type: image.mime_type,
+                data: image.encoded
+              }
+            },
+            cache:
+          )
         end
-        def format_pdf(pdf)
-          {
-            type: 'document',
-            source: {
-              type: 'base64',
-              media_type: pdf.mime_type,
-              data: pdf.encoded
-            }
-          }
+        def format_pdf(pdf, cache: false)
+          with_cache_control(
+            {
+              type: 'document',
+              source: {
+                type: 'base64',
+                media_type: pdf.mime_type,
+                data: pdf.encoded
+              }
+            },
+            cache:
+          )
+        end
+        def with_cache_control(hash, cache: false)
+          return hash unless cache
+          hash.merge(cache_control: { type: 'ephemeral' })
         end
       end
     end

data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb CHANGED Viewed

@@ -39,6 +39,14 @@ module RubyLLM
             data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens')
           end
+          def extract_cached_tokens(data)
+            data.dig('message', 'usage', 'cache_read_input_tokens')
+          end
+          def extract_cache_creation_tokens(data)
+            data.dig('message', 'usage', 'cache_creation_input_tokens')
+          end
           private
           def extract_content_by_type(data)

data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb CHANGED Viewed

@@ -71,6 +71,8 @@ module RubyLLM
               content: extract_streaming_content(data),
               input_tokens: extract_input_tokens(data),
               output_tokens: extract_output_tokens(data),
+              cached_tokens: extract_cached_tokens(data),
+              cache_creation_tokens: extract_cache_creation_tokens(data),
               tool_calls: extract_tool_calls(data)
             }
           end

data/lib/ruby_llm/providers/gemini/chat.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
           "models/#{@model}:generateContent"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),
@@ -80,7 +80,8 @@ module RubyLLM
             content: extract_content(data),
             tool_calls: tool_calls,
             input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
-            output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
+            output_tokens: calculate_output_tokens(data),
+            cached_tokens: data.dig('usageMetadata', 'cacheTokensDetails', 0, 'tokenCount') || 0,
             model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
             raw: response
           )
@@ -133,6 +134,12 @@ module RubyLLM
           parts = candidate.dig('content', 'parts')
           parts&.any? { |p| p['functionCall'] }
         end
+        def calculate_output_tokens(data)
+          candidates = data.dig('usageMetadata', 'candidatesTokenCount') || 0
+          thoughts = data.dig('usageMetadata', 'thoughtsTokenCount') || 0
+          candidates + thoughts
+        end
       end
     end
   end

data/lib/ruby_llm/providers/gemini/streaming.rb CHANGED Viewed

@@ -16,6 +16,7 @@ module RubyLLM
             content: extract_content(data),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
+            cached_tokens: extract_cached_tokens(data),
             tool_calls: extract_tool_calls(data)
           )
         end
@@ -42,7 +43,14 @@ module RubyLLM
         end
         def extract_output_tokens(data)
-          data.dig('usageMetadata', 'candidatesTokenCount')
+          candidates = data.dig('usageMetadata', 'candidatesTokenCount') || 0
+          thoughts = data.dig('usageMetadata', 'thoughtsTokenCount') || 0
+          total = candidates + thoughts
+          total.positive? ? total : nil
+        end
+        def extract_cached_tokens(data)
+          data.dig('usageMetadata', 'cachedContentTokenCount')
         end
         def parse_streaming_error(data)

data/lib/ruby_llm/providers/mistral/chat.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module RubyLLM
         end
         # rubocop:disable Metrics/ParameterLists
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists
           payload = super
           # Mistral doesn't support stream_options
           payload.delete(:stream_options)

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
         module_function
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Lint/UnusedMethodArgument, Metrics/ParameterLists
           payload = {
             model: model,
             messages: format_messages(messages),
@@ -56,6 +56,7 @@ module RubyLLM
             tool_calls: parse_tool_calls(message_data['tool_calls']),
             input_tokens: data['usage']['prompt_tokens'],
             output_tokens: data['usage']['completion_tokens'],
+            cached_tokens: data.dig('usage', 'prompt_tokens_details', 'cached_tokens'),
             model_id: data['model'],
             raw: response
           )

data/lib/ruby_llm/providers/openai/response.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
         module_function
-        def render_response_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+        def render_response_payload(messages, tools:, temperature:, model:, cache_prompts:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
           payload = {
             model: model,
             input: format_input(messages),
@@ -97,6 +97,7 @@ module RubyLLM
             tool_calls: parse_response_tool_calls(outputs),
             input_tokens: data['usage']['input_tokens'],
             output_tokens: data['usage']['output_tokens'],
+            cached_tokens: data.dig('usage', 'input_tokens_details', 'cached_tokens'),
             model_id: data['model'],
             raw: response
           )

data/lib/ruby_llm/providers/openai/streaming.rb CHANGED Viewed

@@ -87,7 +87,8 @@ module RubyLLM
             content: data.dig('choices', 0, 'delta', 'content'),
             tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
             input_tokens: data.dig('usage', 'prompt_tokens'),
-            output_tokens: data.dig('usage', 'completion_tokens')
+            output_tokens: data.dig('usage', 'completion_tokens'),
+            cached_tokens: data.dig('usage', 'cached_tokens')
           )
         end

data/lib/ruby_llm/providers/openai.rb CHANGED Viewed

@@ -17,12 +17,12 @@ module RubyLLM
         end
       end
-      def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+      def render_payload(messages, tools:, temperature:, model:, cache_prompts:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
         @using_responses_api = !audio_input?(messages)
         if @using_responses_api
-          render_response_payload(messages, tools: tools, temperature: temperature, model: model, stream: stream,
-                                            schema: schema)
+          render_response_payload(messages, tools: tools, temperature: temperature, model: model,
+                                            cache_prompts:, stream:, schema:)
         else
           super
         end

data/lib/ruby_llm/stream_accumulator.rb CHANGED Viewed

@@ -12,6 +12,8 @@ module RubyLLM
       @tool_calls = {}
       @input_tokens = 0
       @output_tokens = 0
+      @cached_tokens = 0
+      @cache_creation_tokens = 0
       @latest_tool_call_id = nil
     end
@@ -37,6 +39,8 @@ module RubyLLM
         tool_calls: tool_calls_from_stream,
         input_tokens: @input_tokens.positive? ? @input_tokens : nil,
         output_tokens: @output_tokens.positive? ? @output_tokens : nil,
+        cached_tokens: @cached_tokens.positive? ? @cached_tokens : nil,
+        cache_creation_tokens: @cache_creation_tokens.positive? ? @cache_creation_tokens : nil,
         raw: response
       )
     end
@@ -92,6 +96,8 @@ module RubyLLM
     def count_tokens(chunk)
       @input_tokens = chunk.input_tokens if chunk.input_tokens
       @output_tokens = chunk.output_tokens if chunk.output_tokens
+      @cached_tokens = chunk.cached_tokens if chunk.cached_tokens
+      @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
     end
   end
 end

data/lib/ruby_llm/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module RubyLLM
-  VERSION = '0.0.2'
+  VERSION = '0.0.4'
 end

data/lib/{ruby_llm.rb → ruby_llm_community.rb} RENAMED Viewed

@@ -9,7 +9,7 @@ require 'logger'
 require 'securerandom'
 require 'zeitwerk'
-loader = Zeitwerk::Loader.for_gem
+loader = Zeitwerk::Loader.for_gem(warn_on_extra_files: false)
 loader.inflector.inflect(
   'ruby_llm' => 'RubyLLM',
   'llm' => 'LLM',
@@ -24,12 +24,17 @@ loader.inflector.inflect(
   'mistral' => 'Mistral',
   'pdf' => 'PDF'
 )
+loader.ignore("#{__dir__}/shims")
 loader.ignore("#{__dir__}/tasks")
 loader.ignore("#{__dir__}/ruby_llm/railtie")
 loader.ignore("#{__dir__}/ruby_llm/active_record")
 loader.ignore("#{__dir__}/generators")
 loader.setup
+# This is a shim for the RubyLLM gem.
+module RubyLlmCommunity
+end
 # A delightful Ruby interface to modern AI language models.
 # Provides a unified way to interact with models from OpenAI, Anthropic and others
 # with a focus on developer happiness and convention over configuration.

data/lib/shims/ruby_llm.rb ADDED Viewed

@@ -0,0 +1,3 @@
+# frozen_string_literal: true
+require 'ruby_llm_community'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ruby_llm_community
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.4
 platform: ruby
 authors:
 - Paul Shippy
@@ -145,7 +145,6 @@ files:
 - lib/generators/ruby_llm/install/templates/message_model.rb.tt
 - lib/generators/ruby_llm/install/templates/tool_call_model.rb.tt
 - lib/generators/ruby_llm/install_generator.rb
-- lib/ruby_llm.rb
 - lib/ruby_llm/active_record/acts_as.rb
 - lib/ruby_llm/aliases.json
 - lib/ruby_llm/aliases.rb
@@ -239,6 +238,8 @@ files:
 - lib/ruby_llm/tool_call.rb
 - lib/ruby_llm/utils.rb
 - lib/ruby_llm/version.rb
+- lib/ruby_llm_community.rb
+- lib/shims/ruby_llm.rb
 - lib/tasks/aliases.rake
 - lib/tasks/models_docs.rake
 - lib/tasks/models_update.rake
@@ -257,6 +258,7 @@ metadata:
 rdoc_options: []
 require_paths:
 - lib
+- lib/shims
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="