RubyGems - ruby_llm - Versions diffs - 1.12.0 → 1.14.1 - Mend

ruby_llm 1.12.0 → 1.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

data/lib/ruby_llm/providers/bedrock.rb CHANGED Viewed

@@ -18,12 +18,15 @@ module RubyLLM
         {}
       end
-      def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, &) # rubocop:disable Metrics/ParameterLists
+      # rubocop:disable Metrics/ParameterLists
+      def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil,
+                   tool_prefs: nil, &)
         normalized_params = normalize_params(params, model:)
         super(
           messages,
           tools: tools,
+          tool_prefs: tool_prefs,
           temperature: temperature,
           model: model,
           params: normalized_params,
@@ -33,6 +36,7 @@ module RubyLLM
           &
         )
       end
+      # rubocop:enable Metrics/ParameterLists
       def parse_error(response)
         return if response.body.nil? || response.body.empty?
@@ -49,6 +53,10 @@ module RubyLLM
       end
       class << self
+        def configuration_options
+          %i[bedrock_api_key bedrock_secret_key bedrock_region bedrock_session_token]
+        end
         def configuration_requirements
           %i[bedrock_api_key bedrock_secret_key bedrock_region]
         end

data/lib/ruby_llm/providers/deepseek/capabilities.rb CHANGED Viewed

@@ -3,127 +3,17 @@
 module RubyLLM
   module Providers
     class DeepSeek
-      # Determines capabilities and pricing for DeepSeek models
+      # Provider-level capability checks used outside the model registry.
       module Capabilities
         module_function
-        def context_window_for(model_id)
-          case model_id
-          when /deepseek-(?:chat|reasoner)/ then 64_000
-          else 32_768
-          end
+        def supports_tool_choice?(_model_id)
+          true
         end
-        def max_tokens_for(model_id)
-          case model_id
-          when /deepseek-(?:chat|reasoner)/ then 8_192
-          else 4_096
-          end
-        end
-        def input_price_for(model_id)
-          PRICES.dig(model_family(model_id), :input_miss) || default_input_price
-        end
-        def output_price_for(model_id)
-          PRICES.dig(model_family(model_id), :output) || default_output_price
-        end
-        def cache_hit_price_for(model_id)
-          PRICES.dig(model_family(model_id), :input_hit) || default_cache_hit_price
-        end
-        def supports_vision?(_model_id)
+        def supports_tool_parallel_control?(_model_id)
           false
         end
-        def supports_functions?(model_id)
-          model_id.match?(/deepseek-chat/)
-        end
-        def supports_json_mode?(_model_id)
-          false
-        end
-        def format_display_name(model_id)
-          case model_id
-          when 'deepseek-chat' then 'DeepSeek V3'
-          when 'deepseek-reasoner' then 'DeepSeek R1'
-          else
-            model_id.split('-')
-                    .map(&:capitalize)
-                    .join(' ')
-          end
-        end
-        def model_type(_model_id)
-          'chat'
-        end
-        def model_family(model_id)
-          case model_id
-          when /deepseek-reasoner/ then :reasoner
-          else :chat
-          end
-        end
-        PRICES = {
-          chat: {
-            input_hit: 0.07,
-            input_miss: 0.27,
-            output: 1.10
-          },
-          reasoner: {
-            input_hit: 0.14,
-            input_miss: 0.55,
-            output: 2.19
-          }
-        }.freeze
-        def default_input_price
-          0.27
-        end
-        def default_output_price
-          1.10
-        end
-        def default_cache_hit_price
-          0.07
-        end
-        def modalities_for(_model_id)
-          {
-            input: ['text'],
-            output: ['text']
-          }
-        end
-        def capabilities_for(model_id)
-          capabilities = ['streaming']
-          capabilities << 'function_calling' if model_id.match?(/deepseek-chat/)
-          capabilities
-        end
-        def pricing_for(model_id)
-          family = model_family(model_id)
-          prices = PRICES.fetch(family, { input_miss: default_input_price, output: default_output_price })
-          standard_pricing = {
-            input_per_million: prices[:input_miss],
-            output_per_million: prices[:output]
-          }
-          standard_pricing[:cached_input_per_million] = prices[:input_hit] if prices[:input_hit]
-          {
-            text_tokens: {
-              standard: standard_pricing
-            }
-          }
-        end
       end
     end
   end

data/lib/ruby_llm/providers/deepseek.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module RubyLLM
       include DeepSeek::Chat
       def api_base
-        'https://api.deepseek.com'
+        @config.deepseek_api_base || 'https://api.deepseek.com'
       end
       def headers
@@ -21,6 +21,10 @@ module RubyLLM
           DeepSeek::Capabilities
         end
+        def configuration_options
+          %i[deepseek_api_key deepseek_api_base]
+        end
         def configuration_requirements
           %i[deepseek_api_key]
         end

data/lib/ruby_llm/providers/gemini/capabilities.rb CHANGED Viewed

@@ -3,13 +3,35 @@
 module RubyLLM
   module Providers
     class Gemini
-      # Determines capabilities and pricing for Google Gemini models
+      # Provider-level capability checks and narrow registry fallbacks.
       module Capabilities
         module_function
+        PRICES = {
+          flash_2: { input: 0.10, output: 0.40 }, # rubocop:disable Naming/VariableNumber
+          flash_lite_2: { input: 0.075, output: 0.30 }, # rubocop:disable Naming/VariableNumber
+          flash: { input: 0.075, output: 0.30 },
+          flash_8b: { input: 0.0375, output: 0.15 },
+          pro: { input: 1.25, output: 5.0 },
+          pro_2_5: { input: 0.12, output: 0.50 }, # rubocop:disable Naming/VariableNumber
+          gemini_embedding: { input: 0.002, output: 0.004 },
+          embedding: { input: 0.00, output: 0.00 },
+          imagen: { price: 0.03 },
+          aqa: { input: 0.00, output: 0.00 }
+        }.freeze
+        def supports_tool_choice?(_model_id)
+          true
+        end
+        def supports_tool_parallel_control?(_model_id)
+          false
+        end
         def context_window_for(model_id)
           case model_id
-          when /gemini-2\.5-pro-exp-03-25/, /gemini-2\.0-flash/, /gemini-2\.0-flash-lite/, /gemini-1\.5-flash/, /gemini-1\.5-flash-8b/ # rubocop:disable Layout/LineLength
+          when /gemini-2\.5-pro-exp-03-25/, /gemini-2\.0-flash/, /gemini-2\.0-flash-lite/, /gemini-1\.5-flash/,
+               /gemini-1\.5-flash-8b/
             1_048_576
           when /gemini-1\.5-pro/ then 2_097_152
           when /gemini-embedding-exp/ then 8_192
@@ -23,7 +45,8 @@ module RubyLLM
         def max_tokens_for(model_id)
           case model_id
           when /gemini-2\.5-pro-exp-03-25/ then 64_000
-          when /gemini-2\.0-flash/, /gemini-2\.0-flash-lite/, /gemini-1\.5-flash/, /gemini-1\.5-flash-8b/, /gemini-1\.5-pro/ # rubocop:disable Layout/LineLength
+          when /gemini-2\.0-flash/, /gemini-2\.0-flash-lite/, /gemini-1\.5-flash/, /gemini-1\.5-flash-8b/,
+               /gemini-1\.5-pro/
             8_192
           when /gemini-embedding-exp/ then nil
           when /text-embedding-004/, /embedding-001/ then 768
@@ -32,18 +55,24 @@ module RubyLLM
           end
         end
-        def input_price_for(model_id)
-          base_price = PRICES.dig(pricing_family(model_id), :input) || default_input_price
-          return base_price unless long_context_model?(model_id)
-          context_window_for(model_id) > 128_000 ? base_price * 2 : base_price
+        def critical_capabilities_for(model_id)
+          capabilities = []
+          capabilities << 'function_calling' if supports_functions?(model_id)
+          capabilities << 'structured_output' if supports_structured_output?(model_id)
+          capabilities << 'vision' if supports_vision?(model_id)
+          capabilities
         end
-        def output_price_for(model_id)
-          base_price = PRICES.dig(pricing_family(model_id), :output) || default_output_price
-          return base_price unless long_context_model?(model_id)
-          context_window_for(model_id) > 128_000 ? base_price * 2 : base_price
+        def pricing_for(model_id)
+          prices = PRICES.fetch(pricing_family(model_id), { input: 0.075, output: 0.30 })
+          {
+            text_tokens: {
+              standard: {
+                input_per_million: prices[:input] || prices[:price] || 0.075,
+                output_per_million: prices[:output] || prices[:price] || 0.30
+              }
+            }
+          }
         end
         def supports_vision?(model_id)
@@ -52,17 +81,13 @@ module RubyLLM
           model_id.match?(/gemini|flash|pro|imagen/)
         end
-        def supports_video?(model_id)
-          model_id.match?(/gemini/)
-        end
         def supports_functions?(model_id)
           return false if model_id.match?(/text-embedding|embedding-001|aqa|flash-lite|imagen|gemini-2\.0-flash-lite/)
           model_id.match?(/gemini|pro|flash/)
         end
-        def supports_json_mode?(model_id)
+        def supports_structured_output?(model_id)
           if model_id.match?(/text-embedding|embedding-001|aqa|imagen|gemini-2\.0-flash-lite|gemini-2\.5-pro-exp-03-25/)
             return false
           end
@@ -70,59 +95,6 @@ module RubyLLM
           model_id.match?(/gemini|pro|flash/)
         end
-        def format_display_name(model_id)
-          model_id
-            .delete_prefix('models/')
-            .split('-')
-            .map(&:capitalize)
-            .join(' ')
-            .gsub(/(\d+\.\d+)/, ' \1')
-            .gsub(/\s+/, ' ')
-            .gsub('Aqa', 'AQA')
-            .strip
-        end
-        def supports_caching?(model_id)
-          if model_id.match?(/flash-lite|gemini-2\.5-pro-exp-03-25|aqa|imagen|text-embedding|embedding-001/)
-            return false
-          end
-          model_id.match?(/gemini|pro|flash/)
-        end
-        def supports_tuning?(model_id)
-          model_id.match?(/gemini-1\.5-flash|gemini-1\.5-flash-8b/)
-        end
-        def supports_audio?(model_id)
-          model_id.match?(/gemini|pro|flash/)
-        end
-        def model_type(model_id)
-          case model_id
-          when /text-embedding|embedding|gemini-embedding/ then 'embedding'
-          when /imagen/ then 'image'
-          else 'chat'
-          end
-        end
-        def model_family(model_id)
-          case model_id
-          when /gemini-2\.5-pro-exp-03-25/ then 'gemini25_pro_exp'
-          when /gemini-2\.0-flash-lite/ then 'gemini20_flash_lite'
-          when /gemini-2\.0-flash/ then 'gemini20_flash'
-          when /gemini-1\.5-flash-8b/ then 'gemini15_flash_8b'
-          when /gemini-1\.5-flash/ then 'gemini15_flash'
-          when /gemini-1\.5-pro/ then 'gemini15_pro'
-          when /gemini-embedding-exp/ then 'gemini_embedding_exp'
-          when /text-embedding-004/ then 'embedding4'
-          when /embedding-001/ then 'embedding1'
-          when /aqa/ then 'aqa'
-          when /imagen-3/ then 'imagen3'
-          else 'other'
-          end
-        end
         def pricing_family(model_id)
           case model_id
           when /gemini-2\.5-pro-exp-03-25/ then :pro_2_5 # rubocop:disable Naming/VariableNumber
@@ -139,142 +111,8 @@ module RubyLLM
           end
         end
-        def long_context_model?(model_id)
-          model_id.match?(/gemini-1\.5-(?:pro|flash)|gemini-1\.5-flash-8b/)
-        end
-        def context_length(model_id)
-          context_window_for(model_id)
-        end
-        PRICES = {
-          flash_2: { # rubocop:disable Naming/VariableNumber
-            input: 0.10,
-            output: 0.40,
-            audio_input: 0.70,
-            cache: 0.025,
-            cache_storage: 1.00,
-            grounding_search: 35.00
-          },
-          flash_lite_2: { # rubocop:disable Naming/VariableNumber
-            input: 0.075,
-            output: 0.30
-          },
-          flash: {
-            input: 0.075,
-            output: 0.30,
-            cache: 0.01875,
-            cache_storage: 1.00,
-            grounding_search: 35.00
-          },
-          flash_8b: {
-            input: 0.0375,
-            output: 0.15,
-            cache: 0.01,
-            cache_storage: 0.25,
-            grounding_search: 35.00
-          },
-          pro: {
-            input: 1.25,
-            output: 5.0,
-            cache: 0.3125,
-            cache_storage: 4.50,
-            grounding_search: 35.00
-          },
-          pro_2_5: { # rubocop:disable Naming/VariableNumber
-            input: 0.12,
-            output: 0.50
-          },
-          gemini_embedding: {
-            input: 0.002,
-            output: 0.004
-          },
-          embedding: {
-            input: 0.00,
-            output: 0.00
-          },
-          imagen: {
-            price: 0.03
-          },
-          aqa: {
-            input: 0.00,
-            output: 0.00
-          }
-        }.freeze
-        def default_input_price
-          0.075
-        end
-        def default_output_price
-          0.30
-        end
-        def modalities_for(model_id)
-          modalities = {
-            input: ['text'],
-            output: ['text']
-          }
-          if supports_vision?(model_id)
-            modalities[:input] << 'image'
-            modalities[:input] << 'pdf'
-          end
-          modalities[:input] << 'video' if supports_video?(model_id)
-          modalities[:input] << 'audio' if model_id.match?(/audio/)
-          modalities[:output] << 'embeddings' if model_id.match?(/embedding|gemini-embedding/)
-          modalities[:output] = ['image'] if model_id.match?(/imagen/)
-          modalities
-        end
-        def capabilities_for(model_id)
-          capabilities = ['streaming']
-          capabilities << 'function_calling' if supports_functions?(model_id)
-          capabilities << 'structured_output' if supports_json_mode?(model_id)
-          capabilities << 'batch' if model_id.match?(/embedding|flash/)
-          capabilities << 'caching' if supports_caching?(model_id)
-          capabilities << 'fine_tuning' if supports_tuning?(model_id)
-          capabilities
-        end
-        def pricing_for(model_id)
-          family = pricing_family(model_id)
-          prices = PRICES.fetch(family, { input: default_input_price, output: default_output_price })
-          standard_pricing = {
-            input_per_million: prices[:input],
-            output_per_million: prices[:output]
-          }
-          standard_pricing[:cached_input_per_million] = prices[:input_hit] if prices[:input_hit]
-          batch_pricing = {
-            input_per_million: (standard_pricing[:input_per_million] || 0) * 0.5,
-            output_per_million: (standard_pricing[:output_per_million] || 0) * 0.5
-          }
-          if standard_pricing[:cached_input_per_million]
-            batch_pricing[:cached_input_per_million] = standard_pricing[:cached_input_per_million] * 0.5
-          end
-          pricing = {
-            text_tokens: {
-              standard: standard_pricing,
-              batch: batch_pricing
-            }
-          }
-          if model_id.match?(/embedding|gemini-embedding/)
-            pricing[:embeddings] = {
-              standard: { input_per_million: prices[:price] || 0.002 }
-            }
-          end
-          pricing
-        end
+        module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
+                        :supports_vision?, :supports_functions?, :supports_structured_output?, :pricing_family
       end
     end
   end

data/lib/ruby_llm/providers/gemini/chat.rb CHANGED Viewed

@@ -14,7 +14,10 @@ module RubyLLM
           "models/#{@model}:generateContent"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+        # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil,
+                           thinking: nil, tool_prefs: nil)
+          tool_prefs ||= {}
           @model = model.id
           payload = {
             contents: format_messages(messages),
@@ -26,9 +29,15 @@ module RubyLLM
           payload[:generationConfig].merge!(structured_output_config(schema, model)) if schema
           payload[:generationConfig][:thinkingConfig] = build_thinking_config(model, thinking) if thinking&.enabled?
-          payload[:tools] = format_tools(tools) if tools.any?
+          if tools.any?
+            payload[:tools] = format_tools(tools)
+            # Gemini doesn't support controlling parallel tool calls
+            payload[:toolConfig] = build_tool_config(tool_prefs[:choice]) unless tool_prefs[:choice].nil?
+          end
           payload
         end
+        # rubocop:enable Metrics/ParameterLists,Lint/UnusedMethodArgument
         def build_thinking_config(_model, thinking)
           config = { includeThoughts: true }
@@ -111,6 +120,7 @@ module RubyLLM
             tool_calls: tool_calls,
             input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
             output_tokens: calculate_output_tokens(data),
+            cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
             thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
             model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
             raw: response
@@ -120,6 +130,9 @@ module RubyLLM
         def convert_schema_to_gemini(schema)
           return nil unless schema
+          # Extract inner schema if wrapper format (e.g., from RubyLLM::Schema.to_json_schema)
+          schema = schema[:schema] || schema
           GeminiSchema.new(schema).to_h
         end
@@ -132,7 +145,10 @@ module RubyLLM
           parts = candidate.dig('content', 'parts')
           return '' unless parts&.any?
-          build_response_content(parts)
+          non_thought_parts = parts.reject { |part| part['thought'] }
+          return '' unless non_thought_parts.any?
+          build_response_content(non_thought_parts)
         end
         def extract_text_parts(parts)
@@ -176,7 +192,7 @@ module RubyLLM
         end
         def build_json_schema(schema)
-          normalized = RubyLLM::Utils.deep_dup(schema)
+          normalized = RubyLLM::Utils.deep_dup(schema[:schema])
           normalized.delete(:strict)
           normalized.delete('strict')
           RubyLLM::Utils.deep_stringify_keys(normalized)

data/lib/ruby_llm/providers/gemini/images.rb CHANGED Viewed

@@ -10,7 +10,7 @@ module RubyLLM
         end
         def render_image_payload(prompt, model:, size:)
-          RubyLLM.logger.debug "Ignoring size #{size}. Gemini does not support image size customization."
+          RubyLLM.logger.debug { "Ignoring size #{size}. Gemini does not support image size customization." }
           @model = model
           {
             instances: [

data/lib/ruby_llm/providers/gemini/models.rb CHANGED Viewed

@@ -17,14 +17,12 @@ module RubyLLM
             Model::Info.new(
               id: model_id,
-              name: model_data['displayName'],
+              name: model_data['displayName'] || model_id,
               provider: slug,
-              family: capabilities.model_family(model_id),
               created_at: nil,
               context_window: model_data['inputTokenLimit'] || capabilities.context_window_for(model_id),
               max_output_tokens: model_data['outputTokenLimit'] || capabilities.max_tokens_for(model_id),
-              modalities: capabilities.modalities_for(model_id),
-              capabilities: capabilities.capabilities_for(model_id),
+              capabilities: capabilities.critical_capabilities_for(model_id),
               pricing: capabilities.pricing_for(model_id),
               metadata: {
                 version: model_data['version'],

data/lib/ruby_llm/providers/gemini/streaming.rb CHANGED Viewed

@@ -22,6 +22,7 @@ module RubyLLM
             ),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
+            cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
             thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
             tool_calls: extract_tool_calls(data)
           )
@@ -83,7 +84,7 @@ module RubyLLM
           error_data = JSON.parse(data)
           [error_data['error']['code'], error_data['error']['message']]
         rescue JSON::ParserError => e
-          RubyLLM.logger.debug "Failed to parse streaming error: #{e.message}"
+          RubyLLM.logger.debug { "Failed to parse streaming error: #{e.message}" }
           [500, "Failed to parse error: #{data}"]
         end
       end

data/lib/ruby_llm/providers/gemini/tools.rb CHANGED Viewed

@@ -205,6 +205,25 @@ module RubyLLM
           else 'STRING'
           end
         end
+        def build_tool_config(tool_choice)
+          {
+            functionCallingConfig: {
+              mode: forced_tool_choice?(tool_choice) ? 'any' : tool_choice
+            }.tap do |config|
+              # Use allowedFunctionNames to simulate specific tool choice
+              config[:allowedFunctionNames] = [tool_choice] if specific_tool_choice?(tool_choice)
+            end
+          }
+        end
+        def forced_tool_choice?(tool_choice)
+          tool_choice == :required || specific_tool_choice?(tool_choice)
+        end
+        def specific_tool_choice?(tool_choice)
+          !%i[auto none required].include?(tool_choice)
+        end
       end
     end
   end

data/lib/ruby_llm/providers/gemini.rb CHANGED Viewed

@@ -28,6 +28,10 @@ module RubyLLM
           Gemini::Capabilities
         end
+        def configuration_options
+          %i[gemini_api_key gemini_api_base]
+        end
         def configuration_requirements
           %i[gemini_api_key]
         end

data/lib/ruby_llm/providers/gpustack/capabilities.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class GPUStack
+      # Determines capabilities for GPUStack models
+      module Capabilities
+        module_function
+        def supports_tool_choice?(_model_id)
+          false
+        end
+        def supports_tool_parallel_control?(_model_id)
+          false
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gpustack.rb CHANGED Viewed

@@ -21,6 +21,10 @@ module RubyLLM
       end
       class << self
+        def configuration_options
+          %i[gpustack_api_base gpustack_api_key]
+        end
         def local?
           true
         end
@@ -28,6 +32,10 @@ module RubyLLM
         def configuration_requirements
           %i[gpustack_api_base]
         end
+        def capabilities
+          GPUStack::Capabilities
+        end
       end
     end
   end