RubyGems - ruby_llm_community - Versions diffs - 1.2.0 → 1.3.1 - Mend

ruby_llm_community 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

data/lib/ruby_llm/providers/gemini/tools.rb CHANGED Viewed

@@ -13,7 +13,41 @@ module RubyLLM
           }]
         end
-        def extract_tool_calls(data)
+        def format_tool_call(msg)
+          parts = []
+          if msg.content && !(msg.content.respond_to?(:empty?) && msg.content.empty?)
+            formatted_content = Media.format_content(msg.content)
+            parts.concat(formatted_content.is_a?(Array) ? formatted_content : [formatted_content])
+          end
+          msg.tool_calls.each_value do |tool_call|
+            parts << {
+              functionCall: {
+                name: tool_call.name,
+                args: tool_call.arguments
+              }
+            }
+          end
+          parts
+        end
+        def format_tool_result(msg, function_name = nil)
+          function_name ||= msg.tool_call_id
+          [{
+            functionResponse: {
+              name: function_name,
+              response: {
+                name: function_name,
+                content: Media.format_content(msg.content)
+              }
+            }
+          }]
+        end
+        def extract_tool_calls(data) # rubocop:disable Metrics/PerceivedComplexity
           return nil unless data
           candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
@@ -22,49 +56,136 @@ module RubyLLM
           parts = candidate.dig('content', 'parts')
           return nil unless parts.is_a?(Array)
-          function_call_part = parts.find { |p| p['functionCall'] }
-          return nil unless function_call_part
-          function_data = function_call_part['functionCall']
-          return nil unless function_data
+          tool_calls = parts.each_with_object({}) do |part, result|
+            function_data = part['functionCall']
+            next unless function_data
-          id = SecureRandom.uuid
+            id = SecureRandom.uuid
-          {
-            id => ToolCall.new(
-              id: id,
+            result[id] = ToolCall.new(
+              id:,
               name: function_data['name'],
-              arguments: function_data['args']
+              arguments: function_data['args'] || {}
             )
-          }
+          end
+          tool_calls.empty? ? nil : tool_calls
         end
         private
         def function_declaration_for(tool)
-          {
+          parameters_schema = tool.params_schema ||
+                              RubyLLM::Tool::SchemaDefinition.from_parameters(tool.parameters)&.json_schema
+          declaration = {
             name: tool.name,
-            description: tool.description,
-            parameters: tool.parameters.any? ? format_parameters(tool.parameters) : nil
-          }.compact
+            description: tool.description
+          }
+          declaration[:parameters] = convert_tool_schema_to_gemini(parameters_schema) if parameters_schema
+          return declaration if tool.provider_params.empty?
+          RubyLLM::Utils.deep_merge(declaration, tool.provider_params)
         end
-        def format_parameters(parameters)
+        def convert_tool_schema_to_gemini(schema)
+          return nil unless schema
+          schema = RubyLLM::Utils.deep_stringify_keys(schema)
+          raise ArgumentError, 'Gemini tool parameters must be objects' unless schema['type'] == 'object'
           {
             type: 'OBJECT',
-            properties: parameters.transform_values do |param|
-              {
-                type: param_type_for_gemini(param.type),
-                description: param.description
-              }.compact
-            end,
-            required: parameters.select { |_, p| p.required }.keys.map(&:to_s)
+            properties: schema.fetch('properties', {}).transform_values { |property| convert_property(property) },
+            required: (schema['required'] || []).map(&:to_s)
           }
         end
+        def convert_property(property_schema) # rubocop:disable Metrics/PerceivedComplexity
+          normalized_schema = normalize_any_of_schema(property_schema)
+          working_schema = normalized_schema || property_schema
+          type = param_type_for_gemini(working_schema['type'])
+          property = {
+            type: type
+          }
+          copy_common_attributes(property, property_schema)
+          copy_common_attributes(property, working_schema)
+          case type
+          when 'ARRAY'
+            items_schema = working_schema['items'] || property_schema['items'] || { 'type' => 'string' }
+            property[:items] = convert_property(items_schema)
+            copy_tool_attributes(property, working_schema, %w[minItems maxItems])
+            copy_tool_attributes(property, property_schema, %w[minItems maxItems])
+          when 'OBJECT'
+            nested_properties = working_schema.fetch('properties', {}).transform_values do |child|
+              convert_property(child)
+            end
+            property[:properties] = nested_properties
+            required = working_schema['required'] || property_schema['required']
+            property[:required] = required.map(&:to_s) if required
+          end
+          property
+        end
+        def copy_common_attributes(target, source)
+          copy_tool_attributes(target, source, %w[description enum format nullable maximum minimum multipleOf])
+        end
+        def copy_tool_attributes(target, source, attributes)
+          attributes.each do |attribute|
+            value = schema_value(source, attribute)
+            next if value.nil?
+            target[attribute.to_sym] = value
+          end
+        end
+        def normalize_any_of_schema(schema) # rubocop:disable Metrics/PerceivedComplexity
+          any_of = schema['anyOf'] || schema[:anyOf]
+          return nil unless any_of.is_a?(Array) && any_of.any?
+          null_entries, non_null_entries = any_of.partition { |entry| schema_type(entry).to_s == 'null' }
+          if non_null_entries.size == 1 && null_entries.any?
+            normalized = RubyLLM::Utils.deep_dup(non_null_entries.first)
+            normalized['nullable'] = true
+            normalized
+          elsif non_null_entries.any?
+            RubyLLM::Utils.deep_dup(non_null_entries.first)
+          else
+            { 'type' => 'string', 'nullable' => true }
+          end
+        end
+        def schema_type(schema)
+          schema['type'] || schema[:type]
+        end
+        def schema_value(source, attribute) # rubocop:disable Metrics/PerceivedComplexity
+          case attribute
+          when 'multipleOf'
+            source['multipleOf'] || source[:multipleOf] || source['multiple_of'] || source[:multiple_of]
+          when 'minItems'
+            source['minItems'] || source[:minItems] || source['min_items'] || source[:min_items]
+          when 'maxItems'
+            source['maxItems'] || source[:maxItems] || source['max_items'] || source[:max_items]
+          else
+            source[attribute] || source[attribute.to_sym]
+          end
+        end
         def param_type_for_gemini(type)
           case type.to_s.downcase
-          when 'integer', 'number', 'float' then 'NUMBER'
+          when 'integer' then 'INTEGER'
+          when 'number', 'float', 'double' then 'NUMBER'
           when 'boolean' then 'BOOLEAN'
           when 'array' then 'ARRAY'
           when 'object' then 'OBJECT'

data/lib/ruby_llm/providers/gemini/transcription.rb ADDED Viewed

@@ -0,0 +1,116 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class Gemini
+      # Audio transcription helpers for the Gemini API implementation
+      module Transcription
+        DEFAULT_PROMPT = 'Transcribe the provided audio and respond with only the transcript text.'
+        def transcribe(audio_file, model:, language:, **options)
+          attachment = Attachment.new(audio_file)
+          payload = render_transcription_payload(attachment, language:, **options)
+          response = @connection.post(transcription_url(model), payload)
+          parse_transcription_response(response, model:)
+        end
+        private
+        def transcription_url(model)
+          "models/#{model}:generateContent"
+        end
+        def render_transcription_payload(attachment, language:, **options)
+          prompt = build_prompt(options[:prompt], language)
+          audio_part = format_audio_part(attachment)
+          raise UnsupportedAttachmentError, attachment.mime_type unless attachment.audio?
+          payload = {
+            contents: [
+              {
+                role: 'user',
+                parts: [
+                  { text: prompt },
+                  audio_part
+                ]
+              }
+            ]
+          }
+          generation_config = build_generation_config(options)
+          payload[:generationConfig] = generation_config unless generation_config.empty?
+          payload[:safetySettings] = options[:safety_settings] if options[:safety_settings]
+          payload
+        end
+        def build_generation_config(options)
+          config = {}
+          response_mime_type = options.fetch(:response_mime_type, 'text/plain')
+          config[:responseMimeType] = response_mime_type if response_mime_type
+          config[:temperature] = options[:temperature] if options.key?(:temperature)
+          config[:maxOutputTokens] = options[:max_output_tokens] if options[:max_output_tokens]
+          config
+        end
+        def build_prompt(custom_prompt, language)
+          prompt = DEFAULT_PROMPT
+          prompt += " Respond in the #{language} language." if language
+          prompt += " #{custom_prompt}" if custom_prompt
+          prompt
+        end
+        def format_audio_part(attachment)
+          {
+            inline_data: {
+              mime_type: attachment.mime_type,
+              data: attachment.encoded
+            }
+          }
+        end
+        def parse_transcription_response(response, model:)
+          data = response.body
+          text = extract_text(data)
+          usage = extract_usage(data)
+          RubyLLM::Transcription.new(
+            text: text,
+            model: model,
+            input_tokens: usage[:input_tokens],
+            output_tokens: usage[:output_tokens]
+          )
+        end
+        def extract_text(data)
+          candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
+          return unless candidate
+          parts = candidate.dig('content', 'parts') || []
+          texts = parts.filter_map { |part| part['text'] }
+          texts.join if texts.any?
+        end
+        def extract_usage(data)
+          metadata = data.is_a?(Hash) ? data['usageMetadata'] : nil
+          return { input_tokens: nil, output_tokens: nil } unless metadata
+          {
+            input_tokens: metadata['promptTokenCount'],
+            output_tokens: sum_output_tokens(metadata)
+          }
+        end
+        def sum_output_tokens(metadata)
+          candidates = metadata['candidatesTokenCount'] || 0
+          thoughts = metadata['thoughtsTokenCount'] || 0
+          candidates + thoughts
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gemini.rb CHANGED Viewed

@@ -8,12 +8,13 @@ module RubyLLM
       include Gemini::Embeddings
       include Gemini::Images
       include Gemini::Models
+      include Gemini::Transcription
       include Gemini::Streaming
       include Gemini::Tools
       include Gemini::Media
       def api_base
-        'https://generativelanguage.googleapis.com/v1beta'
+        @config.gemini_api_base || 'https://generativelanguage.googleapis.com/v1beta'
       end
       def headers

data/lib/ruby_llm/providers/gpustack/media.rb CHANGED Viewed

@@ -10,6 +10,7 @@ module RubyLLM
         module_function
         def format_content(content)
+          return content.value if content.is_a?(RubyLLM::Content::Raw)
           return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
           return content unless content.is_a?(Content)

data/lib/ruby_llm/providers/ollama/media.rb CHANGED Viewed

@@ -10,6 +10,7 @@ module RubyLLM
         module_function
         def format_content(content)
+          return content.value if content.is_a?(RubyLLM::Content::Raw)
           return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
           return content unless content.is_a?(Content)

data/lib/ruby_llm/providers/openai/capabilities.rb CHANGED Viewed

@@ -27,6 +27,9 @@ module RubyLLM
           gpt4o_realtime: /^gpt-4o-realtime/,
           gpt4o_search: /^gpt-4o-search/,
           gpt4o_transcribe: /^gpt-4o-transcribe/,
+          gpt5: /^gpt-5/,
+          gpt5_mini: /^gpt-5-mini/,
+          gpt5_nano: /^gpt-5-nano/,
           o1: /^o1(?!-(?:mini|pro))/,
           o1_mini: /^o1-mini/,
           o1_pro: /^o1-pro/,
@@ -45,7 +48,7 @@ module RubyLLM
         def context_window_for(model_id)
           case model_family(model_id)
           when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
-          when 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
                'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime',
                'gpt4o_search', 'gpt4o_transcribe', 'gpt4o_mini_search', 'o1_mini' then 128_000
           when 'gpt4' then 8_192
@@ -60,6 +63,7 @@ module RubyLLM
         def max_tokens_for(model_id)
           case model_family(model_id)
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
           when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
           when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'gpt4o_mini_search' then 16_384
           when 'babbage', 'davinci' then 16_384 # rubocop:disable Lint/DuplicateBranch
@@ -77,16 +81,17 @@ module RubyLLM
         def supports_vision?(model_id)
           case model_family(model_id)
-          when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1',
-               'o1_pro', 'moderation', 'gpt4o_search', 'gpt4o_mini_search' then true
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4',
+               'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search',
+               'gpt4o_mini_search' then true
           else false
           end
         end
         def supports_functions?(model_id)
           case model_family(model_id)
-          when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
-               'o3_mini' then true
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o',
+               'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
           when 'chatgpt4o', 'gpt35_turbo', 'o1_mini', 'gpt4o_mini_tts',
                'gpt4o_transcribe', 'gpt4o_search', 'gpt4o_mini_search' then false
           else false # rubocop:disable Lint/DuplicateBranch
@@ -95,8 +100,8 @@ module RubyLLM
         def supports_structured_output?(model_id)
           case model_family(model_id)
-          when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
-               'o3_mini' then true
+          when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o',
+               'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
           else false
           end
         end
@@ -107,6 +112,9 @@ module RubyLLM
         PRICES = {
           gpt_image_1: { input_text: 5.0, input_image: 10.0, output: 8.0, cached_input: 0.5 }, # rubocop:disable Naming/VariableNumber
+          gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
+          gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
+          gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
           gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
           gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
           gpt41_nano: { input: 0.1, output: 0.4 },

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -48,13 +48,17 @@ module RubyLLM
           message_data = data.dig('choices', 0, 'message')
           return unless message_data
+          usage = data['usage'] || {}
+          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
           Message.new(
             role: :assistant,
             content: message_data['content'],
             tool_calls: parse_tool_calls(message_data['tool_calls']),
-            input_tokens: data['usage']['prompt_tokens'],
-            output_tokens: data['usage']['completion_tokens'],
-            cached_tokens: data.dig('usage', 'prompt_tokens_details', 'cached_tokens'),
+            input_tokens: usage['prompt_tokens'],
+            output_tokens: usage['completion_tokens'],
+            cached_tokens: cached_tokens,
+            cache_creation_tokens: 0,
             model_id: data['model'],
             raw: response
           )

data/lib/ruby_llm/providers/openai/media.rb CHANGED Viewed

@@ -7,7 +7,8 @@ module RubyLLM
       module Media
         module_function
-        def format_content(content)
+        def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
+          return content.value if content.is_a?(RubyLLM::Content::Raw)
           return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
           return content unless content.is_a?(Content)

data/lib/ruby_llm/providers/openai/streaming.rb CHANGED Viewed

@@ -86,14 +86,18 @@ module RubyLLM
         end
         def build_chat_completions_chunk(data)
+          usage = data['usage'] || {}
+          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
           Chunk.new(
             role: :assistant,
             model_id: data['model'],
             content: data.dig('choices', 0, 'delta', 'content'),
             tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
-            input_tokens: data.dig('usage', 'prompt_tokens'),
-            output_tokens: data.dig('usage', 'completion_tokens'),
-            cached_tokens: data.dig('usage', 'cached_tokens')
+            input_tokens: usage['prompt_tokens'],
+            output_tokens: usage['completion_tokens'],
+            cached_tokens: cached_tokens,
+            cache_creation_tokens: 0
           )
         end

data/lib/ruby_llm/providers/openai/tools.rb CHANGED Viewed

@@ -7,24 +7,54 @@ module RubyLLM
       module Tools
         module_function
+        EMPTY_PARAMETERS_SCHEMA = {
+          'type' => 'object',
+          'properties' => {},
+          'required' => [],
+          'additionalProperties' => false,
+          'strict' => true
+        }.freeze
+        def parameters_schema_for(tool)
+          tool.params_schema ||
+            schema_from_parameters(tool.parameters)
+        end
+        def schema_from_parameters(parameters)
+          schema_definition = RubyLLM::Tool::SchemaDefinition.from_parameters(parameters)
+          schema_definition&.json_schema || EMPTY_PARAMETERS_SCHEMA
+        end
         def chat_tool_for(tool)
-          {
+          parameters_schema = parameters_schema_for(tool)
+          definition = {
             type: 'function',
             function: {
               name: tool.name,
               description: tool.description,
-              parameters: tool_parameters_for(tool)
+              parameters: parameters_schema
             }
           }
+          return definition if tool.provider_params.empty?
+          RubyLLM::Utils.deep_merge(definition, tool.provider_params)
         end
         def response_tool_for(tool)
-          {
+          parameters_schema = parameters_schema_for(tool)
+          definition = {
             type: 'function',
             name: tool.name,
             description: tool.description,
-            parameters: tool_parameters_for(tool)
+            parameters: parameters_schema
           }
+          return definition if tool.provider_params.empty?
+          RubyLLM::Utils.deep_merge(definition, tool.provider_params)
         end
         def param_schema(param)
@@ -34,14 +64,6 @@ module RubyLLM
           }.compact
         end
-        def tool_parameters_for(tool)
-          {
-            type: 'object',
-            properties: tool.parameters.transform_values { |param| param_schema(param) },
-            required: tool.parameters.select { |_, p| p.required }.keys
-          }
-        end
         def format_tool_calls(tool_calls)
           return nil unless tool_calls&.any?

data/lib/ruby_llm/providers/openai/transcription.rb ADDED Viewed

@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class OpenAI
+      # Audio transcription methods for the OpenAI API integration
+      module Transcription
+        module_function
+        def transcription_url
+          'audio/transcriptions'
+        end
+        def render_transcription_payload(file_part, model:, language:, **options)
+          {
+            model: model,
+            file: file_part,
+            language: language,
+            chunking_strategy: (options[:chunking_strategy] || 'auto' if supports_chunking_strategy?(model, options)),
+            response_format: response_format_for(model, options),
+            prompt: options[:prompt],
+            temperature: options[:temperature],
+            timestamp_granularities: options[:timestamp_granularities],
+            known_speaker_names: options[:speaker_names],
+            known_speaker_references: encode_speaker_references(options[:speaker_references])
+          }.compact
+        end
+        def encode_speaker_references(references)
+          return nil unless references
+          references.map do |ref|
+            Attachment.new(ref).for_llm
+          end
+        end
+        def response_format_for(model, options)
+          return options[:response_format] if options.key?(:response_format)
+          'diarized_json' if model.include?('diarize')
+        end
+        def supports_chunking_strategy?(model, options)
+          return false if model.start_with?('whisper')
+          return true if options.key?(:chunking_strategy)
+          model.include?('diarize')
+        end
+        def parse_transcription_response(response, model:)
+          data = response.body
+          return RubyLLM::Transcription.new(text: data, model: model) if data.is_a?(String)
+          usage = data['usage'] || {}
+          RubyLLM::Transcription.new(
+            text: data['text'],
+            model: model,
+            language: data['language'],
+            duration: data['duration'],
+            segments: data['segments'],
+            input_tokens: usage['input_tokens'] || usage['prompt_tokens'],
+            output_tokens: usage['output_tokens'] || usage['completion_tokens']
+          )
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/openai_base.rb CHANGED Viewed

@@ -14,6 +14,7 @@ module RubyLLM
       include OpenAI::Tools
       include OpenAI::Images
       include OpenAI::Media
+      include OpenAI::Transcription
       def api_base
         @config.openai_api_base || 'https://api.openai.com/v1'

data/lib/ruby_llm/providers/vertexai/transcription.rb ADDED Viewed

@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class VertexAI
+      # Vertex AI specific helpers for audio transcription
+      module Transcription
+        private
+        def transcription_url(model)
+          "projects/#{@config.vertexai_project_id}/locations/#{@config.vertexai_location}/publishers/google/models/#{model}:generateContent" # rubocop:disable Layout/LineLength
+        end
+      end
+    end
+  end
+end