RubyGems - dify_llm - Versions diffs - 1.8.1 → 1.9.0 - Mend

dify_llm 1.8.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

data/lib/ruby_llm/providers/gemini/tools.rb CHANGED Viewed

@@ -13,7 +13,41 @@ module RubyLLM
           }]
         end
-        def extract_tool_calls(data)
+        def format_tool_call(msg)
+          parts = []
+          if msg.content && !(msg.content.respond_to?(:empty?) && msg.content.empty?)
+            formatted_content = Media.format_content(msg.content)
+            parts.concat(formatted_content.is_a?(Array) ? formatted_content : [formatted_content])
+          end
+          msg.tool_calls.each_value do |tool_call|
+            parts << {
+              functionCall: {
+                name: tool_call.name,
+                args: tool_call.arguments
+              }
+            }
+          end
+          parts
+        end
+        def format_tool_result(msg, function_name = nil)
+          function_name ||= msg.tool_call_id
+          [{
+            functionResponse: {
+              name: function_name,
+              response: {
+                name: function_name,
+                content: Media.format_content(msg.content)
+              }
+            }
+          }]
+        end
+        def extract_tool_calls(data) # rubocop:disable Metrics/PerceivedComplexity
           return nil unless data
           candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
@@ -22,49 +56,136 @@ module RubyLLM
           parts = candidate.dig('content', 'parts')
           return nil unless parts.is_a?(Array)
-          function_call_part = parts.find { |p| p['functionCall'] }
-          return nil unless function_call_part
-          function_data = function_call_part['functionCall']
-          return nil unless function_data
+          tool_calls = parts.each_with_object({}) do |part, result|
+            function_data = part['functionCall']
+            next unless function_data
-          id = SecureRandom.uuid
+            id = SecureRandom.uuid
-          {
-            id => ToolCall.new(
-              id: id,
+            result[id] = ToolCall.new(
+              id:,
               name: function_data['name'],
-              arguments: function_data['args']
+              arguments: function_data['args'] || {}
             )
-          }
+          end
+          tool_calls.empty? ? nil : tool_calls
         end
         private
         def function_declaration_for(tool)
-          {
+          parameters_schema = tool.params_schema ||
+                              RubyLLM::Tool::SchemaDefinition.from_parameters(tool.parameters)&.json_schema
+          declaration = {
             name: tool.name,
-            description: tool.description,
-            parameters: tool.parameters.any? ? format_parameters(tool.parameters) : nil
-          }.compact
+            description: tool.description
+          }
+          declaration[:parameters] = convert_tool_schema_to_gemini(parameters_schema) if parameters_schema
+          return declaration if tool.provider_params.empty?
+          RubyLLM::Utils.deep_merge(declaration, tool.provider_params)
         end
-        def format_parameters(parameters)
+        def convert_tool_schema_to_gemini(schema)
+          return nil unless schema
+          schema = RubyLLM::Utils.deep_stringify_keys(schema)
+          raise ArgumentError, 'Gemini tool parameters must be objects' unless schema['type'] == 'object'
           {
             type: 'OBJECT',
-            properties: parameters.transform_values do |param|
-              {
-                type: param_type_for_gemini(param.type),
-                description: param.description
-              }.compact
-            end,
-            required: parameters.select { |_, p| p.required }.keys.map(&:to_s)
+            properties: schema.fetch('properties', {}).transform_values { |property| convert_property(property) },
+            required: (schema['required'] || []).map(&:to_s)
           }
         end
+        def convert_property(property_schema) # rubocop:disable Metrics/PerceivedComplexity
+          normalized_schema = normalize_any_of_schema(property_schema)
+          working_schema = normalized_schema || property_schema
+          type = param_type_for_gemini(working_schema['type'])
+          property = {
+            type: type
+          }
+          copy_common_attributes(property, property_schema)
+          copy_common_attributes(property, working_schema)
+          case type
+          when 'ARRAY'
+            items_schema = working_schema['items'] || property_schema['items'] || { 'type' => 'string' }
+            property[:items] = convert_property(items_schema)
+            copy_tool_attributes(property, working_schema, %w[minItems maxItems])
+            copy_tool_attributes(property, property_schema, %w[minItems maxItems])
+          when 'OBJECT'
+            nested_properties = working_schema.fetch('properties', {}).transform_values do |child|
+              convert_property(child)
+            end
+            property[:properties] = nested_properties
+            required = working_schema['required'] || property_schema['required']
+            property[:required] = required.map(&:to_s) if required
+          end
+          property
+        end
+        def copy_common_attributes(target, source)
+          copy_tool_attributes(target, source, %w[description enum format nullable maximum minimum multipleOf])
+        end
+        def copy_tool_attributes(target, source, attributes)
+          attributes.each do |attribute|
+            value = schema_value(source, attribute)
+            next if value.nil?
+            target[attribute.to_sym] = value
+          end
+        end
+        def normalize_any_of_schema(schema) # rubocop:disable Metrics/PerceivedComplexity
+          any_of = schema['anyOf'] || schema[:anyOf]
+          return nil unless any_of.is_a?(Array) && any_of.any?
+          null_entries, non_null_entries = any_of.partition { |entry| schema_type(entry).to_s == 'null' }
+          if non_null_entries.size == 1 && null_entries.any?
+            normalized = RubyLLM::Utils.deep_dup(non_null_entries.first)
+            normalized['nullable'] = true
+            normalized
+          elsif non_null_entries.any?
+            RubyLLM::Utils.deep_dup(non_null_entries.first)
+          else
+            { 'type' => 'string', 'nullable' => true }
+          end
+        end
+        def schema_type(schema)
+          schema['type'] || schema[:type]
+        end
+        def schema_value(source, attribute) # rubocop:disable Metrics/PerceivedComplexity
+          case attribute
+          when 'multipleOf'
+            source['multipleOf'] || source[:multipleOf] || source['multiple_of'] || source[:multiple_of]
+          when 'minItems'
+            source['minItems'] || source[:minItems] || source['min_items'] || source[:min_items]
+          when 'maxItems'
+            source['maxItems'] || source[:maxItems] || source['max_items'] || source[:max_items]
+          else
+            source[attribute] || source[attribute.to_sym]
+          end
+        end
         def param_type_for_gemini(type)
           case type.to_s.downcase
-          when 'integer', 'number', 'float' then 'NUMBER'
+          when 'integer' then 'INTEGER'
+          when 'number', 'float', 'double' then 'NUMBER'
           when 'boolean' then 'BOOLEAN'
           when 'array' then 'ARRAY'
           when 'object' then 'OBJECT'

data/lib/ruby_llm/providers/gemini/transcription.rb ADDED Viewed

@@ -0,0 +1,116 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class Gemini
+      # Audio transcription helpers for the Gemini API implementation
+      module Transcription
+        DEFAULT_PROMPT = 'Transcribe the provided audio and respond with only the transcript text.'
+        def transcribe(audio_file, model:, language:, **options)
+          attachment = Attachment.new(audio_file)
+          payload = render_transcription_payload(attachment, language:, **options)
+          response = @connection.post(transcription_url(model), payload)
+          parse_transcription_response(response, model:)
+        end
+        private
+        def transcription_url(model)
+          "models/#{model}:generateContent"
+        end
+        def render_transcription_payload(attachment, language:, **options)
+          prompt = build_prompt(options[:prompt], language)
+          audio_part = format_audio_part(attachment)
+          raise UnsupportedAttachmentError, attachment.mime_type unless attachment.audio?
+          payload = {
+            contents: [
+              {
+                role: 'user',
+                parts: [
+                  { text: prompt },
+                  audio_part
+                ]
+              }
+            ]
+          }
+          generation_config = build_generation_config(options)
+          payload[:generationConfig] = generation_config unless generation_config.empty?
+          payload[:safetySettings] = options[:safety_settings] if options[:safety_settings]
+          payload
+        end
+        def build_generation_config(options)
+          config = {}
+          response_mime_type = options.fetch(:response_mime_type, 'text/plain')
+          config[:responseMimeType] = response_mime_type if response_mime_type
+          config[:temperature] = options[:temperature] if options.key?(:temperature)
+          config[:maxOutputTokens] = options[:max_output_tokens] if options[:max_output_tokens]
+          config
+        end
+        def build_prompt(custom_prompt, language)
+          prompt = DEFAULT_PROMPT
+          prompt += " Respond in the #{language} language." if language
+          prompt += " #{custom_prompt}" if custom_prompt
+          prompt
+        end
+        def format_audio_part(attachment)
+          {
+            inline_data: {
+              mime_type: attachment.mime_type,
+              data: attachment.encoded
+            }
+          }
+        end
+        def parse_transcription_response(response, model:)
+          data = response.body
+          text = extract_text(data)
+          usage = extract_usage(data)
+          RubyLLM::Transcription.new(
+            text: text,
+            model: model,
+            input_tokens: usage[:input_tokens],
+            output_tokens: usage[:output_tokens]
+          )
+        end
+        def extract_text(data)
+          candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
+          return unless candidate
+          parts = candidate.dig('content', 'parts') || []
+          texts = parts.filter_map { |part| part['text'] }
+          texts.join if texts.any?
+        end
+        def extract_usage(data)
+          metadata = data.is_a?(Hash) ? data['usageMetadata'] : nil
+          return { input_tokens: nil, output_tokens: nil } unless metadata
+          {
+            input_tokens: metadata['promptTokenCount'],
+            output_tokens: sum_output_tokens(metadata)
+          }
+        end
+        def sum_output_tokens(metadata)
+          candidates = metadata['candidatesTokenCount'] || 0
+          thoughts = metadata['thoughtsTokenCount'] || 0
+          candidates + thoughts
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gemini.rb CHANGED Viewed

@@ -8,12 +8,13 @@ module RubyLLM
       include Gemini::Embeddings
       include Gemini::Images
       include Gemini::Models
+      include Gemini::Transcription
       include Gemini::Streaming
       include Gemini::Tools
       include Gemini::Media
       def api_base
-        'https://generativelanguage.googleapis.com/v1beta'
+        @config.gemini_api_base || 'https://generativelanguage.googleapis.com/v1beta'
       end
       def headers

data/lib/ruby_llm/providers/gpustack/media.rb CHANGED Viewed

@@ -10,6 +10,7 @@ module RubyLLM
         module_function
         def format_content(content)
+          return content.value if content.is_a?(RubyLLM::Content::Raw)
           return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
           return content unless content.is_a?(Content)

data/lib/ruby_llm/providers/ollama/media.rb CHANGED Viewed

@@ -10,6 +10,7 @@ module RubyLLM
         module_function
         def format_content(content)
+          return content.value if content.is_a?(RubyLLM::Content::Raw)
           return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
           return content unless content.is_a?(Content)

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -47,12 +47,17 @@ module RubyLLM
           message_data = data.dig('choices', 0, 'message')
           return unless message_data
+          usage = data['usage'] || {}
+          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
           Message.new(
             role: :assistant,
             content: message_data['content'],
             tool_calls: parse_tool_calls(message_data['tool_calls']),
-            input_tokens: data['usage']['prompt_tokens'],
-            output_tokens: data['usage']['completion_tokens'],
+            input_tokens: usage['prompt_tokens'],
+            output_tokens: usage['completion_tokens'],
+            cached_tokens: cached_tokens,
+            cache_creation_tokens: 0,
             model_id: data['model'],
             raw: response
           )

data/lib/ruby_llm/providers/openai/media.rb CHANGED Viewed

@@ -7,7 +7,8 @@ module RubyLLM
       module Media
         module_function
-        def format_content(content)
+        def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
+          return content.value if content.is_a?(RubyLLM::Content::Raw)
           return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
           return content unless content.is_a?(Content)

data/lib/ruby_llm/providers/openai/streaming.rb CHANGED Viewed

@@ -12,13 +12,18 @@ module RubyLLM
         end
         def build_chunk(data)
+          usage = data['usage'] || {}
+          cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
           Chunk.new(
             role: :assistant,
             model_id: data['model'],
             content: data.dig('choices', 0, 'delta', 'content'),
             tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
-            input_tokens: data.dig('usage', 'prompt_tokens'),
-            output_tokens: data.dig('usage', 'completion_tokens')
+            input_tokens: usage['prompt_tokens'],
+            output_tokens: usage['completion_tokens'],
+            cached_tokens: cached_tokens,
+            cache_creation_tokens: 0
           )
         end

data/lib/ruby_llm/providers/openai/tools.rb CHANGED Viewed

@@ -7,19 +7,39 @@ module RubyLLM
       module Tools
         module_function
+        EMPTY_PARAMETERS_SCHEMA = {
+          'type' => 'object',
+          'properties' => {},
+          'required' => [],
+          'additionalProperties' => false,
+          'strict' => true
+        }.freeze
+        def parameters_schema_for(tool)
+          tool.params_schema ||
+            schema_from_parameters(tool.parameters)
+        end
+        def schema_from_parameters(parameters)
+          schema_definition = RubyLLM::Tool::SchemaDefinition.from_parameters(parameters)
+          schema_definition&.json_schema || EMPTY_PARAMETERS_SCHEMA
+        end
         def tool_for(tool)
-          {
+          parameters_schema = parameters_schema_for(tool)
+          definition = {
             type: 'function',
             function: {
               name: tool.name,
               description: tool.description,
-              parameters: {
-                type: 'object',
-                properties: tool.parameters.transform_values { |param| param_schema(param) },
-                required: tool.parameters.select { |_, p| p.required }.keys
-              }
+              parameters: parameters_schema
             }
           }
+          return definition if tool.provider_params.empty?
+          RubyLLM::Utils.deep_merge(definition, tool.provider_params)
         end
         def param_schema(param)

data/lib/ruby_llm/providers/openai/transcription.rb ADDED Viewed

@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class OpenAI
+      # Audio transcription methods for the OpenAI API integration
+      module Transcription
+        module_function
+        def transcription_url
+          'audio/transcriptions'
+        end
+        def render_transcription_payload(file_part, model:, language:, **options)
+          {
+            model: model,
+            file: file_part,
+            language: language,
+            chunking_strategy: (options[:chunking_strategy] || 'auto' if supports_chunking_strategy?(model, options)),
+            response_format: response_format_for(model, options),
+            prompt: options[:prompt],
+            temperature: options[:temperature],
+            timestamp_granularities: options[:timestamp_granularities],
+            known_speaker_names: options[:speaker_names],
+            known_speaker_references: encode_speaker_references(options[:speaker_references])
+          }.compact
+        end
+        def encode_speaker_references(references)
+          return nil unless references
+          references.map do |ref|
+            Attachment.new(ref).for_llm
+          end
+        end
+        def response_format_for(model, options)
+          return options[:response_format] if options.key?(:response_format)
+          'diarized_json' if model.include?('diarize')
+        end
+        def supports_chunking_strategy?(model, options)
+          return false if model.start_with?('whisper')
+          return true if options.key?(:chunking_strategy)
+          model.include?('diarize')
+        end
+        def parse_transcription_response(response, model:)
+          data = response.body
+          return RubyLLM::Transcription.new(text: data, model: model) if data.is_a?(String)
+          usage = data['usage'] || {}
+          RubyLLM::Transcription.new(
+            text: data['text'],
+            model: model,
+            language: data['language'],
+            duration: data['duration'],
+            segments: data['segments'],
+            input_tokens: usage['input_tokens'] || usage['prompt_tokens'],
+            output_tokens: usage['output_tokens'] || usage['completion_tokens']
+          )
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/openai.rb CHANGED Viewed

@@ -12,6 +12,7 @@ module RubyLLM
       include OpenAI::Tools
       include OpenAI::Images
       include OpenAI::Media
+      include OpenAI::Transcription
       def api_base
         @config.openai_api_base || 'https://api.openai.com/v1'

data/lib/ruby_llm/providers/vertexai/transcription.rb ADDED Viewed

@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class VertexAI
+      # Vertex AI specific helpers for audio transcription
+      module Transcription
+        private
+        def transcription_url(model)
+          "projects/#{@config.vertexai_project_id}/locations/#{@config.vertexai_location}/publishers/google/models/#{model}:generateContent" # rubocop:disable Layout/LineLength
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/vertexai.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module RubyLLM
       include VertexAI::Streaming
       include VertexAI::Embeddings
       include VertexAI::Models
+      include VertexAI::Transcription
       def initialize(config)
         super
@@ -37,6 +38,8 @@ module RubyLLM
         initialize_authorizer unless @authorizer
         @authorizer.fetch_access_token!['access_token']
+      rescue Google::Auth::AuthorizationError => e
+        raise UnauthorizedError.new(nil, "Invalid Google Cloud credentials for Vertex AI: #{e.message}")
       end
       def initialize_authorizer

data/lib/ruby_llm/stream_accumulator.rb CHANGED Viewed

@@ -8,8 +8,10 @@ module RubyLLM
     def initialize
       @content = +''
       @tool_calls = {}
-      @input_tokens = 0
-      @output_tokens = 0
+      @input_tokens = nil
+      @output_tokens = nil
+      @cached_tokens = nil
+      @cache_creation_tokens = nil
       @latest_tool_call_id = nil
     end
@@ -35,8 +37,10 @@ module RubyLLM
         model_id: model_id,
         conversation_id: conversation_id,
         tool_calls: tool_calls_from_stream,
-        input_tokens: @input_tokens.positive? ? @input_tokens : nil,
-        output_tokens: @output_tokens.positive? ? @output_tokens : nil,
+        input_tokens: @input_tokens,
+        output_tokens: @output_tokens,
+        cached_tokens: @cached_tokens,
+        cache_creation_tokens: @cache_creation_tokens,
         raw: response
       )
     end
@@ -92,6 +96,8 @@ module RubyLLM
     def count_tokens(chunk)
       @input_tokens = chunk.input_tokens if chunk.input_tokens
       @output_tokens = chunk.output_tokens if chunk.output_tokens
+      @cached_tokens = chunk.cached_tokens if chunk.cached_tokens
+      @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
     end
   end
 end