RubyGems - ruby_llm - Versions diffs - 0.1.0.pre41 → 0.1.0.pre43 - Mend

ruby_llm 0.1.0.pre41 → 0.1.0.pre43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/.rspec_status +5 -4
data/README.md +82 -364
data/lib/ruby_llm/content.rb +1 -9
data/lib/ruby_llm/models.json +345 -452
data/lib/ruby_llm/provider.rb +1 -2
data/lib/ruby_llm/providers/anthropic/media.rb +5 -2
data/lib/ruby_llm/providers/gemini/chat.rb +140 -0
data/lib/ruby_llm/providers/gemini/embeddings.rb +53 -0
data/lib/ruby_llm/providers/gemini/images.rb +51 -0
data/lib/ruby_llm/providers/gemini/media.rb +136 -0
data/lib/ruby_llm/providers/gemini/models.rb +41 -6
data/lib/ruby_llm/providers/gemini/streaming.rb +99 -0
data/lib/ruby_llm/providers/gemini/tools.rb +88 -0
data/lib/ruby_llm/providers/gemini.rb +10 -4
data/lib/ruby_llm/providers/openai/images.rb +0 -2
data/lib/ruby_llm/stream_accumulator.rb +1 -1
data/lib/ruby_llm/version.rb +1 -1
metadata +8 -2

data/lib/ruby_llm/provider.rb CHANGED Viewed

@@ -106,7 +106,7 @@ module RubyLLM
         end
       end
-      def to_json_stream(&block) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
+      def to_json_stream(&block) # rubocop:disable Metrics/MethodLength
         buffer = String.new
         parser = EventStreamParser::Parser.new
@@ -126,7 +126,6 @@ module RubyLLM
             parser.feed(chunk) do |_type, data|
               unless data == '[DONE]'
                 parsed_data = JSON.parse(data)
-                RubyLLM.logger.debug "chunk: #{parsed_data}"
                 block.call(parsed_data)
               end
             end

data/lib/ruby_llm/providers/anthropic/media.rb CHANGED Viewed

@@ -34,10 +34,13 @@ module RubyLLM
           source = part[:source]
           if source.start_with?('http')
-            # For URLs
+            # For URLs - add "type": "url" here
             {
               type: 'document',
-              source: { url: source }
+              source: {
+                type: 'url', # This line is missing in the current implementation
+                url: source
+              }
             }
           else
             # For local files

data/lib/ruby_llm/providers/gemini/chat.rb ADDED Viewed

@@ -0,0 +1,140 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    module Gemini
+      # Chat methods for the Gemini API implementation
+      module Chat # rubocop:disable Metrics/ModuleLength
+        # Must be public for Provider to use
+        def complete(messages, tools:, temperature:, model:, &block) # rubocop:disable Metrics/MethodLength
+          payload = {
+            contents: format_messages(messages),
+            generationConfig: {
+              temperature: temperature
+            }
+          }
+          payload[:tools] = format_tools(tools) if tools.any?
+          # Store tools for use in generate_completion
+          @tools = tools
+          if block_given?
+            stream_completion(model, payload, &block)
+          else
+            generate_completion(model, payload)
+          end
+        end
+        # Format methods can be private
+        private
+        def generate_completion(model, payload)
+          url = "models/#{model}:generateContent"
+          response = post(url, payload)
+          result = parse_completion_response(response)
+          # If this contains a tool call, log it
+          result.tool_calls.values.first if result.tool_call?
+          result
+        end
+        def format_messages(messages)
+          messages.map do |msg|
+            {
+              role: format_role(msg.role),
+              parts: format_parts(msg)
+            }
+          end
+        end
+        def format_role(role)
+          case role
+          when :assistant then 'model'
+          when :system, :tool then 'user' # Gemini doesn't have system, use user role, function responses use user role
+          else role.to_s
+          end
+        end
+        def format_parts(msg) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
+          if msg.tool_call?
+            # Handle function calls
+            [{
+              functionCall: {
+                name: msg.tool_calls.values.first.name,
+                args: msg.tool_calls.values.first.arguments
+              }
+            }]
+          elsif msg.tool_result?
+            # Handle function responses
+            [{
+              functionResponse: {
+                name: msg.tool_call_id,
+                response: {
+                  name: msg.tool_call_id,
+                  content: msg.content
+                }
+              }
+            }]
+          elsif msg.content.is_a?(Array)
+            # Handle multi-part content (text, images, etc.)
+            msg.content.map { |part| format_part(part) }
+          else
+            # Simple text content
+            [{ text: msg.content.to_s }]
+          end
+        end
+        def format_part(part) # rubocop:disable Metrics/MethodLength
+          case part[:type]
+          when 'text'
+            { text: part[:text] }
+          when 'image'
+            Media.format_image(part)
+          when 'pdf'
+            Media.format_pdf(part)
+          when 'audio'
+            Media.format_audio(part)
+          else
+            { text: part.to_s }
+          end
+        end
+        def parse_completion_response(response)
+          data = response.body
+          tool_calls = extract_tool_calls(data)
+          Message.new(
+            role: :assistant,
+            content: extract_content(data),
+            tool_calls: tool_calls,
+            input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
+            output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
+            model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0]
+          )
+        end
+        def extract_content(data) # rubocop:disable Metrics/CyclomaticComplexity
+          candidate = data.dig('candidates', 0)
+          return '' unless candidate
+          # Content will be empty for function calls
+          return '' if function_call?(candidate)
+          # Extract text content
+          parts = candidate.dig('content', 'parts')
+          text_parts = parts&.select { |p| p['text'] }
+          return '' unless text_parts&.any?
+          text_parts.map { |p| p['text'] }.join
+        end
+        def function_call?(candidate)
+          parts = candidate.dig('content', 'parts')
+          parts&.any? { |p| p['functionCall'] }
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gemini/embeddings.rb ADDED Viewed

@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    module Gemini
+      # Embeddings methods for the Gemini API integration
+      module Embeddings
+        # Must be public for Provider module
+        def embed(text, model:) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
+          payload = {
+            content: {
+              parts: format_text_for_embedding(text)
+            }
+          }
+          url = "models/#{model}:embedContent"
+          response = post(url, payload)
+          if text.is_a?(Array)
+            # We need to make separate calls for each text with Gemini
+            embeddings = text.map do |t|
+              single_payload = { content: { parts: [{ text: t.to_s }] } }
+              single_response = post(url, single_payload)
+              single_response.body.dig('embedding', 'values')
+            end
+            Embedding.new(
+              vectors: embeddings,
+              model: model,
+              input_tokens: response.body.dig('usageMetadata', 'promptTokenCount') || 0
+            )
+          else
+            Embedding.new(
+              vectors: response.body.dig('embedding', 'values'),
+              model: model,
+              input_tokens: response.body.dig('usageMetadata', 'promptTokenCount') || 0
+            )
+          end
+        end
+        private
+        def format_text_for_embedding(text)
+          if text.is_a?(Array)
+            text.map { |t| { text: t.to_s } }
+          else
+            [{ text: text.to_s }]
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gemini/images.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    module Gemini
+      # Image generation methods for the Gemini API implementation
+      module Images
+        def images_url(model:)
+          "models/#{model}:predict"
+        end
+        def paint(prompt, model:, size:) # rubocop:disable Lint/UnusedMethodArgument
+          payload = render_image_payload(prompt)
+          response = post(images_url(model:), payload)
+          parse_image_response(response)
+        end
+        def render_image_payload(prompt)
+          {
+            instances: [
+              {
+                prompt: prompt
+              }
+            ],
+            parameters: {
+              sampleCount: 1
+            }
+          }
+        end
+        def parse_image_response(response) # rubocop:disable Metrics/MethodLength
+          data = response.body
+          image_data = data['predictions']&.first
+          unless image_data&.key?('bytesBase64Encoded')
+            raise Error, 'Unexpected response format from Gemini image generation API'
+          end
+          # Handle response with base64 encoded image data
+          image_url = "data:#{image_data['mimeType'] || 'image/png'};base64,#{image_data['bytesBase64Encoded']}"
+          Image.new(
+            url: image_url,
+            revised_prompt: '', # Imagen doesn't return revised prompts
+            model_id: ''
+          )
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gemini/media.rb ADDED Viewed

@@ -0,0 +1,136 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    module Gemini
+      # Media handling methods for the Gemini API integration
+      module Media # rubocop:disable Metrics/ModuleLength
+        module_function
+        def format_image(part) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/PerceivedComplexity
+          source = part[:source]
+          if source.is_a?(String)
+            if source.start_with?('http')
+              # Handle URL
+              {
+                inline_data: {
+                  mime_type: mime_type_for_image(source),
+                  data: fetch_and_encode_image(source)
+                }
+              }
+            else
+              # Handle file path
+              {
+                inline_data: {
+                  mime_type: mime_type_for_image(source),
+                  data: encode_image_file(source)
+                }
+              }
+            end
+          elsif source.is_a?(Hash)
+            if source[:url]
+              # Handle URL in hash
+              {
+                inline_data: {
+                  mime_type: source[:media_type] || mime_type_for_image(source[:url]),
+                  data: fetch_and_encode_image(source[:url])
+                }
+              }
+            else
+              # Handle data in hash
+              {
+                inline_data: {
+                  mime_type: source[:media_type] || 'image/jpeg',
+                  data: source[:data]
+                }
+              }
+            end
+          end
+        end
+        def format_pdf(part) # rubocop:disable Metrics/MethodLength
+          source = part[:source]
+          if source.is_a?(String) && source.start_with?('http')
+            # Handle URL
+            {
+              inline_data: {
+                mime_type: 'application/pdf',
+                data: fetch_and_encode_pdf(source)
+              }
+            }
+          else
+            # Handle file path or data
+            {
+              inline_data: {
+                mime_type: 'application/pdf',
+                data: part[:content] ? Base64.strict_encode64(part[:content]) : encode_pdf_file(source)
+              }
+            }
+          end
+        end
+        def format_audio(part) # rubocop:disable Metrics/MethodLength
+          source = part[:source]
+          if source.is_a?(String) && source.start_with?('http')
+            # Handle URL
+            {
+              file_data: {
+                mime_type: mime_type_for_audio(source),
+                file_uri: source
+              }
+            }
+          else
+            # Handle file path or data
+            content = part[:content] || File.read(source)
+            {
+              inline_data: {
+                mime_type: mime_type_for_audio(source),
+                data: Base64.strict_encode64(content)
+              }
+            }
+          end
+        end
+        def mime_type_for_image(path)
+          ext = File.extname(path).downcase.delete('.')
+          case ext
+          when 'png' then 'image/png'
+          when 'gif' then 'image/gif'
+          when 'webp' then 'image/webp'
+          else 'image/jpeg'
+          end
+        end
+        def mime_type_for_audio(path)
+          ext = File.extname(path).downcase.delete('.')
+          case ext
+          when 'mp3' then 'audio/mpeg'
+          when 'ogg' then 'audio/ogg'
+          else 'audio/wav'
+          end
+        end
+        def fetch_and_encode_image(url)
+          response = Faraday.get(url)
+          Base64.strict_encode64(response.body)
+        end
+        def fetch_and_encode_pdf(url)
+          response = Faraday.get(url)
+          Base64.strict_encode64(response.body)
+        end
+        def encode_image_file(path)
+          Base64.strict_encode64(File.read(path))
+        end
+        def encode_pdf_file(path)
+          Base64.strict_encode64(File.read(path))
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gemini/models.rb CHANGED Viewed

@@ -3,16 +3,51 @@
 module RubyLLM
   module Providers
     module Gemini
-      # Models methods of the Gemini API integration
+      # Models methods for the Gemini API integration
       module Models
-        module_function
+        # Methods needed by Provider - must be public
+        def models_url
+          'models'
+        end
-        def parse_list_models_response(response, slug, capabilities)
-          response.body['data']&.each do |model|
-            model['id'] = model['id'].delete_prefix('models/')
+        def list_models
+          response = connection.get("models?key=#{RubyLLM.config.gemini_api_key}") do |req|
+            req.headers.merge! headers
           end
-          OpenAI::Models.parse_list_models_response(response, slug, capabilities)
+          parse_list_models_response(response, slug, capabilities)
+        end
+        private
+        def parse_list_models_response(response, slug, capabilities) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
+          (response.body['models'] || []).map do |model|
+            # Extract model ID without "models/" prefix
+            model_id = model['name'].gsub('models/', '')
+            ModelInfo.new(
+              id: model_id,
+              created_at: nil,
+              display_name: model['displayName'],
+              provider: slug,
+              type: capabilities.model_type(model_id),
+              family: capabilities.model_family(model_id),
+              metadata: {
+                version: model['version'],
+                description: model['description'],
+                input_token_limit: model['inputTokenLimit'],
+                output_token_limit: model['outputTokenLimit'],
+                supported_generation_methods: model['supportedGenerationMethods']
+              },
+              context_window: model['inputTokenLimit'] || capabilities.context_window_for(model_id),
+              max_tokens: model['outputTokenLimit'] || capabilities.max_tokens_for(model_id),
+              supports_vision: capabilities.supports_vision?(model_id),
+              supports_functions: capabilities.supports_functions?(model_id),
+              supports_json_mode: capabilities.supports_json_mode?(model_id),
+              input_price_per_million: capabilities.input_price_for(model_id),
+              output_price_per_million: capabilities.output_price_for(model_id)
+            )
+          end
         end
       end
     end

data/lib/ruby_llm/providers/gemini/streaming.rb ADDED Viewed

@@ -0,0 +1,99 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    module Gemini
+      # Streaming methods for the Gemini API implementation
+      module Streaming
+        # Need to make stream_completion public for chat.rb to access
+        def stream_completion(model, payload, &block) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
+          url = "models/#{model}:streamGenerateContent?alt=sse"
+          accumulator = StreamAccumulator.new
+          post(url, payload) do |req|
+            req.options.on_data = stream_handler(accumulator, &block)
+          end
+          # If this is a tool call, immediately execute it and include the result
+          message = accumulator.to_message
+          if message.tool_call? && message.content.to_s.empty? && @tools && !@tools.empty?
+            tool_call = message.tool_calls.values.first
+            tool = @tools[tool_call.name.to_sym]
+            if tool
+              tool_result = tool.call(tool_call.arguments)
+              # Create a new chunk with the result
+              result_chunk = Chunk.new(
+                role: :assistant,
+                content: "The result is #{tool_result}",
+                model_id: message.model_id,
+                input_tokens: message.input_tokens,
+                output_tokens: message.output_tokens,
+                tool_calls: message.tool_calls
+              )
+              # Add to accumulator and call the block
+              accumulator.add(result_chunk)
+              block.call(result_chunk)
+            end
+          end
+          accumulator.to_message
+        end
+        private
+        # Handle streaming
+        def stream_handler(accumulator, &block) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
+          to_json_stream do |data| # rubocop:disable Metrics/BlockLength
+            next unless data['candidates']&.any?
+            candidate = data['candidates'][0]
+            parts = candidate.dig('content', 'parts')
+            model_id = data['modelVersion']
+            # First attempt to extract tool calls
+            tool_calls = nil
+            # Check if any part contains a functionCall
+            if parts&.any? { |p| p['functionCall'] }
+              function_part = parts.find { |p| p['functionCall'] }
+              function_data = function_part['functionCall']
+              if function_data && function_data['name']
+                # Create a tool call with proper structure - convert args to JSON string
+                id = SecureRandom.uuid
+                tool_calls = {
+                  id => ToolCall.new(
+                    id: id,
+                    name: function_data['name'],
+                    arguments: JSON.generate(function_data['args']) # Convert Hash to JSON string
+                  )
+                }
+              end
+            end
+            # Extract text content (if any)
+            text = nil
+            if parts
+              text_parts = parts.select { |p| p['text'] }
+              text = text_parts.map { |p| p['text'] }.join if text_parts.any?
+            end
+            chunk = Chunk.new(
+              role: :assistant,
+              content: text,
+              model_id: model_id,
+              input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
+              output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
+              tool_calls: tool_calls
+            )
+            accumulator.add(chunk)
+            block.call(chunk)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gemini/tools.rb ADDED Viewed

@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    module Gemini
+      # Tools methods for the Gemini API implementation
+      module Tools
+        # Format tools for Gemini API
+        def format_tools(tools)
+          return [] if tools.empty?
+          [{
+            functionDeclarations: tools.values.map { |tool| function_declaration_for(tool) }
+          }]
+        end
+        # Extract tool calls from response data
+        def extract_tool_calls(data) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
+          return nil unless data
+          # Get the first candidate
+          candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
+          return nil unless candidate
+          # Get the parts array from content
+          parts = candidate.dig('content', 'parts')
+          return nil unless parts.is_a?(Array)
+          # Find the function call part
+          function_call_part = parts.find { |p| p['functionCall'] }
+          return nil unless function_call_part
+          # Get the function call data
+          function_data = function_call_part['functionCall']
+          return nil unless function_data
+          # Create a unique ID for the tool call
+          id = SecureRandom.uuid
+          # Return the tool call in the expected format
+          {
+            id => ToolCall.new(
+              id: id,
+              name: function_data['name'],
+              arguments: function_data['args']
+            )
+          }
+        end
+        private
+        # Format a single tool for Gemini API
+        def function_declaration_for(tool)
+          {
+            name: tool.name,
+            description: tool.description,
+            parameters: {
+              type: 'OBJECT',
+              properties: format_parameters(tool.parameters),
+              required: tool.parameters.select { |_, p| p.required }.keys.map(&:to_s)
+            }
+          }
+        end
+        # Format tool parameters for Gemini API
+        def format_parameters(parameters)
+          parameters.transform_values do |param|
+            {
+              type: param_type_for_gemini(param.type),
+              description: param.description
+            }.compact
+          end
+        end
+        # Convert RubyLLM param types to Gemini API types
+        def param_type_for_gemini(type)
+          case type.to_s.downcase
+          when 'integer', 'number', 'float' then 'NUMBER'
+          when 'boolean' then 'BOOLEAN'
+          when 'array' then 'ARRAY'
+          when 'object' then 'OBJECT'
+          else 'STRING'
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gemini.rb CHANGED Viewed

@@ -2,20 +2,26 @@
 module RubyLLM
   module Providers
-    # Gemini API integration.
+    # Native Gemini API implementation
     module Gemini
-      extend OpenAI
+      extend Provider
+      extend Gemini::Chat
+      extend Gemini::Embeddings
+      extend Gemini::Images
       extend Gemini::Models
+      extend Gemini::Streaming
+      extend Gemini::Tools
+      extend Gemini::Media
       module_function
       def api_base
-        'https://generativelanguage.googleapis.com/v1beta/openai'
+        'https://generativelanguage.googleapis.com/v1beta'
       end
       def headers
         {
-          'Authorization' => "Bearer #{RubyLLM.config.gemini_api_key}"
+          'x-goog-api-key' => RubyLLM.config.gemini_api_key
         }
       end